├── .circleci
└── config.yml
├── .gitattributes
├── .gitignore
├── .travis.yml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── data
├── linnerud_exercise.csv
├── linnerud_physio.csv
└── wine.csv
├── docs
├── .gitignore
├── Makefile
├── _static
│ └── theme_overrides.css
├── _templates
│ ├── class.rst
│ └── function.rst
├── api.rst
├── conf.py
├── index.rst
├── requirements.txt
├── usage.rst
└── user_guide
│ ├── behavioral.rst
│ ├── meancentered.rst
│ └── results.rst
├── pyls
├── __init__.py
├── _version.py
├── base.py
├── compute.py
├── examples
│ ├── __init__.py
│ ├── datasets.json
│ └── datasets.py
├── io.py
├── matlab
│ ├── __init__.py
│ └── io.py
├── plotting
│ └── meancentered.py
├── structures.py
├── tests
│ ├── __init__.py
│ ├── conftest.py
│ ├── data
│ │ ├── bpls_onegroup_onecond_nosplit.mat
│ │ ├── bpls_onegroup_onecond_split.mat
│ │ ├── empty.mat
│ │ ├── mpls_multigroup_onecond_nosplit.mat
│ │ ├── mpls_multigroup_onecond_split.mat
│ │ └── resultonly.mat
│ ├── matlab.py
│ ├── test_base.py
│ ├── test_compute.py
│ ├── test_examples.py
│ ├── test_io.py
│ ├── test_matlab.py
│ ├── test_structures.py
│ ├── test_utils.py
│ └── types
│ │ ├── __init__.py
│ │ ├── test_regression.py
│ │ └── test_svd.py
├── types
│ ├── __init__.py
│ ├── behavioral.py
│ ├── meancentered.py
│ └── regression.py
└── utils.py
├── requirements.txt
├── setup.cfg
├── setup.py
└── versioneer.py
/.circleci/config.yml:
--------------------------------------------------------------------------------
1 | version: 2.1
2 |
3 | executors:
4 | exeggutor:
5 | docker:
6 | - image: circleci/python:3.6
7 | working_directory: ~/pyls
8 | environment:
9 | PYTHON_VERSION: "3.6"
10 | OPENBLAS_NUM_THREADS: "1"
11 | MKL_NUM_THREADS: "1"
12 | DROPBOX_URL: "https://www.dropbox.com/s/e6jfvekw6habeud/matlab_pls.tar.gz?dl=1"
13 |
14 | jobs:
15 | setup:
16 | executor: exeggutor
17 | steps:
18 | - checkout
19 | - run:
20 | name: Generating checksum to cache Matlab PLS results
21 | command: echo "${DROPBOX_URL}" > checksum.txt
22 | - restore_cache:
23 | name: Checking for cached Matlab PLS results
24 | keys:
25 | - data-v2-{{ checksum "checksum.txt" }}
26 | - data-v2-
27 | - run:
28 | name: Preparing Matlab PLS results
29 | command: |
30 | if [[ -e /tmp/data/matlab ]]; then
31 | echo "Restoring Matlab PLS results from cache"
32 | else
33 | mkdir -p /tmp/data/matlab
34 | curl -L "${DROPBOX_URL}" | tar xz -C /tmp/data/matlab
35 | fi
36 | - save_cache:
37 | name: Caching Matlab PLS results
38 | key: data-v2-{{ checksum "checksum.txt" }}
39 | paths:
40 | - /tmp/data
41 | - restore_cache:
42 | name: Restoring cached dependencies
43 | keys:
44 | - dependencies-v3-{{ checksum "requirements.txt" }}
45 | - dependencies-v3-
46 | - run:
47 | name: Creating test environment
48 | command: |
49 | python3 -m venv venv
50 | . venv/bin/activate
51 | pip install .[tests]
52 | pip install joblib
53 | - save_cache:
54 | name: Caching dependencies
55 | key: dependencies-v3-{{ checksum "requirements.txt" }}
56 | paths:
57 | - ./venv
58 | - persist_to_workspace:
59 | name: Persisting workspace
60 | root: ./
61 | paths:
62 | - requirements.txt
63 | - checksum.txt
64 |
65 |
66 | behavioral_pls:
67 | executor: exeggutor
68 | steps:
69 | - checkout
70 | - attach_workspace:
71 | at: ./
72 | - restore_cache:
73 | name: Loading Matlab PLS results
74 | keys:
75 | - data-v2-{{ checksum "checksum.txt" }}
76 | - data-v2-
77 | - restore_cache:
78 | name: Loading dependencies
79 | keys:
80 | - dependencies-v3-{{ checksum "requirements.txt" }}
81 | - dependencies-v3-
82 | - run:
83 | name: Running Matlab-Python comparison
84 | command: |
85 | . venv/bin/activate
86 | for mat in /tmp/data/matlab/bpls*mat; do
87 | echo $( date +%H:%M:%S ) "${mat}"
88 | python -c "import pyls.tests; pyls.tests.assert_matlab_equivalence('${mat}', n_proc='max', n_perm=2500, n_split=100);"
89 | done
90 |
91 | meancentered_pls:
92 | executor: exeggutor
93 | steps:
94 | - checkout
95 | - attach_workspace:
96 | at: ./
97 | - restore_cache:
98 | name: Loading Matlab PLS results
99 | keys:
100 | - data-v2-{{ checksum "checksum.txt" }}
101 | - data-v2-
102 | - restore_cache:
103 | name: Loading dependencies
104 | keys:
105 | - dependencies-v3-{{ checksum "requirements.txt" }}
106 | - dependencies-v3-
107 | - run:
108 | name: Running Matlab-Python comparison
109 | command: |
110 | . venv/bin/activate
111 | for mat in /tmp/data/matlab/mpls*mat; do
112 | echo $( date +%H:%M:%S ) "${mat}"
113 | python -c "import pyls.tests; pyls.tests.assert_matlab_equivalence('${mat}', n_proc='max', n_perm=2500, n_split=250);"
114 | done
115 |
116 | workflows:
117 | version: 2.1
118 | regression_tests:
119 | jobs:
120 | - setup
121 | - behavioral_pls:
122 | requires:
123 | - setup
124 | - meancentered_pls:
125 | requires:
126 | - setup
127 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | pyls/_version.py export-subst
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | plsc/
2 | .vscode/
3 | docs/generated/
4 |
5 | # Byte-compiled / optimized / DLL files
6 | __pycache__/
7 | .pytest_cache/
8 | *.py[cod]
9 | *$py.class
10 |
11 | # C extensions
12 | *.so
13 |
14 | # Distribution / packaging
15 | .Python
16 | env/
17 | build/
18 | develop-eggs/
19 | dist/
20 | downloads/
21 | eggs/
22 | .eggs/
23 | lib/
24 | lib64/
25 | parts/
26 | sdist/
27 | var/
28 | *.egg-info/
29 | .installed.cfg
30 | *.egg
31 |
32 | # PyInstaller
33 | # Usually these files are written by a python script from a template
34 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
35 | *.manifest
36 | *.spec
37 |
38 | # Installer logs
39 | pip-log.txt
40 | pip-delete-this-directory.txt
41 |
42 | # Unit test / coverage reports
43 | htmlcov/
44 | .tox/
45 | .coverage
46 | .coverage.*
47 | .cache
48 | nosetests.xml
49 | coverage.xml
50 | *,cover
51 | .hypothesis/
52 |
53 | # Translations
54 | *.mo
55 | *.pot
56 |
57 | # Django stuff:
58 | *.log
59 | local_settings.py
60 |
61 | # Flask stuff:
62 | instance/
63 | .webassets-cache
64 |
65 | # Scrapy stuff:
66 | .scrapy
67 |
68 | # Sphinx documentation
69 | docs/_build/
70 |
71 | # PyBuilder
72 | target/
73 |
74 | # IPython Notebook
75 | .ipynb_checkpoints
76 |
77 | # pyenv
78 | .python-version
79 |
80 | # celery beat schedule file
81 | celerybeat-schedule
82 |
83 | # dotenv
84 | .env
85 |
86 | # virtualenv
87 | venv/
88 | ENV/
89 |
90 | # Spyder project settings
91 | .spyderproject
92 |
93 | # Rope project settings
94 | .ropeproject
95 |
96 | .imdone
97 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | sudo: false
3 | dist: xenial
4 | notifications:
5 | email: change
6 |
7 | branches:
8 | only:
9 | - master
10 |
11 | python:
12 | - 3.5
13 | - 3.6
14 | - 3.7
15 |
16 | env:
17 | matrix:
18 | - CHECK_TYPE=linting
19 | - CHECK_TYPE=docdoctest INSTALL_PANDAS=true
20 | - CHECK_TYPE=test
21 | global:
22 | - OPENBLAS_NUM_THREADS=1
23 | - MKL_NUM_THREADS=1
24 | - INSTALL_TYPE=setup
25 |
26 | matrix:
27 | include:
28 | - python: 3.6
29 | env:
30 | - INSTALL_TYPE=sdist
31 | - CHECK_TYPE=test
32 | - python: 3.6
33 | env:
34 | - INSTALL_TYPE=wheel
35 | - CHECK_TYPE=test
36 | - python: 3.6
37 | env:
38 | - INSTALL_JOBLIB=true
39 | - INSTALL_PANDAS=true
40 | - CHECK_TYPE=test
41 |
42 | before_install:
43 | - python -m pip install --upgrade pip
44 | - if [ "${CHECK_TYPE}" == "linting" ]; then
45 | pip install flake8;
46 | fi
47 | - if [ "${CHECK_TYPE}" == "test" ]; then
48 | pip install "pytest>=3.6" pytest-cov coverage coveralls codecov;
49 | fi
50 | - if [ ! -z "${INSTALL_JOBLIB}" ]; then
51 | pip install joblib;
52 | fi
53 | - if [ ! -z "${INSTALL_PANDAS}" ]; then
54 | pip install pandas;
55 | fi
56 |
57 | install:
58 | - |
59 | if [ "${INSTALL_TYPE}" == "setup" ]; then
60 | python setup.py install;
61 | elif [ "${INSTALL_TYPE}" == "sdist" ]; then
62 | python setup.py sdist;
63 | pip install dist/*.tar.gz;
64 | elif [ "${INSTALL_TYPE}" == "wheel" ]; then
65 | python setup.py bdist_wheel;
66 | pip install dist/*.whl;
67 | else
68 | false;
69 | fi
70 |
71 | script:
72 | - |
73 | if [ "${CHECK_TYPE}" == "linting" ]; then
74 | flake8 pyls;
75 | elif [ "${CHECK_TYPE}" == "docdoctest" ]; then
76 | cd docs;
77 | pip install -r ./requirements.txt;
78 | make html;
79 | make doctest;
80 | elif [ "${CHECK_TYPE}" == "test" ]; then
81 | mkdir for_testing && cd for_testing;
82 | cp ../setup.cfg .;
83 | args="--cov-report term-missing --cov=pyls --doctest-modules --pyargs";
84 | python -m pytest ${args} pyls;
85 | else
86 | false;
87 | fi
88 |
89 | after_success:
90 | - if [ "${CHECK_TYPE}" == "test" ]; then
91 | codecov;
92 | fi
93 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 |
3 | In the interest of fostering an open and welcoming environment we want participation in our project and our community to be a harassment-free experience for everyone.
4 |
5 | Although no list can hope to be all-encompassing, we explicitly honor diversity in age, body size, disability, ethnicity, gender identity and expression, level of experience, native language, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation.
6 |
7 | ## Our Standards
8 |
9 | We aim to promote behavior that contributes to a positive and welcoming environment.
10 | Examples of such behavior include:
11 |
12 | * Using inclusive language
13 | * Being respectful of differing viewpoints and experiences
14 | * Showing empathy towards other community members
15 |
16 | We do not tolerate harassment or other, inappropriate behavior in our community.
17 | Examples of unacceptable behavior by participants include:
18 |
19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances
20 | * Personal or political attacks on contributors, and insulting or derogatory comments on contributed code with the intent to undermine contributions
21 | * Public or private harassment
22 |
23 | ## Our Responsibilities
24 |
25 | The maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
26 |
27 | The maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
28 |
29 | ## Scope
30 |
31 | This Code of Conduct applies both within our online GitHub repository and in public spaces when an individual is representing the project or its community.
32 | Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event.
33 |
34 | ## Enforcement
35 |
36 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting Ross Markello at ross.markello@mail.mcgill.ca.
37 | Confidentiality will be respected in reporting.
38 |
39 | Enforcement of this Code can include (but is not limited to):
40 |
41 | * Facilitating a converstaion between the two parties involved in the violation of the Code of Conduct
42 | * Requesting a community member apologize for their behavior
43 | * Asking a community member (or multiple members) to enter a cooling off period that puts a time-limited pause on a particular discussion topic
44 | * Asking a community member to no longer participate in the `pyls` community, including making contributions or commenting on issues/pull requests
45 |
46 | ## Attribution
47 |
48 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
49 | available at [https://www.contributor-covenant.org/version/1/4/code-of-conduct.html](https://www.contributor-covenant.org/version/1/4/code-of-conduct.html).
50 |
51 | [homepage]: https://www.contributor-covenant.org
52 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 2, June 1991
3 |
4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
6 | Everyone is permitted to copy and distribute verbatim copies
7 | of this license document, but changing it is not allowed.
8 |
9 | Preamble
10 |
11 | The licenses for most software are designed to take away your
12 | freedom to share and change it. By contrast, the GNU General Public
13 | License is intended to guarantee your freedom to share and change free
14 | software--to make sure the software is free for all its users. This
15 | General Public License applies to most of the Free Software
16 | Foundation's software and to any other program whose authors commit to
17 | using it. (Some other Free Software Foundation software is covered by
18 | the GNU Lesser General Public License instead.) You can apply it to
19 | your programs, too.
20 |
21 | When we speak of free software, we are referring to freedom, not
22 | price. Our General Public Licenses are designed to make sure that you
23 | have the freedom to distribute copies of free software (and charge for
24 | this service if you wish), that you receive source code or can get it
25 | if you want it, that you can change the software or use pieces of it
26 | in new free programs; and that you know you can do these things.
27 |
28 | To protect your rights, we need to make restrictions that forbid
29 | anyone to deny you these rights or to ask you to surrender the rights.
30 | These restrictions translate to certain responsibilities for you if you
31 | distribute copies of the software, or if you modify it.
32 |
33 | For example, if you distribute copies of such a program, whether
34 | gratis or for a fee, you must give the recipients all the rights that
35 | you have. You must make sure that they, too, receive or can get the
36 | source code. And you must show them these terms so they know their
37 | rights.
38 |
39 | We protect your rights with two steps: (1) copyright the software, and
40 | (2) offer you this license which gives you legal permission to copy,
41 | distribute and/or modify the software.
42 |
43 | Also, for each author's protection and ours, we want to make certain
44 | that everyone understands that there is no warranty for this free
45 | software. If the software is modified by someone else and passed on, we
46 | want its recipients to know that what they have is not the original, so
47 | that any problems introduced by others will not reflect on the original
48 | authors' reputations.
49 |
50 | Finally, any free program is threatened constantly by software
51 | patents. We wish to avoid the danger that redistributors of a free
52 | program will individually obtain patent licenses, in effect making the
53 | program proprietary. To prevent this, we have made it clear that any
54 | patent must be licensed for everyone's free use or not licensed at all.
55 |
56 | The precise terms and conditions for copying, distribution and
57 | modification follow.
58 |
59 | GNU GENERAL PUBLIC LICENSE
60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
61 |
62 | 0. This License applies to any program or other work which contains
63 | a notice placed by the copyright holder saying it may be distributed
64 | under the terms of this General Public License. The "Program", below,
65 | refers to any such program or work, and a "work based on the Program"
66 | means either the Program or any derivative work under copyright law:
67 | that is to say, a work containing the Program or a portion of it,
68 | either verbatim or with modifications and/or translated into another
69 | language. (Hereinafter, translation is included without limitation in
70 | the term "modification".) Each licensee is addressed as "you".
71 |
72 | Activities other than copying, distribution and modification are not
73 | covered by this License; they are outside its scope. The act of
74 | running the Program is not restricted, and the output from the Program
75 | is covered only if its contents constitute a work based on the
76 | Program (independent of having been made by running the Program).
77 | Whether that is true depends on what the Program does.
78 |
79 | 1. You may copy and distribute verbatim copies of the Program's
80 | source code as you receive it, in any medium, provided that you
81 | conspicuously and appropriately publish on each copy an appropriate
82 | copyright notice and disclaimer of warranty; keep intact all the
83 | notices that refer to this License and to the absence of any warranty;
84 | and give any other recipients of the Program a copy of this License
85 | along with the Program.
86 |
87 | You may charge a fee for the physical act of transferring a copy, and
88 | you may at your option offer warranty protection in exchange for a fee.
89 |
90 | 2. You may modify your copy or copies of the Program or any portion
91 | of it, thus forming a work based on the Program, and copy and
92 | distribute such modifications or work under the terms of Section 1
93 | above, provided that you also meet all of these conditions:
94 |
95 | a) You must cause the modified files to carry prominent notices
96 | stating that you changed the files and the date of any change.
97 |
98 | b) You must cause any work that you distribute or publish, that in
99 | whole or in part contains or is derived from the Program or any
100 | part thereof, to be licensed as a whole at no charge to all third
101 | parties under the terms of this License.
102 |
103 | c) If the modified program normally reads commands interactively
104 | when run, you must cause it, when started running for such
105 | interactive use in the most ordinary way, to print or display an
106 | announcement including an appropriate copyright notice and a
107 | notice that there is no warranty (or else, saying that you provide
108 | a warranty) and that users may redistribute the program under
109 | these conditions, and telling the user how to view a copy of this
110 | License. (Exception: if the Program itself is interactive but
111 | does not normally print such an announcement, your work based on
112 | the Program is not required to print an announcement.)
113 |
114 | These requirements apply to the modified work as a whole. If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works. But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 |
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 |
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 |
134 | 3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 |
138 | a) Accompany it with the complete corresponding machine-readable
139 | source code, which must be distributed under the terms of Sections
140 | 1 and 2 above on a medium customarily used for software interchange; or,
141 |
142 | b) Accompany it with a written offer, valid for at least three
143 | years, to give any third party, for a charge no more than your
144 | cost of physically performing source distribution, a complete
145 | machine-readable copy of the corresponding source code, to be
146 | distributed under the terms of Sections 1 and 2 above on a medium
147 | customarily used for software interchange; or,
148 |
149 | c) Accompany it with the information you received as to the offer
150 | to distribute corresponding source code. (This alternative is
151 | allowed only for noncommercial distribution and only if you
152 | received the program in object code or executable form with such
153 | an offer, in accord with Subsection b above.)
154 |
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it. For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable. However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 |
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 |
172 | 4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License. Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 |
180 | 5. You are not required to accept this License, since you have not
181 | signed it. However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works. These actions are
183 | prohibited by law if you do not accept this License. Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 |
189 | 6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions. You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 |
197 | 7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License. If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all. For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 |
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 |
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices. Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 |
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 |
229 | 8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded. In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 |
237 | 9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time. Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 |
242 | Each version is given a distinguishing version number. If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation. If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 |
250 | 10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission. For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this. Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 |
258 | NO WARRANTY
259 |
260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 |
270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 |
280 | END OF TERMS AND CONDITIONS
281 |
282 | How to Apply These Terms to Your New Programs
283 |
284 | If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 |
288 | To do so, attach the following notices to the program. It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 |
293 | {description}
294 | Copyright (C) {year} {fullname}
295 |
296 | This program is free software; you can redistribute it and/or modify
297 | it under the terms of the GNU General Public License as published by
298 | the Free Software Foundation; either version 2 of the License, or
299 | (at your option) any later version.
300 |
301 | This program is distributed in the hope that it will be useful,
302 | but WITHOUT ANY WARRANTY; without even the implied warranty of
303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
304 | GNU General Public License for more details.
305 |
306 | You should have received a copy of the GNU General Public License along
307 | with this program; if not, write to the Free Software Foundation, Inc.,
308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 |
310 | Also add information on how to contact you by electronic and paper mail.
311 |
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 |
315 | Gnomovision version 69, Copyright (C) year name of author
316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 | This is free software, and you are welcome to redistribute it
318 | under certain conditions; type `show c' for details.
319 |
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License. Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 |
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary. Here is a sample; alter the names:
328 |
329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 | `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 |
332 | {signature of Ty Coon}, 1 April 1989
333 | Ty Coon, President of Vice
334 |
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs. If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library. If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.* setup* MANIFEST.in LICENSE requirements.txt
2 | include pyls/examples/datasets.json
3 | recursive-include data *
4 | recursive-include pyls/tests/data *
5 | include versioneer.py
6 | include pyls/_version.py
7 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # pyls
2 |
3 | This package provides a Python interface for partial least squares (PLS) analysis, a multivariate statistical technique used to relate two sets of variables.
4 |
5 | [](https://travis-ci.org/rmarkello/pyls)
6 | [](https://circleci.com/gh/rmarkello/pyls)
7 | [](https://codecov.io/gh/rmarkello/pyls)
8 | [](http://pyls.readthedocs.io/en/latest/?badge=latest)
9 | [](https://opensource.org/licenses/GPL-2.0)
10 |
11 | ## Table of Contents
12 |
13 | If you know where you're going, feel free to jump ahead:
14 |
15 | * [Installation and setup](#requirements-and-installation)
16 | * [Purpose](#purpose)
17 | * [Overview](#overview)
18 | * [Background](#background)
19 | * [Usage](#usage)
20 | * [PLS correlation methods](#pls-correlation-methods)
21 | * [Behavioral PLS](#behavioral-pls)
22 | * [Mean-centered PLS](#mean-centered-pls)
23 | * [PLS regression methods](#pls-regression-methods)
24 | * [Regression with SIMPLS](#regression-with-simpls)
25 | * [PLS results](#results)
26 |
27 | ## Installation and setup
28 |
29 | This package requires Python >= 3.5. Assuming you have the correct version of Python installed, you can install this package by opening a terminal and running the following:
30 |
31 | ```bash
32 | git clone https://github.com/netneurolab/pypyls.git
33 | cd pyls
34 | python setup.py install
35 | ```
36 |
37 | There are plans (hopes?) to get this set up on PyPI for an easier installation process, but that is a long-term goal!
38 |
39 | ## Purpose
40 |
41 | ### Overview
42 |
43 | Partial least squares (PLS) is a statistical technique that aims to find shared information between two sets of variables.
44 | If you're unfamiliar with PLS and are interested in a thorough (albeit quite technical) treatment of it [Abdi et al., 2013](https://doi.org/10.1007/978-1-62703-059-5_23) is a good resource.
45 | There are multiple "flavors" of PLS that are tailored to different use cases; this package implements two functions that fall within the category typically referred to as **PLS-C** (PLS correlation) or **PLS-SVD** (PLS singular value decomposition) and one function that falls within the category typically referred to as **PLS-R** (PLS regression).
46 |
47 | ### Background
48 |
49 | The functionality of the current package largely mirrors that originally introduced by [McIntosh et al., (1996)](https://www.ncbi.nlm.nih.gov/pubmed/9345485) in their [Matlab toolbox](https://www.rotman-baycrest.on.ca/index.php?section=84).
50 | However, while the Matlab toolbox has a significant number of tools dedicated to integrating neuroimaging-specific paradigms (i.e., loading M/EEG and fMRI data), the current Python package aims to implement and expand on only the core _statistical_ functions of that toolbox.
51 |
52 | While the core algorithms of PLS implemented in this package are present (to a degree) in [`scikit-learn`](`https://scikit-learn.org/stable/modules/classes.html#module-sklearn.cross_decomposition`), this package provides a different API and includes some additional functionality.
53 | Namely, `pyls`:
54 |
55 | 1. Has integrated significance and reliability testing via built-in permutation testing and bootstrap resampling,
56 | 2. Implements [mean-centered PLS](https://www.ncbi.nlm.nih.gov/pubmed/20656037) for multivariate group/condition comparisons,
57 | 3. Uses the [SIMPLS](https://doi.org/10.1016%2F0169-7439%2893%2985002-X) instead of the [NIPALS algorithm](https://doi.org/10.1016/B978-0-12-426653-7.50032-6) for PLS regression
58 |
59 | ## Usage
60 |
61 | `pyls` implement two subtypes of PLS-C: a more traditional form that we call "behavioral PLS" (`pyls.behavioral_pls`) and a somewhat newer form that we call "mean-centered PLS" (`pyls.meancentered_pls`).
62 | It also implements one type of PLS-R, which uses the SIMPLS algorithm (`pyls.pls_regression`); this is, in principle, very similar to "behavioral PLS."
63 |
64 | ### PLS correlation methods
65 |
66 | #### Behavioral PLS
67 |
68 | As the more "traditional" form of PLS-C, `pyls.behavioral_pls` looks to find relationships between two sets of variables.
69 | To run a behavioral PLS we would do the following:
70 |
71 | ```python
72 | >>> import numpy as np
73 |
74 | # let's create two data arrays with 80 observations
75 | >>> X = np.random.rand(80, 10000) # a 10000-feature (e.g., neural) data array
76 | >>> Y = np.random.rand(80, 10) # a 10-feature (e.g., behavioral) data array
77 |
78 | # we're going to pretend that this data is from 2 groups of 20 subjects each,
79 | # and that each subject participated in 2 task conditions
80 | >>> groups = [20, 20] # a list with the number of subjects in each group
81 | >>> n_cond = 2 # the number of tasks or conditions
82 |
83 | # run the analysis and look at the results structure
84 | >>> from pyls import behavioral_pls
85 | >>> bpls = behavioral_pls(X, Y, groups=groups, n_cond=n_cond)
86 | >>> bpls
87 | PLSResults(x_weights, y_weights, x_scores, y_scores, y_loadings, singvals, varexp, permres,
88 | bootres, splitres, cvres, inputs)
89 | ```
90 |
91 | #### Mean-centered PLS
92 |
93 | In contrast to behavioral PLS, `pyls.meancentered_pls` doesn't look to find relationships between two sets of variables, but rather tries to find relationships between _groupings_ in a single set of variables. As such, we will only provide it with _one_ of our created data arrays (`X`) and it will attempt to examine how the features of that array differ between groups and/or conditions. To run a mean-centered PLS we would do the following:
94 |
95 | ```python
96 | >>> from pyls import meancentered_pls
97 | >>> mpls = meancentered_pls(X, groups=groups, n_cond=n_cond)
98 | >>> mpls
99 | PLSResults(x_weights, y_weights, x_scores, y_scores, singvals, varexp, permres, bootres, splitres,
100 | inputs)
101 | ```
102 |
103 | ### PLS regression methods
104 |
105 | #### Regression with SIMPLS
106 |
107 | Whereas `pyls.behavioral_pls` aims to maximize the symmetric relationship between `X` and `Y`, `pyls.pls_regression` performs a directed decomposition.
108 | That is, it aims to find components in `X` that explain the most variance in `Y` (but not necessarily vice versa).
109 | To run a PLS regression analysis we would do the following:
110 |
111 | ```python
112 | >>> from pyls import pls_regression
113 | >>> plsr = pls_regression(X, Y, n_components=5)
114 | >>> plsr
115 | PLSResults(x_weights, x_scores, y_scores, y_loadings, varexp, permres, bootres, inputs)
116 | ```
117 |
118 | Currently `pyls.pls_regression()` does not support groups or conditions.
119 |
120 | ### PLS Results
121 |
122 | The docstrings of the results objects (`bpls`, `plsr`, and `mpls` in the above example) have some information describing what each output represents, so while we work on improving our documentation you can rely on those for some insight! Try typing `help(bpls)`, `help(plsr)`, or `help(mpls)` to get more information on what the different values represent.
123 |
124 | If you are at all familiar with the Matlab PLS toolbox you might notice that the results structures have a dramatically different naming convention; despite this all the same information should be present!
125 |
--------------------------------------------------------------------------------
/data/linnerud_exercise.csv:
--------------------------------------------------------------------------------
1 | ,Chins,Situps,Jumps
2 | 0,5.0,162.0,60.0
3 | 1,2.0,110.0,60.0
4 | 2,12.0,101.0,101.0
5 | 3,12.0,105.0,37.0
6 | 4,13.0,155.0,58.0
7 | 5,4.0,101.0,42.0
8 | 6,8.0,101.0,38.0
9 | 7,6.0,125.0,40.0
10 | 8,15.0,200.0,40.0
11 | 9,17.0,251.0,250.0
12 | 10,17.0,120.0,38.0
13 | 11,13.0,210.0,115.0
14 | 12,14.0,215.0,105.0
15 | 13,1.0,50.0,50.0
16 | 14,6.0,70.0,31.0
17 | 15,12.0,210.0,120.0
18 | 16,4.0,60.0,25.0
19 | 17,11.0,230.0,80.0
20 | 18,15.0,225.0,73.0
21 | 19,2.0,110.0,43.0
22 |
--------------------------------------------------------------------------------
/data/linnerud_physio.csv:
--------------------------------------------------------------------------------
1 | ,Weight,Waist,Pulse
2 | 0,191.0,36.0,50.0
3 | 1,189.0,37.0,52.0
4 | 2,193.0,38.0,58.0
5 | 3,162.0,35.0,62.0
6 | 4,189.0,35.0,46.0
7 | 5,182.0,36.0,56.0
8 | 6,211.0,38.0,56.0
9 | 7,167.0,34.0,60.0
10 | 8,176.0,31.0,74.0
11 | 9,154.0,33.0,56.0
12 | 10,169.0,34.0,50.0
13 | 11,166.0,33.0,52.0
14 | 12,154.0,34.0,64.0
15 | 13,247.0,46.0,50.0
16 | 14,193.0,36.0,46.0
17 | 15,202.0,37.0,62.0
18 | 16,176.0,37.0,54.0
19 | 17,157.0,32.0,52.0
20 | 18,156.0,33.0,54.0
21 | 19,138.0,33.0,68.0
22 |
--------------------------------------------------------------------------------
/data/wine.csv:
--------------------------------------------------------------------------------
1 | ,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
2 | 0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0
3 | 1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0
4 | 2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0
5 | 3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0
6 | 4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0
7 | 5,14.2,1.76,2.45,15.2,112.0,3.27,3.39,0.34,1.97,6.75,1.05,2.85,1450.0
8 | 6,14.39,1.87,2.45,14.6,96.0,2.5,2.52,0.3,1.98,5.25,1.02,3.58,1290.0
9 | 7,14.06,2.15,2.61,17.6,121.0,2.6,2.51,0.31,1.25,5.05,1.06,3.58,1295.0
10 | 8,14.83,1.64,2.17,14.0,97.0,2.8,2.98,0.29,1.98,5.2,1.08,2.85,1045.0
11 | 9,13.86,1.35,2.27,16.0,98.0,2.98,3.15,0.22,1.85,7.22,1.01,3.55,1045.0
12 | 10,14.1,2.16,2.3,18.0,105.0,2.95,3.32,0.22,2.38,5.75,1.25,3.17,1510.0
13 | 11,14.12,1.48,2.32,16.8,95.0,2.2,2.43,0.26,1.57,5.0,1.17,2.82,1280.0
14 | 12,13.75,1.73,2.41,16.0,89.0,2.6,2.76,0.29,1.81,5.6,1.15,2.9,1320.0
15 | 13,14.75,1.73,2.39,11.4,91.0,3.1,3.69,0.43,2.81,5.4,1.25,2.73,1150.0
16 | 14,14.38,1.87,2.38,12.0,102.0,3.3,3.64,0.29,2.96,7.5,1.2,3.0,1547.0
17 | 15,13.63,1.81,2.7,17.2,112.0,2.85,2.91,0.3,1.46,7.3,1.28,2.88,1310.0
18 | 16,14.3,1.92,2.72,20.0,120.0,2.8,3.14,0.33,1.97,6.2,1.07,2.65,1280.0
19 | 17,13.83,1.57,2.62,20.0,115.0,2.95,3.4,0.4,1.72,6.6,1.13,2.57,1130.0
20 | 18,14.19,1.59,2.48,16.5,108.0,3.3,3.93,0.32,1.86,8.7,1.23,2.82,1680.0
21 | 19,13.64,3.1,2.56,15.2,116.0,2.7,3.03,0.17,1.66,5.1,0.96,3.36,845.0
22 | 20,14.06,1.63,2.28,16.0,126.0,3.0,3.17,0.24,2.1,5.65,1.09,3.71,780.0
23 | 21,12.93,3.8,2.65,18.6,102.0,2.41,2.41,0.25,1.98,4.5,1.03,3.52,770.0
24 | 22,13.71,1.86,2.36,16.6,101.0,2.61,2.88,0.27,1.69,3.8,1.11,4.0,1035.0
25 | 23,12.85,1.6,2.52,17.8,95.0,2.48,2.37,0.26,1.46,3.93,1.09,3.63,1015.0
26 | 24,13.5,1.81,2.61,20.0,96.0,2.53,2.61,0.28,1.66,3.52,1.12,3.82,845.0
27 | 25,13.05,2.05,3.22,25.0,124.0,2.63,2.68,0.47,1.92,3.58,1.13,3.2,830.0
28 | 26,13.39,1.77,2.62,16.1,93.0,2.85,2.94,0.34,1.45,4.8,0.92,3.22,1195.0
29 | 27,13.3,1.72,2.14,17.0,94.0,2.4,2.19,0.27,1.35,3.95,1.02,2.77,1285.0
30 | 28,13.87,1.9,2.8,19.4,107.0,2.95,2.97,0.37,1.76,4.5,1.25,3.4,915.0
31 | 29,14.02,1.68,2.21,16.0,96.0,2.65,2.33,0.26,1.98,4.7,1.04,3.59,1035.0
32 | 30,13.73,1.5,2.7,22.5,101.0,3.0,3.25,0.29,2.38,5.7,1.19,2.71,1285.0
33 | 31,13.58,1.66,2.36,19.1,106.0,2.86,3.19,0.22,1.95,6.9,1.09,2.88,1515.0
34 | 32,13.68,1.83,2.36,17.2,104.0,2.42,2.69,0.42,1.97,3.84,1.23,2.87,990.0
35 | 33,13.76,1.53,2.7,19.5,132.0,2.95,2.74,0.5,1.35,5.4,1.25,3.0,1235.0
36 | 34,13.51,1.8,2.65,19.0,110.0,2.35,2.53,0.29,1.54,4.2,1.1,2.87,1095.0
37 | 35,13.48,1.81,2.41,20.5,100.0,2.7,2.98,0.26,1.86,5.1,1.04,3.47,920.0
38 | 36,13.28,1.64,2.84,15.5,110.0,2.6,2.68,0.34,1.36,4.6,1.09,2.78,880.0
39 | 37,13.05,1.65,2.55,18.0,98.0,2.45,2.43,0.29,1.44,4.25,1.12,2.51,1105.0
40 | 38,13.07,1.5,2.1,15.5,98.0,2.4,2.64,0.28,1.37,3.7,1.18,2.69,1020.0
41 | 39,14.22,3.99,2.51,13.2,128.0,3.0,3.04,0.2,2.08,5.1,0.89,3.53,760.0
42 | 40,13.56,1.71,2.31,16.2,117.0,3.15,3.29,0.34,2.34,6.13,0.95,3.38,795.0
43 | 41,13.41,3.84,2.12,18.8,90.0,2.45,2.68,0.27,1.48,4.28,0.91,3.0,1035.0
44 | 42,13.88,1.89,2.59,15.0,101.0,3.25,3.56,0.17,1.7,5.43,0.88,3.56,1095.0
45 | 43,13.24,3.98,2.29,17.5,103.0,2.64,2.63,0.32,1.66,4.36,0.82,3.0,680.0
46 | 44,13.05,1.77,2.1,17.0,107.0,3.0,3.0,0.28,2.03,5.04,0.88,3.35,885.0
47 | 45,14.21,4.04,2.44,18.9,111.0,2.85,2.65,0.3,1.25,5.24,0.87,3.33,1080.0
48 | 46,14.38,3.59,2.28,16.0,102.0,3.25,3.17,0.27,2.19,4.9,1.04,3.44,1065.0
49 | 47,13.9,1.68,2.12,16.0,101.0,3.1,3.39,0.21,2.14,6.1,0.91,3.33,985.0
50 | 48,14.1,2.02,2.4,18.8,103.0,2.75,2.92,0.32,2.38,6.2,1.07,2.75,1060.0
51 | 49,13.94,1.73,2.27,17.4,108.0,2.88,3.54,0.32,2.08,8.9,1.12,3.1,1260.0
52 | 50,13.05,1.73,2.04,12.4,92.0,2.72,3.27,0.17,2.91,7.2,1.12,2.91,1150.0
53 | 51,13.83,1.65,2.6,17.2,94.0,2.45,2.99,0.22,2.29,5.6,1.24,3.37,1265.0
54 | 52,13.82,1.75,2.42,14.0,111.0,3.88,3.74,0.32,1.87,7.05,1.01,3.26,1190.0
55 | 53,13.77,1.9,2.68,17.1,115.0,3.0,2.79,0.39,1.68,6.3,1.13,2.93,1375.0
56 | 54,13.74,1.67,2.25,16.4,118.0,2.6,2.9,0.21,1.62,5.85,0.92,3.2,1060.0
57 | 55,13.56,1.73,2.46,20.5,116.0,2.96,2.78,0.2,2.45,6.25,0.98,3.03,1120.0
58 | 56,14.22,1.7,2.3,16.3,118.0,3.2,3.0,0.26,2.03,6.38,0.94,3.31,970.0
59 | 57,13.29,1.97,2.68,16.8,102.0,3.0,3.23,0.31,1.66,6.0,1.07,2.84,1270.0
60 | 58,13.72,1.43,2.5,16.7,108.0,3.4,3.67,0.19,2.04,6.8,0.89,2.87,1285.0
61 | 59,12.37,0.94,1.36,10.6,88.0,1.98,0.57,0.28,0.42,1.95,1.05,1.82,520.0
62 | 60,12.33,1.1,2.28,16.0,101.0,2.05,1.09,0.63,0.41,3.27,1.25,1.67,680.0
63 | 61,12.64,1.36,2.02,16.8,100.0,2.02,1.41,0.53,0.62,5.75,0.98,1.59,450.0
64 | 62,13.67,1.25,1.92,18.0,94.0,2.1,1.79,0.32,0.73,3.8,1.23,2.46,630.0
65 | 63,12.37,1.13,2.16,19.0,87.0,3.5,3.1,0.19,1.87,4.45,1.22,2.87,420.0
66 | 64,12.17,1.45,2.53,19.0,104.0,1.89,1.75,0.45,1.03,2.95,1.45,2.23,355.0
67 | 65,12.37,1.21,2.56,18.1,98.0,2.42,2.65,0.37,2.08,4.6,1.19,2.3,678.0
68 | 66,13.11,1.01,1.7,15.0,78.0,2.98,3.18,0.26,2.28,5.3,1.12,3.18,502.0
69 | 67,12.37,1.17,1.92,19.6,78.0,2.11,2.0,0.27,1.04,4.68,1.12,3.48,510.0
70 | 68,13.34,0.94,2.36,17.0,110.0,2.53,1.3,0.55,0.42,3.17,1.02,1.93,750.0
71 | 69,12.21,1.19,1.75,16.8,151.0,1.85,1.28,0.14,2.5,2.85,1.28,3.07,718.0
72 | 70,12.29,1.61,2.21,20.4,103.0,1.1,1.02,0.37,1.46,3.05,0.906,1.82,870.0
73 | 71,13.86,1.51,2.67,25.0,86.0,2.95,2.86,0.21,1.87,3.38,1.36,3.16,410.0
74 | 72,13.49,1.66,2.24,24.0,87.0,1.88,1.84,0.27,1.03,3.74,0.98,2.78,472.0
75 | 73,12.99,1.67,2.6,30.0,139.0,3.3,2.89,0.21,1.96,3.35,1.31,3.5,985.0
76 | 74,11.96,1.09,2.3,21.0,101.0,3.38,2.14,0.13,1.65,3.21,0.99,3.13,886.0
77 | 75,11.66,1.88,1.92,16.0,97.0,1.61,1.57,0.34,1.15,3.8,1.23,2.14,428.0
78 | 76,13.03,0.9,1.71,16.0,86.0,1.95,2.03,0.24,1.46,4.6,1.19,2.48,392.0
79 | 77,11.84,2.89,2.23,18.0,112.0,1.72,1.32,0.43,0.95,2.65,0.96,2.52,500.0
80 | 78,12.33,0.99,1.95,14.8,136.0,1.9,1.85,0.35,2.76,3.4,1.06,2.31,750.0
81 | 79,12.7,3.87,2.4,23.0,101.0,2.83,2.55,0.43,1.95,2.57,1.19,3.13,463.0
82 | 80,12.0,0.92,2.0,19.0,86.0,2.42,2.26,0.3,1.43,2.5,1.38,3.12,278.0
83 | 81,12.72,1.81,2.2,18.8,86.0,2.2,2.53,0.26,1.77,3.9,1.16,3.14,714.0
84 | 82,12.08,1.13,2.51,24.0,78.0,2.0,1.58,0.4,1.4,2.2,1.31,2.72,630.0
85 | 83,13.05,3.86,2.32,22.5,85.0,1.65,1.59,0.61,1.62,4.8,0.84,2.01,515.0
86 | 84,11.84,0.89,2.58,18.0,94.0,2.2,2.21,0.22,2.35,3.05,0.79,3.08,520.0
87 | 85,12.67,0.98,2.24,18.0,99.0,2.2,1.94,0.3,1.46,2.62,1.23,3.16,450.0
88 | 86,12.16,1.61,2.31,22.8,90.0,1.78,1.69,0.43,1.56,2.45,1.33,2.26,495.0
89 | 87,11.65,1.67,2.62,26.0,88.0,1.92,1.61,0.4,1.34,2.6,1.36,3.21,562.0
90 | 88,11.64,2.06,2.46,21.6,84.0,1.95,1.69,0.48,1.35,2.8,1.0,2.75,680.0
91 | 89,12.08,1.33,2.3,23.6,70.0,2.2,1.59,0.42,1.38,1.74,1.07,3.21,625.0
92 | 90,12.08,1.83,2.32,18.5,81.0,1.6,1.5,0.52,1.64,2.4,1.08,2.27,480.0
93 | 91,12.0,1.51,2.42,22.0,86.0,1.45,1.25,0.5,1.63,3.6,1.05,2.65,450.0
94 | 92,12.69,1.53,2.26,20.7,80.0,1.38,1.46,0.58,1.62,3.05,0.96,2.06,495.0
95 | 93,12.29,2.83,2.22,18.0,88.0,2.45,2.25,0.25,1.99,2.15,1.15,3.3,290.0
96 | 94,11.62,1.99,2.28,18.0,98.0,3.02,2.26,0.17,1.35,3.25,1.16,2.96,345.0
97 | 95,12.47,1.52,2.2,19.0,162.0,2.5,2.27,0.32,3.28,2.6,1.16,2.63,937.0
98 | 96,11.81,2.12,2.74,21.5,134.0,1.6,0.99,0.14,1.56,2.5,0.95,2.26,625.0
99 | 97,12.29,1.41,1.98,16.0,85.0,2.55,2.5,0.29,1.77,2.9,1.23,2.74,428.0
100 | 98,12.37,1.07,2.1,18.5,88.0,3.52,3.75,0.24,1.95,4.5,1.04,2.77,660.0
101 | 99,12.29,3.17,2.21,18.0,88.0,2.85,2.99,0.45,2.81,2.3,1.42,2.83,406.0
102 | 100,12.08,2.08,1.7,17.5,97.0,2.23,2.17,0.26,1.4,3.3,1.27,2.96,710.0
103 | 101,12.6,1.34,1.9,18.5,88.0,1.45,1.36,0.29,1.35,2.45,1.04,2.77,562.0
104 | 102,12.34,2.45,2.46,21.0,98.0,2.56,2.11,0.34,1.31,2.8,0.8,3.38,438.0
105 | 103,11.82,1.72,1.88,19.5,86.0,2.5,1.64,0.37,1.42,2.06,0.94,2.44,415.0
106 | 104,12.51,1.73,1.98,20.5,85.0,2.2,1.92,0.32,1.48,2.94,1.04,3.57,672.0
107 | 105,12.42,2.55,2.27,22.0,90.0,1.68,1.84,0.66,1.42,2.7,0.86,3.3,315.0
108 | 106,12.25,1.73,2.12,19.0,80.0,1.65,2.03,0.37,1.63,3.4,1.0,3.17,510.0
109 | 107,12.72,1.75,2.28,22.5,84.0,1.38,1.76,0.48,1.63,3.3,0.88,2.42,488.0
110 | 108,12.22,1.29,1.94,19.0,92.0,2.36,2.04,0.39,2.08,2.7,0.86,3.02,312.0
111 | 109,11.61,1.35,2.7,20.0,94.0,2.74,2.92,0.29,2.49,2.65,0.96,3.26,680.0
112 | 110,11.46,3.74,1.82,19.5,107.0,3.18,2.58,0.24,3.58,2.9,0.75,2.81,562.0
113 | 111,12.52,2.43,2.17,21.0,88.0,2.55,2.27,0.26,1.22,2.0,0.9,2.78,325.0
114 | 112,11.76,2.68,2.92,20.0,103.0,1.75,2.03,0.6,1.05,3.8,1.23,2.5,607.0
115 | 113,11.41,0.74,2.5,21.0,88.0,2.48,2.01,0.42,1.44,3.08,1.1,2.31,434.0
116 | 114,12.08,1.39,2.5,22.5,84.0,2.56,2.29,0.43,1.04,2.9,0.93,3.19,385.0
117 | 115,11.03,1.51,2.2,21.5,85.0,2.46,2.17,0.52,2.01,1.9,1.71,2.87,407.0
118 | 116,11.82,1.47,1.99,20.8,86.0,1.98,1.6,0.3,1.53,1.95,0.95,3.33,495.0
119 | 117,12.42,1.61,2.19,22.5,108.0,2.0,2.09,0.34,1.61,2.06,1.06,2.96,345.0
120 | 118,12.77,3.43,1.98,16.0,80.0,1.63,1.25,0.43,0.83,3.4,0.7,2.12,372.0
121 | 119,12.0,3.43,2.0,19.0,87.0,2.0,1.64,0.37,1.87,1.28,0.93,3.05,564.0
122 | 120,11.45,2.4,2.42,20.0,96.0,2.9,2.79,0.32,1.83,3.25,0.8,3.39,625.0
123 | 121,11.56,2.05,3.23,28.5,119.0,3.18,5.08,0.47,1.87,6.0,0.93,3.69,465.0
124 | 122,12.42,4.43,2.73,26.5,102.0,2.2,2.13,0.43,1.71,2.08,0.92,3.12,365.0
125 | 123,13.05,5.8,2.13,21.5,86.0,2.62,2.65,0.3,2.01,2.6,0.73,3.1,380.0
126 | 124,11.87,4.31,2.39,21.0,82.0,2.86,3.03,0.21,2.91,2.8,0.75,3.64,380.0
127 | 125,12.07,2.16,2.17,21.0,85.0,2.6,2.65,0.37,1.35,2.76,0.86,3.28,378.0
128 | 126,12.43,1.53,2.29,21.5,86.0,2.74,3.15,0.39,1.77,3.94,0.69,2.84,352.0
129 | 127,11.79,2.13,2.78,28.5,92.0,2.13,2.24,0.58,1.76,3.0,0.97,2.44,466.0
130 | 128,12.37,1.63,2.3,24.5,88.0,2.22,2.45,0.4,1.9,2.12,0.89,2.78,342.0
131 | 129,12.04,4.3,2.38,22.0,80.0,2.1,1.75,0.42,1.35,2.6,0.79,2.57,580.0
132 | 130,12.86,1.35,2.32,18.0,122.0,1.51,1.25,0.21,0.94,4.1,0.76,1.29,630.0
133 | 131,12.88,2.99,2.4,20.0,104.0,1.3,1.22,0.24,0.83,5.4,0.74,1.42,530.0
134 | 132,12.81,2.31,2.4,24.0,98.0,1.15,1.09,0.27,0.83,5.7,0.66,1.36,560.0
135 | 133,12.7,3.55,2.36,21.5,106.0,1.7,1.2,0.17,0.84,5.0,0.78,1.29,600.0
136 | 134,12.51,1.24,2.25,17.5,85.0,2.0,0.58,0.6,1.25,5.45,0.75,1.51,650.0
137 | 135,12.6,2.46,2.2,18.5,94.0,1.62,0.66,0.63,0.94,7.1,0.73,1.58,695.0
138 | 136,12.25,4.72,2.54,21.0,89.0,1.38,0.47,0.53,0.8,3.85,0.75,1.27,720.0
139 | 137,12.53,5.51,2.64,25.0,96.0,1.79,0.6,0.63,1.1,5.0,0.82,1.69,515.0
140 | 138,13.49,3.59,2.19,19.5,88.0,1.62,0.48,0.58,0.88,5.7,0.81,1.82,580.0
141 | 139,12.84,2.96,2.61,24.0,101.0,2.32,0.6,0.53,0.81,4.92,0.89,2.15,590.0
142 | 140,12.93,2.81,2.7,21.0,96.0,1.54,0.5,0.53,0.75,4.6,0.77,2.31,600.0
143 | 141,13.36,2.56,2.35,20.0,89.0,1.4,0.5,0.37,0.64,5.6,0.7,2.47,780.0
144 | 142,13.52,3.17,2.72,23.5,97.0,1.55,0.52,0.5,0.55,4.35,0.89,2.06,520.0
145 | 143,13.62,4.95,2.35,20.0,92.0,2.0,0.8,0.47,1.02,4.4,0.91,2.05,550.0
146 | 144,12.25,3.88,2.2,18.5,112.0,1.38,0.78,0.29,1.14,8.21,0.65,2.0,855.0
147 | 145,13.16,3.57,2.15,21.0,102.0,1.5,0.55,0.43,1.3,4.0,0.6,1.68,830.0
148 | 146,13.88,5.04,2.23,20.0,80.0,0.98,0.34,0.4,0.68,4.9,0.58,1.33,415.0
149 | 147,12.87,4.61,2.48,21.5,86.0,1.7,0.65,0.47,0.86,7.65,0.54,1.86,625.0
150 | 148,13.32,3.24,2.38,21.5,92.0,1.93,0.76,0.45,1.25,8.42,0.55,1.62,650.0
151 | 149,13.08,3.9,2.36,21.5,113.0,1.41,1.39,0.34,1.14,9.4,0.57,1.33,550.0
152 | 150,13.5,3.12,2.62,24.0,123.0,1.4,1.57,0.22,1.25,8.6,0.59,1.3,500.0
153 | 151,12.79,2.67,2.48,22.0,112.0,1.48,1.36,0.24,1.26,10.8,0.48,1.47,480.0
154 | 152,13.11,1.9,2.75,25.5,116.0,2.2,1.28,0.26,1.56,7.1,0.61,1.33,425.0
155 | 153,13.23,3.3,2.28,18.5,98.0,1.8,0.83,0.61,1.87,10.52,0.56,1.51,675.0
156 | 154,12.58,1.29,2.1,20.0,103.0,1.48,0.58,0.53,1.4,7.6,0.58,1.55,640.0
157 | 155,13.17,5.19,2.32,22.0,93.0,1.74,0.63,0.61,1.55,7.9,0.6,1.48,725.0
158 | 156,13.84,4.12,2.38,19.5,89.0,1.8,0.83,0.48,1.56,9.01,0.57,1.64,480.0
159 | 157,12.45,3.03,2.64,27.0,97.0,1.9,0.58,0.63,1.14,7.5,0.67,1.73,880.0
160 | 158,14.34,1.68,2.7,25.0,98.0,2.8,1.31,0.53,2.7,13.0,0.57,1.96,660.0
161 | 159,13.48,1.67,2.64,22.5,89.0,2.6,1.1,0.52,2.29,11.75,0.57,1.78,620.0
162 | 160,12.36,3.83,2.38,21.0,88.0,2.3,0.92,0.5,1.04,7.65,0.56,1.58,520.0
163 | 161,13.69,3.26,2.54,20.0,107.0,1.83,0.56,0.5,0.8,5.88,0.96,1.82,680.0
164 | 162,12.85,3.27,2.58,22.0,106.0,1.65,0.6,0.6,0.96,5.58,0.87,2.11,570.0
165 | 163,12.96,3.45,2.35,18.5,106.0,1.39,0.7,0.4,0.94,5.28,0.68,1.75,675.0
166 | 164,13.78,2.76,2.3,22.0,90.0,1.35,0.68,0.41,1.03,9.58,0.7,1.68,615.0
167 | 165,13.73,4.36,2.26,22.5,88.0,1.28,0.47,0.52,1.15,6.62,0.78,1.75,520.0
168 | 166,13.45,3.7,2.6,23.0,111.0,1.7,0.92,0.43,1.46,10.68,0.85,1.56,695.0
169 | 167,12.82,3.37,2.3,19.5,88.0,1.48,0.66,0.4,0.97,10.26,0.72,1.75,685.0
170 | 168,13.58,2.58,2.69,24.5,105.0,1.55,0.84,0.39,1.54,8.66,0.74,1.8,750.0
171 | 169,13.4,4.6,2.86,25.0,112.0,1.98,0.96,0.27,1.11,8.5,0.67,1.92,630.0
172 | 170,12.2,3.03,2.32,19.0,96.0,1.25,0.49,0.4,0.73,5.5,0.66,1.83,510.0
173 | 171,12.77,2.39,2.28,19.5,86.0,1.39,0.51,0.48,0.64,9.899999,0.57,1.63,470.0
174 | 172,14.16,2.51,2.48,20.0,91.0,1.68,0.7,0.44,1.24,9.7,0.62,1.71,660.0
175 | 173,13.71,5.65,2.45,20.5,95.0,1.68,0.61,0.52,1.06,7.7,0.64,1.74,740.0
176 | 174,13.4,3.91,2.48,23.0,102.0,1.8,0.75,0.43,1.41,7.3,0.7,1.56,750.0
177 | 175,13.27,4.28,2.26,20.0,120.0,1.59,0.69,0.43,1.35,10.2,0.59,1.56,835.0
178 | 176,13.17,2.59,2.37,20.0,120.0,1.65,0.68,0.53,1.46,9.3,0.6,1.62,840.0
179 | 177,14.13,4.1,2.74,24.5,96.0,2.05,0.76,0.56,1.35,9.2,0.61,1.6,560.0
180 |
--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | _build
2 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | SPHINXPROJ = pyls
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # For getting rid of generated docs before re-building
18 | clean:
19 | rm -rf $(BUILDDIR)/* auto_examples/ generated/
20 |
21 | .PHONY: clean
22 |
23 | # Catch-all target: route all unknown targets to Sphinx using the new
24 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
25 | %: Makefile
26 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
27 |
--------------------------------------------------------------------------------
/docs/_static/theme_overrides.css:
--------------------------------------------------------------------------------
1 | /* override table width restrictions */
2 | @media screen and (min-width: 767px) {
3 |
4 | .wy-table-responsive table td {
5 | /* !important prevents the common CSS stylesheets from overriding
6 | this as on RTD they are loaded after this stylesheet */
7 | white-space: normal !important;
8 | }
9 |
10 | .wy-table-responsive {
11 | overflow: visible !important;
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/docs/_templates/class.rst:
--------------------------------------------------------------------------------
1 | {{ fullname }}
2 | {{ underline }}
3 |
4 | .. currentmodule:: {{ module }}
5 |
6 | .. autoclass:: {{ objname }}
7 | :no-members:
8 | :no-inherited-members:
9 |
10 | .. raw:: html
11 |
12 |
13 |
--------------------------------------------------------------------------------
/docs/_templates/function.rst:
--------------------------------------------------------------------------------
1 | {{ fullname }}
2 | {{ underline }}
3 |
4 | .. currentmodule:: {{ module }}
5 |
6 | .. autofunction:: {{ objname }}
7 |
8 | .. raw:: html
9 |
10 |
11 |
--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
1 | .. _ref_api:
2 |
3 | .. currentmodule:: pyls
4 |
5 | -------------
6 | Reference API
7 | -------------
8 |
9 | This is the primary reference of ``pyls``. Please refer to the :ref:`user guide
10 | ` for more information on how to best implement these functions in your
11 | own workflows.
12 |
13 | .. contents:: **List of modules**
14 | :local:
15 |
16 | .. _ref_decomp:
17 |
18 | :mod:`pyls` - PLS decompositions
19 | --------------------------------------
20 |
21 | .. automodule:: pyls.types
22 | :no-members:
23 | :no-inherited-members:
24 |
25 | .. currentmodule:: pyls
26 |
27 | .. autosummary::
28 | :template: function.rst
29 | :toctree: generated/
30 |
31 | pyls.behavioral_pls
32 | pyls.meancentered_pls
33 |
34 | .. _ref_results:
35 |
36 | :mod:`pyls.structures` - PLS data structures
37 | --------------------------------------------
38 |
39 | .. automodule:: pyls.structures
40 | :no-members:
41 | :no-inherited-members:
42 |
43 | .. currentmodule:: pyls.structures
44 |
45 | .. autosummary::
46 | :template: class.rst
47 | :toctree: generated/
48 |
49 | pyls.structures.PLSResults
50 | pyls.structures.PLSPermResults
51 | pyls.structures.PLSBootResults
52 | pyls.structures.PLSSplitHalfResults
53 | pyls.structures.PLSCrossValidationResults
54 | pyls.structures.PLSInputs
55 |
56 | .. _ref_io:
57 |
58 | :mod:`pyls.io` - Data I/O functionality
59 | ---------------------------------------
60 |
61 | .. automodule:: pyls.io
62 | :no-members:
63 | :no-inherited-members:
64 |
65 | .. currentmodule:: pyls
66 |
67 | .. autosummary::
68 | :template: function.rst
69 | :toctree: generated/
70 |
71 | pyls.save_results
72 | pyls.load_results
73 |
74 | .. _ref_matlab:
75 |
76 | :mod:`pyls.matlab` - Matlab compatibility
77 | -----------------------------------------
78 |
79 | .. automodule:: pyls.matlab
80 | :no-members:
81 | :no-inherited-members:
82 |
83 | .. currentmodule:: pyls
84 |
85 | .. autosummary::
86 | :template: function.rst
87 | :toctree: generated/
88 |
89 | pyls.import_matlab_result
90 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | #
4 | # Configuration file for the Sphinx documentation builder.
5 |
6 | # -- Path setup --------------------------------------------------------------
7 |
8 | # If extensions (or modules to document with autodoc) are in another directory,
9 | # add these directories to sys.path here. If the directory is relative to the
10 | # documentation root, use os.path.abspath to make it absolute, like shown here.
11 | #
12 | import os
13 | import sys
14 |
15 | # -- Project information -----------------------------------------------------
16 |
17 | project = 'pyls'
18 | copyright = '2018, pyls developers'
19 | author = 'pyls developers'
20 |
21 | # Import project to get version info
22 | sys.path.insert(0, os.path.abspath(os.path.pardir))
23 | import pyls # noqa
24 | # The short X.Y version
25 | version = pyls.__version__
26 | # The full version, including alpha/beta/rc tags
27 | release = pyls.__version__
28 |
29 | # -- General configuration ---------------------------------------------------
30 |
31 | # Add any Sphinx extension module names here, as strings. They can be
32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
33 | # ones.
34 | extensions = [
35 | 'sphinx.ext.autodoc',
36 | 'sphinx.ext.autosummary',
37 | 'sphinx.ext.doctest',
38 | 'sphinx.ext.intersphinx',
39 | 'sphinx.ext.mathjax',
40 | 'sphinx.ext.napoleon',
41 | 'sphinx.ext.viewcode',
42 | ]
43 |
44 | # Generate the API documentation when building
45 | autosummary_generate = True
46 | autodoc_default_flags = ['members', 'inherited-members']
47 | numpydoc_show_class_members = False
48 | autoclass_content = "class"
49 |
50 | # Add any paths that contain templates here, relative to this directory.
51 | templates_path = ['_templates']
52 |
53 | # The suffix(es) of source filenames.
54 | source_suffix = '.rst'
55 |
56 | # The master toctree document.
57 | master_doc = 'index'
58 |
59 | # The language for content autogenerated by Sphinx. Refer to documentation
60 | # for a list of supported languages.
61 | #
62 | # This is also used if you do content translation via gettext catalogs.
63 | # Usually you set "language" from the command line for these cases.
64 | language = None
65 |
66 | # List of patterns, relative to source directory, that match files and
67 | # directories to ignore when looking for source files.
68 | # This pattern also affects html_static_path and html_extra_path .
69 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
70 |
71 | # The name of the Pygments (syntax highlighting) style to use.
72 | pygments_style = 'sphinx'
73 | highlight_language = 'python3'
74 |
75 | # -- Options for HTML output -------------------------------------------------
76 |
77 | # The theme to use for HTML and HTML Help pages. See the documentation for
78 | # a list of builtin themes.
79 | import sphinx_rtd_theme # noqa
80 | html_theme = 'sphinx_rtd_theme'
81 | html_show_sourcelink = False
82 |
83 | # Theme options are theme-specific and customize the look and feel of a theme
84 | # further. For a list of options available for each theme, see the
85 | # documentation.
86 | #
87 | # html_theme_options = {}
88 |
89 | html_context = {
90 | 'css_files': [
91 | '_static/theme_overrides.css'
92 | ]
93 | }
94 |
95 | # Add any paths that contain custom static files (such as style sheets) here,
96 | # relative to this directory. They are copied after the builtin static files,
97 | # so a file named "default.css" will overwrite the builtin "default.css".
98 | html_static_path = ['_static']
99 |
100 | # -- Options for HTMLHelp output ---------------------------------------------
101 |
102 | # Output file base name for HTML help builder.
103 | htmlhelp_basename = 'pylsdoc'
104 |
105 | # -- Extension configuration -------------------------------------------------
106 | intersphinx_mapping = {
107 | 'numpy': ('https://docs.scipy.org/doc/numpy', None),
108 | 'scipy': ('https://docs.scipy.org/doc/scipy/reference', None),
109 | 'sklearn': ('http://scikit-learn.org/stable', None),
110 | }
111 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | pyls: Partial Least Squares in Python
2 | =====================================
3 |
4 | This package provides a Python interface for performing partial least squares
5 | (PLS) analyses.
6 |
7 | .. image:: https://travis-ci.org/rmarkello/pyls.svg?branch=master
8 | :target: https://travis-ci.org/rmarkello/pyls
9 | .. image:: https://circleci.com/gh/rmarkello/pyls.svg?style=shield
10 | :target: https://circleci.com/gh/rmarkello/pyls
11 | .. image:: https://codecov.io/gh/rmarkello/pyls/branch/master/graph/badge.svg
12 | :target: https://codecov.io/gh/rmarkello/pyls
13 | .. image:: https://readthedocs.org/projects/pyls/badge/?version=latest
14 | :target: http://pyls.readthedocs.io/en/latest
15 | .. image:: http://img.shields.io/badge/License-GPL%202.0-blue.svg
16 | :target: https://opensource.org/licenses/GPL-2.0
17 |
18 | .. _readme_installation:
19 |
20 | Installation requirements
21 | -------------------------
22 |
23 | Currently, ``pyls`` works with Python 3.5+ and requires a few dependencies:
24 |
25 | - h5py
26 | - numpy
27 | - scikit-learn
28 | - scipy, and
29 | - tqdm
30 |
31 | Assuming you have the correct version of Python installed, you can install
32 | ``pyls`` by opening a terminal and running the following:
33 |
34 | .. code-block:: bash
35 |
36 | git clone https://github.com/rmarkello/pyls.git
37 | cd pyls
38 | python setup.py install
39 |
40 | All relevant dependencies will be installed alongside the ``pyls`` module.
41 |
42 | .. _readme_quickstart:
43 |
44 | Quickstart
45 | ----------
46 |
47 | There are a number of ways to use ``pyls``, depending on the type of analysis
48 | you would like to perform. Assuming you have two matrices ``X`` and ``Y``
49 | representing different observations from a set of samples (i.e., subjects,
50 | neurons, brain regions), you can run a simple analysis with:
51 |
52 | .. code-block:: python
53 |
54 | >>> import pyls
55 | >>> results = pyls.behavioral_pls(X, Y)
56 |
57 | For detailed information on the different methods available and how to
58 | interpret the results object, please refer to our :ref:`user guide `.
59 |
60 | .. _readme_development:
61 |
62 | Development and getting involved
63 | --------------------------------
64 |
65 | If you've found a bug, are experiencing a problem, or have a question about
66 | using the package, please head on over to our `GitHub issues`_ and make a new
67 | issue with some information about it! Someone will try and get back to you
68 | as quickly as possible, though please note that the primary developer for
69 | ``pyls`` (@rmarkello) is a graduate student so responses make take some time!
70 |
71 | If you're interested in getting involved in the project: welcome |sparkles|!
72 | We're thrilled to welcome new contributors. You should start by reading our
73 | `code of conduct`_; all activity on ``pyls`` should adhere to the CoC. After
74 | that, take a look at our `contributing guidelines`_ so you're familiar with the
75 | processes we (generally) try to follow when making changes to the repository!
76 | Once you're ready to jump in head on over to our issues to see if there's
77 | anything you might like to work on.
78 |
79 | .. _readme_licensing:
80 |
81 | License Information
82 | -------------------
83 |
84 | This codebase is licensed under the GNU General Public License, version 2. The
85 | full license can be found in the `LICENSE`_ file in the ``pyls`` distribution.
86 |
87 | All trademarks referenced herein are property of their respective holders.
88 |
89 | .. toctree::
90 | :maxdepth: 2
91 |
92 | usage
93 | api
94 |
95 | .. |sparkles| replace:: ✨
96 | .. _code of conduct: https://github.com/rmarkello/pyls/blob/master/CODE_OF_CONDUCT.md
97 | .. _contributing guidelines: https://github.com/rmarkello/pyls/blob/master/CONTRIBUTING.md
98 | .. _GitHub issues: https://github.com/rmarkello/pyls/issues
99 | .. _LICENSE: https://github.com/rmarkello/pyls/blob/master/LICENSE
100 |
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | -r ../requirements.txt
2 | sphinx>=2.0
3 | sphinx_rtd_theme
4 |
--------------------------------------------------------------------------------
/docs/usage.rst:
--------------------------------------------------------------------------------
1 | .. _usage:
2 |
3 | ----------
4 | User guide
5 | ----------
6 |
7 | Partial least squares (PLS) is a multivariate statistical technique that aims
8 | to find shared information between two sets of variables. If you're unfamiliar
9 | with PLS and are interested in a thorough (albeit quite technical) treatment,
10 | `Abdi et al., 2013 `_ is a good
11 | resource.
12 |
13 | This user guide will go through the basic statistical concepts of the two types
14 | of PLS implemented in the current package (:ref:`usage_behavioral` and
15 | :ref:`usage_meancentered`) and demonstrate how to interpret and use the results
16 | of a PLS analysis (:ref:`usage_results`). If you still have questions after
17 | going through this guide then you can refer to the :ref:`ref_api`!
18 |
19 | .. toctree::
20 | :caption: Table of Contents
21 | :numbered:
22 | :maxdepth: 2
23 |
24 | user_guide/behavioral.rst
25 | user_guide/meancentered.rst
26 | user_guide/results.rst
27 |
--------------------------------------------------------------------------------
/docs/user_guide/behavioral.rst:
--------------------------------------------------------------------------------
1 | .. testsetup::
2 |
3 | import numpy as np
4 | np.set_printoptions(suppress=True)
5 |
6 | .. _usage_behavioral:
7 |
8 | Behavioral PLS
9 | ==============
10 |
11 | Running a behavioral PLS using ``pyls`` is as simple as:
12 |
13 | .. code-block::
14 |
15 | >>> import pyls
16 | >>> out = pyls.behavioral_pls(X, Y)
17 |
18 | What we call behavioral PLS in the ``pyls`` package is actually the more
19 | traditional form of PLS (and is generally not prefixed with "behavioral"). This
20 | form of PLS, at its core, attempts to find shared information between two sets
21 | of features derived from a common set of samples. However, as with all things,
22 | there are a number of ever-so-slightly different kinds of PLS that exist in the
23 | wild, so to be thorough we're going to briefly explain the exact flavor
24 | implemented here before diving into a more illustrative example.
25 |
26 | What *exactly* do we mean by "behavioral PLS"?
27 | ----------------------------------------------
28 |
29 | **Technical answer**: :py:func:`pyls.behavioral_pls` employs a symmetrical,
30 | singular value decomposition (SVD) based form of PLS, and is sometimes referred
31 | to as PLS-correlation (PLS-C), PLS-SVD, or, infrequently, EZ-PLS. Notably, it
32 | is **not** the same as PLS regression (PLS-R).
33 |
34 | **Less technical answer**: :py:func:`pyls.behavioral_pls` is like performing a
35 | principal components analysis (PCA) but when you have two related datasets,
36 | each with multiple features.
37 |
38 | Differences from PLS regression (PLS-R)
39 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
40 |
41 | You can think of the differences between PLS-C and PLS-R similar to how you
42 | might consider the differences between a Pearson correlation and a simple
43 | linear regression. Though this analogy is an over-simplification, the primary
44 | difference to take away is that behavioral PLS (PLS-C) does *not assess*
45 | *directional relationships between sets of data* (e.g., X → Y), but rather
46 | looks at how the two sets generally covary (e.g., X ↔ Y).
47 |
48 | To understand this a bit more we can walk through a detailed example.
49 |
50 | An exercise in calisthenics
51 | ---------------------------
52 |
53 | .. note::
54 | Descriptions of PLS are almost always accompanied by a litany of equations,
55 | and for good reason: understanding how to interpret the results of a PLS
56 | requires at least a cursory understanding of the math behind it. As such,
57 | this example is going to rely on these equations, but will always do so in
58 | the context of real data. The hope is that this approach will help make the
59 | more abstract mathematical concepts a bit more concrete (and easier to
60 | apply to new data sets!).
61 |
62 | We'll start by loading the example dataset [1]_:
63 |
64 | .. doctest::
65 |
66 | >>> from pyls.examples import load_dataset
67 | >>> data = load_dataset('linnerud')
68 |
69 | This is the same dataset as in :py:func:`sklearn.datasets.load_linnerud`; the
70 | formatting has just been lightly modified to better suit our purposes.
71 |
72 | Our ``data`` object can be treated as a dictionary, containing all the
73 | information necessary to run a PLS analysis. The keys can be accessed as
74 | attributes, so we can take a quick look at our input matrices
75 | :math:`\textbf{X}` and :math:`\textbf{Y}`:
76 |
77 | .. doctest::
78 |
79 | >>> sorted(data.keys())
80 | ['X', 'Y', 'n_boot', 'n_perm']
81 | >>> data.X.shape
82 | (20, 3)
83 | >>> data.X.head()
84 | Chins Situps Jumps
85 | 0 5.0 162.0 60.0
86 | 1 2.0 110.0 60.0
87 | 2 12.0 101.0 101.0
88 | 3 12.0 105.0 37.0
89 | 4 13.0 155.0 58.0
90 |
91 | The rows of our :math:`\textbf{X}_{n \times p}` matrix here represent *n*
92 | subjects, and the columns indicate *p* different types of exercises these
93 | subjects were able to perform. So the first subject was able to do 5 chin-ups,
94 | 162 situps, and 60 jumping jacks.
95 |
96 | .. doctest::
97 |
98 | >>> data.Y.shape
99 | (20, 3)
100 | >>> data.Y.head()
101 | Weight Waist Pulse
102 | 0 191.0 36.0 50.0
103 | 1 189.0 37.0 52.0
104 | 2 193.0 38.0 58.0
105 | 3 162.0 35.0 62.0
106 | 4 189.0 35.0 46.0
107 |
108 | The rows of our :math:`\textbf{Y}_{n \times q}` matrix *also* represent *n*
109 | subjects (critically, the same subjects as in :math:`\textbf{X}`), and the
110 | columns indicate *q* physiological measurements taken for each subject. That
111 | same subject referenced above thus has a weight of 191 pounds, a 36 inch waist,
112 | and a pulse of 50 beats per minute.
113 |
114 | Behavioral PLS will attempt to establish whether a relationship exists between
115 | the exercises performed and these physiological variables. If we wanted to run
116 | the full analysis right away, we could do so with:
117 |
118 | .. doctest::
119 |
120 | >>> from pyls import behavioral_pls
121 | >>> results = behavioral_pls(**data)
122 |
123 | If you're comfortable with the down-and-dirty of PLS and want to go ahead and
124 | start understanding the ``results`` object, feel free to jump ahead to
125 | :ref:`usage_results`. Otherwise, read on for more about what's happening behind
126 | the scenes of :py:func:`~.behavioral_pls`
127 |
128 | The cross-covariance matrix
129 | ---------------------------
130 |
131 | Behavioral PLS works by decomposing the cross-covariance matrix
132 | :math:`\textbf{R}_{q \times p}` generated from the input matrices, where
133 | :math:`\textbf{R} = \textbf{Y}^{T} \textbf{X}`. The results of PLS are a
134 | bit easier to interpret when :math:`\textbf{R}` is the cross-correlation matrix
135 | instead of the cross-covariance matrix, which means that we should z-score each
136 | feature in :math:`\textbf{X}` and :math:`\textbf{Y}` before multiplying them;
137 | this is done automatically by the :py:func:`~.behavioral_pls` function.
138 |
139 | In our example, :math:`\textbf{R}` ends up being a 3 x 3 matrix:
140 |
141 | .. doctest::
142 |
143 | >>> from pyls.compute import xcorr
144 | >>> R = xcorr(data.X, data.Y)
145 | >>> R
146 | Chins Situps Jumps
147 | Weight -0.389694 -0.493084 -0.226296
148 | Waist -0.552232 -0.645598 -0.191499
149 | Pulse 0.150648 0.225038 0.034933
150 |
151 | The :math:`q` rows of this matrix correspond to the physiological measurements
152 | and the :math:`p` columns to the exercises. Examining the first row, we can see
153 | that ``-0.389694`` is the correlation between ``Weight`` and ``Chins`` across
154 | all the subjects, ``-0.493084`` the correlation between ``Weight`` and
155 | ``Situps``, and so on.
156 |
157 | Singular value decomposition
158 | ----------------------------
159 |
160 | Once we have generated our correlation matrix :math:`\textbf{R}` we subject it
161 | to a singular value decomposition, where :math:`\textbf{R} = \textbf{USV}^{T}`:
162 |
163 | .. doctest::
164 |
165 | >>> from pyls.compute import svd
166 | >>> U, S, V = svd(R)
167 | >>> U.shape, S.shape, V.shape
168 | ((3, 3), (3, 3), (3, 3))
169 |
170 | The outputs of this decomposition are two arrays of left and right singular
171 | vectors (:math:`\textbf{U}_{p \times l}` and :math:`\textbf{V}_{q \times l}`)
172 | and a diagonal matrix of singular values (:math:`\textbf{S}_{l \times l}`). The
173 | rows of :math:`\textbf{U}` correspond to the exercises from our input matrix
174 | :math:`\textbf{X}`, and the rows of :math:`\textbf{V}` correspond to the
175 | physiological measurements from our input matrix :math:`\textbf{Y}`. The
176 | columns of :math:`\textbf{U}` and :math:`\textbf{V}`, on the other hand,
177 | represent new dimensions or components that have been "discovered" in the data.
178 |
179 | ..
180 |
181 | The :math:`i^{th}` columns of :math:`\textbf{U}` and :math:`\textbf{V}` weigh
182 | the contributions of these exercises and physiological measurements,
183 | respectively. Taken together, the :math:`i^{th}` left and right singular
184 | vectors and singular value represent a *latent variable*, a multivariate
185 | pattern that weighs the original exercise and physiological measurements such
186 | that they maximally covary with each other.
187 |
188 | The :math:`i^{th}` singular value is proportional to the total
189 | exercise-physiology covariance accounted for by the latent variable. The
190 | effect size (:math:`\eta`) associated with a particular latent variable can be
191 | estimated as the ratio of the squared singular value (:math:`\sigma`) to the
192 | sum of all the squared singular values:
193 |
194 | .. math::
195 |
196 | \eta_{i} = \sigma_{i}^{2} \big/ \sum \limits_{j=1}^{l} \sigma_{j}^{2}
197 |
198 | We can use the helper function :py:func:`pyls.compute.varexp` to calculate this
199 | for us:
200 |
201 | .. doctest::
202 |
203 | >>> from pyls.compute import varexp
204 | >>> pctvar = varexp(S)[0, 0]
205 | >>> print('{:.4f}'.format(pctvar))
206 | 0.9947
207 |
208 | Taking a look at the variance explained, we see that a whopping ~99.5% of the
209 | covariance between the exercises and physiological measurements in
210 | :math:`\textbf{X}` and :math:`\textbf{Y}` are explained by this latent
211 | variable, suggesting that the relationship between these variable can be
212 | effectively explained by a single dimension.
213 |
214 | Examining the weights from the singular vectors:
215 |
216 | .. doctest::
217 |
218 | >>> U[:, 0]
219 | array([0.61330742, 0.7469717 , 0.25668519])
220 | >>> V[:, 0]
221 | array([-0.58989118, -0.77134059, 0.23887675])
222 |
223 | we see that all the exercises (``U[:, 0]``) are positively weighted, but that
224 | the physiological measurements (``V[:, 0]``) are split, with ``Weight`` and
225 | ``Waist`` measurements negatively weighted and ``Pulse`` positively weighted.
226 | (Note that the order of the weights is the same as the order of the original
227 | columns in our :math:`\textbf{X}` and :math:`\textbf{Y}` matrices.) Taken
228 | together this suggests that, for the subjects in this dataset, individuals who
229 | completed more of a given exercise tended to:
230 |
231 | 1. Complete more of the other exercises, and
232 | 2. Have a lower weight, smaller waist, and higher heart rate.
233 |
234 | It is also worth examining how correlated the projections of the original
235 | variables on this latent variable are. To do that, we can multiply the original
236 | data matrices by the relevant singular vectors and then correlate the results:
237 |
238 | .. doctest::
239 |
240 | >>> from scipy.stats import pearsonr
241 | >>> XU = np.dot(data.X, U)
242 | >>> YV = np.dot(data.Y, V)
243 | >>> r, p = pearsonr(XU[:, 0], YV[:, 0])
244 | >>> print('r = {:.4f}, p = {:.4f}'.format(r, p))
245 | r = 0.4900, p = 0.0283
246 |
247 | The correlation value of this latent variable (~ ``0.49``) suggests that our
248 | interpretation of the singular vectors weights, above, is only *somewhat*
249 | accurate. We can think of this correlation (ranging from -1 to 1) as a proxy
250 | for the question: "how often is this interpretation of the singular vectors
251 | true?" Correlations closer to -1 indicate that the interpretation is largely
252 | inaccurate across subjects, whereas correlations closer to 1 indicate the
253 | interpretation is largely accurate across subjects.
254 |
255 | Latent variable significance testing
256 | ------------------------------------
257 |
258 | Scientists love null-hypothesis significance testing, so there's a strong urge
259 | for researchers doing these sorts of analyses to want to find a way to
260 | determine whether observed latent variables are significant(ly different from a
261 | specified null model). The issue comes in determining what aspect of the latent
262 | variables to test!
263 |
264 | With behavioral PLS we assess whether the **variance explained** by a given
265 | latent variable is significantly different than would be expected by a null.
266 | Importantly, that null is generated by re-computing the latent variables from
267 | random permutations of the original data, generating a non-parametric
268 | distribution of explained variances by which to measure "significance."
269 |
270 | ..
271 |
272 | Reliability of the singular vectors
273 | -----------------------------------
274 |
275 |
276 |
277 | .. [1] Tenenhaus, M. (1998). La régression PLS: théorie et pratique. Editions
278 | technip.
279 |
--------------------------------------------------------------------------------
/docs/user_guide/meancentered.rst:
--------------------------------------------------------------------------------
1 | .. _usage_meancentered:
2 |
3 | Mean-centered PLS
4 | =================
5 |
6 | In contrast to behavioral PLS, mean-centered PLS doesn't aim to find
7 | relationships between two sets of variables. Instead, it tries to find
8 | relationships between *groupings* in a single set of variables. Indeed, you can
9 | think of it almost like a multivariate t-test or ANOVA (depending on how many
10 | groups you have).
11 |
12 | An oenological example
13 | ----------------------
14 |
15 | .. doctest::
16 |
17 | >>> from pyls.examples import load_dataset
18 | >>> data = load_dataset('wine')
19 |
20 | This is the same dataset as in :py:func:`sklearn.datasets.load_wine`; the
21 | formatting has just been lightly modified to better suit our purposes.
22 |
23 | Our ``data`` object can be treated as a dictionary, containing all the
24 | information necessary to run a PLS analysis. The keys can be accessed as
25 | attributes, so we can take a quick look at our input matrix:
26 |
27 | .. doctest::
28 |
29 | >>> sorted(data.keys())
30 | ['X', 'groups', 'n_boot', 'n_perm']
31 | >>> data.X.shape
32 | (178, 13)
33 | >>> data.X.columns
34 | Index(['alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium',
35 | 'total_phenols', 'flavanoids', 'nonflavanoid_phenols',
36 | 'proanthocyanins', 'color_intensity', 'hue',
37 | 'od280/od315_of_diluted_wines', 'proline'],
38 | dtype='object')
39 | >>> data.groups
40 | [59, 71, 48]
41 |
--------------------------------------------------------------------------------
/docs/user_guide/results.rst:
--------------------------------------------------------------------------------
1 | .. _usage_results:
2 |
3 | PLS Results
4 | ===========
5 |
6 | So you ran a PLS analysis and got some results. Congratulations! The easy part
7 | is done. 🙃 Interpreting (trying to interpret) the results of a PLS
8 | analysis---similar to interpreting the results of a PCA or factor analysis or
9 | CCA or any other complex decomposition---can be difficult. The ``pyls`` package
10 | contains some functions, tools, and data structures to try and help.
11 |
12 | The :py:class:`~.structures.PLSResults` data structure is, at its core, a
13 | Python dictionary that is designed to contain all possible results from any of
14 | the analyses available in :py:mod:`pyls.types`. Let's generate a small example
15 | results object to play around with. We'll use the dataset from the
16 | :ref:`usage_behavioral` example:
17 |
18 | .. doctest::
19 |
20 | >>> from pyls.examples import load_dataset
21 | >>> data = load_dataset('linnerud')
22 |
23 | We can generate the results file by running the behavioral PLS analysis again.
24 | We pass the ``verbose=False`` flag to suppress the progress bar that would
25 | normally be displayed:
26 |
27 | .. doctest::
28 |
29 | >>> from pyls import behavioral_pls
30 | >>> results = behavioral_pls(**data, verbose=False)
31 | >>> results
32 | PLSResults(x_weights, y_weights, x_scores, y_scores, y_loadings, singvals, varexp, permres, bootres, cvres, inputs)
33 |
34 | Printing the ``results`` object gives us a helpful view of some of the
35 | different outputs available to us. While we won't go into detail about all of
36 | these (see the :ref:`ref_api` for info on those), we'll touch on a few of the
37 | potentially more confusing ones.
38 |
--------------------------------------------------------------------------------
/pyls/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | __all__ = [
4 | '__version__',
5 | 'behavioral_pls', 'meancentered_pls', 'pls_regression',
6 | 'import_matlab_result', 'save_results', 'load_results',
7 | 'examples', 'PLSInputs', 'PLSResults',
8 |
9 | ]
10 |
11 | from ._version import get_versions
12 | __version__ = get_versions()['version']
13 | del get_versions
14 |
15 | from . import examples
16 | from .io import load_results, save_results
17 | from .matlab import import_matlab_result
18 | from .structures import PLSInputs, PLSResults
19 | from .types import (behavioral_pls, meancentered_pls, pls_regression)
20 |
--------------------------------------------------------------------------------
/pyls/compute.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import numpy as np
4 | from scipy.stats import zscore, zmap
5 | from sklearn.utils.extmath import randomized_svd
6 | from sklearn.utils.validation import check_X_y, check_random_state
7 | from pyls import utils
8 |
9 |
10 | def svd(crosscov, n_components=None, seed=None):
11 | """
12 | Calculates the SVD of `crosscov` and returns singular vectors/values
13 |
14 | Parameters
15 | ----------
16 | crosscov : (B, T) array_like
17 | Cross-covariance (or cross-correlation) matrix to be decomposed
18 | n_components : int, optional
19 | Number of components to retain from decomposition
20 | seed : {int, :obj:`numpy.random.RandomState`, None}, optional
21 | Seed for random number generation. Default: None
22 |
23 | Returns
24 | -------
25 | U : (B, L) `numpy.ndarray`
26 | Left singular vectors from singular value decomposition
27 | d : (L, L) `numpy.ndarray`
28 | Diagonal array of singular values from singular value decomposition
29 | V : (J, L) `numpy.ndarray`
30 | Right singular vectors from singular value decomposition
31 | """
32 |
33 | seed = check_random_state(seed)
34 | crosscov = np.asanyarray(crosscov)
35 |
36 | if n_components is None:
37 | n_components = min(crosscov.shape)
38 | elif not isinstance(n_components, int):
39 | raise TypeError('Provided `n_components` {} must be of type int'
40 | .format(n_components))
41 |
42 | # run most computationally efficient SVD
43 | if crosscov.shape[0] <= crosscov.shape[1]:
44 | U, d, V = randomized_svd(crosscov.T, n_components=n_components,
45 | random_state=seed, transpose=False)
46 | V = V.T
47 | else:
48 | V, d, U = randomized_svd(crosscov, n_components=n_components,
49 | random_state=seed, transpose=False)
50 | U = U.T
51 |
52 | return U, np.diag(d), V
53 |
54 |
55 | def xcorr(X, Y, norm=False, covariance=False):
56 | """
57 | Calculates the cross-covariance matrix of `X` and `Y`
58 |
59 | Parameters
60 | ----------
61 | X : (S, B) array_like
62 | Input matrix, where `S` is samples and `B` is features.
63 | Y : (S, T) array_like, optional
64 | Input matrix, where `S` is samples and `T` is features.
65 | norm : bool, optional
66 | Whether to normalize `X` and `Y` (i.e., sum of squares = 1). Default:
67 | False
68 | covariance : bool, optional
69 | Whether to calculate the cross-covariance matrix instead of the cross-
70 | correlation matrix. Default: False
71 |
72 | Returns
73 | -------
74 | xprod : (T, B) `numpy.ndarray`
75 | Cross-covariance of `X` and `Y`
76 | """
77 |
78 | check_X_y(X, Y, multi_output=True)
79 |
80 | # we could just use scipy.stats zscore but if we do this we retain the
81 | # original data structure; if pandas dataframes were given, a dataframe
82 | # will be returned
83 | if not covariance:
84 | Xn = (X - X.mean(axis=0)) / X.std(axis=0, ddof=1)
85 | Yn = (Y - Y.mean(axis=0)) / Y.std(axis=0, ddof=1)
86 | else:
87 | Xn, Yn = X - X.mean(0, keepdims=True), Y - Y.mean(0, keepdims=True)
88 |
89 | if norm:
90 | Xn, Yn = normalize(Xn), normalize(Yn)
91 |
92 | xprod = (Yn.T @ Xn) / (len(Xn) - 1)
93 |
94 | return xprod
95 |
96 |
97 | def normalize(X, axis=0):
98 | """
99 | Normalizes `X` along `axis`
100 |
101 | Utilizes Frobenius norm (or Hilbert-Schmidt norm / `L_{p,q}` norm where
102 | `p=q=2`)
103 |
104 | Parameters
105 | ----------
106 | X : (S, B) array_like
107 | Input array
108 | axis : int, optional
109 | Axis for normalization. Default: 0
110 |
111 | Returns
112 | -------
113 | normed : (S, B) `numpy.ndarray`
114 | Normalized `X`
115 | """
116 |
117 | normed = np.array(X)
118 | normal_base = np.linalg.norm(normed, axis=axis, keepdims=True)
119 | # avoid DivideByZero errors
120 | zero_items = np.where(normal_base == 0)
121 | normal_base[zero_items] = 1
122 | # normalize and re-set zero_items to 0
123 | normed = normed / normal_base
124 | normed[zero_items] = 0
125 |
126 | return normed
127 |
128 |
129 | def rescale_test(X_train, X_test, Y_train, U, V):
130 | """
131 | Generates out-of-sample predicted `Y` values
132 |
133 | Parameters
134 | ----------
135 | X_train : (S1, B) array_like
136 | Data matrix, where `S1` is observations and `B` is features
137 | X_test : (S2, B)
138 | Data matrix, where `S2` is observations and `B` is features
139 | Y_train : (S1, T) array_like
140 | Behavioral matrix, where `S1` is observations and `T` is features
141 |
142 | Returns
143 | -------
144 | Y_pred : (S2, T) `numpy.ndarray`
145 | Behavioral matrix, where `S2` is observations and `T` is features
146 | """
147 |
148 | X_resc = zmap(X_test, compare=X_train, ddof=1)
149 | Y_pred = (X_resc @ U @ V.T) + Y_train.mean(axis=0, keepdims=True)
150 |
151 | return Y_pred
152 |
153 |
154 | def perm_sig(orig, perm):
155 | """
156 | Calculates significance of `orig` values agains `perm` distributions
157 |
158 | Compares amplitude of each singular value to distribution created via
159 | permutation in `perm`
160 |
161 | Parameters
162 | ----------
163 | orig : (L, L) array_like
164 | Diagonal matrix of singular values for `L` latent variables
165 | perm : (L, P) array_like
166 | Distribution of singular values from permutation testing where `P` is
167 | the number of permutations
168 |
169 | Returns
170 | -------
171 | sprob : (L,) `numpy.ndarray`
172 | Number of permutations where singular values exceeded original data
173 | decomposition for each of `L` latent variables normalized by the total
174 | number of permutations. Can be interpreted as the statistical
175 | significance of the latent variables (i.e., non-parametric p-value).
176 | """
177 |
178 | sp = np.sum(perm > np.diag(orig)[:, None], axis=1) + 1
179 | sprob = sp / (perm.shape[-1] + 1)
180 |
181 | return sprob
182 |
183 |
184 | def boot_ci(boot, ci=95):
185 | """
186 | Generates CI for bootstrapped values `boot`
187 |
188 | Parameters
189 | ----------
190 | boot : (G, L, B) array_like
191 | Singular vectors, where `G` is features, `L` is components, and `B` is
192 | bootstraps
193 | ci : (0, 100) float, optional
194 | Confidence interval bounds to be calculated. Default: 95
195 |
196 | Returns
197 | -------
198 | lower : (G, L) `numpy.ndarray`
199 | Lower bound of CI for singular vectors in `boot`
200 | upper : (G, L) `numpy.ndarray`
201 | Upper bound of CI for singular vectors in `boot`
202 | """
203 |
204 | low = (100 - ci) / 2
205 | prc = [low, 100 - low]
206 |
207 | lower, upper = np.percentile(boot, prc, axis=-1)
208 |
209 | return lower, upper
210 |
211 |
212 | def boot_rel(orig, u_sum, u_square, n_boot):
213 | """
214 | Determines bootstrap ratios (BSR) of saliences from bootstrap distributions
215 |
216 | Parameters
217 | ----------
218 | orig : (G, L) array_like
219 | Original singular vectors
220 | u_sum : (G, L) array_like
221 | Sum of bootstrapped singular vectors
222 | u_square : (G, L) array_like
223 | Sum of squared bootstraped singular vectors
224 | n_boot : int
225 | Number of bootstraps used in generating `u_sum` and `u_square`
226 |
227 | Returns
228 | -------
229 | bsr : (G, L) `numpy.ndarray`
230 | Bootstrap ratios for provided singular vectors
231 | """
232 |
233 | u_sum2 = (u_sum ** 2) / n_boot
234 | u_se = np.sqrt(np.abs(u_square - u_sum2) / (n_boot - 1))
235 | bsr = orig / u_se
236 |
237 | return bsr, u_se
238 |
239 |
240 | def procrustes(original, permuted, singular):
241 | """
242 | Performs Procrustes rotation on `permuted` to align with `original`
243 |
244 | `original` and `permuted` should be either left *or* right singular
245 | vector from two SVDs. `singular` should be the diagonal matrix of
246 | singular values from the SVD that generated `original`
247 |
248 | Parameters
249 | ----------
250 | original : array_like
251 | permuted : array_like
252 | singular : array_like
253 |
254 | Returns
255 | -------
256 | resamp : `numpy.ndarray`
257 | Singular values of rotated `permuted` matrix
258 | """
259 |
260 | temp = original.T @ permuted
261 | N, _, P = randomized_svd(temp, n_components=min(temp.shape))
262 | resamp = permuted @ singular @ (P.T @ N.T)
263 |
264 | return resamp
265 |
266 |
267 | def get_group_mean(X, Y, n_cond=1, mean_centering=0):
268 | """
269 | Parameters
270 | ----------
271 | X : (S, B) array_like
272 | Input data matrix, where `S` is observations and `B` is features
273 | Y : (S, T) array_like, optional
274 | Dummy coded input array, where `S` is observations and `T`
275 | corresponds to the number of different groups x conditions. A value
276 | of 1 indicates that an observation belongs to a specific group or
277 | condition.
278 | n_cond : int, optional
279 | Number of conditions in dummy coded `Y` array. Default: 1
280 | mean_centering : {0, 1, 2}, optional
281 | Mean-centering method. Default: 0
282 |
283 | Returns
284 | -------
285 | group_mean : (T, B) `numpy.ndarray`
286 | Means to be removed from `X` during centering
287 | """
288 |
289 | if mean_centering == 0:
290 | # we want means of GROUPS, collapsing across conditions
291 | inds = slice(0, Y.shape[-1], n_cond)
292 | groups = utils.dummy_code(Y[:, inds].sum(axis=0).astype(int) * n_cond)
293 | elif mean_centering == 1:
294 | # we want means of CONDITIONS, collapsing across groups
295 | groups = Y.copy()
296 | elif mean_centering == 2:
297 | # we want the overall mean of the entire dataset
298 | groups = np.ones((len(X), 1))
299 | else:
300 | raise ValueError("Mean centering type must be in [0, 1, 2].")
301 |
302 | # get mean of data over grouping variable
303 | group_mean = np.row_stack([X[grp].mean(axis=0)[None] for grp in
304 | groups.T.astype(bool)])
305 |
306 | # we want group_mean to have the same number of rows as Y does columns
307 | # that way, we can easily subtract it for mean centering the data
308 | # and generating the matrix for SVD
309 | if mean_centering == 0:
310 | group_mean = np.repeat(group_mean, n_cond, axis=0)
311 | elif mean_centering == 1:
312 | group_mean = group_mean.reshape(-1, n_cond, X.shape[-1]).mean(axis=0)
313 | group_mean = np.tile(group_mean.T, int(Y.shape[-1] / n_cond)).T
314 | else:
315 | group_mean = np.repeat(group_mean, Y.shape[-1], axis=0)
316 |
317 | return group_mean
318 |
319 |
320 | def get_mean_center(X, Y, n_cond=1, mean_centering=0, means=True):
321 | """
322 | Parameters
323 | ----------
324 | X : (S, B) array_like
325 | Input data matrix, where `S` is observations and `B` is features
326 | Y : (S, T) array_like, optional
327 | Dummy coded input array, where `S` is observations and `T`
328 | corresponds to the number of different groups x conditions. A value
329 | of 1 indicates that an observation belongs to a specific group or
330 | condition.
331 | n_cond : int, optional
332 | Number of conditions in dummy coded `Y` array. Default: 1
333 | mean_centering : {0, 1, 2}, optional
334 | Mean-centering method. Default: 0
335 | means : bool, optional
336 | Whether to return demeaned averages instead of demeaned data. Default:
337 | True
338 |
339 | Returns
340 | -------
341 | mean_centered : {(T, B), (S, B)} `numpy.ndarray`
342 | If `means` is True, returns array with shape (T, B); otherwise, returns
343 | (S, B)
344 | """
345 |
346 | mc = get_group_mean(X, Y, n_cond=n_cond, mean_centering=mean_centering)
347 |
348 | if means:
349 | # take mean of groups and subtract relevant mean_centering entry
350 | mean_centered = np.row_stack([X[grp].mean(axis=0) - mc[n] for (n, grp)
351 | in enumerate(Y.T.astype(bool))])
352 | else:
353 | # subtract relevant mean_centering entry from each observation
354 | mean_centered = np.row_stack([X[grp] - mc[n][None] for (n, grp)
355 | in enumerate(Y.T.astype(bool))])
356 |
357 | return mean_centered
358 |
359 |
360 | def efficient_corr(x, y):
361 | """
362 | Computes correlation of matching columns in `x` and `y`
363 |
364 | Parameters
365 | ----------
366 | x, y : (N, M) array_like
367 | Input data arrays
368 |
369 | Returns
370 | -------
371 | corr : (M,) numpy.ndarray
372 | Correlations of columns in `x` and `y`
373 | """
374 |
375 | # we need 2D arrays
376 | x, y = np.vstack(x), np.vstack(y)
377 |
378 | # check shapes
379 | if x.shape != y.shape:
380 | if x.shape[-1] != 1 and y.shape[-1] != 1:
381 | raise ValueError('Provided inputs x and y must either have '
382 | 'matching shapes or one must be a column '
383 | 'vector.\nProvided data:\n\tx: {}\n\ty: {}'
384 | .format(x.shape, y.shape))
385 |
386 | corr = np.sum(zscore(x, ddof=1) * zscore(y, ddof=1), axis=0) / (len(x) - 1)
387 |
388 | # fix rounding errors
389 | corr = np.clip(corr, -1, 1)
390 |
391 | return corr
392 |
393 |
394 | def varexp(singular):
395 | """
396 | Calculates the variance explained by values in `singular`
397 |
398 | Parameters
399 | ----------
400 | singular : (L, L) array_like
401 | Singular values from singular value decomposition
402 |
403 | Returns
404 | -------
405 | varexp : (L, L) `numpy.ndarray`
406 | Variance explained
407 | """
408 |
409 | if singular.ndim != 2:
410 | raise ValueError('Provided `singular` array must be a square diagonal '
411 | 'matrix, not array of shape {}'
412 | .format(singular.shape))
413 |
414 | return np.diag(np.diag(singular)**2 / np.sum(np.diag(singular)**2))
415 |
--------------------------------------------------------------------------------
/pyls/examples/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ['available_datasets', 'load_dataset', 'query_dataset']
2 |
3 | from .datasets import available_datasets, load_dataset, query_dataset
4 |
--------------------------------------------------------------------------------
/pyls/examples/datasets.json:
--------------------------------------------------------------------------------
1 | {
2 | "linnerud": {
3 | "description": "These data come from a toy example demonstrating the relationship between exercise ability and physiological fitness.",
4 | "reference": "Tenenhaus, M. (1998). La regression PLS: theorie et pratique. Editions Technip.",
5 | "urls": [
6 | "https://raw.githubusercontent.com/rmarkello/pyls/3f5e79227d2f9f80887e80bea107a9c7e6b0e0c2/data/linnerud_exercise.csv",
7 | "https://raw.githubusercontent.com/rmarkello/pyls/3f5e79227d2f9f80887e80bea107a9c7e6b0e0c2/data/linnerud_physio.csv"
8 | ],
9 | "X": "linnerud_exercise.csv",
10 | "Y": "linnerud_physio.csv",
11 | "n_perm": 1000,
12 | "n_boot": 1000
13 | },
14 | "mirchi_2018": {
15 | "description": "Study examining the relationship between changes in functional brain connectivity derived from resting-state functional magnetic resonance imaging (rsfMRI) and behavioral mood scores using the MyConnectome database.",
16 | "reference": "Mirchi, N., Betzel, R. F., Bernhardt, B. C., Dagher, A., & Mišić, B. (2018). Tracking mood fluctuations with functional network patterns. Social Cognitive and Affective Neuroscience.",
17 | "urls": [
18 | "https://www.dropbox.com/s/29pmo4uf19go442/myconnectome_fc.npy?dl=1",
19 | "https://www.dropbox.com/s/w7px20kxwvqx1d1/myconnectome_panas.csv?dl=1",
20 | "http://web.stanford.edu/group/poldracklab/myconnectome-data/base/parcellation/parcel_data.txt"
21 | ],
22 | "X": "myconnectome_fc.npy",
23 | "Y": "myconnectome_panas.csv",
24 | "n_perm": 10000,
25 | "n_boot": 10000,
26 | "test_size": 0.25,
27 | "test_split": 100,
28 | "parcellation": "parcel_data.txt"
29 | },
30 | "wine": {
31 | "description": "These data are the results of a chemical analysis of wines grown in the same region in Italy but derived from three different cultivars. The analysis determined the quantities of 13 constituents found in each of the three types of wines.",
32 | "reference": "Lichman, M. (2013). UCI Machine Learning Repository [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, School of Information and Computer Science.",
33 | "urls": [
34 | "https://raw.githubusercontent.com/rmarkello/pyls/3f5e79227d2f9f80887e80bea107a9c7e6b0e0c2/data/wine.csv"
35 | ],
36 | "X": "wine.csv",
37 | "n_perm": 1000,
38 | "n_boot": 1000,
39 | "groups": [
40 | 59,
41 | 71,
42 | 48
43 | ]
44 | },
45 | "whitaker_vertes_2016": {
46 | "description": "Study examining the relationship between developmental brain changes derived from structural magnetic resonance imaging (sMRI) and genetic expression in the brain using the NeuroScience in Psychiatry Network (NSPN) dataset",
47 | "reference": "Whitaker, K. J., Vértes, P. E., Romero-Garcia, R., Váša, F., Moutoussis, M., Prabhu, G., Weiskopf, N., Callaghan, M. F., Wagstyl, K., Rittman, T., Tait, R., Ooi, C., Suckling, J., Inkster, B., Fonagy, P., Dolan, R. J., Jones, P. B., Goodyer, I. M., Bullmore, E. T. (2016). Adolescence is associated with genomically patterned consolidation of the hubs of the human brain connectome. Proceedings of the National Academy of Sciences, 113(32), 9105-9110.",
48 | "urls": [
49 | "https://raw.githubusercontent.com/KirstieJane/NSPN_WhitakerVertes_PNAS2016/master/DATA/PLS_gene_predictor_vars.csv",
50 | "https://raw.githubusercontent.com/KirstieJane/NSPN_WhitakerVertes_PNAS2016/master/CT_MT_ANALYSES/COMPLETE/PLS/COVARS_none/PLS_MRI_response_vars.csv"
51 | ],
52 | "X": "PLS_gene_predictor_vars.csv",
53 | "Y": "PLS_MRI_response_vars.csv",
54 | "n_perm": 1000,
55 | "n_boot": 1000,
56 | "n_components": 2
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/pyls/examples/datasets.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Functions and utilities for getting datasets for PLS examples
4 | """
5 |
6 | import json
7 | import os
8 | from pkg_resources import resource_filename
9 | import urllib
10 |
11 | import numpy as np
12 |
13 | from ..structures import PLSInputs
14 |
15 | try:
16 | import pandas as pd
17 | pandas_avail = True
18 | except ImportError:
19 | pandas_avail = False
20 |
21 | with open(resource_filename('pyls', 'examples/datasets.json'), 'r') as src:
22 | _DATASETS = json.load(src)
23 |
24 |
25 | def available_datasets(name=None):
26 | """
27 | Lists available datasets to download
28 |
29 | Returns
30 | -------
31 | datasets : list
32 | List of available datasets
33 | """
34 |
35 | if name is not None:
36 | if name not in _DATASETS.keys():
37 | raise ValueError('Provided dataset {} is not available. Dataset '
38 | 'must be one of: {}.'
39 | .format(name, available_datasets()))
40 | else:
41 | return name
42 |
43 | return list(_DATASETS.keys())
44 |
45 |
46 | def query_dataset(name, key='description'):
47 | """
48 | Queries dataset `name` for information specified by `key`
49 |
50 | Parameters
51 | ----------
52 | name : str
53 | Name of dataset. Must be in :func:`pyls.examples.available_datasets()`
54 | key : str, optional
55 | Key to query from `name`. If not specified will return a list of
56 | available keys. Default: 'description'
57 |
58 | Returns
59 | -------
60 | value
61 | Value specified by `key` for dataset `name`
62 | """
63 |
64 | name = available_datasets(name)
65 | if key is None:
66 | return list(_DATASETS.get(name).keys())
67 |
68 | value = _DATASETS.get(name).get(key, None)
69 | if value is None:
70 | raise KeyError('Provided key {} not specified for dataset {}. '
71 | 'Available keys are {}'
72 | .format(name, key, list(_DATASETS.get(name).keys())))
73 |
74 | return value
75 |
76 |
77 | def _get_data_dir(data_dir=None):
78 | """
79 | Gets path to pyls data directory
80 |
81 | Parameters
82 | ----------
83 | data_dir : str, optional
84 | Path to use as data directory. If not specified, will check for
85 | environmental variable 'PYLS_DATA'; if that is not set, will use
86 | `~/pyls-data` instead. Default: None
87 |
88 | Returns
89 | -------
90 | data_dir : str
91 | Path to use as data directory
92 | """
93 |
94 | if data_dir is None:
95 | data_dir = os.environ.get('PYLS_DATA', os.path.join('~', 'pyls-data'))
96 | data_dir = os.path.expanduser(data_dir)
97 | if not os.path.exists(data_dir):
98 | os.makedirs(data_dir)
99 |
100 | return data_dir
101 |
102 |
103 | def load_dataset(name, data_dir=None, verbose=1, return_reference=False):
104 | """
105 | Loads dataset provided by `name` into a :obj:`PLSInputs` object
106 |
107 | Parameters
108 | ----------
109 | name : str
110 | Name of dataset. Must be in :func:`pyls.examples.available_datasets()`
111 | data_dir : str, optional
112 | Path to use as data directory to store dataset. If not specified, will
113 | check for environmental variable 'PYLS_DATA'; if that is not set, will
114 | use `~/pyls-data` instead. Default: None
115 | verbose : int, optional
116 | Level of verbosity for status messages about fetching/loading dataset.
117 | Set to 0 for no updates. Default: 1
118 | return_reference : bool, optional
119 | Whether to return APA-style reference for dataset specified by `name`.
120 | Default: False
121 |
122 | Returns
123 | -------
124 | dataset : :obj:`~.structures.PLSInputs`
125 | PLSInputs object containing pre-loaded data ready to run PLS analysis.
126 | Rerun the analysis by calling :func:`pyls.behavioral_pls(**dataset)` or
127 | :func:`pyls.meancentered_pls(**dataset)`, as appropriate
128 | """
129 |
130 | name = available_datasets(name)
131 | data_dir = _get_data_dir(data_dir)
132 | _get_dataset(name, data_dir, verbose=verbose)
133 |
134 | dataset = PLSInputs()
135 | for key, value in _DATASETS.get(name, {}).items():
136 | if isinstance(value, str) and key in PLSInputs.allowed:
137 | fname = os.path.join(data_dir, name, value)
138 | if fname.endswith('.csv') or fname.endswith('.txt'):
139 | if pandas_avail:
140 | value = pd.read_csv(fname, index_col=0)
141 | else:
142 | value = np.genfromtxt(fname, skip_header=True,
143 | delimiter=',')[:, 1:]
144 | elif fname.endswith('.npy'):
145 | value = np.load(fname)
146 | else:
147 | raise ValueError('Cannot recognize datatype of {}. Please '
148 | 'create an issue on GitHub with dataset you '
149 | 'are trying to load ({})'.format(fname, name))
150 | dataset[key] = value
151 |
152 | # make some dataset-specific corrections
153 | if name == 'whitaker_vertes_2016':
154 | dataset.X = dataset.X.T
155 |
156 | if return_reference:
157 | return dataset, query_dataset(name, 'reference')
158 |
159 | return dataset
160 |
161 |
162 | def _get_dataset(name, data_dir=None, verbose=1):
163 | """
164 | Downloads dataset defined by `name`
165 |
166 | Parameters
167 | ----------
168 | name : str
169 | Name of dataset. Must be in :func:`pyls.examples.available_datasets()`
170 | data_dir : str
171 | Path to use as data directory to store dataset
172 | """
173 |
174 | data_dir = os.path.join(_get_data_dir(data_dir), name)
175 | os.makedirs(data_dir, exist_ok=True)
176 |
177 | for url in _DATASETS.get(name, {}).get('urls', []):
178 | parse = urllib.parse.urlparse(url)
179 | fname = os.path.join(data_dir, os.path.basename(parse.path))
180 |
181 | if not os.path.exists(fname):
182 | out = urllib.request.urlopen(url)
183 | with open(fname, 'wb') as dest:
184 | dest.write(out.read())
185 |
--------------------------------------------------------------------------------
/pyls/io.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Functions for saving and loading PLS data objects
4 | """
5 |
6 | import h5py
7 | import numpy as np
8 |
9 | from .structures import PLSResults
10 |
11 |
12 | def save_results(fname, results):
13 | """
14 | Saves PLS `results` to hdf5 file `fname`
15 |
16 | If `fname` does not end with '.hdf5' it will be appended
17 |
18 | Parameters
19 | ----------
20 | fname : str
21 | Filepath to where hdf5 file should be created and `results` stored
22 | results : :obj:`pyls.structures.PLSResults`
23 | PLSResults object to be saved
24 |
25 | Returns
26 | -------
27 | fname : str
28 | Filepath to created file
29 | """
30 |
31 | def _recursive_save(h5file, obj, group='/results'):
32 | """
33 | Recursively saves `obj` to `h5file` in `group`
34 |
35 | Parameters
36 | ----------
37 | h5file : :obj:`h5py.File`
38 | obj : dict
39 | group : str, optional
40 | Group in `h5file` in which to create datasets
41 | """
42 |
43 | grp = h5file.create_group(group)
44 | for key, item in obj.items():
45 | if isinstance(item, dict):
46 | _recursive_save(h5file, item, group=group + '/' + key)
47 | elif isinstance(item, np.ndarray):
48 | grp.create_dataset(key, item.shape, item.dtype)[...] = item
49 | else:
50 | if item is not None:
51 | grp.attrs[key] = item
52 | else:
53 | grp.attrs[key] = 'None'
54 |
55 | if not isinstance(fname, str):
56 | fname = str(fname)
57 |
58 | if not fname.endswith('.hdf5'):
59 | fname += '.hdf5'
60 |
61 | with h5py.File(fname, 'w') as h5:
62 | _recursive_save(h5, results, group='/results')
63 |
64 | return fname
65 |
66 |
67 | def load_results(fname):
68 | """
69 | Load PLS results stored in `fname`, generated by `pyls.save_results()`
70 |
71 | Parameters
72 | ----------
73 | fname : str
74 | Filepath to HDF5 file containing PLS results
75 |
76 | Returns
77 | -------
78 | results : :obj:`pyls.structures.PLSResults`
79 | Loaded PLS results
80 | """
81 |
82 | def _recursive_load(h5file, group='/results'):
83 | """
84 | Recursively loads data from `h5file`
85 |
86 | Parameters
87 | ----------
88 | h5file : :obj:`h5py.File`
89 | group : str, optional
90 | Group in `h5file` from which to load datasets
91 |
92 | Returns
93 | -------
94 | results : dict
95 | Dictionary containing loaded data
96 | """
97 |
98 | results = dict()
99 | for key, item in h5file[group].items():
100 | if isinstance(item, h5py.Dataset):
101 | results[key] = item[()]
102 | elif isinstance(item, h5py.Group):
103 | results[key] = _recursive_load(h5file, group=group + '/' + key)
104 | for key, value in h5file[group].attrs.items():
105 | if isinstance(value, str) and value == 'None':
106 | value = None
107 | results[key] = value
108 |
109 | return results
110 |
111 | if not isinstance(fname, str):
112 | fname = str(fname)
113 |
114 | if not fname.endswith('.hdf5'):
115 | fname += '.hdf5'
116 |
117 | if not h5py.is_hdf5(fname):
118 | raise TypeError('Provided file {} is not valid HDF5 format.'
119 | .format(fname))
120 |
121 | with h5py.File(fname, 'r') as h5file:
122 | return PLSResults(**_recursive_load(h5file, '/results'))
123 |
--------------------------------------------------------------------------------
/pyls/matlab/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Utilities for handling PLS results generated using the Matlab PLS toolbox
4 | """
5 |
6 | __all__ = ['import_matlab_result']
7 |
8 | from .io import import_matlab_result
9 |
--------------------------------------------------------------------------------
/pyls/matlab/io.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from collections.abc import MutableMapping
4 |
5 | import numpy as np
6 | import scipy.io as sio
7 |
8 | from ..structures import PLSResults
9 |
10 | _result_mapping = (
11 | ('u', 'x_weights'),
12 | ('s', 'singvals'),
13 | ('v', 'y_weights'),
14 | ('usc', 'x_scores'),
15 | ('vsc', 'y_scores'),
16 | ('lvcorrs', 'y_loadings'),
17 | # permres
18 | ('perm_result_sprob', 'pvals'),
19 | ('perm_result_permsamp', 'permsamples'),
20 | # bootres
21 | ('boot_result_compare_u', 'x_weights_normed'),
22 | ('boot_result_u_se', 'x_weights_stderr'),
23 | ('boot_result_bootsamp', 'bootsamples'),
24 | # splitres
25 | ('perm_splithalf_orig_ucorr', 'ucorr'),
26 | ('perm_splithalf_orig_vcorr', 'vcorr'),
27 | ('perm_splithalf_ucorr_prob', 'ucorr_pvals'),
28 | ('perm_splithalf_vcorr_prob', 'vcorr_pvals'),
29 | ('perm_splithalf_ucorr_ul', 'ucorr_uplim'),
30 | ('perm_splithalf_vcorr_ul', 'vcorr_lolim'),
31 | ('perm_splithalf_ucorr_ll', 'ucorr_uplim'),
32 | ('perm_splithalf_vcorr_ll', 'vcorr_lolim'),
33 | # inputs
34 | ('inputs_X', 'X'),
35 | ('stacked_behavdata', 'Y'),
36 | ('num_subj_lst', 'groups'),
37 | ('num_conditions', 'n_cond'),
38 | ('perm_result_num_perm', 'n_perm'),
39 | ('boot_result_num_boot', 'n_boot'),
40 | ('perm_splithalf_num_split', 'n_split'),
41 | ('boot_result_clim', 'ci'),
42 | ('other_input_meancentering_type', 'mean_centering'),
43 | ('method', 'method')
44 | )
45 |
46 | _mean_centered_mapping = (
47 | ('boot_result_orig_usc', 'contrast'),
48 | ('boot_result_distrib', 'contrast_boot'),
49 | ('boot_result_ulusc', 'contrast_ci_up'),
50 | ('boot_result_llusc', 'contrast_ci_lo'),
51 | )
52 |
53 | _behavioral_mapping = (
54 | ('boot_result_orig_corr', 'y_loadings'),
55 | ('boot_result_distrib', 'y_loadings_boot'),
56 | ('boot_result_ulcorr', 'y_loadings_ci_up'),
57 | ('boot_result_llcorr', 'y_loadings_ci_lo'),
58 | )
59 |
60 |
61 | def _coerce_void(value):
62 | """
63 | Converts `value` to `value.dtype`
64 |
65 | Parameters
66 | ----------
67 | value : array_like
68 |
69 | Returns
70 | -------
71 | value : dtype
72 | `Value` coerced to `dtype`
73 | """
74 |
75 | if np.squeeze(value).ndim == 0:
76 | return value.dtype.type(value.squeeze())
77 | else:
78 | return np.squeeze(value)
79 |
80 |
81 | def _flatten(d, parent_key='', sep='_'):
82 | """
83 | Flattens nested dictionary `d` into single dictionary with new keyset
84 |
85 | Parameters
86 | ----------
87 | d : dict
88 | Dictionary to be flattened
89 | parent_key : str, optional
90 | Key of parent dictionary of `d`. Default: ''
91 | sep : str, optional
92 | How to join keys of `d` with `parent_key`, if provided. Default: '_'
93 |
94 | Returns
95 | -------
96 | flat : dict
97 | Flattened input dictionary `d`
98 |
99 | Notes
100 | -----
101 | Taken directly from https://stackoverflow.com/a/6027615
102 | """
103 |
104 | items = []
105 | for k, v in d.items():
106 | new_key = parent_key + sep + k if parent_key else k
107 | if isinstance(v, MutableMapping):
108 | items.extend(_flatten(v, new_key, sep=sep).items())
109 | else:
110 | items.append((new_key, v))
111 | return dict(items)
112 |
113 |
114 | def _rename_keys(d, mapping):
115 | """
116 | Renames keys in dictionary `d` based on tuples in `mapping`
117 |
118 | Parameters
119 | ----------
120 | d : dict
121 | Dictionary with keys to be renamed
122 | mapping : list of tuples
123 | List of (oldkey, newkey) pairs to rename entries in `d`
124 |
125 | Returns
126 | -------
127 | renamed : dict
128 | Input dictionary `d` with keys renamed
129 | """
130 |
131 | new_dict = d.copy()
132 | for oldkey, newkey in mapping:
133 | try:
134 | new_dict[newkey] = new_dict.pop(oldkey)
135 | except KeyError:
136 | pass
137 |
138 | return new_dict
139 |
140 |
141 | def import_matlab_result(fname, datamat='datamat_lst'):
142 | """
143 | Imports `fname` PLS result from Matlab
144 |
145 | Parameters
146 | ----------
147 | fname : str
148 | Filepath to output mat file obtained from Matlab PLS toolbox. Should
149 | contain at least a result struct object.
150 | datamat : str, optional
151 | Variable name of datamat ('X' array) provided to original PLS if it
152 | exists `fname`. By default the datamat is not stored in the PLS results
153 | structure, but if it is was saved in `fname` it can be loaded and
154 | cached in the returned results object. Default: 'datamat_lst'
155 |
156 | Returns
157 | -------
158 | results : :obj:`~.structures.PLSResults`
159 | Matlab results in a Python-friendly format
160 | """
161 |
162 | def get_labels(fields):
163 | labels = [k for k, v in sorted(fields.items(),
164 | key=lambda x: x[-1][-1])]
165 | return labels
166 |
167 | # load mat file using scipy.io
168 | matfile = sio.loadmat(fname)
169 |
170 | # if 'result' key is missing then consider this a malformed PLS result mat
171 | try:
172 | result = matfile.get('result')[0, 0]
173 | except (IndexError, TypeError):
174 | raise ValueError('Cannot get result struct from provided mat file')
175 |
176 | # convert result structure to a dictionary using dtypes as keys
177 | labels = get_labels(result.dtype.fields)
178 | result = {labels[n]: value for n, value in enumerate(result)}
179 |
180 | # convert sub-structures to dictionaries using dtypes as keys
181 | struct = ['boot_result', 'perm_result', 'perm_splithalf', 'other_input']
182 | for attr in struct:
183 | if result.get(attr) is not None:
184 | labels = get_labels(result[attr].dtype.fields)
185 | result[attr] = {labels[n]: _coerce_void(value) for n, value
186 | in enumerate(result[attr][0, 0])}
187 |
188 | # get input data from results file, if it exists
189 | X = matfile.get(datamat)
190 | result['inputs'] = dict(X=np.vstack(X[:, 0])) if X is not None else dict()
191 |
192 | # squeeze all the values so they're a bit more interpretable
193 | for key, val in result.items():
194 | if isinstance(val, np.ndarray):
195 | result[key] = _coerce_void(val)
196 |
197 | # flatten the dictionary and rename the keys according to our mapping
198 | result = _rename_keys(_flatten(result), _result_mapping)
199 | if result['method'] == 3:
200 | result = _rename_keys(result, _behavioral_mapping)
201 | if 'y_loadings_ci_up' in result:
202 | result['y_loadings_ci'] = np.stack([
203 | result['y_loadings_ci_lo'], result['y_loadings_ci_up']
204 | ], axis=-1)
205 | else:
206 | result = _rename_keys(result, _mean_centered_mapping)
207 | if 'contrast_ci_up' in result:
208 | result['contrast_ci'] = np.stack([
209 | result['contrast_ci_lo'], result['contrast_ci_up']
210 | ], axis=-1)
211 |
212 | # index arrays - 1 to account for Matlab vs Python 1- vs 0-indexing
213 | for key in ['bootsamples', 'permsamples']:
214 | try:
215 | result[key] -= 1
216 | except KeyError:
217 | continue
218 |
219 | if result.get('n_split', None) is None:
220 | result['n_split'] = None
221 |
222 | # pack it into a `PLSResults` class instance for easy attribute access
223 | results = PLSResults(**result)
224 |
225 | return results
226 |
--------------------------------------------------------------------------------
/pyls/plotting/meancentered.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Functions for plotting results from a mean-centered PLS
4 | """
5 |
6 | import numpy as np
7 | import pandas as pd
8 | import seaborn as sns
9 |
10 |
11 | def _set_group_lvls(n_conds, n_grps, grp_lvls=None):
12 | """
13 | Derives a pandas data series of group labels
14 |
15 | Parameters
16 | ----------
17 | n_conds : int
18 | Number of conditions in the analysis
19 | n_grps : int
20 | Number of groups in the analysis
21 | grp_lvls : list, optional
22 | List of group labels
23 |
24 | Returns
25 | -------
26 | labels : pd.Series
27 | Series of group labels aligned to the input data structure
28 | """
29 |
30 | grping = []
31 | if grp_lvls is None:
32 | for i in range(n_grps):
33 | grping += ["Group" + str(i)] * n_conds
34 | else:
35 | for i in range(n_grps):
36 | grping.extend([grp_lvls[i]] * n_conds)
37 | return pd.Series(grping, name='Group')
38 |
39 |
40 | def _set_cond_lvls(n_conds, n_grps, cond_lvls=None):
41 | """
42 | Derives a pandas series of condition labels
43 |
44 | Parameters
45 | ----------
46 | n_conds : int
47 | Number of conditions in the analysis
48 | n_grps : int
49 | Number of groups in the analysis
50 | cond_lvls : list, optional
51 | List of condition labels
52 |
53 | Returns
54 | -------
55 | labels : pd.Series
56 | Series of condition labels aligned to the input data structure
57 | """
58 |
59 | if cond_lvls is None:
60 | cond_lvls = ["Condition" + str(i) for i in range(n_conds)] * n_grps
61 | else:
62 | cond_lvls = cond_lvls * n_grps
63 |
64 | return pd.Series(cond_lvls, name='Condition')
65 |
66 |
67 | def _define_vars(results, cond_lvls=None, grp_lvls=None):
68 | """
69 | Create a pandas data frame from `results` for easy plotting
70 |
71 | Uses the result dictionary returned by PLS as well as user-supplied
72 | condition and group label(s).
73 |
74 | Parameters
75 | ----------
76 | results : :obj:pyls.PLSResults
77 | The PLS result dictionary
78 | cond_lvls : list, optional
79 | List of condition labels
80 | grp_lvls : list, optional
81 | List of group labels
82 |
83 | Returns
84 | -------
85 | df : pd.DataFrame
86 | A pandas DataFrame with derived estimates (and upper- and lower-
87 | estimated error) for all latent variables
88 | """
89 |
90 | estimate = results.bootres.contrast
91 | ul = results.bootres.contrast_uplim
92 | ll = results.bootres.contrast_lolim
93 |
94 | n_grps = len(results.inputs.groups)
95 | n_conds = estimate.shape[1] // n_grps
96 | cond = _set_cond_lvls(n_conds, n_grps, cond_lvls=cond_lvls)
97 | grp = _set_group_lvls(n_conds, n_grps, grp_lvls=grp_lvls)
98 |
99 | num_est = estimate.shape[1] + 1 # for 1-based indexing in plots
100 | colnames = []
101 | for itm in ['Estimate_LV', 'UL_LV', 'LL_LV']:
102 | for i in range(1, num_est):
103 | colnames.append(itm + str(i))
104 |
105 | df = pd.DataFrame(np.hstack((estimate, ul, ll)), columns=colnames)
106 | df = pd.concat([df, cond, grp], axis=1)
107 | return df
108 |
109 |
110 | def _rearrange_df(df, plot_order):
111 | """
112 | Rearranged `df` according to `plot_order`
113 |
114 | In examining plots, users may wish to rearrange the order in which
115 | conditions are presented in order to ease visual interpretation. This
116 | function reorders the dataframe as desired
117 |
118 | Parameters
119 | ----------
120 | df : pandas.DataFrame
121 | Dataframe containing condition, group labels, and PLS results
122 | plot_order : list
123 | User-defined order in which to plot conditions
124 |
125 | Returns
126 | -------
127 | df : pd.DataFrame
128 | Provided dataframe `df` with re-ordered conditions
129 | """
130 |
131 | sorter_idx = dict(zip(plot_order, range(len(plot_order))))
132 | df['Cond_Arrange'] = df['Condition'].map(sorter_idx)
133 | df = df.sort_values(by=['Group', 'Cond_Arrange'], ascending=[False, True])
134 | return df.drop(columns=['Cond_Arrange'])
135 |
136 |
137 | def plot_contrast(results, lv=0, cond_labels=None, group_labels=None,
138 | cond_order=None, **kwargs):
139 | """
140 | Plots group / condition contrast from `results` for a provided `lv`
141 |
142 | Parameters
143 | ----------
144 | results : :obj:pyls.PLSResults
145 | The PLS result dictionary
146 | lv : int, optional
147 | Index of desired latent variable to plot. Uses zero-indexing, so the
148 | first latent variables is `lv=0`. Default: 0
149 | cond_labels : list, optional
150 | List of condition labels as they were supplied to the original PLS.
151 | If not supplied, uses "ConditionX" as label. Default: None
152 | group_labels : list, optional
153 | List of group labels as they were supplied to the original PLS. If
154 | not supplied, uses "GroupX" as label. Default: None
155 | cond_order : list, optional
156 | Desired order for plotting conditions. If not supplied, plots
157 | conditions in order they were provided to original PLS. Default: None
158 | **kwargs : key, value mappings
159 | Keywords arguments passed to :obj:seaborn.barplot
160 |
161 | Returns
162 | -------
163 | ax : matplotlib.axes.Axis
164 | A matplotlib axes object for saving or modifying
165 | """
166 |
167 | df = _define_vars(results, cond_lvls=cond_labels, grp_lvls=group_labels)
168 | if cond_order is not None:
169 | df = _rearrange_df(df, cond_order)
170 | num_sig = (len(df.columns) - 2) // 3
171 | ax = sns.barplot(x="Group", y=df[df.columns[lv]], hue="Condition",
172 | data=df, capsize=0.1, errwidth=1.25, alpha=0.25, ci=None,
173 | **kwargs)
174 | ax.legend(bbox_to_anchor=(1.1, 1.05))
175 | x = [r.get_x() for r in ax.patches]
176 | nx = np.sort(x)
177 | abs_err = np.abs([df[df.columns[lv + (num_sig * 2)]].get_values(),
178 | df[df.columns[lv + num_sig]].get_values()]
179 | - df[df.columns[lv]].get_values())
180 | ax.errorbar(x=nx + (np.diff(nx).min() / 2),
181 | y=df[df.columns[lv]], fmt='none', yerr=abs_err, ecolor='black')
182 |
183 | return ax
184 |
--------------------------------------------------------------------------------
/pyls/structures.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Data structures to hold PLS inputs and results objects
4 | """
5 |
6 | from multiprocessing import cpu_count
7 | from textwrap import dedent
8 | from .utils import ResDict
9 |
10 | _pls_input_docs = dict(
11 | decomposition_narrative=dedent("""\
12 | The singular value decomposition generates mutually orthogonal latent
13 | variables (LVs), comprised of left and right singular vectors and a
14 | diagonal matrix of singular values. The `i`-th pair of singular vectors
15 | detail the contributions of individual input features to an overall,
16 | multivariate pattern (the `i`-th LV), and the singular values explain the
17 | amount of variance captured by that pattern.
18 |
19 | Statistical significance of the LVs is determined via permutation testing.
20 | Bootstrap resampling is used to examine the contribution and reliability of
21 | the input features to each LV. Split-half resampling can optionally be used
22 | to assess the reliability of the LVs. A cross-validated framework can
23 | optionally be used to examine how accurate the decomposition is when
24 | employed in a predictive framework.\
25 | """),
26 | input_matrix=dedent("""\
27 | X : (S, B) array_like
28 | Input data matrix, where `S` is samples and `B` is features\
29 | """),
30 | groups=dedent("""\
31 | groups : (G,) list of int
32 | List with the number of subjects present in each of `G` groups. Input
33 | data should be organized as subjects within groups (i.e., groups should
34 | be vertically stacked). If there is only one group this can be left
35 | blank.\
36 | """),
37 | conditions=dedent("""\
38 | n_cond : int
39 | Number of conditions observed in data. Note that all subjects must have
40 | the same number of conditions. If both conditions and groups are
41 | present then the input data should be organized as subjects within
42 | conditions within groups (i.e., g1c1s[1-S], g1c2s[1-S], g2c1s[1-S],
43 | g2c2s[1-S]).\
44 | """),
45 | mean_centering=dedent("""\
46 | mean_centering : {0, 1, 2}, optional
47 | Mean-centering method to use. This will determine how the mean-centered
48 | matrix is generated and what effects are "boosted" during the SVD.
49 | Default: 0\
50 | """),
51 | # perms / resampling / crossval
52 | stat_test=dedent("""\
53 | n_perm : int, optional
54 | Number of permutations to use for testing significance of components.
55 | Default: 5000
56 | n_boot : int, optional
57 | Number of bootstraps to use for testing reliability of data features.
58 | Default: 5000\
59 | """),
60 | split_half=dedent("""\
61 | n_split : int, optional
62 | Number of split-half resamples to assess during permutation testing.
63 | Default: 0\
64 | """),
65 | cross_val=dedent("""\
66 | test_split : int, optional
67 | Number of splits for generating test sets during cross-validation.
68 | Default: 100
69 | test_size : [0, 1) float, optional
70 | Proportion of data to partition to test set during cross-validation.
71 | Default: 0.25\
72 | """),
73 | covariance=dedent("""\
74 | covariance : bool, optional
75 | Whether to use the cross-covariance matrix instead of the cross-
76 | correlation during the decomposition. Only set if you are sure this is
77 | what you want as many of the results may become more difficult to
78 | interpret (i.e., :py:attr:`~.structures.PLSResults.behavcorr` will no
79 | longer be intepretable as Pearson correlation values). Default: False\
80 | """),
81 | rotate=dedent("""\
82 | rotate : bool, optional
83 | Whether to perform Procrustes rotations during permutation testing. Can
84 | inflate false-positive rates; see Kovacevic et al., (2013) for more
85 | information. Default: True\
86 | """),
87 | ci=dedent("""\
88 | ci : [0, 100] float, optional
89 | Confidence interval to use for assessing bootstrap results. This
90 | roughly corresponds to an alpha rate; e.g., the 95%ile CI is
91 | approximately equivalent to a two-tailed p <= 0.05. Default: 95\
92 | """),
93 | proc_options=dedent("""\
94 | seed : {int, :obj:`numpy.random.RandomState`, None}, optional
95 | Seed to use for random number generation. Helps ensure reproducibility
96 | of results. Default: None
97 | verbose : bool, optional
98 | Whether to show progress bars as the analysis runs. Note that progress
99 | bars will not persist after the analysis is completed. Default: True
100 | n_proc : int, optional
101 | How many processes to use for parallelizing permutation testing and
102 | bootstrap resampling. If not specified will default to serialized
103 | processing (i.e., one processor). Can optionally specify 'max' to use
104 | all available processors. Default: None\
105 | """),
106 | pls_results=dedent("""\
107 | results : :obj:`pyls.structures.PLSResults`
108 | Dictionary-like object containing results from the PLS analysis\
109 | """),
110 | resamples=dedent("""\
111 | permsamples : array_like, optional
112 | Resampled array to be used during permutation testing.
113 | If not specified a set of unique permutations will be generated.
114 | Default: None
115 | permindices : Boolean, optional
116 | Re-sampling array to be used during permutation test (if n_perm > 0).
117 | If not specified a set of unique permutations will be generated.
118 | Whether permsamples is an array to permute indices or a pre-permuted
119 | array. Useful when permuting with methods like BrainSMASH or Eigenstrapping.
120 | Default: True
121 | bootsamples : array_like, optional
122 | Resampling array to be used during bootstrap resampling (if n_boot >
123 | 0). If not specified a set of unique bootstraps will be generated.
124 | Default: None\
125 | """),
126 | references=dedent("""\
127 | McIntosh, A. R., Bookstein, F. L., Haxby, J. V., & Grady, C. L. (1996).
128 | Spatial pattern analysis of functional brain images using partial least
129 | squares. NeuroImage, 3(3), 143-157.
130 |
131 | McIntosh, A. R., & Lobaugh, N. J. (2004). Partial least squares analysis of
132 | neuroimaging data: applications and advances. NeuroImage, 23, S250-S263.
133 |
134 | Krishnan, A., Williams, L. J., McIntosh, A. R., & Abdi, H. (2011). Partial
135 | Least Squares (PLS) methods for neuroimaging: a tutorial and review.
136 | NeuroImage, 56(2), 455-475.
137 |
138 | Kovacevic, N., Abdi, H., Beaton, D., & McIntosh, A. R. (2013). Revisiting
139 | PLS resampling: comparing significance versus reliability across range of
140 | simulations. In New Perspectives in Partial Least Squares and Related
141 | Methods (pp. 159-170). Springer, New York, NY. Chicago\
142 | """)
143 | )
144 |
145 |
146 | class PLSInputs(ResDict):
147 | allowed = [
148 | 'X', 'Y', 'groups', 'n_cond', 'n_perm', 'n_boot', 'n_split',
149 | 'test_split', 'test_size', 'mean_centering', 'covariance', 'rotate',
150 | 'ci', 'seed', 'verbose', 'n_proc', 'bootsamples', 'permsamples',
151 | 'method', 'n_components', 'aggfunc', 'permindices'
152 | ]
153 |
154 | def __init__(self, *args, **kwargs):
155 | super().__init__(*args, **kwargs)
156 | if self.get('n_split') == 0:
157 | self['n_split'] = None
158 |
159 | if self.get('test_split') == 0:
160 | self['test_split'] = None
161 |
162 | if self.get('n_proc') is not None:
163 | n_proc = self.get('n_proc')
164 | if n_proc == 'max' or n_proc == -1:
165 | self['n_proc'] = cpu_count()
166 | elif n_proc < 0:
167 | self['n_proc'] = cpu_count() + 1 + n_proc
168 |
169 | ts = self.get('test_size')
170 | if ts is not None and (ts < 0 or ts >= 1):
171 | raise ValueError('test_size must be in [0, 1). Provided value: {}'
172 | .format(ts))
173 |
174 |
175 | PLSInputs.__doc__ = dedent("""\
176 | PLS input information
177 |
178 | Attributes
179 | ----------
180 | X : (S, B) array_like
181 | Input data matrix, where `S` is observations and `B` is features.
182 | Y : (S, T) array_like
183 | Behavioral matrix, where `S` is observations and `T` is features.
184 | If from :obj:`.behavioral_pls`, this is the provided behavior matrix;
185 | if from :obj:`.meancentered_pls`, this is a dummy-coded group/condition
186 | matrix.
187 | {groups}
188 | {conditions}
189 | {mean_centering}
190 | {covariance}
191 | {stat_test}
192 | {rotate}
193 | {ci}
194 | {proc_options}
195 | """).format(**_pls_input_docs)
196 |
197 |
198 | class PLSResults(ResDict):
199 | r"""
200 | Dictionary-like object containing results of PLS analysis
201 |
202 | Attributes
203 | ----------
204 | x_weights : (B, L) `numpy.ndarray`
205 | Weights of `B` features used to project `X` matrix into PLS-derived
206 | component space
207 | y_weights : (J, L) `numpy.ndarray`
208 | Weights of `J` features used to project `Y` matrix into PLS-derived
209 | component space; not available with :func:`.pls_regression`
210 | x_scores : (S, L) `numpy.ndarray`
211 | Projection of `X` matrix into PLS-derived component space
212 | y_scores : (S, L) `numpy.ndarray`
213 | Projection of `Y` matrix into PLS-derived component space
214 | y_loadings : (J, L) `numpy.ndarray`
215 | Covariance of features in `Y` with projected `x_scores`
216 | singvals : (L, L) `numpy.ndarray`
217 | Singular values for PLS-derived component space; not available with
218 | :func:`.pls_regression`
219 | varexp : (L,) `numpy.ndarray`
220 | Variance explained in each of the PLS-derived components
221 | permres : :obj:`~.structures.PLSPermResults`
222 | Results of permutation testing, as applicable
223 | bootres : :obj:`~.structures.PLSBootResults`
224 | Results of bootstrap resampling, as applicable
225 | splitres : :obj:`~.structures.PLSSplitHalfResults`
226 | Results of split-half resampling, as applicable
227 | cvres : :obj:`~.structures.PLSCrossValidationResults`
228 | Results of cross-validation testing, as applicable
229 | inputs : :obj:`~.structures.PLSInputs`
230 | Inputs provided to original PLS
231 | """
232 | allowed = [
233 | 'x_weights', 'y_weights', 'x_scores', 'y_scores',
234 | 'y_loadings', 'singvals', 'varexp',
235 | 'permres', 'bootres', 'splitres', 'cvres', 'inputs'
236 | ]
237 |
238 | def __init__(self, **kwargs):
239 | super().__init__(**kwargs)
240 | # create all sub-dictionaries
241 | self.inputs = PLSInputs(**kwargs.get('inputs', kwargs))
242 | self.bootres = PLSBootResults(**kwargs.get('bootres', kwargs))
243 | self.permres = PLSPermResults(**kwargs.get('permres', kwargs))
244 | self.splitres = PLSSplitHalfResults(**kwargs.get('splitres', kwargs))
245 | self.cvres = PLSCrossValidationResults(**kwargs.get('cvres', kwargs))
246 |
247 |
248 | class PLSBootResults(ResDict):
249 | """
250 | Dictionary-like object containing results of PLS bootstrap resampling
251 |
252 | Attributes
253 | ----------
254 | x_weights_normed : (B, L) `numpy.ndarray`
255 | `x_weights` normalized by their standard error, obtained from bootstrap
256 | resampling (see `x_weights_stderr`)
257 | x_weights_stderr : (B, L) `numpy.ndarray`
258 | Standard error of `x_weights`, used to generate `x_weights_normed`
259 | y_loadings : (J, L) `numpy.ndarray`
260 | Covariance of features in `Y` with projected `x_scores`; not available
261 | with :func:`.meancentered_pls`
262 | y_loadings_boot : (J, L, R) `numpy.ndarray`
263 | Distribution of `y_loadings` across all bootstrap resamples; not
264 | available with :func:`.meancentered_pls`
265 | y_loadings_ci: (J, L, 2) `numpy.ndarray`
266 | Lower (..., 0) and upper (..., 1) bounds of confidence interval for
267 | `y_loadings`; not available with :func:`.meancentered_pls`
268 | contrast : (J, L) `numpy.ndarray`
269 | Group x condition averages of :attr:`brainscores_demeaned`. Can be
270 | treated as a contrast indicating group x condition differences. Only
271 | obtained from :obj:`.meancentered_pls`.
272 | contrast_boot : (J, L, R) `numpy.ndarray`
273 | Bootstrapped distribution of `contrast`; only available with
274 | :func:`.meancentered_pls`
275 | contrast_ci : (J, L, 2) `numpy.ndarray`
276 | Lower (..., 0) and upper (..., 1) bounds of confidence interval for
277 | `contrast`; only available with :func:`.meancentered_pls`
278 | bootsamples : (S, R) `numpy.ndarray`
279 | Indices of bootstrapped samples `S` across `R` resamples.
280 | """
281 | allowed = [
282 | 'x_weights_normed', 'x_weights_stderr', 'bootsamples',
283 | 'y_loadings', 'y_loadings_boot', 'y_loadings_ci',
284 | 'contrast', 'contrast_boot', 'contrast_ci'
285 | ]
286 |
287 |
288 | class PLSPermResults(ResDict):
289 | """
290 | Dictionary-like object containing results of PLS permutation testing
291 |
292 | Attributes
293 | ----------
294 | pvals : (L,) `numpy.ndarray`
295 | Non-parametric p-values used to examine whether components from
296 | original decomposition explain more variance than permuted components
297 | permsamples : (S, P) `numpy.ndarray`
298 | Resampling array used to permute `S` samples over `P` permutations
299 | """
300 | allowed = [
301 | 'pvals', 'permsamples', 'perm_singval'
302 | ]
303 |
304 |
305 | class PLSSplitHalfResults(ResDict):
306 | """
307 | Dictionary-like object containing results of PLS split-half resampling
308 |
309 | Attributes
310 | ----------
311 | ucorr, vcorr : (L,) `numpy.ndarray`
312 | Average correlations between split-half resamples in original (non-
313 | permuted) data for left/right singular vectors. Can be interpreted
314 | as reliability of `L` latent variables
315 | ucorr_pvals, vcorr_pvals : (L,) `numpy.ndarray`
316 | Number of permutations where correlation between split-half
317 | resamples exceeded original correlations, normalized by the total
318 | number of permutations. Can be interpreted as the statistical
319 | significance of the reliability of `L` latent variables
320 | ucorr_uplim, vcorr_uplim : (L,) `numpy.ndarray`
321 | Upper bound of confidence interval for correlations between split
322 | halves for left/right singular vectors
323 | ucorr_lolim, vcorr_lolim : (L,) `numpy.ndarray`
324 | Lower bound of confidence interval for correlations between split
325 | halves for left/right singular vectors
326 | """
327 | allowed = [
328 | 'ucorr', 'vcorr',
329 | 'ucorr_pvals', 'vcorr_pvals',
330 | 'ucorr_uplim', 'vcorr_uplim',
331 | 'ucorr_lolim', 'vcorr_lolim'
332 | ]
333 |
334 |
335 | class PLSCrossValidationResults(ResDict):
336 | """
337 | Dictionary-like object containing results of PLS cross-validation testing
338 |
339 | Attributes
340 | ----------
341 | r_squared : (T, I) `numpy.ndarray`
342 | R-squared ("determination coefficient") for each of `T` predicted
343 | behavioral scores against true behavioral scores across `I` train /
344 | test split
345 | pearson_r : (T, I) `numpy.ndarray`
346 | Pearson's correlation for each of `T` predicted behavioral scores
347 | against true behavioral scores across `I` train / test split
348 | """
349 | allowed = [
350 | 'pearson_r', 'r_squared'
351 | ]
352 |
--------------------------------------------------------------------------------
/pyls/tests/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ['compare_python_matlab', 'assert_matlab_equivalence']
2 |
3 | from .matlab import compare_python_matlab, assert_matlab_equivalence
4 |
--------------------------------------------------------------------------------
/pyls/tests/conftest.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import numpy as np
4 | import pytest
5 | import pyls
6 |
7 |
8 | @pytest.fixture(scope='session')
9 | def testdir(tmpdir_factory):
10 | data_dir = tmpdir_factory.mktemp('data')
11 | return str(data_dir)
12 |
13 |
14 | @pytest.fixture(scope='session')
15 | def mpls_results():
16 | Xf = 1000
17 | subj = 100
18 | rs = np.random.RandomState(1234)
19 | return pyls.meancentered_pls(rs.rand(subj, Xf), n_cond=2,
20 | n_perm=10, n_boot=10, n_split=10)
21 |
22 |
23 | @pytest.fixture(scope='session')
24 | def bpls_results():
25 | Xf = 1000
26 | Yf = 100
27 | subj = 100
28 | rs = np.random.RandomState(1234)
29 | return pyls.behavioral_pls(rs.rand(subj, Xf), rs.rand(subj, Yf),
30 | n_perm=10, n_boot=10, n_split=10)
31 |
32 |
33 | @pytest.fixture(scope='session')
34 | def pls_inputs():
35 | return dict(X=np.random.rand(100, 1000), Y=np.random.rand(100, 100),
36 | groups=[50, 50], n_cond=1, mean_centering=0,
37 | n_perm=10, n_boot=10, n_split=5,
38 | test_size=0.25, test_split=100,
39 | rotate=True, ci=95, seed=1234, verbose=True,
40 | permsamples=10, bootsamples=10)
41 |
--------------------------------------------------------------------------------
/pyls/tests/data/bpls_onegroup_onecond_nosplit.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/netneurolab/pypyls/e0ff056fe59bbc8d0334d63bbba316708eede75c/pyls/tests/data/bpls_onegroup_onecond_nosplit.mat
--------------------------------------------------------------------------------
/pyls/tests/data/bpls_onegroup_onecond_split.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/netneurolab/pypyls/e0ff056fe59bbc8d0334d63bbba316708eede75c/pyls/tests/data/bpls_onegroup_onecond_split.mat
--------------------------------------------------------------------------------
/pyls/tests/data/empty.mat:
--------------------------------------------------------------------------------
1 | MATLAB 5.0 MAT-file Platform: posix, Created on: Mon Mar 19 11:24:37 2018 IM
--------------------------------------------------------------------------------
/pyls/tests/data/mpls_multigroup_onecond_nosplit.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/netneurolab/pypyls/e0ff056fe59bbc8d0334d63bbba316708eede75c/pyls/tests/data/mpls_multigroup_onecond_nosplit.mat
--------------------------------------------------------------------------------
/pyls/tests/data/mpls_multigroup_onecond_split.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/netneurolab/pypyls/e0ff056fe59bbc8d0334d63bbba316708eede75c/pyls/tests/data/mpls_multigroup_onecond_split.mat
--------------------------------------------------------------------------------
/pyls/tests/data/resultonly.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/netneurolab/pypyls/e0ff056fe59bbc8d0334d63bbba316708eede75c/pyls/tests/data/resultonly.mat
--------------------------------------------------------------------------------
/pyls/tests/matlab.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import numpy as np
4 | import pyls
5 |
6 |
7 | def assert_num_equiv(a, b, atol=1e-4):
8 | """
9 | Asserts numerical equivalence of `a` and `b`
10 |
11 | Compares numerical equivalence of `a` and `b`, accounting for potential
12 | sign flips. Uses :func:`numpy.allclose` for assessing equivalence once
13 | sign flips have been considered.
14 |
15 | Parameters
16 | ----------
17 | a, b : array_like
18 | Arrays to compare for numerical equivalence
19 | atol : float, optional
20 | Absolute tolerance for differences in `a` and `b`. Default: 1e-4
21 |
22 | Raises
23 | ------
24 | AssertionError
25 | If `a` and `b` are not numerically equivalent to `atol`
26 | """
27 |
28 | # signs may be flipped so adjust accordingly
29 | flip = 1 * np.all(np.sign(b / a) == 1, axis=0, keepdims=True)
30 | flip[flip == 0] = -1
31 | diff = a - (b * flip)
32 |
33 | assert np.allclose(diff, 0, atol=atol)
34 |
35 |
36 | def assert_func_equiv(a, b, corr=0.975, ftol=0.01):
37 | """
38 | Asserts "functional" equivalence of `a` and `b`
39 |
40 | Given the numerical instabilities of SVD between Matlab and Python we
41 | cannot always assume numerical equivalence, especially when permutation
42 | testing and bootstrap resampling are considered. This function thus
43 | considers whether results are "functionally" equivalent, where functional
44 | equivalence is defined by the correlation of `a` and `b` (if both are one-
45 | dimensional) or the correlation of columns of `a` and `b` (if both are two-
46 | dimensional). Correlations must surpass provided `corr` to be considered
47 | functionally equivalent.
48 |
49 | Parameters
50 | ----------
51 | a, b : array_like
52 | Arrays to compare for functional equivalence
53 | corr : [0, 1] float, optional
54 | Correlation that must be surpassed in order to achieve functional
55 | equivalence between `a` and `b`. Default: 0.99
56 | ftol : float, optional
57 | If len(a) and len(b) <= 2, the correlation cannot be used to assess
58 | functional equivalence. Instead, this specifies the numerical tolerance
59 | permitted between corresponding values in the two vectors.
60 |
61 | Raises
62 | ------
63 | AssertionError
64 | If `a` and `b` are not functionally equivalent
65 | """
66 |
67 | if len(a) == 1 and len(b) == 1: # can't do anything here, really...
68 | return
69 | elif len(a) <= 2 and len(b) <= 2: # can't correlate length 2 array...
70 | assert np.allclose(np.sign(a), np.sign(b))
71 | if ftol is not None:
72 | assert np.all(np.abs(a - b) < ftol)
73 | return
74 |
75 | if a.ndim > 1:
76 | corrs = pyls.compute.efficient_corr(a, b)
77 | else:
78 | corrs = np.corrcoef(a, b)[0, 1]
79 |
80 | assert np.all(np.abs(corrs) >= corr)
81 |
82 |
83 | def assert_pvals_equiv(a, b, alpha=0.05):
84 | """
85 | Asserts that p-values in `a` and `b` achieve same statistical significance
86 |
87 | Uses `alpha` to determine significance threshold and ensures that
88 | corresponding p-values in `a` and `b` both reject or fail to reject the
89 | null hypothesis.
90 |
91 | Parameters
92 | ----------
93 | a, b : array_like
94 | Arrays of p-values to be considered
95 | alpha : [0, 1] float, optional
96 | Alpha to set statistical significance threshold. Default: 0.05
97 |
98 | Raises
99 | ------
100 | AssertionError
101 | If p-values in `a` and `b` do not achieve identical statistical
102 | significance thresholds
103 | """
104 |
105 | assert np.all((a < alpha) == (b < alpha))
106 |
107 |
108 | def compare_python_matlab(python, matlab, *, atol=1e-4, corr=0.975, alpha=0.05,
109 | ftol=0.01):
110 | """
111 | Compares PLS results generated from `python` and `matlab`
112 |
113 | Due to floating point differences in linear algebra routines like SVD that
114 | propagate through permutation testing and bootstrap resampling, we cannot
115 | expected that PLS results from Python and Matlab will generate _exactly_
116 | the same results. This function compares the numerical eqivalence of
117 | results we do expect to be exact, and assesses the functional equivalence
118 | of the remaining results using correlations and alpha testing, as
119 | appropriate.
120 |
121 | Parameters
122 | ----------
123 | python : :obj:`pyls.structures.PLSResults`
124 | PLSResults object generated from Python
125 | matlab : :obj:`pyls.structures.PLSResults`
126 | PLSResults object generated from Matlab
127 | atol : float, optional
128 | Absolute tolerance permitted between `python` and `matlab` results
129 | that should have numerical equivalency. Default: 1e-4
130 | corr : [0, 1] float, optional
131 | Minimum correlation expected between `python` and `matlab` results
132 | that can't be expected to retain numerical equivalency. Default: 0.975
133 | alpha : [0, 1] float, optional
134 | Alpha level for assessing significance of latent variables, used to
135 | compare whether `python` and `matlab` results retain same functional
136 | significance. Default: 0.05
137 | ftol : float, optional
138 | If len(a) and len(b) <= 2, the correlation ( `corr`) cannot be used to
139 | assess functional equivalence. Instead, this value specifies the
140 | numerical tolerance allowed between corresponding values in the two
141 | vectors. Default: 0.01
142 |
143 | Returns
144 | -------
145 | equivalent : bool
146 | Whether PLSResults objects stored in `python` and `matlab` are
147 | functionally (not necessarily exactly numerically) equivalent
148 | reason : str
149 | If `equivalent=False`, reason for failure; otherwise, empty string
150 | """
151 |
152 | if not isinstance(python, pyls.PLSResults):
153 | raise ValueError('Provided `python` object must be a pyls.PLSResults '
154 | 'instance, not {}.'.format(type(python)))
155 | if not isinstance(matlab, pyls.PLSResults):
156 | raise ValueError('Provided `matlab` object must be a pyls.PLSResults '
157 | 'instance, not {}.'.format(type(matlab)))
158 |
159 | # singular values close to 0 cannot be considered because they're random
160 | keep = ~np.isclose(python['singvals'], 0)
161 |
162 | # check top-level results (only for shared keys)
163 | for k in python.keys():
164 | if isinstance(python[k], np.ndarray) and (k in matlab):
165 | a, b = python[k][..., keep], matlab[k][..., keep]
166 | try:
167 | assert_num_equiv(a, b, atol=atol)
168 | except AssertionError:
169 | return False, k
170 |
171 | # check pvals for functional equivalence
172 | if matlab.get('permres', {}).get('pvals') is not None:
173 | a = python['permres']['pvals'][keep]
174 | b = matlab['permres']['pvals'][keep]
175 | try:
176 | assert_func_equiv(a, b, corr, ftol=ftol)
177 | assert_pvals_equiv(a, b, alpha)
178 | except AssertionError:
179 | return False, 'permres.pvals'
180 |
181 | # check bootstraps for functional equivalence
182 | if matlab.get('bootres', {}).get('x_weights_normed') is not None:
183 | a = python['bootres']['x_weights_normed'][..., keep]
184 | b = matlab['bootres']['x_weights_normed'][..., keep]
185 | try:
186 | assert_func_equiv(a, b, corr, ftol=ftol)
187 | except AssertionError:
188 | return False, 'bootres.x_weights_normed'
189 |
190 | # check splitcorr for functional equivalence
191 | if matlab.get('splitres', {}).get('ucorr') is not None:
192 | a, b = python['splitres'], matlab['splitres']
193 | try:
194 | for k in ['ucorr', 'vcorr']:
195 | assert_func_equiv(a[k][keep], b[k][keep], corr, ftol=ftol)
196 | except AssertionError:
197 | return False, 'splitres.{}'.format(k)
198 |
199 | return True, ''
200 |
201 |
202 | def assert_matlab_equivalence(fname, method=None, *, atol=1e-4, corr=0.975,
203 | alpha=0.05, ftol=0.01, **kwargs):
204 | """
205 | Compares Matlab PLS results stored in `fname` with Python-generated results
206 |
207 | Loads `fname` using :func:`pyls.import_matlab_result`, re-runs analysis,
208 | and then compares results using :func:`pyls.tests.compare_matlab_result`.
209 |
210 | Parameters
211 | ----------
212 | fname : str
213 | Path to Matlab PLS results
214 | method : function, optional
215 | PLS function to use to re-run analysis from `fname`. If not specified
216 | will try and determine method from `fname`. Default: None
217 | atol : float, optional
218 | Absolute tolerance permitted between `python` and `matlab` results
219 | that should have numerical equivalency. Default: 1e-4
220 | corr : [0, 1] float, optional
221 | Minimum correlation expected between `python` and `matlab` results
222 | that can't be expected to retain numerical equivalency. Default: 0.975
223 | alpha : [0, 1] float, optional
224 | Alpha level for assessing significance of latent variables, used to
225 | compare whether `python` and `matlab` results retain same functional
226 | significance. Default: 0.05
227 | ftol : float, optional
228 | If len(a) and len(b) <= 2, the correlation ( `corr`) cannot be used to
229 | assess functional equivalence. Instead, this value specifies the
230 | numerical tolerance allowed between corresponding values in the two
231 | vectors. Default: 0.01
232 | kwargs : optional
233 | Key-value arguments to provide to PLS analysis. May override arguments
234 | specified in `fname`
235 |
236 | Raises
237 | ------
238 | AssertionError
239 | If PLS results generated by Python are not the same as those stored in
240 | `fname`
241 | """
242 | # load matlab result
243 | matlab = pyls.matlab.import_matlab_result(fname)
244 |
245 | # fix n_split default (if not specified in matlab assume 0)
246 | if 'n_split' not in matlab['inputs']:
247 | matlab['inputs']['n_split'] = None
248 |
249 | # get PLS method
250 | fcn = None
251 | if method is None:
252 | if matlab['inputs']['method'] == 1:
253 | fcn = pyls.meancentered_pls
254 | elif matlab['inputs']['method'] == 3:
255 | fcn = pyls.behavioral_pls
256 | elif isinstance(method, str):
257 | if method == 'meancentered':
258 | fcn = pyls.meancentered_pls
259 | elif method == 'behavioral':
260 | fcn = pyls.behavioral_pls
261 | elif callable(method):
262 | if method in [pyls.meancentered_pls, pyls.behavioral_pls]:
263 | fcn = method
264 |
265 | if fcn is None:
266 | raise ValueError('Cannot determine PLS method used to generate {}'
267 | 'from file. Please provide `method` argument.'
268 | .format(fname))
269 |
270 | # use seed for reproducibility of re-analysis
271 | matlab['inputs']['seed'] = 1234
272 | matlab['inputs']['verbose'] = False
273 | # don't update n_split if it was previously set to None
274 | if matlab['inputs']['n_split'] is None:
275 | if 'n_split' in kwargs:
276 | kwargs.pop('n_split')
277 | matlab['inputs'].update(kwargs)
278 |
279 | # run PLS
280 | python = fcn(**matlab['inputs'])
281 | equiv, reason = compare_python_matlab(python, matlab, atol=atol, corr=corr,
282 | alpha=alpha, ftol=ftol)
283 |
284 | if not equiv:
285 | raise AssertionError('compare_matlab_result failed: {}'.format(reason))
286 |
--------------------------------------------------------------------------------
/pyls/tests/test_base.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import numpy as np
4 | import pytest
5 | import pyls
6 |
7 |
8 | # tests for gen_permsamp(), gen_bootsamp(), and gen_splitsamp() are all very
9 | # similar because the code behind them is, in many senses, redundant.
10 | # that being said, the differences between the functions are intricate enough
11 | # that extracting the shared functionality would be more difficult than anyone
12 | # has time for right now.
13 | # thus, we get repetitive tests to make sure that nothing is screwed up!
14 | def test_gen_permsamp():
15 | # test to make sure that there are no duplicates generated given a
16 | # sufficiently large number of samples / conditions to work with
17 | unique_perms = pyls.base.gen_permsamp([10, 10], 2, seed=1234, n_perm=10)
18 | assert unique_perms.shape == (40, 10)
19 | for n, perm in enumerate(unique_perms.T[::-1], 1):
20 | assert not (perm[:, None] == unique_perms[:, :-n]).all(axis=0).any()
21 |
22 | # test that random state works and gives equivalent permutations when
23 | # the same number of groups / conditions / permutations are provided
24 | same_perms = pyls.base.gen_permsamp([10, 10], 2, seed=1234, n_perm=10)
25 | assert same_perms.shape == (40, 10)
26 | assert np.all(unique_perms == same_perms)
27 |
28 | # test that, given a small number of samples and requesting a large number
29 | # of permutations, duplicate samples are given (and a warning is raised!)
30 | with pytest.warns(UserWarning):
31 | dupe_perms = pyls.base.gen_permsamp([2, 2], 1, n_perm=25)
32 | assert dupe_perms.shape == (4, 25)
33 | dupe = False
34 | for n, perm in enumerate(dupe_perms.T[::-1], 1):
35 | dupe = dupe or (perm[:, None] == dupe_perms[:, :-n]).all(axis=0).any()
36 | assert dupe
37 |
38 | # test that subject conditions are kept together during permutations
39 | # that is, each subject has two conditions so we want to make sure that
40 | # when we permute subject order both conditions for a given subject are
41 | # moved together
42 | cond_perms = pyls.base.gen_permsamp([10], 2, n_perm=10)
43 | assert cond_perms.shape == (20, 10)
44 | for n in range(10):
45 | comp = np.array([f + 10 if f < 10 else f - 10 for f in cond_perms[n]])
46 | assert np.all(comp == cond_perms[n + 10])
47 |
48 | # test that subjects are permuted between groups
49 | # that is, no permutation should result in a group having the same subjects
50 | group_perms = pyls.base.gen_permsamp([10, 10], 1, n_perm=10)
51 | g1, g2 = np.sort(group_perms[:10], 0), np.sort(group_perms[10:], 0)
52 | comp = np.arange(0, 10)[:, None]
53 | assert not np.any(np.all(comp == g1, axis=0))
54 | assert not np.any(np.all((comp + 10) == g2, axis=0))
55 |
56 | # test that permutations with groups and conditions are appropriate
57 | # we'll use unique_perms since that has 2 groups and 2 conditions already
58 | # we want to confirm that (1) subject conditions are permuted together, and
59 | # (2) subjects are permuted between groups
60 | g1, g2 = unique_perms[:20], unique_perms[20:]
61 | # confirm subject conditions are permuted together
62 | for g in [g1, g2]:
63 | for n in range(10):
64 | comp = [f + 10 if f < 10 or (f >= 20 and f < 30) else f - 10
65 | for f in g[n]]
66 | assert np.all(comp == g[n + 10])
67 | # confirm subjects perare muted between groups
68 | comp = np.arange(0, 20)[:, None]
69 | assert not np.any(np.all(comp == np.sort(g1, axis=0), axis=0))
70 | assert not np.any(np.all((comp + 20) == np.sort(g2, axis=0), axis=0))
71 |
72 |
73 | def test_gen_bootsamp():
74 | # test to make sure that there are no duplicates generated given a
75 | # sufficiently large number of samples / conditions to work with
76 | unique_boots = pyls.base.gen_bootsamp([10, 10], 2, seed=1234, n_boot=10)
77 | assert unique_boots.shape == (40, 10)
78 | for n, perm in enumerate(unique_boots.T[::-1], 1):
79 | assert not (perm[:, None] == unique_boots[:, :-n]).all(axis=0).any()
80 |
81 | # test that random state works and gives equivalent bootstraps when
82 | # the same number of groups / conditions / bootstraps are provided
83 | same_boots = pyls.base.gen_bootsamp([10, 10], 2, seed=1234, n_boot=10)
84 | assert same_boots.shape == (40, 10)
85 | assert np.all(unique_boots == same_boots)
86 |
87 | # test that, given a small number of samples and requesting a large number
88 | # of bootstraps, duplicate samples are given (and a warning is raised!)
89 | with pytest.warns(UserWarning):
90 | dupe_boots = pyls.base.gen_bootsamp([5], 1, n_boot=125)
91 | assert dupe_boots.shape == (5, 125)
92 | dupe = False
93 | for n, perm in enumerate(dupe_boots.T[::-1], 1):
94 | dupe = dupe or (perm[:, None] == dupe_boots[:, :-n]).all(axis=0).any()
95 | assert dupe
96 |
97 | # test that bootstraps all have the minimum number of unique subjects
98 | # that is, since we are always bootstrapping within groups/conditions, we
99 | # want to ensure that there is never a case where e.g., an entire group is
100 | # replaced with ONE subject (unless there are only two subjects, but then
101 | # what are you really doing?)
102 | # we set a minumum subject threshold equal to 1/2 the number of samples in
103 | # the smallest group; thus, with e.g., groups of [10, 20, 30], the minimum
104 | # number of unique subjects in any given group for any given bootstrap
105 | # should be 5 (=10/2)
106 | for grp in np.split(unique_boots, 4, axis=0):
107 | for boot in grp.T:
108 | assert np.unique(boot).size >= 5
109 |
110 | # make sure that when we're resampling subjects we're doing it for all
111 | # conditions; this is a much easier check than for permutations!
112 | for n in range(10):
113 | assert np.all(unique_boots[n] + 10 == unique_boots[n + 10])
114 | for n in range(20, 30):
115 | assert np.all(unique_boots[n] + 10 == unique_boots[n + 10])
116 |
117 |
118 | def test_gen_splitsamp():
119 | # test to make sure that there are no duplicates generated given a
120 | # sufficiently large number of samples / conditions to work with
121 | unique_splits = pyls.base.gen_splits([10, 10], 2, seed=1234, n_split=10)
122 | assert unique_splits.shape == (40, 10)
123 | for n, perm in enumerate(unique_splits.T[::-1], 1):
124 | assert not (perm[:, None] == unique_splits[:, :-n]).all(axis=0).any()
125 |
126 | # test that random state works and gives equivalent splits when
127 | # the same number of groups / conditions / splits are provided
128 | same_splits = pyls.base.gen_splits([10, 10], 2, seed=1234, n_split=10)
129 | assert same_splits.shape == (40, 10)
130 | assert np.all(unique_splits == same_splits)
131 |
132 | # test that, given a small number of samples and requesting a large number
133 | # of splits, duplicate samples are given (and a warning is raised!)
134 | with pytest.warns(UserWarning):
135 | dupe_splits = pyls.base.gen_splits([5], 1, n_split=125)
136 | assert dupe_splits.shape == (5, 125)
137 | dupe = False
138 | for n, perm in enumerate(dupe_splits.T[::-1], 1):
139 | dupe = dupe or (perm[:, None] == dupe_splits[:, :-n]).all(axis=0).any()
140 | assert dupe
141 |
142 | # make sure that each group is split independently!
143 | for grp in np.split(unique_splits, 4, axis=0):
144 | assert np.all(np.sum(grp, axis=0) == 5)
145 |
146 | # make sure that `test_size` works as expected, too
147 | # `test_size` should determine the proportion of values set to False in
148 | # each group x condition
149 | # by default, `test_size` is 0.5, so the split is half-and-half, but if we
150 | # change it to e.g., 0.2, then there should be `0.2 * n_samples` False
151 | # values in each group x condition
152 | test_splits = pyls.base.gen_splits([10, 10], 2, n_split=10, test_size=0.2)
153 | for grp in np.split(test_splits, 4, axis=0):
154 | assert np.all(np.sum(grp, axis=0) == 8)
155 |
156 |
157 | def test_BasePLS(pls_inputs):
158 | # test that BasePLS accepts all inputs and stores them correctly
159 | basepls = pyls.base.BasePLS(**pls_inputs)
160 | for key in pls_inputs.keys():
161 | assert hasattr(basepls.inputs, key)
162 | assert np.all(basepls.inputs[key] == pls_inputs[key])
163 |
164 | # test that groups are handled correctly
165 | X, n_samples = pls_inputs['X'], len(pls_inputs['X'])
166 | # when not provided, should be calculated
167 | basepls = pyls.base.BasePLS(X, n_cond=2)
168 | assert basepls.inputs.groups == [n_samples // 2]
169 | # when provided as an int, should be coerced into a list
170 | basepls = pyls.base.BasePLS(X, groups=n_samples // 2, n_cond=2)
171 | assert basepls.inputs.groups == [n_samples // 2]
172 | # when they don't match the number of samples in the input data, error
173 | with pytest.raises(ValueError):
174 | basepls = pyls.base.BasePLS(X, groups=[100, 100])
175 |
176 | # ensure errors are raised for not implemented
177 | with pytest.raises(NotImplementedError):
178 | basepls.gen_covcorr(pls_inputs['X'], pls_inputs['Y'])
179 | with pytest.raises(NotImplementedError):
180 | basepls.gen_distrib(pls_inputs['X'], pls_inputs['Y'])
181 |
--------------------------------------------------------------------------------
/pyls/tests/test_compute.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import numpy as np
4 | import pytest
5 | import pyls
6 |
7 | rs = np.random.RandomState(1234)
8 |
9 |
10 | def test_normalize():
11 | X = rs.rand(10, 10)
12 | out = pyls.compute.normalize(X, axis=0)
13 | assert np.allclose(np.sum(out**2, axis=0), 1)
14 |
15 | out = pyls.compute.normalize(X, axis=1)
16 | assert np.allclose(np.sum(out**2, axis=1), 1)
17 |
18 |
19 | def test_xcorr():
20 | X = rs.rand(20, 200)
21 | Y = rs.rand(20, 25)
22 |
23 | xcorr = pyls.compute.xcorr(X, Y)
24 | assert xcorr.shape == (25, 200)
25 | xcorr = pyls.compute.xcorr(X, Y, norm=True)
26 | assert xcorr.shape == (25, 200)
27 |
28 | with pytest.raises(ValueError):
29 | pyls.compute.xcorr(X[:, 0], Y)
30 | with pytest.raises(ValueError):
31 | pyls.compute.xcorr(X[:, 0], Y[:, 0])
32 | with pytest.raises(ValueError):
33 | pyls.compute.xcorr(X[0:10], Y)
34 |
35 |
36 | def test_efficient_corr():
37 | x, y = rs.rand(100), rs.rand(100, 10)
38 | assert pyls.compute.efficient_corr(x, y).shape == (10,)
39 | x = rs.rand(100, 10)
40 | assert pyls.compute.efficient_corr(x, y).shape == (10,)
41 |
42 | x = rs.rand(100, 2)
43 | with pytest.raises(ValueError):
44 | pyls.compute.efficient_corr(x, y)
45 |
46 | x, y = np.ones((100, 2)), np.ones((100, 2)) * 5
47 | x[50:, 0], y[50:, 0] = 2, 6
48 | x[50:, 1], y[50:, 1] = 2, 4
49 | assert np.allclose(pyls.compute.efficient_corr(x, y), np.array([1., -1.]))
50 |
--------------------------------------------------------------------------------
/pyls/tests/test_examples.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import os
4 | import pytest
5 | import pyls.examples
6 |
7 | DATASETS = [
8 | 'mirchi_2018', 'whitaker_vertes_2016', 'wine', 'linnerud'
9 | ]
10 |
11 |
12 | def test_available_datasets():
13 | # make sure we get a list of strings when called with no arguments
14 | avail = pyls.examples.available_datasets()
15 | assert isinstance(avail, list)
16 | assert all([isinstance(f, str) for f in avail])
17 |
18 | # check that we get all expected datasets back
19 | assert len(set(DATASETS) - set(avail)) == 0
20 |
21 | # check that we can supply dataset names to function to confirm validity
22 | for f in DATASETS:
23 | assert f == pyls.examples.available_datasets(f)
24 |
25 | # check that providing non-valid dataset name errors
26 | for f in ['thisisnotadataset', 10]:
27 | with pytest.raises(ValueError):
28 | pyls.examples.available_datasets(f)
29 |
30 |
31 | @pytest.mark.parametrize(('dataset', 'keys'), [
32 | ('linnerud', [
33 | 'description', 'reference', 'urls', 'X', 'Y', 'n_perm', 'n_boot'
34 | ]),
35 | ('mirchi_2018', [
36 | 'description', 'reference', 'urls', 'X', 'Y',
37 | 'n_perm', 'n_boot', 'test_size', 'test_split', 'parcellation'
38 | ]),
39 | ('wine', [
40 | 'description', 'reference', 'urls', 'X', 'n_perm', 'n_boot', 'groups'
41 | ]),
42 | ('whitaker_vertes_2016', [
43 | 'description', 'reference', 'urls', 'X', 'Y', 'n_perm', 'n_boot',
44 | 'n_components'
45 | ])
46 | ])
47 | def test_query_dataset(dataset, keys):
48 | # check that default return string (description)
49 | assert isinstance(pyls.examples.query_dataset(dataset), str)
50 | # check that supplying None returns all available keys
51 | assert set(pyls.examples.query_dataset(dataset, None)) == set(keys)
52 | # check that all valid keys return something
53 | for k in keys:
54 | assert pyls.examples.query_dataset(dataset, k) is not None
55 | # check nonsense keys
56 | for k in ['notakey', 10, 20.5132]:
57 | with pytest.raises(KeyError):
58 | pyls.examples.query_dataset(dataset, k)
59 |
60 |
61 | def test_get_data_dir(tmpdir):
62 | # check that default (no arguments) returns valid default directory
63 | data_dir = pyls.examples.datasets._get_data_dir()
64 | assert isinstance(data_dir, str)
65 | assert os.path.exists(data_dir)
66 | assert os.path.basename(data_dir) == 'pyls-data'
67 |
68 | # check supplying directory returns same directory
69 | assert pyls.examples.datasets._get_data_dir(str(tmpdir)) == str(tmpdir)
70 | assert os.path.exists(str(tmpdir))
71 |
72 | # check that _get_data_dir() pulls from environmental variable correctly
73 | os.environ['PYLS_DATA'] = str(tmpdir)
74 | assert pyls.examples.datasets._get_data_dir() == str(tmpdir)
75 |
76 |
77 | @pytest.mark.parametrize(('dataset', 'keys'), [
78 | ('linnerud', ['X', 'Y', 'n_perm', 'n_boot']),
79 | ('mirchi_2018', ['X', 'Y', 'n_perm', 'n_boot', 'test_size', 'test_split']),
80 | ('wine', ['X', 'groups', 'n_perm', 'n_boot']),
81 | ('whitaker_vertes_2016', ['X', 'Y', 'n_perm', 'n_boot', 'n_components'])
82 | ])
83 | def test_load_dataset(tmpdir, dataset, keys):
84 | ds = pyls.examples.load_dataset(dataset, str(tmpdir))
85 | assert isinstance(ds, pyls.structures.PLSInputs)
86 | for k in keys:
87 | assert hasattr(ds, k) and getattr(ds, k) is not None
88 | ds, ref = pyls.examples.load_dataset(dataset, str(tmpdir),
89 | return_reference=True)
90 | assert isinstance(ref, str)
91 |
--------------------------------------------------------------------------------
/pyls/tests/test_io.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import os.path as op
4 | import h5py
5 | import pytest
6 | import pyls
7 |
8 |
9 | def test_load_save(testdir, mpls_results, bpls_results):
10 | for res, fn in zip([mpls_results, bpls_results], ['mpls', 'bpls']):
11 | fname = pyls.save_results(op.join(testdir, fn), res)
12 | assert op.isfile(fname)
13 | assert h5py.is_hdf5(fname)
14 | assert pyls.load_results(fname) == res
15 |
16 | with pytest.raises(TypeError):
17 | pyls.load_results(testdir)
18 |
--------------------------------------------------------------------------------
/pyls/tests/test_matlab.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import os.path as op
4 | import pkg_resources
5 | import pytest
6 | import pyls
7 |
8 | data_dir = pkg_resources.resource_filename('pyls', 'tests/data')
9 | EXAMPLES = ['mpls_multigroup_onecond_nosplit.mat',
10 | 'mpls_multigroup_onecond_split.mat',
11 | 'bpls_onegroup_onecond_nosplit.mat',
12 | 'bpls_onegroup_onecond_split.mat',
13 | 'resultonly.mat']
14 |
15 | attrs = [
16 | 'x_weights', 'singvals', 'y_weights', 'x_scores', 'permres', 'bootres',
17 | 'inputs'
18 | ]
19 |
20 |
21 | @pytest.mark.parametrize('fname', EXAMPLES)
22 | def test_import_matlab(fname):
23 | res = pyls.matlab.import_matlab_result(op.join(data_dir, fname))
24 | # make sure the mat file cast appropriately
25 | assert isinstance(res, pyls.structures.PLSResults)
26 | # make sure all the attributes are there (don't check outputs)
27 | for attr in attrs:
28 | assert hasattr(res, attr)
29 | if '_split' in fname:
30 | assert hasattr(res, 'splitres')
31 |
32 |
33 | def test_errors():
34 | with pytest.raises(ValueError):
35 | pyls.matlab.import_matlab_result(op.join(data_dir, 'empty.mat'))
36 |
--------------------------------------------------------------------------------
/pyls/tests/test_structures.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import multiprocessing as mp
4 | import numpy as np
5 | import pytest
6 | from pyls import structures
7 |
8 |
9 | def test_PLSInputs(pls_inputs):
10 | # check correct handling of all available PLSInputs keys
11 | pls_inputs = structures.PLSInputs(**pls_inputs)
12 | for key in pls_inputs.keys():
13 | assert hasattr(pls_inputs, key)
14 | assert np.all(getattr(pls_inputs, key) == pls_inputs[key])
15 |
16 | # test_split and n_split should be None when set to 0
17 | assert structures.PLSInputs(n_split=0).n_split is None
18 | assert structures.PLSInputs(test_split=0).test_split is None
19 |
20 | # confirm n_proc inputs are handled appropriately
21 | assert structures.PLSInputs(n_proc=1).n_proc == 1
22 | for n_proc in ['max', -1]:
23 | assert structures.PLSInputs(n_proc=n_proc).n_proc == mp.cpu_count()
24 | assert structures.PLSInputs(n_proc=-2).n_proc == mp.cpu_count() - 1
25 |
26 | # check input checking for test_size
27 | with pytest.raises(ValueError):
28 | structures.PLSInputs(test_size=1)
29 | with pytest.raises(ValueError):
30 | structures.PLSInputs(test_size=-0.5)
31 |
32 | # check that PLSInputs rejects disallowed keys
33 | assert structures.PLSInputs(notakey=10).get('notakey') is None
34 |
35 |
36 | @pytest.mark.xfail
37 | def test_PLSResults():
38 | assert False
39 |
40 |
41 | @pytest.mark.xfail
42 | def test_PLSBootResults():
43 | assert False
44 |
45 |
46 | @pytest.mark.xfail
47 | def test_PLSPermResults():
48 | assert False
49 |
50 |
51 | @pytest.mark.xfail
52 | def test_PLSSplitHalfResults():
53 | assert False
54 |
55 |
56 | @pytest.mark.xfail
57 | def test_PLSCrossValidationResults():
58 | assert False
59 |
--------------------------------------------------------------------------------
/pyls/tests/test_utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import numpy as np
4 | from pyls import utils
5 | import pytest
6 | import tqdm
7 |
8 |
9 | def test_empty_dict():
10 | assert utils._empty_dict({})
11 | assert utils._empty_dict(dict())
12 | assert not utils._empty_dict(dict(d=10))
13 | assert not utils._empty_dict(dict(d=dict(d=dict(d=10))))
14 | assert not utils._empty_dict([])
15 | assert not utils._empty_dict(None)
16 | assert not utils._empty_dict('test')
17 | assert not utils._empty_dict(10)
18 | assert not utils._empty_dict(10.0)
19 | assert not utils._empty_dict(set())
20 |
21 |
22 | def test_not_empty_keys():
23 | assert utils._not_empty_keys(dict()) == set()
24 | assert utils._not_empty_keys(dict(test=10)) == {'test'}
25 | assert utils._not_empty_keys(dict(test=10, temp=None)) == {'test'}
26 | assert utils._not_empty_keys(dict(test=10, temp={})) == {'test'}
27 |
28 | with pytest.raises(TypeError):
29 | utils._not_empty_keys([10, 20, 30])
30 |
31 |
32 | def test_ResDict():
33 | # toy example with some allowed keys
34 | class TestDict(utils.ResDict):
35 | allowed = ['test', 'temp']
36 |
37 | # confirm string representations work
38 | d = utils.ResDict()
39 | assert str(d) == 'ResDict()'
40 | assert str(TestDict(test={})) == 'TestDict()'
41 | assert str(TestDict(test=None)) == 'TestDict()'
42 | assert d != TestDict()
43 |
44 | # confirm general key checking works
45 | test1 = TestDict(test=10)
46 | test2 = TestDict(test=11)
47 | test3 = TestDict(test=10, temp=11)
48 | assert str(test1) == 'TestDict(test)'
49 | assert str(test2) == 'TestDict(test)'
50 | assert str(test3) == 'TestDict(test, temp)'
51 | assert test1 == test1
52 | assert test1 != test2
53 | assert test1 != test3
54 |
55 | # confirm numpy array comparisons work
56 | test1 = TestDict(test=np.arange(9))
57 | test2 = TestDict(test=np.arange(9) + 1e-6) # should work
58 | test3 = TestDict(test=np.arange(9) + 1e-5) # too high
59 | test4 = TestDict(test=np.arange(10)) # totally different
60 | assert test1 == test1
61 | assert test1 == test2
62 | assert test1 != test3
63 | assert test1 != test4
64 |
65 | # confirm nested dictionary comparisons work
66 | test1 = TestDict(test=test1)
67 | test2 = TestDict(test=test3)
68 | assert test1 == test1
69 | assert test1 != test2
70 |
71 | # confirm item assignment holds
72 | test1.temp = 10
73 | assert test1.temp == 10
74 | assert test1 == test1
75 | assert test1 != test2
76 |
77 | # confirm rejection of item assignment not in cls.allowed
78 | test1.blargh = 10
79 | assert not hasattr(test1, 'blargh')
80 |
81 | test1.temp = None
82 | test2.temp = None
83 | assert test1 != test2
84 |
85 |
86 | def test_trange():
87 | # test that verbose=False generates a range object
88 | out = utils.trange(1000, verbose=False, desc='Test tqdm')
89 | assert [f for f in out] == list(range(1000))
90 | # test that function will accept arbitrary kwargs and overwrite defaults
91 | out = utils.trange(1000, desc='Test tqdm', mininterval=0.5, ascii=False)
92 | assert isinstance(out, tqdm.tqdm)
93 |
94 |
95 | def test_dummy_label():
96 | groups = [10, 12, 11]
97 | expected = [[10, 12, 11], [10, 10, 12, 12, 11, 11]]
98 | for n_cond in range(1, 3):
99 | dummy = utils.dummy_label(groups, n_cond=n_cond)
100 | assert dummy.shape == (np.sum(groups) * n_cond,)
101 | assert np.unique(dummy).size == len(groups) * n_cond
102 | for n, grp in enumerate(np.unique(dummy)):
103 | assert np.sum(dummy == grp) == expected[n_cond - 1][n]
104 |
105 |
106 | def test_dummy_code():
107 | groups = [10, 12, 11]
108 | expected = [[10, 12, 11], [10, 10, 12, 12, 11, 11]]
109 | for n_cond in range(1, 3):
110 | dummy = utils.dummy_code(groups, n_cond=n_cond)
111 | assert dummy.shape == (np.sum(groups) * n_cond, len(groups) * n_cond)
112 | assert np.all(np.unique(dummy) == [0, 1])
113 | for n, grp in enumerate(dummy.T):
114 | assert grp.sum() == expected[n_cond - 1][n]
115 |
116 |
117 | def test_permute_cols():
118 | x = np.arange(9).reshape(3, 3)
119 | expected = np.array([[0, 1, 5], [6, 4, 2], [3, 7, 8]])
120 |
121 | out = utils.permute_cols(x, seed=np.random.RandomState(1234))
122 | assert not np.all(out == x) and np.all(out == expected)
123 |
124 | # don't accept 1D arrays
125 | with pytest.raises(ValueError):
126 | utils.permute_cols(np.arange(9))
127 |
128 |
129 | def test_unravel():
130 | expected = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
131 | assert utils._unravel()(range(10)) == expected
132 | expected = [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
133 | assert utils._unravel()(x ** 2 for x in range(10)) == expected
134 |
135 | # test context manager status and arbitrary argument acceptance
136 | with utils._unravel(10, test=20) as cm:
137 | assert cm(x**2 for x in range(10)) == expected
138 |
139 |
140 | def test_get_par_func():
141 | def fcn(x):
142 | return x
143 | assert fcn(10) == 10
144 | assert fcn([10, 10]) == [10, 10]
145 |
146 | if utils.joblib_avail:
147 | import joblib
148 | with utils.get_par_func(1000, fcn) as (par, func):
149 | assert isinstance(par, joblib.Parallel)
150 | assert par.n_jobs == 1000
151 | assert not fcn == func
152 |
153 | utils.joblib_avail = False
154 | with utils.get_par_func(1000, fcn) as (par, func):
155 | assert isinstance(par, utils._unravel)
156 | assert fcn == func
157 |
--------------------------------------------------------------------------------
/pyls/tests/types/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/netneurolab/pypyls/e0ff056fe59bbc8d0334d63bbba316708eede75c/pyls/tests/types/__init__.py
--------------------------------------------------------------------------------
/pyls/tests/types/test_regression.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import numpy as np
4 | import pytest
5 | import pyls
6 |
7 | Xf = 1000
8 | Yf = 100
9 | subj = 50
10 | rs = np.random.RandomState(1234)
11 |
12 |
13 | class PLSRegressionTests():
14 | defaults = pyls.structures.PLSInputs(X=rs.rand(subj, Xf),
15 | Y=rs.rand(subj, Yf),
16 | n_perm=20, n_boot=10,
17 | ci=95, seed=rs, verbose=False)
18 |
19 | def __init__(self, n_components=None, **kwargs):
20 | params = self.defaults.copy()
21 | params.update(kwargs)
22 | self.inputs = pyls.structures.PLSInputs(**params)
23 | self.inputs['n_components'] = n_components
24 | self.output = pyls.pls_regression(**self.inputs)
25 | self.confirm_outputs()
26 |
27 | def make_outputs(self):
28 | """
29 | Used to make list of expected attributes and shapes for PLS outputs
30 |
31 | Returns
32 | -------
33 | attrs : list-of-tuples
34 | Each entry in the list is a tuple with the attribute name and
35 | expected shape
36 | """
37 |
38 | if self.inputs['n_components'] is None:
39 | num_lv = subj - 1
40 | else:
41 | num_lv = self.inputs['n_components']
42 |
43 | attrs = [
44 | ('x_weights', (Xf, num_lv)),
45 | ('x_scores', (subj, num_lv)),
46 | ('y_scores', (subj, num_lv)),
47 | ('y_loadings', (Yf, num_lv)),
48 | ('varexp', (num_lv,)),
49 | ]
50 |
51 | return attrs
52 |
53 | def confirm_outputs(self):
54 | """ Confirms generated outputs are of expected shape / size """
55 | for (attr, shape) in self.make_outputs():
56 | assert attr in self.output
57 | assert self.output[attr].shape == shape
58 |
59 |
60 | @pytest.mark.parametrize('n_components', [
61 | None, 2, 5, 10, 15
62 | ])
63 | def test_regression_onegroup_onecondition(n_components):
64 | PLSRegressionTests(n_components=n_components)
65 |
66 |
67 | @pytest.mark.parametrize('aggfunc', [
68 | 'mean', 'median', 'sum'
69 | ])
70 | def test_regression_3dbootstrap(aggfunc):
71 | # confirm providing 3D arrays works
72 | Y = rs.rand(subj, Yf, 100)
73 | PLSRegressionTests(Y=Y, n_components=2, aggfunc=aggfunc)
74 |
75 | # confirm providing valid bootsamples for 3D array works
76 | sboot = pyls.base.gen_bootsamp([subj], 1, n_boot=10)
77 | nboot = pyls.base.gen_bootsamp([100], 1, n_boot=10)
78 | bootsamples = np.array(list(zip(sboot.T, nboot.T))).T
79 | PLSRegressionTests(Y=Y, n_components=2, aggfunc=aggfunc,
80 | bootsamples=bootsamples, n_boot=10)
81 |
82 |
83 | def test_regression_missingdata():
84 | X = rs.rand(subj, Xf)
85 | X[10] = np.nan
86 | PLSRegressionTests(X=X, n_components=2)
87 | X[20] = np.nan
88 | PLSRegressionTests(X=X, n_components=2)
89 | Y = rs.rand(subj, Yf)
90 | Y[11] = np.nan
91 | PLSRegressionTests(X=X, Y=Y, n_components=2)
92 |
93 |
94 | def test_errors():
95 | with pytest.raises(ValueError):
96 | PLSRegressionTests(n_components=1000)
97 | with pytest.raises(ValueError):
98 | PLSRegressionTests(Y=rs.rand(subj - 1, Yf))
99 | with pytest.raises(ValueError):
100 | PLSRegressionTests(Y=rs.rand(subj, Yf, 10), aggfunc='notafunc')
101 | with pytest.raises(TypeError):
102 | PLSRegressionTests(Y=rs.rand(subj, Yf, 10), aggfunc=lambda x: x)
103 | with pytest.raises(ValueError):
104 | PLSRegressionTests(Y=rs.rand(subj, Yf, 10), bootsamples=[[10], [10]])
105 |
--------------------------------------------------------------------------------
/pyls/tests/types/test_svd.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import numpy as np
4 | import pytest
5 | import pyls
6 |
7 | Xf = 1000
8 | Yf = 100
9 | subj = 100
10 | rs = np.random.RandomState(1234)
11 |
12 |
13 | class PLSSVDTest():
14 | defaults = pyls.structures.PLSInputs(X=rs.rand(subj, Xf),
15 | Y=rs.rand(subj, Yf),
16 | groups=None, n_cond=1,
17 | mean_centering=0, rotate=True,
18 | n_perm=20, n_boot=10, n_split=None,
19 | ci=95, seed=rs, verbose=False)
20 | funcs = dict(meancentered=pyls.meancentered_pls,
21 | behavioral=pyls.behavioral_pls)
22 |
23 | def __init__(self, plstype, **kwargs):
24 | self.inputs = pyls.structures.PLSInputs(**{key: kwargs.get(key, val)
25 | for (key, val) in
26 | self.defaults.items()})
27 | self.output = self.funcs.get(plstype)(**self.inputs)
28 | self.type = plstype
29 | self.confirm_outputs()
30 |
31 | def make_outputs(self):
32 | """
33 | Used to make list of expected attributes and shapes for PLS outputs
34 |
35 | Returns
36 | -------
37 | attrs : list-of-tuples
38 | Each entry in the list is a tuple with the attribute name and
39 | expected shape
40 | """
41 |
42 | dummy = len(self.output.inputs.groups) * self.output.inputs.n_cond
43 | if self.type == 'behavioral':
44 | behavior = Yf * dummy
45 | num_lv = min([f for f in [Xf, behavior] if f != 1])
46 | else:
47 | behavior = num_lv = dummy
48 |
49 | attrs = [
50 | ('x_weights', (Xf, num_lv)),
51 | ('y_weights', (behavior, num_lv)),
52 | ('singvals', (num_lv,)),
53 | ('varexp', (num_lv,)),
54 | ('x_scores', (subj, num_lv)),
55 | ('y_scores', (subj, num_lv)),
56 | ]
57 |
58 | return attrs
59 |
60 | def confirm_outputs(self):
61 | """ Confirms generated outputs are of expected shape / size """
62 | for (attr, shape) in self.make_outputs():
63 | assert attr in self.output
64 | assert self.output[attr].shape == shape
65 |
66 |
67 | @pytest.mark.parametrize(('n_split', 'rotate'), [
68 | (None, True), (None, False), (5, True), (5, False)
69 | ])
70 | def test_behavioral_onegroup_onecondition(n_split, rotate):
71 | PLSSVDTest('behavioral', groups=None, n_cond=1, n_split=n_split,
72 | rotate=rotate)
73 |
74 |
75 | @pytest.mark.parametrize(('n_split', 'rotate'), [
76 | (None, True), (None, False), (5, True), (5, False)
77 | ])
78 | def test_behavioral_multigroup_onecondition(n_split, rotate):
79 | PLSSVDTest('behavioral', groups=[33, 34, 33], n_cond=1, n_split=n_split,
80 | rotate=rotate)
81 |
82 |
83 | @pytest.mark.parametrize(('n_split', 'rotate'), [
84 | (None, True), (None, False), (5, True), (5, False)
85 | ])
86 | def test_behavioral_onegroup_multicondition(n_split, rotate):
87 | PLSSVDTest('behavioral', groups=subj // 4, n_cond=4, n_split=n_split,
88 | rotate=rotate)
89 |
90 |
91 | @pytest.mark.parametrize(('n_split', 'rotate'), [
92 | (None, True), (None, False), (5, True), (5, False)
93 | ])
94 | def test_behavioral_multigroup_multicondition(n_split, rotate):
95 | PLSSVDTest('behavioral', groups=[25, 25], n_cond=2, n_split=n_split,
96 | rotate=rotate)
97 |
98 |
99 | @pytest.mark.parametrize(('mean_centering', 'n_split', 'rotate'), [
100 | (1, None, True), (1, None, False), (1, 5, True), (1, 5, False),
101 | (2, None, True), (2, None, False), (2, 5, True), (2, 5, False)
102 | ])
103 | def test_meancentered_multigroup_onecondition(mean_centering, n_split, rotate):
104 | PLSSVDTest('meancentered', groups=[33, 34, 33], n_cond=1, n_split=n_split,
105 | mean_centering=mean_centering, rotate=rotate)
106 |
107 |
108 | @pytest.mark.parametrize(('mean_centering', 'n_split', 'rotate'), [
109 | (0, None, True), (0, None, False), (0, 5, True), (0, 5, False),
110 | (2, None, True), (2, None, False), (2, 5, True), (2, 5, False)
111 | ])
112 | def test_meancentered_onegroup_multicondition(mean_centering, n_split, rotate):
113 | PLSSVDTest('meancentered', groups=subj // 2, n_cond=2, n_split=n_split,
114 | mean_centering=mean_centering, rotate=rotate)
115 |
116 |
117 | @pytest.mark.parametrize(('mean_centering', 'n_split', 'rotate'), [
118 | (0, None, True), (0, None, False), (0, 5, True), (0, 5, False),
119 | (1, None, True), (1, None, False), (1, 5, True), (1, 5, False),
120 | (2, None, True), (2, None, False), (2, 5, True), (2, 5, False)
121 | ])
122 | def test_meancentered_multigroup_multicondition(mean_centering, n_split,
123 | rotate):
124 | PLSSVDTest('meancentered', groups=[25, 25], n_cond=2, n_split=n_split,
125 | mean_centering=mean_centering, rotate=rotate)
126 |
127 |
128 | def test_warnings():
129 | with pytest.warns(UserWarning):
130 | PLSSVDTest('meancentered', groups=[50, 50], mean_centering=0)
131 | with pytest.warns(UserWarning):
132 | PLSSVDTest('meancentered', n_cond=2, mean_centering=1)
133 |
134 |
135 | def test_errors():
136 | with pytest.raises(ValueError):
137 | PLSSVDTest('meancentered', groups=[50, 50], mean_centering=3)
138 | with pytest.raises(ValueError):
139 | PLSSVDTest('meancentered', groups=[subj])
140 | with pytest.raises(ValueError):
141 | PLSSVDTest('meancentered', n_cond=3)
142 | with pytest.raises(ValueError):
143 | PLSSVDTest('behavioral', Y=rs.rand(subj - 1, Yf))
144 |
--------------------------------------------------------------------------------
/pyls/types/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | The primary PLS decomposition methods for use in conducting PLS analyses
4 | """
5 |
6 | __all__ = ['behavioral_pls', 'meancentered_pls', 'pls_regression']
7 |
8 | from .behavioral import behavioral_pls
9 | from .meancentered import meancentered_pls
10 | from .regression import pls_regression
11 |
--------------------------------------------------------------------------------
/pyls/types/behavioral.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import numpy as np
4 | from sklearn.metrics import r2_score
5 | from ..base import BasePLS, gen_splits
6 | from ..structures import _pls_input_docs
7 | from .. import compute, utils
8 |
9 |
10 | class BehavioralPLS(BasePLS):
11 | def __init__(self, X, Y, *, groups=None, n_cond=1, n_perm=5000,
12 | n_boot=5000, n_split=100, test_size=0.25, test_split=100,
13 | covariance=False, rotate=True, ci=95, permsamples=None,
14 | bootsamples=None, seed=None, verbose=True, n_proc=None,
15 | **kwargs):
16 |
17 | super().__init__(X=np.asarray(X), Y=np.asarray(Y), groups=groups,
18 | n_cond=n_cond, n_perm=n_perm, n_boot=n_boot,
19 | n_split=n_split, test_size=test_size,
20 | test_split=test_split, covariance=covariance,
21 | rotate=rotate, ci=ci, permsamples=permsamples,
22 | bootsamples=bootsamples, seed=seed, verbose=verbose,
23 | n_proc=n_proc, **kwargs)
24 |
25 | self.results = self.run_pls(self.inputs.X, self.inputs.Y)
26 |
27 | def gen_covcorr(self, X, Y, groups, **kwargs):
28 | """
29 | Computes cross-covariance matrix from `X` and `Y`
30 |
31 | Parameters
32 | ----------
33 | X : (S, B) array_like
34 | Input data matrix, where `S` is observations and `B` is features
35 | Y : (S, T) array_like
36 | Input data matrix, where `S` is observations and `T` is features
37 | groups : (S, J) array_like
38 | Dummy coded input array, where `S` is observations and `J`
39 | corresponds to the number of different groups x conditions. A value
40 | of 1 indicates that an observation belongs to a specific group or
41 | condition.
42 |
43 | Returns
44 | -------
45 | crosscov : (J*T, B) np.ndarray
46 | Cross-covariance matrix
47 | """
48 |
49 | return np.row_stack([
50 | compute.xcorr(X[grp], Y[grp], covariance=self.inputs.covariance)
51 | for grp in groups.T.astype(bool)
52 | ])
53 |
54 | def gen_distrib(self, X, Y, original, groups, *args, **kwargs):
55 | """
56 | Finds behavioral correlations for single bootstrap resample
57 |
58 | Parameters
59 | ----------
60 | X : (S, B) array_like
61 | Input data matrix, where `S` is observations and `B` is features
62 | Y : (S, T) array_like
63 | Input data matrix, where `S` is observations and `T` is features
64 | original : (B, L) array_like
65 | Left singular vectors from bootstrap
66 | groups : (S, J) array_like
67 | Dummy coded input array, where `S` is observations and `J`
68 | corresponds to the number of different groups x conditions. A value
69 | of 1 indicates that an observation belongs to a specific group or
70 | condition.
71 |
72 | Returns
73 | -------
74 | distrib : (T, L)
75 | Behavioral correlations for single bootstrap resample
76 | """
77 |
78 | tusc = X @ compute.normalize(original)
79 |
80 | return self.gen_covcorr(tusc, Y, groups=groups)
81 |
82 | def crossval(self, X, Y, groups=None, seed=None):
83 | """
84 | Performs cross-validation of SVD of `X` and `Y`
85 |
86 | Parameters
87 | ----------
88 | X : (S, B) array_like
89 | Input data matrix, where `S` is observations and `B` is features
90 | Y : (S, T) array_like
91 | Input data matrix, where `S` is observations and `T` is features
92 | seed : {int, :obj:`numpy.random.RandomState`, None}, optional
93 | Seed for random number generation. Default: None
94 |
95 | Returns
96 | -------
97 | r_scores : (C,) np.ndarray
98 | R (Pearon correlation) scores across train-test splits
99 | r2_scores : (C,) np.ndarray
100 | R^2 (coefficient of determination) scores across train-test splits
101 | """
102 |
103 | if groups is None:
104 | groups = utils.dummy_code(self.inputs.groups, self.inputs.n_cond)
105 |
106 | # use gen_splits to handle grouping/condition vars in train/test split
107 | splits = gen_splits(self.inputs.groups,
108 | self.inputs.n_cond,
109 | self.inputs.test_split,
110 | seed=seed,
111 | test_size=self.inputs.test_size)
112 |
113 | gen = utils.trange(self.inputs.test_split, verbose=self.inputs.verbose,
114 | desc='Running cross-validation')
115 | with utils.get_par_func(self.inputs.n_proc,
116 | self.__class__._single_crossval) as (par,
117 | func):
118 | out = par(
119 | func(self, X=X, Y=Y, inds=splits[:, i], groups=groups, seed=i)
120 | for i in gen
121 | )
122 | r_scores, r2_scores = [np.stack(o, axis=-1) for o in zip(*out)]
123 |
124 | return r_scores, r2_scores
125 |
126 | def _single_crossval(self, X, Y, inds, groups=None, seed=None):
127 | """
128 | Generates single cross-validated r and r^2 score
129 |
130 | Parameters
131 | ----------
132 | X : (S, B) array_like
133 | Input data matrix, where `S` is observations and `B` is features
134 | Y : (S, T) array_like
135 | Input data matrix, where `S` is observations and `T` is features
136 | inds : (S,) array_like
137 | Train-test split, where train = True and test = False
138 | groups : (S, J) array_like, optional
139 | Dummy coded input array, where `S` is observations and `J`
140 | corresponds to the number of different groups x conditions. A value
141 | of 1 indicates that an observation belongs to a specific group or
142 | condition. If not specified will be generated on-the-fly. Default:
143 | None
144 | seed : {int, :obj:`numpy.random.RandomState`, None}, optional
145 | Seed for random number generation. Default: None
146 | """
147 |
148 | if groups is None:
149 | groups = utils.dummy_code(self.inputs.groups, self.inputs.n_cond)
150 |
151 | X_train, Y_train, dummy_train = X[inds], Y[inds], groups[inds]
152 | X_test, Y_test, dummy_test = X[~inds], Y[~inds], groups[~inds]
153 | # perform initial decomposition on train set
154 | U, d, V = self.svd(X_train, Y_train, groups=dummy_train, seed=seed)
155 |
156 | # rescale the test set based on the training set
157 | Y_pred = []
158 | for n, V_spl in enumerate(np.split(V, groups.shape[-1])):
159 | tr_grp = dummy_train[:, n].astype(bool)
160 | te_grp = dummy_test[:, n].astype(bool)
161 | rescaled = compute.rescale_test(X_train[tr_grp], X_test[te_grp],
162 | Y_train[tr_grp], U, V_spl)
163 | Y_pred.append(rescaled)
164 | Y_pred = np.row_stack(Y_pred)
165 |
166 | # calculate r & r-squared from comp of rescaled test & true values
167 | r_scores = compute.efficient_corr(Y_test, Y_pred)
168 | r2_scores = r2_score(Y_test, Y_pred, multioutput='raw_values')
169 |
170 | return r_scores, r2_scores
171 |
172 | def run_pls(self, X, Y):
173 | """
174 | Runs PLS analysis
175 |
176 | Parameters
177 | ----------
178 | X : (S, B) array_like
179 | Input data matrix, where `S` is observations and `B` is features
180 | Y : (S, T) array_like
181 | Input data matrix, where `S` is observations and `T` is features
182 | """
183 |
184 | res = super().run_pls(X, Y)
185 |
186 | # mechanism for splitting outputs along group / condition indices
187 | grps = np.repeat(res['inputs']['groups'], res['inputs']['n_cond'])
188 | res['y_scores'] = np.vstack([
189 | y @ v for (y, v) in zip(np.split(Y, np.cumsum(grps)[:-1]),
190 | np.split(res['y_weights'], len(grps)))
191 | ])
192 |
193 | # get lvcorrs
194 | groups = utils.dummy_code(self.inputs.groups, self.inputs.n_cond)
195 | res['y_loadings'] = self.gen_covcorr(res['x_scores'], Y, groups)
196 |
197 | if self.inputs.n_boot > 0:
198 | # compute bootstraps
199 | distrib, u_sum, u_square = self.bootstrap(X, Y, self.rs)
200 |
201 | # add original scaled singular vectors back in
202 | bs = res['x_weights'] @ res['singvals']
203 | u_sum, u_square = u_sum + bs, u_square + (bs ** 2)
204 |
205 | # calculate bootstrap ratios and confidence intervals
206 | bsrs, uboot_se = compute.boot_rel(bs, u_sum, u_square,
207 | self.inputs.n_boot + 1)
208 | corrci = np.stack(compute.boot_ci(distrib, ci=self.inputs.ci), -1)
209 |
210 | # update results.boot_result dictionary
211 | res['bootres'].update(dict(x_weights_normed=bsrs,
212 | x_weights_stderr=uboot_se,
213 | y_loadings=res['y_loadings'].copy(),
214 | y_loadings_boot=distrib,
215 | y_loadings_ci=corrci,
216 | bootsamples=self.bootsamp))
217 |
218 | # compute cross-validated prediction-based metrics
219 | if self.inputs.test_split is not None and self.inputs.test_size > 0:
220 | r, r2 = self.crossval(X, Y, groups=self.dummy, seed=self.rs)
221 | res['cvres'].update(dict(pearson_r=r, r_squared=r2))
222 |
223 | # get rid of the stupid diagonal matrix
224 | res['varexp'] = np.diag(compute.varexp(res['singvals']))
225 | res['singvals'] = np.diag(res['singvals'])
226 |
227 | return res
228 |
229 |
230 | # let's make it a function
231 | def behavioral_pls(X, Y, *, groups=None, n_cond=1, n_perm=5000, n_boot=5000,
232 | n_split=0, test_size=0.25, test_split=100,
233 | covariance=False, rotate=True, ci=95, permsamples=None,
234 | bootsamples=None, seed=None, verbose=True, n_proc=None,
235 | **kwargs):
236 | pls = BehavioralPLS(X=X, Y=Y, groups=groups, n_cond=n_cond,
237 | n_perm=n_perm, n_boot=n_boot, n_split=n_split,
238 | test_size=test_size, test_split=test_split,
239 | covariance=covariance, rotate=rotate, ci=ci,
240 | permsamples=permsamples, bootsamples=bootsamples,
241 | seed=seed, verbose=verbose, n_proc=n_proc, **kwargs)
242 | return pls.results
243 |
244 |
245 | behavioral_pls.__doc__ = r"""
246 | Performs behavioral PLS on `X` and `Y`.
247 |
248 | Behavioral PLS is a multivariate statistical approach that relates two sets
249 | of variables together. Traditionally, one of these arrays
250 | represents a set of brain features (e.g., functional connectivity
251 | estimates) and the other represents a set of behavioral variables; however,
252 | these arrays can be any two sets of features belonging to a common group of
253 | samples.
254 |
255 | Using a singular value decomposition, behavioral PLS attempts to find
256 | linear combinations of features from the provided arrays that maximally
257 | covary with each other. The decomposition is performed on the cross-
258 | covariance matrix :math:`R`, where :math:`R = Y^{{T}} \times X`, which
259 | represents the covariation of all the input features across samples.
260 |
261 | Parameters
262 | ----------
263 | {input_matrix}
264 | Y : (S, T) array_like
265 | Input data matrix, where `S` is samples and `T` is features
266 | {groups}
267 | {conditions}
268 | {stat_test}
269 | {split_half}
270 | {cross_val}
271 | {covariance}
272 | {rotate}
273 | {ci}
274 | {resamples}
275 | {proc_options}
276 |
277 | Returns
278 | ----------
279 | {pls_results}
280 |
281 | Notes
282 | -----
283 | {decomposition_narrative}
284 |
285 | References
286 | ----------
287 |
288 | {references}
289 |
290 | Misic, B., Betzel, R. F., de Reus, M. A., van den Heuvel, M.P.,
291 | Berman, M. G., McIntosh, A. R., & Sporns, O. (2016). Network level
292 | structure-function relationships in human neocortex. Cerebral Cortex,
293 | 26, 3285-96.
294 | """.format(**_pls_input_docs)
295 |
--------------------------------------------------------------------------------
/pyls/types/meancentered.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import warnings
4 | import numpy as np
5 | from ..base import BasePLS
6 | from ..structures import _pls_input_docs
7 | from .. import compute, utils
8 |
9 |
10 | class MeanCenteredPLS(BasePLS):
11 | def __init__(self, X, groups=None, n_cond=1, mean_centering=0, n_perm=5000,
12 | n_boot=5000, n_split=100, rotate=True, ci=95,
13 | permsamples=None, bootsamples=None, seed=None,
14 | verbose=True, n_proc=None, **kwargs):
15 |
16 | # check that groups and conditions are set appropriately
17 | if groups is None:
18 | if len(X) // n_cond != len(X) / n_cond:
19 | raise ValueError('Provided `X` matrix with {} samples is not '
20 | 'evenly divisible into {} conditions. Please '
21 | 'confirm inputs are correct and try again. '
22 | .format(len(X), n_cond))
23 | groups = [len(X) // n_cond]
24 | elif not isinstance(groups, (list, np.ndarray)):
25 | groups = [groups]
26 |
27 | # check inputs for validity
28 | if n_cond == 1 and len(groups) == 1:
29 | raise ValueError('Cannot perform PLS with only one group and one '
30 | 'condition. Please confirm inputs are correct.')
31 | if n_cond == 1 and mean_centering == 0:
32 | warnings.warn('Cannot set mean_centering to 0 when there is only '
33 | 'one condition. Resetting mean_centering to 1.')
34 | mean_centering = 1
35 | elif len(groups) == 1 and mean_centering == 1:
36 | warnings.warn('Cannot set mean_centering to 1 when there is only '
37 | 'one group. Resetting mean_centering to 0.')
38 | mean_centering = 0
39 |
40 | # instantiate base class, generate dummy array, and run PLS analysis
41 | super().__init__(X=np.asarray(X), groups=groups, n_cond=n_cond,
42 | mean_centering=mean_centering, n_perm=n_perm,
43 | n_boot=n_boot, n_split=n_split, rotate=rotate, ci=ci,
44 | permsamples=permsamples, bootsamples=bootsamples,
45 | seed=seed, verbose=verbose, n_proc=n_proc, **kwargs)
46 | self.inputs.Y = utils.dummy_code(self.inputs.groups,
47 | self.inputs.n_cond)
48 | self.results = self.run_pls(self.inputs.X, self.inputs.Y)
49 |
50 | def gen_covcorr(self, X, Y, **kwargs):
51 | """
52 | Computes mean-centered matrix from `X` and `Y`
53 |
54 | Parameters
55 | ----------
56 | X : (S, B) array_like
57 | Input data matrix, where `S` is observations and `B` is features
58 | Y : (S, T) array_like
59 | Dummy coded input array, where `S` is observations and `T`
60 | corresponds to the number of different groups x conditions. A value
61 | of 1 indicates that an observation belongs to a specific group or
62 | condition.
63 |
64 | Returns
65 | -------
66 | mean_centered : (T, B) np.ndarray
67 | Mean-centered matrix
68 | """
69 |
70 | mean_centered = compute.get_mean_center(X, Y, self.inputs.n_cond,
71 | self.inputs.mean_centering,
72 | means=True)
73 | return mean_centered
74 |
75 | def gen_distrib(self, X, Y, original, *args, **kwargs):
76 | """
77 | Finds contrast for single bootstrap resample
78 |
79 | Parameters
80 | ----------
81 | X : (S, B) array_like
82 | Input data matrix, where `S` is observations and `B` is features
83 | Y : (S, T) array_like
84 | Dummy coded input array, where `S` is observations and `T`
85 | corresponds to the number of different groups x conditions. A value
86 | of 1 indicates that an observation belongs to a specific group or
87 | condition.
88 | original : (B, L) array_like
89 | Left singular vectors from bootstrap
90 |
91 | Returns
92 | -------
93 | distrib : (T, L)
94 | Contrast for single bootstrap resample
95 | """
96 |
97 | usc = compute.get_mean_center(X, Y, self.inputs.n_cond,
98 | self.inputs.mean_centering,
99 | means=False)
100 | usc = usc @ compute.normalize(original)
101 |
102 | return np.row_stack([usc[g].mean(axis=0) for g in Y.T.astype(bool)])
103 |
104 | def make_permutation(self, X, Y, perminds):
105 | """
106 | Permutes `X` according to `perminds`, leaving `Y` un-permuted
107 |
108 | Parameters
109 | ----------
110 | X : (S, B) array_like
111 | Input data matrix, where `S` is observations and `B` is features
112 | Y : (S, T) array_like
113 | Input data matrix, where `S` is observations and `T` is features
114 | perminds : (S,) array_like
115 | Array by which to permute `X`
116 |
117 | Returns
118 | -------
119 | Xp : (S, B) array_like
120 | `X`, permuted according to `perminds`
121 | Yp : (S, T) array_like
122 | Identical to `Y`
123 | """
124 |
125 | return X[perminds], Y
126 |
127 | def run_pls(self, X, Y):
128 | """
129 | Runs PLS analysis
130 |
131 | Parameters
132 | ----------
133 | X : (S, B) array_like
134 | Input data matrix, where `S` is observations and `B` is features
135 | Y : (S, T) array_like, optional
136 | Dummy coded input array, where `S` is observations and `T`
137 | corresponds to the number of different groups x conditions. A value
138 | of 1 indicates that an observation belongs to a specific group or
139 | condition.
140 |
141 | Returns
142 | -------
143 | res : :obj:`pyls.structures.PLSResults`
144 | PLS results object
145 | """
146 |
147 | res = super().run_pls(X, Y)
148 | res['y_scores'] = Y @ res['y_weights']
149 |
150 | # get normalized brain scores and contrast
151 | brainscores_dm = compute.get_mean_center(X, Y, self.inputs.n_cond,
152 | self.inputs.mean_centering,
153 | False) @ res['x_weights']
154 | contrast = np.row_stack([brainscores_dm[grp].mean(axis=0) for grp
155 | in Y.T.astype(bool)])
156 |
157 | if self.inputs.n_boot > 0:
158 | # compute bootstraps
159 | distrib, u_sum, u_square = self.bootstrap(X, Y, self.rs)
160 |
161 | # calculate bootstrap ratios and confidence intervals
162 | bs = res['x_weights'] @ res['singvals']
163 | bsrs, uboot_se = compute.boot_rel(bs, u_sum, u_square,
164 | self.inputs.n_boot)
165 | corrci = np.stack(compute.boot_ci(distrib, ci=self.inputs.ci), -1)
166 |
167 | # update results.boot_result dictionary
168 | res['bootres'].update(dict(x_weights_normed=bsrs,
169 | x_weights_stderr=uboot_se,
170 | bootsamples=self.bootsamp,
171 | contrast=contrast,
172 | contrast_boot=distrib,
173 | contrast_ci=corrci))
174 |
175 | # get rid of the stupid diagonal matrix
176 | res['varexp'] = np.diag(compute.varexp(res['singvals']))
177 | res['singvals'] = np.diag(res['singvals'])
178 |
179 | return res
180 |
181 |
182 | def meancentered_pls(X, *, groups=None, n_cond=1, mean_centering=0,
183 | n_perm=5000, n_boot=5000, n_split=0, rotate=True, ci=95,
184 | permsamples=None, bootsamples=None, seed=None,
185 | verbose=True, n_proc=None, **kwargs):
186 | pls = MeanCenteredPLS(X=X, groups=groups, n_cond=n_cond,
187 | mean_centering=mean_centering,
188 | n_perm=n_perm, n_boot=n_boot, n_split=n_split,
189 | rotate=rotate, ci=ci, permsamples=permsamples,
190 | bootsamples=bootsamples, seed=seed, verbose=verbose,
191 | n_proc=n_proc, **kwargs)
192 | return pls.results
193 |
194 |
195 | meancentered_pls.__doc__ = r"""
196 | Performs mean-centered PLS on `X`, sorted into `groups` and `conditions`.
197 |
198 | Mean-centered PLS is a multivariate statistical approach that attempts to
199 | find sets of variables in a matrix which maximally discriminate between
200 | subgroups within the matrix.
201 |
202 | While it carries the name PLS, mean-centered PLS is perhaps more related to
203 | principal components analysis than it is to :obj:`pyls.behavioral_pls`. In
204 | contrast to behavioral PLS, mean-centered PLS does not construct a cross-
205 | covariance matrix. Instead, it operates by averaging the provided data
206 | (`X`) within groups and/or conditions. The resultant matrix :math:`M` is
207 | mean-centered, generating a new matrix :math:`R_{{mean\_centered}}` which
208 | is submitted to singular value decomposition.
209 |
210 | Parameters
211 | ----------
212 | {input_matrix}
213 | {groups}
214 | {conditions}
215 | {mean_centering}
216 | {stat_test}
217 | {split_half}
218 | {rotate}
219 | {ci}
220 | {resamples}
221 | {proc_options}
222 |
223 | Returns
224 | ----------
225 | {pls_results}
226 |
227 | Notes
228 | -----
229 | The provided `mean_centering` argument can be changed to highlight or
230 | "boost" potential group / condition differences by modifying how
231 | :math:`R_{{mean\_centered}}` is generated:
232 |
233 | - `mean_centering=0` will remove group means collapsed across conditions,
234 | emphasizing potential differences between conditions while removing
235 | overall group differences
236 | - `mean_centering=1` will remove condition means collapsed across groups,
237 | emphasizing potential differences between groups while removing overall
238 | condition differences
239 | - `mean_centering=2` will remove the grand mean collapsed across both
240 | groups _and_ conditions, permitting investigation of the full spectrum of
241 | potential group and condition effects.
242 |
243 | {decomposition_narrative}
244 |
245 | References
246 | ----------
247 | {references}
248 | """.format(**_pls_input_docs)
249 |
--------------------------------------------------------------------------------
/pyls/utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from contextlib import contextmanager
4 |
5 | import numpy as np
6 | import tqdm
7 | from sklearn.utils import Bunch
8 | from sklearn.utils.validation import check_array, check_random_state
9 | try:
10 | from joblib import Parallel, delayed
11 | joblib_avail = True
12 | except ImportError:
13 | joblib_avail = False
14 |
15 |
16 | class ResDict(Bunch):
17 | """
18 | Subclass of `sklearn.utils.Bunch` that only accepts keys in `cls.allowed`
19 |
20 | Also edits string representation to show non-empty keys
21 | """
22 |
23 | allowed = []
24 |
25 | def __init__(self, **kwargs):
26 | # only keep allowed keys
27 | i = {key: val for key, val in kwargs.items() if key in
28 | self.__class__.allowed}
29 | super().__init__(**i)
30 |
31 | def __str__(self):
32 | # override dict built-in string repr to display only non-empty keys
33 | items = [k for k in self.__class__.allowed
34 | if k in _not_empty_keys(self)]
35 | return '{name}({keys})'.format(name=self.__class__.__name__,
36 | keys=', '.join(items))
37 |
38 | def __setitem__(self, key, val):
39 | # legit we only want keys that are allowed
40 | if key in self.__class__.allowed:
41 | super().__setitem__(key, val)
42 |
43 | def __eq__(self, value):
44 | # easy check -- are objects the same class?
45 | if not isinstance(value, self.__class__):
46 | return False
47 | # another easy check -- are the non-empty keys different?
48 | if _not_empty_keys(self) != _not_empty_keys(value):
49 | return False
50 | # harder check -- iterate through everything and check item equality
51 | # potentially recursive checks if sub-items are dictionaries
52 | for k, v in self.items():
53 | v2 = value.get(k, None)
54 | if v is None and v2 is None:
55 | continue
56 | # recursive dictionary comparison
57 | if isinstance(v, dict) and isinstance(v2, dict):
58 | if v != v2:
59 | return False
60 | # compare using numpy testing suite
61 | # this is because arrays may be different size and numpy testing
62 | # is way more solid than anything we could come up with
63 | else:
64 | try:
65 | np.testing.assert_array_almost_equal(v, v2)
66 | except (TypeError, AssertionError):
67 | return False
68 |
69 | return True
70 |
71 | def __ne__(self, value):
72 | return not self == value
73 |
74 | __repr__ = __str__
75 |
76 |
77 | def _not_empty_keys(dictionary):
78 | """
79 | Returns list of non-empty keys in `dictionary`
80 |
81 | Non-empty keys are defined as (1) not being None-type and (2) not being an
82 | empty dictionary, itself
83 |
84 | Parameters
85 | ----------
86 | dictionary : dict
87 | Object to query for non-empty keys
88 |
89 | Returns
90 | -------
91 | keys : list
92 | Non-empty keys in `dictionary`
93 | """
94 |
95 | if not isinstance(dictionary, dict):
96 | raise TypeError('Provided input must be type dict, not {}'
97 | .format(type(dictionary)))
98 |
99 | keys = []
100 | for key, value in dictionary.items():
101 | if value is not None and not _empty_dict(value):
102 | keys.append(key)
103 |
104 | return set(keys)
105 |
106 |
107 | def _empty_dict(dobj):
108 | """
109 | Returns True if `dobj` is an empty dictionary; otherwise, returns False
110 |
111 | Parameters
112 | ----------
113 | dobj
114 | Any Python object
115 |
116 | Returns
117 | -------
118 | empty : bool
119 | Whether `dobj` is an empty dictionary-like object
120 | """
121 |
122 | try:
123 | return len(dobj.keys()) == 0
124 | except (AttributeError, TypeError):
125 | return False
126 |
127 |
128 | def trange(n_iter, verbose=True, **kwargs):
129 | """
130 | Wrapper for :obj:`tqdm.trange` with some default options set
131 |
132 | Parameters
133 | ----------
134 | n_iter : int
135 | Number of iterations for progress bar
136 | verbose : bool, optional
137 | Whether to return an :obj:`tqdm.tqdm` progress bar instead of a range
138 | generator. Default: True
139 | kwargs
140 | Key-value arguments provided to :func:`tqdm.trange`
141 |
142 | Returns
143 | -------
144 | progbar : :obj:`tqdm.tqdm`
145 | """
146 |
147 | form = ('{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt}'
148 | ' | {elapsed}<{remaining}')
149 | defaults = dict(ascii=True, leave=False, bar_format=form)
150 | defaults.update(kwargs)
151 |
152 | return tqdm.trange(n_iter, disable=not verbose, **defaults)
153 |
154 |
155 | def dummy_code(groups, n_cond=1):
156 | """
157 | Dummy codes `groups` and `n_cond`
158 |
159 | Parameters
160 | ----------
161 | groups : (G,) list
162 | List with number of subjects in each of `G` groups
163 | n_cond : int, optional
164 | Number of conditions, for each subject. Default: 1
165 |
166 | Returns
167 | -------
168 | Y : (S, F) `numpy.ndarray`
169 | Dummy-coded group array
170 | """
171 |
172 | labels = dummy_label(groups, n_cond)
173 | dummy = np.column_stack([labels == g for g in np.unique(labels)])
174 |
175 | return dummy.astype(int)
176 |
177 |
178 | def dummy_label(groups, n_cond=1):
179 | """
180 | Generates group labels for `groups` and `n_cond`
181 |
182 | Parameters
183 | ----------
184 | groups : (G,) list
185 | List with number of subjects in each of `G` groups
186 | n_cond : int, optional
187 | Number of conditions, for each subject. Default: 1
188 |
189 | Returns
190 | -------
191 | Y : (S,) `numpy.ndarray`
192 | Dummy-label group array
193 | """
194 |
195 | num_labels = len(groups) * n_cond
196 |
197 | return np.repeat(np.arange(num_labels) + 1, np.repeat(groups, n_cond))
198 |
199 |
200 | def permute_cols(x, seed=None):
201 | """
202 | Permutes the rows for each column in `x` separately
203 |
204 | Taken from https://stackoverflow.com/a/27489131
205 |
206 | Parameters
207 | ----------
208 | x : (S, B) array_like
209 | Input array to be permuted
210 | seed : {int, :obj:`numpy.random.RandomState`, None}, optional
211 | Seed for random number generation. Default: None
212 |
213 | Returns
214 | -------
215 | permuted : `numpy.ndarray`
216 | Permuted array
217 | """
218 |
219 | # can't permute row with only 1 sample...
220 | x = check_array(x)
221 | rs = check_random_state(seed)
222 | ix_i = rs.random_sample(x.shape).argsort(axis=0)
223 | ix_j = np.tile(np.arange(x.shape[1]), (x.shape[0], 1))
224 | return x[ix_i, ix_j]
225 |
226 |
227 | class _unravel():
228 | """
229 | Small utility to unravel generator object into a list
230 |
231 | Parameters
232 | ----------
233 | x : generator
234 |
235 | Returns
236 | -------
237 | y : list
238 | """
239 | def __init__(self, *args, **kwargs):
240 | pass
241 |
242 | def __call__(self, x):
243 | return [f for f in x]
244 |
245 | def __enter__(self, *args, **kwargs):
246 | return self
247 |
248 | def __exit__(self, *args, **kwargs):
249 | pass
250 |
251 |
252 | @contextmanager
253 | def get_par_func(n_proc, func, **kwargs):
254 | """
255 | Creates joblib-style parallelization function if joblib is available
256 |
257 | Parameters
258 | ----------
259 | n_proc : int
260 | Number of processors (i.e., jobs) to use for parallelization
261 | func : function
262 | Function to parallelize
263 |
264 | Returns
265 | -------
266 | parallel : :obj:`joblib.Parallel` object
267 | Object to parallelize over `func`
268 | func : :obj:`joblib.delayed` object
269 | Provided `func` wrapped in `joblib.delayed`
270 | """
271 |
272 | if joblib_avail:
273 | func = delayed(func)
274 | with Parallel(n_jobs=n_proc, max_nbytes=1e6,
275 | mmap_mode='r+', **kwargs) as parallel:
276 | yield parallel, func
277 | else:
278 | parallel = _unravel()
279 | yield parallel, func
280 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | h5py
2 | numpy
3 | scikit-learn
4 | scipy
5 | tqdm
6 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | name = pyls
3 | url = https://github.com/rmarkello/pyls
4 | download_url = https://github.com/rmarkello/pyls
5 | author = pyls developers
6 | maintainer = Ross Markello
7 | maintainer_email = rossmarkello@gmail.com
8 | classifiers =
9 | Development Status :: 3 - Alpha
10 | Intended Audience :: Science/Research
11 | License :: OSI Approved :: GNU General Public License v2 (GPLv2)
12 | Programming Language :: Python :: 3.5
13 | Programming Language :: Python :: 3.6
14 | Programming Language :: Python :: 3.7
15 | license = BSD-3
16 | description = A toolbox for performing multivariate PLS decomposition analyses
17 | long_description = file:README.md
18 | long_description_content_type = text/markdown; charset=UTF-8
19 | platforms = OS Independent
20 | provides =
21 | pyls
22 |
23 | [options]
24 | python_requires = >=3.5.1
25 | install_requires =
26 | h5py
27 | numpy
28 | scikit-learn
29 | scipy
30 | tqdm
31 | tests_require =
32 | pytest >=3.6
33 | test_suite = pytest
34 | zip_safe = False
35 | packages = find:
36 | include_package_data = True
37 |
38 | [options.extras_require]
39 | doc =
40 | sphinx >=2.0
41 | sphinx-argparse
42 | sphinx_rtd_theme
43 | plotting =
44 | pandas
45 | seaborn
46 | style =
47 | flake8
48 | test =
49 | pytest-cov
50 | all =
51 | %(doc)s
52 | %(plotting)s
53 | %(style)s
54 | %(test)s
55 |
56 | [options.package_data]
57 | pyls =
58 | data/*
59 | pyls/examples/*json
60 | pyls/tests/data/*
61 |
62 | [coverage:run]
63 | omit =
64 | */pyls/tests/matlab.py
65 | */pyls/_version.py
66 |
67 | [flake8]
68 | doctests = True
69 | exclude =
70 | *build/*
71 | *sphinx*
72 | */__init__.py
73 | ignore = E402, W503
74 | max-line-length = 79
75 |
76 | [tool:pytest]
77 | doctest_optionflags = NORMALIZE_WHITESPACE
78 | xfail_strict = true
79 | addopts = -rx
80 |
81 | [versioneer]
82 | VCS = git
83 | style = pep440
84 | versionfile_source = pyls/_version.py
85 | versionfile_build = pyls/_version.py
86 | tag_prefix =
87 | parentdir_prefix =
88 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import sys
3 |
4 | from setuptools import setup
5 | import versioneer
6 |
7 | SETUP_REQUIRES = ['setuptools >= 30.3.0']
8 | SETUP_REQUIRES += ['wheel'] if 'bdist_wheel' in sys.argv else []
9 |
10 | if __name__ == "__main__":
11 | setup(name='pyls',
12 | setup_requires=SETUP_REQUIRES,
13 | version=versioneer.get_version(),
14 | cmdclass=versioneer.get_cmdclass())
15 |
--------------------------------------------------------------------------------