├── .circleci └── config.yml ├── .gitattributes ├── .gitignore ├── .travis.yml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── data ├── linnerud_exercise.csv ├── linnerud_physio.csv └── wine.csv ├── docs ├── .gitignore ├── Makefile ├── _static │ └── theme_overrides.css ├── _templates │ ├── class.rst │ └── function.rst ├── api.rst ├── conf.py ├── index.rst ├── requirements.txt ├── usage.rst └── user_guide │ ├── behavioral.rst │ ├── meancentered.rst │ └── results.rst ├── pyls ├── __init__.py ├── _version.py ├── base.py ├── compute.py ├── examples │ ├── __init__.py │ ├── datasets.json │ └── datasets.py ├── io.py ├── matlab │ ├── __init__.py │ └── io.py ├── plotting │ └── meancentered.py ├── structures.py ├── tests │ ├── __init__.py │ ├── conftest.py │ ├── data │ │ ├── bpls_onegroup_onecond_nosplit.mat │ │ ├── bpls_onegroup_onecond_split.mat │ │ ├── empty.mat │ │ ├── mpls_multigroup_onecond_nosplit.mat │ │ ├── mpls_multigroup_onecond_split.mat │ │ └── resultonly.mat │ ├── matlab.py │ ├── test_base.py │ ├── test_compute.py │ ├── test_examples.py │ ├── test_io.py │ ├── test_matlab.py │ ├── test_structures.py │ ├── test_utils.py │ └── types │ │ ├── __init__.py │ │ ├── test_regression.py │ │ └── test_svd.py ├── types │ ├── __init__.py │ ├── behavioral.py │ ├── meancentered.py │ └── regression.py └── utils.py ├── requirements.txt ├── setup.cfg ├── setup.py └── versioneer.py /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | 3 | executors: 4 | exeggutor: 5 | docker: 6 | - image: circleci/python:3.6 7 | working_directory: ~/pyls 8 | environment: 9 | PYTHON_VERSION: "3.6" 10 | OPENBLAS_NUM_THREADS: "1" 11 | MKL_NUM_THREADS: "1" 12 | DROPBOX_URL: "https://www.dropbox.com/s/e6jfvekw6habeud/matlab_pls.tar.gz?dl=1" 13 | 14 | jobs: 15 | setup: 16 | executor: exeggutor 17 | steps: 18 | - checkout 19 | - run: 20 | name: Generating checksum to cache Matlab PLS results 21 | command: echo "${DROPBOX_URL}" > checksum.txt 22 | - restore_cache: 23 | name: Checking for cached Matlab PLS results 24 | keys: 25 | - data-v2-{{ checksum "checksum.txt" }} 26 | - data-v2- 27 | - run: 28 | name: Preparing Matlab PLS results 29 | command: | 30 | if [[ -e /tmp/data/matlab ]]; then 31 | echo "Restoring Matlab PLS results from cache" 32 | else 33 | mkdir -p /tmp/data/matlab 34 | curl -L "${DROPBOX_URL}" | tar xz -C /tmp/data/matlab 35 | fi 36 | - save_cache: 37 | name: Caching Matlab PLS results 38 | key: data-v2-{{ checksum "checksum.txt" }} 39 | paths: 40 | - /tmp/data 41 | - restore_cache: 42 | name: Restoring cached dependencies 43 | keys: 44 | - dependencies-v3-{{ checksum "requirements.txt" }} 45 | - dependencies-v3- 46 | - run: 47 | name: Creating test environment 48 | command: | 49 | python3 -m venv venv 50 | . venv/bin/activate 51 | pip install .[tests] 52 | pip install joblib 53 | - save_cache: 54 | name: Caching dependencies 55 | key: dependencies-v3-{{ checksum "requirements.txt" }} 56 | paths: 57 | - ./venv 58 | - persist_to_workspace: 59 | name: Persisting workspace 60 | root: ./ 61 | paths: 62 | - requirements.txt 63 | - checksum.txt 64 | 65 | 66 | behavioral_pls: 67 | executor: exeggutor 68 | steps: 69 | - checkout 70 | - attach_workspace: 71 | at: ./ 72 | - restore_cache: 73 | name: Loading Matlab PLS results 74 | keys: 75 | - data-v2-{{ checksum "checksum.txt" }} 76 | - data-v2- 77 | - restore_cache: 78 | name: Loading dependencies 79 | keys: 80 | - dependencies-v3-{{ checksum "requirements.txt" }} 81 | - dependencies-v3- 82 | - run: 83 | name: Running Matlab-Python comparison 84 | command: | 85 | . venv/bin/activate 86 | for mat in /tmp/data/matlab/bpls*mat; do 87 | echo $( date +%H:%M:%S ) "${mat}" 88 | python -c "import pyls.tests; pyls.tests.assert_matlab_equivalence('${mat}', n_proc='max', n_perm=2500, n_split=100);" 89 | done 90 | 91 | meancentered_pls: 92 | executor: exeggutor 93 | steps: 94 | - checkout 95 | - attach_workspace: 96 | at: ./ 97 | - restore_cache: 98 | name: Loading Matlab PLS results 99 | keys: 100 | - data-v2-{{ checksum "checksum.txt" }} 101 | - data-v2- 102 | - restore_cache: 103 | name: Loading dependencies 104 | keys: 105 | - dependencies-v3-{{ checksum "requirements.txt" }} 106 | - dependencies-v3- 107 | - run: 108 | name: Running Matlab-Python comparison 109 | command: | 110 | . venv/bin/activate 111 | for mat in /tmp/data/matlab/mpls*mat; do 112 | echo $( date +%H:%M:%S ) "${mat}" 113 | python -c "import pyls.tests; pyls.tests.assert_matlab_equivalence('${mat}', n_proc='max', n_perm=2500, n_split=250);" 114 | done 115 | 116 | workflows: 117 | version: 2.1 118 | regression_tests: 119 | jobs: 120 | - setup 121 | - behavioral_pls: 122 | requires: 123 | - setup 124 | - meancentered_pls: 125 | requires: 126 | - setup 127 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | pyls/_version.py export-subst 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | plsc/ 2 | .vscode/ 3 | docs/generated/ 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | .pytest_cache/ 8 | *.py[cod] 9 | *$py.class 10 | 11 | # C extensions 12 | *.so 13 | 14 | # Distribution / packaging 15 | .Python 16 | env/ 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *,cover 51 | .hypothesis/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # IPython Notebook 75 | .ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # celery beat schedule file 81 | celerybeat-schedule 82 | 83 | # dotenv 84 | .env 85 | 86 | # virtualenv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | 93 | # Rope project settings 94 | .ropeproject 95 | 96 | .imdone 97 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | sudo: false 3 | dist: xenial 4 | notifications: 5 | email: change 6 | 7 | branches: 8 | only: 9 | - master 10 | 11 | python: 12 | - 3.5 13 | - 3.6 14 | - 3.7 15 | 16 | env: 17 | matrix: 18 | - CHECK_TYPE=linting 19 | - CHECK_TYPE=docdoctest INSTALL_PANDAS=true 20 | - CHECK_TYPE=test 21 | global: 22 | - OPENBLAS_NUM_THREADS=1 23 | - MKL_NUM_THREADS=1 24 | - INSTALL_TYPE=setup 25 | 26 | matrix: 27 | include: 28 | - python: 3.6 29 | env: 30 | - INSTALL_TYPE=sdist 31 | - CHECK_TYPE=test 32 | - python: 3.6 33 | env: 34 | - INSTALL_TYPE=wheel 35 | - CHECK_TYPE=test 36 | - python: 3.6 37 | env: 38 | - INSTALL_JOBLIB=true 39 | - INSTALL_PANDAS=true 40 | - CHECK_TYPE=test 41 | 42 | before_install: 43 | - python -m pip install --upgrade pip 44 | - if [ "${CHECK_TYPE}" == "linting" ]; then 45 | pip install flake8; 46 | fi 47 | - if [ "${CHECK_TYPE}" == "test" ]; then 48 | pip install "pytest>=3.6" pytest-cov coverage coveralls codecov; 49 | fi 50 | - if [ ! -z "${INSTALL_JOBLIB}" ]; then 51 | pip install joblib; 52 | fi 53 | - if [ ! -z "${INSTALL_PANDAS}" ]; then 54 | pip install pandas; 55 | fi 56 | 57 | install: 58 | - | 59 | if [ "${INSTALL_TYPE}" == "setup" ]; then 60 | python setup.py install; 61 | elif [ "${INSTALL_TYPE}" == "sdist" ]; then 62 | python setup.py sdist; 63 | pip install dist/*.tar.gz; 64 | elif [ "${INSTALL_TYPE}" == "wheel" ]; then 65 | python setup.py bdist_wheel; 66 | pip install dist/*.whl; 67 | else 68 | false; 69 | fi 70 | 71 | script: 72 | - | 73 | if [ "${CHECK_TYPE}" == "linting" ]; then 74 | flake8 pyls; 75 | elif [ "${CHECK_TYPE}" == "docdoctest" ]; then 76 | cd docs; 77 | pip install -r ./requirements.txt; 78 | make html; 79 | make doctest; 80 | elif [ "${CHECK_TYPE}" == "test" ]; then 81 | mkdir for_testing && cd for_testing; 82 | cp ../setup.cfg .; 83 | args="--cov-report term-missing --cov=pyls --doctest-modules --pyargs"; 84 | python -m pytest ${args} pyls; 85 | else 86 | false; 87 | fi 88 | 89 | after_success: 90 | - if [ "${CHECK_TYPE}" == "test" ]; then 91 | codecov; 92 | fi 93 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | In the interest of fostering an open and welcoming environment we want participation in our project and our community to be a harassment-free experience for everyone. 4 | 5 | Although no list can hope to be all-encompassing, we explicitly honor diversity in age, body size, disability, ethnicity, gender identity and expression, level of experience, native language, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | We aim to promote behavior that contributes to a positive and welcoming environment. 10 | Examples of such behavior include: 11 | 12 | * Using inclusive language 13 | * Being respectful of differing viewpoints and experiences 14 | * Showing empathy towards other community members 15 | 16 | We do not tolerate harassment or other, inappropriate behavior in our community. 17 | Examples of unacceptable behavior by participants include: 18 | 19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | * Personal or political attacks on contributors, and insulting or derogatory comments on contributed code with the intent to undermine contributions 21 | * Public or private harassment 22 | 23 | ## Our Responsibilities 24 | 25 | The maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 26 | 27 | The maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 28 | 29 | ## Scope 30 | 31 | This Code of Conduct applies both within our online GitHub repository and in public spaces when an individual is representing the project or its community. 32 | Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. 33 | 34 | ## Enforcement 35 | 36 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting Ross Markello at ross.markello@mail.mcgill.ca. 37 | Confidentiality will be respected in reporting. 38 | 39 | Enforcement of this Code can include (but is not limited to): 40 | 41 | * Facilitating a converstaion between the two parties involved in the violation of the Code of Conduct 42 | * Requesting a community member apologize for their behavior 43 | * Asking a community member (or multiple members) to enter a cooling off period that puts a time-limited pause on a particular discussion topic 44 | * Asking a community member to no longer participate in the `pyls` community, including making contributions or commenting on issues/pull requests 45 | 46 | ## Attribution 47 | 48 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 49 | available at [https://www.contributor-covenant.org/version/1/4/code-of-conduct.html](https://www.contributor-covenant.org/version/1/4/code-of-conduct.html). 50 | 51 | [homepage]: https://www.contributor-covenant.org 52 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | {description} 294 | Copyright (C) {year} {fullname} 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | {signature of Ty Coon}, 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.* setup* MANIFEST.in LICENSE requirements.txt 2 | include pyls/examples/datasets.json 3 | recursive-include data * 4 | recursive-include pyls/tests/data * 5 | include versioneer.py 6 | include pyls/_version.py 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pyls 2 | 3 | This package provides a Python interface for partial least squares (PLS) analysis, a multivariate statistical technique used to relate two sets of variables. 4 | 5 | [![Build Status](https://travis-ci.org/rmarkello/pyls.svg?branch=master)](https://travis-ci.org/rmarkello/pyls) 6 | [![CircleCI](https://circleci.com/gh/rmarkello/pyls.svg?style=shield)](https://circleci.com/gh/rmarkello/pyls) 7 | [![Codecov](https://codecov.io/gh/rmarkello/pyls/branch/master/graph/badge.svg)](https://codecov.io/gh/rmarkello/pyls) 8 | [![Documentation Status](https://readthedocs.org/projects/pyls/badge/?version=latest)](http://pyls.readthedocs.io/en/latest/?badge=latest) 9 | [![License](https://img.shields.io/badge/License-GPL%202.0-blue.svg)](https://opensource.org/licenses/GPL-2.0) 10 | 11 | ## Table of Contents 12 | 13 | If you know where you're going, feel free to jump ahead: 14 | 15 | * [Installation and setup](#requirements-and-installation) 16 | * [Purpose](#purpose) 17 | * [Overview](#overview) 18 | * [Background](#background) 19 | * [Usage](#usage) 20 | * [PLS correlation methods](#pls-correlation-methods) 21 | * [Behavioral PLS](#behavioral-pls) 22 | * [Mean-centered PLS](#mean-centered-pls) 23 | * [PLS regression methods](#pls-regression-methods) 24 | * [Regression with SIMPLS](#regression-with-simpls) 25 | * [PLS results](#results) 26 | 27 | ## Installation and setup 28 | 29 | This package requires Python >= 3.5. Assuming you have the correct version of Python installed, you can install this package by opening a terminal and running the following: 30 | 31 | ```bash 32 | git clone https://github.com/netneurolab/pypyls.git 33 | cd pyls 34 | python setup.py install 35 | ``` 36 | 37 | There are plans (hopes?) to get this set up on PyPI for an easier installation process, but that is a long-term goal! 38 | 39 | ## Purpose 40 | 41 | ### Overview 42 | 43 | Partial least squares (PLS) is a statistical technique that aims to find shared information between two sets of variables. 44 | If you're unfamiliar with PLS and are interested in a thorough (albeit quite technical) treatment of it [Abdi et al., 2013](https://doi.org/10.1007/978-1-62703-059-5_23) is a good resource. 45 | There are multiple "flavors" of PLS that are tailored to different use cases; this package implements two functions that fall within the category typically referred to as **PLS-C** (PLS correlation) or **PLS-SVD** (PLS singular value decomposition) and one function that falls within the category typically referred to as **PLS-R** (PLS regression). 46 | 47 | ### Background 48 | 49 | The functionality of the current package largely mirrors that originally introduced by [McIntosh et al., (1996)](https://www.ncbi.nlm.nih.gov/pubmed/9345485) in their [Matlab toolbox](https://www.rotman-baycrest.on.ca/index.php?section=84). 50 | However, while the Matlab toolbox has a significant number of tools dedicated to integrating neuroimaging-specific paradigms (i.e., loading M/EEG and fMRI data), the current Python package aims to implement and expand on only the core _statistical_ functions of that toolbox. 51 | 52 | While the core algorithms of PLS implemented in this package are present (to a degree) in [`scikit-learn`](`https://scikit-learn.org/stable/modules/classes.html#module-sklearn.cross_decomposition`), this package provides a different API and includes some additional functionality. 53 | Namely, `pyls`: 54 | 55 | 1. Has integrated significance and reliability testing via built-in permutation testing and bootstrap resampling, 56 | 2. Implements [mean-centered PLS](https://www.ncbi.nlm.nih.gov/pubmed/20656037) for multivariate group/condition comparisons, 57 | 3. Uses the [SIMPLS](https://doi.org/10.1016%2F0169-7439%2893%2985002-X) instead of the [NIPALS algorithm](https://doi.org/10.1016/B978-0-12-426653-7.50032-6) for PLS regression 58 | 59 | ## Usage 60 | 61 | `pyls` implement two subtypes of PLS-C: a more traditional form that we call "behavioral PLS" (`pyls.behavioral_pls`) and a somewhat newer form that we call "mean-centered PLS" (`pyls.meancentered_pls`). 62 | It also implements one type of PLS-R, which uses the SIMPLS algorithm (`pyls.pls_regression`); this is, in principle, very similar to "behavioral PLS." 63 | 64 | ### PLS correlation methods 65 | 66 | #### Behavioral PLS 67 | 68 | As the more "traditional" form of PLS-C, `pyls.behavioral_pls` looks to find relationships between two sets of variables. 69 | To run a behavioral PLS we would do the following: 70 | 71 | ```python 72 | >>> import numpy as np 73 | 74 | # let's create two data arrays with 80 observations 75 | >>> X = np.random.rand(80, 10000) # a 10000-feature (e.g., neural) data array 76 | >>> Y = np.random.rand(80, 10) # a 10-feature (e.g., behavioral) data array 77 | 78 | # we're going to pretend that this data is from 2 groups of 20 subjects each, 79 | # and that each subject participated in 2 task conditions 80 | >>> groups = [20, 20] # a list with the number of subjects in each group 81 | >>> n_cond = 2 # the number of tasks or conditions 82 | 83 | # run the analysis and look at the results structure 84 | >>> from pyls import behavioral_pls 85 | >>> bpls = behavioral_pls(X, Y, groups=groups, n_cond=n_cond) 86 | >>> bpls 87 | PLSResults(x_weights, y_weights, x_scores, y_scores, y_loadings, singvals, varexp, permres, 88 | bootres, splitres, cvres, inputs) 89 | ``` 90 | 91 | #### Mean-centered PLS 92 | 93 | In contrast to behavioral PLS, `pyls.meancentered_pls` doesn't look to find relationships between two sets of variables, but rather tries to find relationships between _groupings_ in a single set of variables. As such, we will only provide it with _one_ of our created data arrays (`X`) and it will attempt to examine how the features of that array differ between groups and/or conditions. To run a mean-centered PLS we would do the following: 94 | 95 | ```python 96 | >>> from pyls import meancentered_pls 97 | >>> mpls = meancentered_pls(X, groups=groups, n_cond=n_cond) 98 | >>> mpls 99 | PLSResults(x_weights, y_weights, x_scores, y_scores, singvals, varexp, permres, bootres, splitres, 100 | inputs) 101 | ``` 102 | 103 | ### PLS regression methods 104 | 105 | #### Regression with SIMPLS 106 | 107 | Whereas `pyls.behavioral_pls` aims to maximize the symmetric relationship between `X` and `Y`, `pyls.pls_regression` performs a directed decomposition. 108 | That is, it aims to find components in `X` that explain the most variance in `Y` (but not necessarily vice versa). 109 | To run a PLS regression analysis we would do the following: 110 | 111 | ```python 112 | >>> from pyls import pls_regression 113 | >>> plsr = pls_regression(X, Y, n_components=5) 114 | >>> plsr 115 | PLSResults(x_weights, x_scores, y_scores, y_loadings, varexp, permres, bootres, inputs) 116 | ``` 117 | 118 | Currently `pyls.pls_regression()` does not support groups or conditions. 119 | 120 | ### PLS Results 121 | 122 | The docstrings of the results objects (`bpls`, `plsr`, and `mpls` in the above example) have some information describing what each output represents, so while we work on improving our documentation you can rely on those for some insight! Try typing `help(bpls)`, `help(plsr)`, or `help(mpls)` to get more information on what the different values represent. 123 | 124 | If you are at all familiar with the Matlab PLS toolbox you might notice that the results structures have a dramatically different naming convention; despite this all the same information should be present! 125 | -------------------------------------------------------------------------------- /data/linnerud_exercise.csv: -------------------------------------------------------------------------------- 1 | ,Chins,Situps,Jumps 2 | 0,5.0,162.0,60.0 3 | 1,2.0,110.0,60.0 4 | 2,12.0,101.0,101.0 5 | 3,12.0,105.0,37.0 6 | 4,13.0,155.0,58.0 7 | 5,4.0,101.0,42.0 8 | 6,8.0,101.0,38.0 9 | 7,6.0,125.0,40.0 10 | 8,15.0,200.0,40.0 11 | 9,17.0,251.0,250.0 12 | 10,17.0,120.0,38.0 13 | 11,13.0,210.0,115.0 14 | 12,14.0,215.0,105.0 15 | 13,1.0,50.0,50.0 16 | 14,6.0,70.0,31.0 17 | 15,12.0,210.0,120.0 18 | 16,4.0,60.0,25.0 19 | 17,11.0,230.0,80.0 20 | 18,15.0,225.0,73.0 21 | 19,2.0,110.0,43.0 22 | -------------------------------------------------------------------------------- /data/linnerud_physio.csv: -------------------------------------------------------------------------------- 1 | ,Weight,Waist,Pulse 2 | 0,191.0,36.0,50.0 3 | 1,189.0,37.0,52.0 4 | 2,193.0,38.0,58.0 5 | 3,162.0,35.0,62.0 6 | 4,189.0,35.0,46.0 7 | 5,182.0,36.0,56.0 8 | 6,211.0,38.0,56.0 9 | 7,167.0,34.0,60.0 10 | 8,176.0,31.0,74.0 11 | 9,154.0,33.0,56.0 12 | 10,169.0,34.0,50.0 13 | 11,166.0,33.0,52.0 14 | 12,154.0,34.0,64.0 15 | 13,247.0,46.0,50.0 16 | 14,193.0,36.0,46.0 17 | 15,202.0,37.0,62.0 18 | 16,176.0,37.0,54.0 19 | 17,157.0,32.0,52.0 20 | 18,156.0,33.0,54.0 21 | 19,138.0,33.0,68.0 22 | -------------------------------------------------------------------------------- /data/wine.csv: -------------------------------------------------------------------------------- 1 | ,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline 2 | 0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0 3 | 1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0 4 | 2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0 5 | 3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0 6 | 4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0 7 | 5,14.2,1.76,2.45,15.2,112.0,3.27,3.39,0.34,1.97,6.75,1.05,2.85,1450.0 8 | 6,14.39,1.87,2.45,14.6,96.0,2.5,2.52,0.3,1.98,5.25,1.02,3.58,1290.0 9 | 7,14.06,2.15,2.61,17.6,121.0,2.6,2.51,0.31,1.25,5.05,1.06,3.58,1295.0 10 | 8,14.83,1.64,2.17,14.0,97.0,2.8,2.98,0.29,1.98,5.2,1.08,2.85,1045.0 11 | 9,13.86,1.35,2.27,16.0,98.0,2.98,3.15,0.22,1.85,7.22,1.01,3.55,1045.0 12 | 10,14.1,2.16,2.3,18.0,105.0,2.95,3.32,0.22,2.38,5.75,1.25,3.17,1510.0 13 | 11,14.12,1.48,2.32,16.8,95.0,2.2,2.43,0.26,1.57,5.0,1.17,2.82,1280.0 14 | 12,13.75,1.73,2.41,16.0,89.0,2.6,2.76,0.29,1.81,5.6,1.15,2.9,1320.0 15 | 13,14.75,1.73,2.39,11.4,91.0,3.1,3.69,0.43,2.81,5.4,1.25,2.73,1150.0 16 | 14,14.38,1.87,2.38,12.0,102.0,3.3,3.64,0.29,2.96,7.5,1.2,3.0,1547.0 17 | 15,13.63,1.81,2.7,17.2,112.0,2.85,2.91,0.3,1.46,7.3,1.28,2.88,1310.0 18 | 16,14.3,1.92,2.72,20.0,120.0,2.8,3.14,0.33,1.97,6.2,1.07,2.65,1280.0 19 | 17,13.83,1.57,2.62,20.0,115.0,2.95,3.4,0.4,1.72,6.6,1.13,2.57,1130.0 20 | 18,14.19,1.59,2.48,16.5,108.0,3.3,3.93,0.32,1.86,8.7,1.23,2.82,1680.0 21 | 19,13.64,3.1,2.56,15.2,116.0,2.7,3.03,0.17,1.66,5.1,0.96,3.36,845.0 22 | 20,14.06,1.63,2.28,16.0,126.0,3.0,3.17,0.24,2.1,5.65,1.09,3.71,780.0 23 | 21,12.93,3.8,2.65,18.6,102.0,2.41,2.41,0.25,1.98,4.5,1.03,3.52,770.0 24 | 22,13.71,1.86,2.36,16.6,101.0,2.61,2.88,0.27,1.69,3.8,1.11,4.0,1035.0 25 | 23,12.85,1.6,2.52,17.8,95.0,2.48,2.37,0.26,1.46,3.93,1.09,3.63,1015.0 26 | 24,13.5,1.81,2.61,20.0,96.0,2.53,2.61,0.28,1.66,3.52,1.12,3.82,845.0 27 | 25,13.05,2.05,3.22,25.0,124.0,2.63,2.68,0.47,1.92,3.58,1.13,3.2,830.0 28 | 26,13.39,1.77,2.62,16.1,93.0,2.85,2.94,0.34,1.45,4.8,0.92,3.22,1195.0 29 | 27,13.3,1.72,2.14,17.0,94.0,2.4,2.19,0.27,1.35,3.95,1.02,2.77,1285.0 30 | 28,13.87,1.9,2.8,19.4,107.0,2.95,2.97,0.37,1.76,4.5,1.25,3.4,915.0 31 | 29,14.02,1.68,2.21,16.0,96.0,2.65,2.33,0.26,1.98,4.7,1.04,3.59,1035.0 32 | 30,13.73,1.5,2.7,22.5,101.0,3.0,3.25,0.29,2.38,5.7,1.19,2.71,1285.0 33 | 31,13.58,1.66,2.36,19.1,106.0,2.86,3.19,0.22,1.95,6.9,1.09,2.88,1515.0 34 | 32,13.68,1.83,2.36,17.2,104.0,2.42,2.69,0.42,1.97,3.84,1.23,2.87,990.0 35 | 33,13.76,1.53,2.7,19.5,132.0,2.95,2.74,0.5,1.35,5.4,1.25,3.0,1235.0 36 | 34,13.51,1.8,2.65,19.0,110.0,2.35,2.53,0.29,1.54,4.2,1.1,2.87,1095.0 37 | 35,13.48,1.81,2.41,20.5,100.0,2.7,2.98,0.26,1.86,5.1,1.04,3.47,920.0 38 | 36,13.28,1.64,2.84,15.5,110.0,2.6,2.68,0.34,1.36,4.6,1.09,2.78,880.0 39 | 37,13.05,1.65,2.55,18.0,98.0,2.45,2.43,0.29,1.44,4.25,1.12,2.51,1105.0 40 | 38,13.07,1.5,2.1,15.5,98.0,2.4,2.64,0.28,1.37,3.7,1.18,2.69,1020.0 41 | 39,14.22,3.99,2.51,13.2,128.0,3.0,3.04,0.2,2.08,5.1,0.89,3.53,760.0 42 | 40,13.56,1.71,2.31,16.2,117.0,3.15,3.29,0.34,2.34,6.13,0.95,3.38,795.0 43 | 41,13.41,3.84,2.12,18.8,90.0,2.45,2.68,0.27,1.48,4.28,0.91,3.0,1035.0 44 | 42,13.88,1.89,2.59,15.0,101.0,3.25,3.56,0.17,1.7,5.43,0.88,3.56,1095.0 45 | 43,13.24,3.98,2.29,17.5,103.0,2.64,2.63,0.32,1.66,4.36,0.82,3.0,680.0 46 | 44,13.05,1.77,2.1,17.0,107.0,3.0,3.0,0.28,2.03,5.04,0.88,3.35,885.0 47 | 45,14.21,4.04,2.44,18.9,111.0,2.85,2.65,0.3,1.25,5.24,0.87,3.33,1080.0 48 | 46,14.38,3.59,2.28,16.0,102.0,3.25,3.17,0.27,2.19,4.9,1.04,3.44,1065.0 49 | 47,13.9,1.68,2.12,16.0,101.0,3.1,3.39,0.21,2.14,6.1,0.91,3.33,985.0 50 | 48,14.1,2.02,2.4,18.8,103.0,2.75,2.92,0.32,2.38,6.2,1.07,2.75,1060.0 51 | 49,13.94,1.73,2.27,17.4,108.0,2.88,3.54,0.32,2.08,8.9,1.12,3.1,1260.0 52 | 50,13.05,1.73,2.04,12.4,92.0,2.72,3.27,0.17,2.91,7.2,1.12,2.91,1150.0 53 | 51,13.83,1.65,2.6,17.2,94.0,2.45,2.99,0.22,2.29,5.6,1.24,3.37,1265.0 54 | 52,13.82,1.75,2.42,14.0,111.0,3.88,3.74,0.32,1.87,7.05,1.01,3.26,1190.0 55 | 53,13.77,1.9,2.68,17.1,115.0,3.0,2.79,0.39,1.68,6.3,1.13,2.93,1375.0 56 | 54,13.74,1.67,2.25,16.4,118.0,2.6,2.9,0.21,1.62,5.85,0.92,3.2,1060.0 57 | 55,13.56,1.73,2.46,20.5,116.0,2.96,2.78,0.2,2.45,6.25,0.98,3.03,1120.0 58 | 56,14.22,1.7,2.3,16.3,118.0,3.2,3.0,0.26,2.03,6.38,0.94,3.31,970.0 59 | 57,13.29,1.97,2.68,16.8,102.0,3.0,3.23,0.31,1.66,6.0,1.07,2.84,1270.0 60 | 58,13.72,1.43,2.5,16.7,108.0,3.4,3.67,0.19,2.04,6.8,0.89,2.87,1285.0 61 | 59,12.37,0.94,1.36,10.6,88.0,1.98,0.57,0.28,0.42,1.95,1.05,1.82,520.0 62 | 60,12.33,1.1,2.28,16.0,101.0,2.05,1.09,0.63,0.41,3.27,1.25,1.67,680.0 63 | 61,12.64,1.36,2.02,16.8,100.0,2.02,1.41,0.53,0.62,5.75,0.98,1.59,450.0 64 | 62,13.67,1.25,1.92,18.0,94.0,2.1,1.79,0.32,0.73,3.8,1.23,2.46,630.0 65 | 63,12.37,1.13,2.16,19.0,87.0,3.5,3.1,0.19,1.87,4.45,1.22,2.87,420.0 66 | 64,12.17,1.45,2.53,19.0,104.0,1.89,1.75,0.45,1.03,2.95,1.45,2.23,355.0 67 | 65,12.37,1.21,2.56,18.1,98.0,2.42,2.65,0.37,2.08,4.6,1.19,2.3,678.0 68 | 66,13.11,1.01,1.7,15.0,78.0,2.98,3.18,0.26,2.28,5.3,1.12,3.18,502.0 69 | 67,12.37,1.17,1.92,19.6,78.0,2.11,2.0,0.27,1.04,4.68,1.12,3.48,510.0 70 | 68,13.34,0.94,2.36,17.0,110.0,2.53,1.3,0.55,0.42,3.17,1.02,1.93,750.0 71 | 69,12.21,1.19,1.75,16.8,151.0,1.85,1.28,0.14,2.5,2.85,1.28,3.07,718.0 72 | 70,12.29,1.61,2.21,20.4,103.0,1.1,1.02,0.37,1.46,3.05,0.906,1.82,870.0 73 | 71,13.86,1.51,2.67,25.0,86.0,2.95,2.86,0.21,1.87,3.38,1.36,3.16,410.0 74 | 72,13.49,1.66,2.24,24.0,87.0,1.88,1.84,0.27,1.03,3.74,0.98,2.78,472.0 75 | 73,12.99,1.67,2.6,30.0,139.0,3.3,2.89,0.21,1.96,3.35,1.31,3.5,985.0 76 | 74,11.96,1.09,2.3,21.0,101.0,3.38,2.14,0.13,1.65,3.21,0.99,3.13,886.0 77 | 75,11.66,1.88,1.92,16.0,97.0,1.61,1.57,0.34,1.15,3.8,1.23,2.14,428.0 78 | 76,13.03,0.9,1.71,16.0,86.0,1.95,2.03,0.24,1.46,4.6,1.19,2.48,392.0 79 | 77,11.84,2.89,2.23,18.0,112.0,1.72,1.32,0.43,0.95,2.65,0.96,2.52,500.0 80 | 78,12.33,0.99,1.95,14.8,136.0,1.9,1.85,0.35,2.76,3.4,1.06,2.31,750.0 81 | 79,12.7,3.87,2.4,23.0,101.0,2.83,2.55,0.43,1.95,2.57,1.19,3.13,463.0 82 | 80,12.0,0.92,2.0,19.0,86.0,2.42,2.26,0.3,1.43,2.5,1.38,3.12,278.0 83 | 81,12.72,1.81,2.2,18.8,86.0,2.2,2.53,0.26,1.77,3.9,1.16,3.14,714.0 84 | 82,12.08,1.13,2.51,24.0,78.0,2.0,1.58,0.4,1.4,2.2,1.31,2.72,630.0 85 | 83,13.05,3.86,2.32,22.5,85.0,1.65,1.59,0.61,1.62,4.8,0.84,2.01,515.0 86 | 84,11.84,0.89,2.58,18.0,94.0,2.2,2.21,0.22,2.35,3.05,0.79,3.08,520.0 87 | 85,12.67,0.98,2.24,18.0,99.0,2.2,1.94,0.3,1.46,2.62,1.23,3.16,450.0 88 | 86,12.16,1.61,2.31,22.8,90.0,1.78,1.69,0.43,1.56,2.45,1.33,2.26,495.0 89 | 87,11.65,1.67,2.62,26.0,88.0,1.92,1.61,0.4,1.34,2.6,1.36,3.21,562.0 90 | 88,11.64,2.06,2.46,21.6,84.0,1.95,1.69,0.48,1.35,2.8,1.0,2.75,680.0 91 | 89,12.08,1.33,2.3,23.6,70.0,2.2,1.59,0.42,1.38,1.74,1.07,3.21,625.0 92 | 90,12.08,1.83,2.32,18.5,81.0,1.6,1.5,0.52,1.64,2.4,1.08,2.27,480.0 93 | 91,12.0,1.51,2.42,22.0,86.0,1.45,1.25,0.5,1.63,3.6,1.05,2.65,450.0 94 | 92,12.69,1.53,2.26,20.7,80.0,1.38,1.46,0.58,1.62,3.05,0.96,2.06,495.0 95 | 93,12.29,2.83,2.22,18.0,88.0,2.45,2.25,0.25,1.99,2.15,1.15,3.3,290.0 96 | 94,11.62,1.99,2.28,18.0,98.0,3.02,2.26,0.17,1.35,3.25,1.16,2.96,345.0 97 | 95,12.47,1.52,2.2,19.0,162.0,2.5,2.27,0.32,3.28,2.6,1.16,2.63,937.0 98 | 96,11.81,2.12,2.74,21.5,134.0,1.6,0.99,0.14,1.56,2.5,0.95,2.26,625.0 99 | 97,12.29,1.41,1.98,16.0,85.0,2.55,2.5,0.29,1.77,2.9,1.23,2.74,428.0 100 | 98,12.37,1.07,2.1,18.5,88.0,3.52,3.75,0.24,1.95,4.5,1.04,2.77,660.0 101 | 99,12.29,3.17,2.21,18.0,88.0,2.85,2.99,0.45,2.81,2.3,1.42,2.83,406.0 102 | 100,12.08,2.08,1.7,17.5,97.0,2.23,2.17,0.26,1.4,3.3,1.27,2.96,710.0 103 | 101,12.6,1.34,1.9,18.5,88.0,1.45,1.36,0.29,1.35,2.45,1.04,2.77,562.0 104 | 102,12.34,2.45,2.46,21.0,98.0,2.56,2.11,0.34,1.31,2.8,0.8,3.38,438.0 105 | 103,11.82,1.72,1.88,19.5,86.0,2.5,1.64,0.37,1.42,2.06,0.94,2.44,415.0 106 | 104,12.51,1.73,1.98,20.5,85.0,2.2,1.92,0.32,1.48,2.94,1.04,3.57,672.0 107 | 105,12.42,2.55,2.27,22.0,90.0,1.68,1.84,0.66,1.42,2.7,0.86,3.3,315.0 108 | 106,12.25,1.73,2.12,19.0,80.0,1.65,2.03,0.37,1.63,3.4,1.0,3.17,510.0 109 | 107,12.72,1.75,2.28,22.5,84.0,1.38,1.76,0.48,1.63,3.3,0.88,2.42,488.0 110 | 108,12.22,1.29,1.94,19.0,92.0,2.36,2.04,0.39,2.08,2.7,0.86,3.02,312.0 111 | 109,11.61,1.35,2.7,20.0,94.0,2.74,2.92,0.29,2.49,2.65,0.96,3.26,680.0 112 | 110,11.46,3.74,1.82,19.5,107.0,3.18,2.58,0.24,3.58,2.9,0.75,2.81,562.0 113 | 111,12.52,2.43,2.17,21.0,88.0,2.55,2.27,0.26,1.22,2.0,0.9,2.78,325.0 114 | 112,11.76,2.68,2.92,20.0,103.0,1.75,2.03,0.6,1.05,3.8,1.23,2.5,607.0 115 | 113,11.41,0.74,2.5,21.0,88.0,2.48,2.01,0.42,1.44,3.08,1.1,2.31,434.0 116 | 114,12.08,1.39,2.5,22.5,84.0,2.56,2.29,0.43,1.04,2.9,0.93,3.19,385.0 117 | 115,11.03,1.51,2.2,21.5,85.0,2.46,2.17,0.52,2.01,1.9,1.71,2.87,407.0 118 | 116,11.82,1.47,1.99,20.8,86.0,1.98,1.6,0.3,1.53,1.95,0.95,3.33,495.0 119 | 117,12.42,1.61,2.19,22.5,108.0,2.0,2.09,0.34,1.61,2.06,1.06,2.96,345.0 120 | 118,12.77,3.43,1.98,16.0,80.0,1.63,1.25,0.43,0.83,3.4,0.7,2.12,372.0 121 | 119,12.0,3.43,2.0,19.0,87.0,2.0,1.64,0.37,1.87,1.28,0.93,3.05,564.0 122 | 120,11.45,2.4,2.42,20.0,96.0,2.9,2.79,0.32,1.83,3.25,0.8,3.39,625.0 123 | 121,11.56,2.05,3.23,28.5,119.0,3.18,5.08,0.47,1.87,6.0,0.93,3.69,465.0 124 | 122,12.42,4.43,2.73,26.5,102.0,2.2,2.13,0.43,1.71,2.08,0.92,3.12,365.0 125 | 123,13.05,5.8,2.13,21.5,86.0,2.62,2.65,0.3,2.01,2.6,0.73,3.1,380.0 126 | 124,11.87,4.31,2.39,21.0,82.0,2.86,3.03,0.21,2.91,2.8,0.75,3.64,380.0 127 | 125,12.07,2.16,2.17,21.0,85.0,2.6,2.65,0.37,1.35,2.76,0.86,3.28,378.0 128 | 126,12.43,1.53,2.29,21.5,86.0,2.74,3.15,0.39,1.77,3.94,0.69,2.84,352.0 129 | 127,11.79,2.13,2.78,28.5,92.0,2.13,2.24,0.58,1.76,3.0,0.97,2.44,466.0 130 | 128,12.37,1.63,2.3,24.5,88.0,2.22,2.45,0.4,1.9,2.12,0.89,2.78,342.0 131 | 129,12.04,4.3,2.38,22.0,80.0,2.1,1.75,0.42,1.35,2.6,0.79,2.57,580.0 132 | 130,12.86,1.35,2.32,18.0,122.0,1.51,1.25,0.21,0.94,4.1,0.76,1.29,630.0 133 | 131,12.88,2.99,2.4,20.0,104.0,1.3,1.22,0.24,0.83,5.4,0.74,1.42,530.0 134 | 132,12.81,2.31,2.4,24.0,98.0,1.15,1.09,0.27,0.83,5.7,0.66,1.36,560.0 135 | 133,12.7,3.55,2.36,21.5,106.0,1.7,1.2,0.17,0.84,5.0,0.78,1.29,600.0 136 | 134,12.51,1.24,2.25,17.5,85.0,2.0,0.58,0.6,1.25,5.45,0.75,1.51,650.0 137 | 135,12.6,2.46,2.2,18.5,94.0,1.62,0.66,0.63,0.94,7.1,0.73,1.58,695.0 138 | 136,12.25,4.72,2.54,21.0,89.0,1.38,0.47,0.53,0.8,3.85,0.75,1.27,720.0 139 | 137,12.53,5.51,2.64,25.0,96.0,1.79,0.6,0.63,1.1,5.0,0.82,1.69,515.0 140 | 138,13.49,3.59,2.19,19.5,88.0,1.62,0.48,0.58,0.88,5.7,0.81,1.82,580.0 141 | 139,12.84,2.96,2.61,24.0,101.0,2.32,0.6,0.53,0.81,4.92,0.89,2.15,590.0 142 | 140,12.93,2.81,2.7,21.0,96.0,1.54,0.5,0.53,0.75,4.6,0.77,2.31,600.0 143 | 141,13.36,2.56,2.35,20.0,89.0,1.4,0.5,0.37,0.64,5.6,0.7,2.47,780.0 144 | 142,13.52,3.17,2.72,23.5,97.0,1.55,0.52,0.5,0.55,4.35,0.89,2.06,520.0 145 | 143,13.62,4.95,2.35,20.0,92.0,2.0,0.8,0.47,1.02,4.4,0.91,2.05,550.0 146 | 144,12.25,3.88,2.2,18.5,112.0,1.38,0.78,0.29,1.14,8.21,0.65,2.0,855.0 147 | 145,13.16,3.57,2.15,21.0,102.0,1.5,0.55,0.43,1.3,4.0,0.6,1.68,830.0 148 | 146,13.88,5.04,2.23,20.0,80.0,0.98,0.34,0.4,0.68,4.9,0.58,1.33,415.0 149 | 147,12.87,4.61,2.48,21.5,86.0,1.7,0.65,0.47,0.86,7.65,0.54,1.86,625.0 150 | 148,13.32,3.24,2.38,21.5,92.0,1.93,0.76,0.45,1.25,8.42,0.55,1.62,650.0 151 | 149,13.08,3.9,2.36,21.5,113.0,1.41,1.39,0.34,1.14,9.4,0.57,1.33,550.0 152 | 150,13.5,3.12,2.62,24.0,123.0,1.4,1.57,0.22,1.25,8.6,0.59,1.3,500.0 153 | 151,12.79,2.67,2.48,22.0,112.0,1.48,1.36,0.24,1.26,10.8,0.48,1.47,480.0 154 | 152,13.11,1.9,2.75,25.5,116.0,2.2,1.28,0.26,1.56,7.1,0.61,1.33,425.0 155 | 153,13.23,3.3,2.28,18.5,98.0,1.8,0.83,0.61,1.87,10.52,0.56,1.51,675.0 156 | 154,12.58,1.29,2.1,20.0,103.0,1.48,0.58,0.53,1.4,7.6,0.58,1.55,640.0 157 | 155,13.17,5.19,2.32,22.0,93.0,1.74,0.63,0.61,1.55,7.9,0.6,1.48,725.0 158 | 156,13.84,4.12,2.38,19.5,89.0,1.8,0.83,0.48,1.56,9.01,0.57,1.64,480.0 159 | 157,12.45,3.03,2.64,27.0,97.0,1.9,0.58,0.63,1.14,7.5,0.67,1.73,880.0 160 | 158,14.34,1.68,2.7,25.0,98.0,2.8,1.31,0.53,2.7,13.0,0.57,1.96,660.0 161 | 159,13.48,1.67,2.64,22.5,89.0,2.6,1.1,0.52,2.29,11.75,0.57,1.78,620.0 162 | 160,12.36,3.83,2.38,21.0,88.0,2.3,0.92,0.5,1.04,7.65,0.56,1.58,520.0 163 | 161,13.69,3.26,2.54,20.0,107.0,1.83,0.56,0.5,0.8,5.88,0.96,1.82,680.0 164 | 162,12.85,3.27,2.58,22.0,106.0,1.65,0.6,0.6,0.96,5.58,0.87,2.11,570.0 165 | 163,12.96,3.45,2.35,18.5,106.0,1.39,0.7,0.4,0.94,5.28,0.68,1.75,675.0 166 | 164,13.78,2.76,2.3,22.0,90.0,1.35,0.68,0.41,1.03,9.58,0.7,1.68,615.0 167 | 165,13.73,4.36,2.26,22.5,88.0,1.28,0.47,0.52,1.15,6.62,0.78,1.75,520.0 168 | 166,13.45,3.7,2.6,23.0,111.0,1.7,0.92,0.43,1.46,10.68,0.85,1.56,695.0 169 | 167,12.82,3.37,2.3,19.5,88.0,1.48,0.66,0.4,0.97,10.26,0.72,1.75,685.0 170 | 168,13.58,2.58,2.69,24.5,105.0,1.55,0.84,0.39,1.54,8.66,0.74,1.8,750.0 171 | 169,13.4,4.6,2.86,25.0,112.0,1.98,0.96,0.27,1.11,8.5,0.67,1.92,630.0 172 | 170,12.2,3.03,2.32,19.0,96.0,1.25,0.49,0.4,0.73,5.5,0.66,1.83,510.0 173 | 171,12.77,2.39,2.28,19.5,86.0,1.39,0.51,0.48,0.64,9.899999,0.57,1.63,470.0 174 | 172,14.16,2.51,2.48,20.0,91.0,1.68,0.7,0.44,1.24,9.7,0.62,1.71,660.0 175 | 173,13.71,5.65,2.45,20.5,95.0,1.68,0.61,0.52,1.06,7.7,0.64,1.74,740.0 176 | 174,13.4,3.91,2.48,23.0,102.0,1.8,0.75,0.43,1.41,7.3,0.7,1.56,750.0 177 | 175,13.27,4.28,2.26,20.0,120.0,1.59,0.69,0.43,1.35,10.2,0.59,1.56,835.0 178 | 176,13.17,2.59,2.37,20.0,120.0,1.65,0.68,0.53,1.46,9.3,0.6,1.62,840.0 179 | 177,14.13,4.1,2.74,24.5,96.0,2.05,0.76,0.56,1.35,9.2,0.61,1.6,560.0 180 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = pyls 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # For getting rid of generated docs before re-building 18 | clean: 19 | rm -rf $(BUILDDIR)/* auto_examples/ generated/ 20 | 21 | .PHONY: clean 22 | 23 | # Catch-all target: route all unknown targets to Sphinx using the new 24 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 25 | %: Makefile 26 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 27 | -------------------------------------------------------------------------------- /docs/_static/theme_overrides.css: -------------------------------------------------------------------------------- 1 | /* override table width restrictions */ 2 | @media screen and (min-width: 767px) { 3 | 4 | .wy-table-responsive table td { 5 | /* !important prevents the common CSS stylesheets from overriding 6 | this as on RTD they are loaded after this stylesheet */ 7 | white-space: normal !important; 8 | } 9 | 10 | .wy-table-responsive { 11 | overflow: visible !important; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /docs/_templates/class.rst: -------------------------------------------------------------------------------- 1 | {{ fullname }} 2 | {{ underline }} 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | :no-members: 8 | :no-inherited-members: 9 | 10 | .. raw:: html 11 | 12 |
13 | -------------------------------------------------------------------------------- /docs/_templates/function.rst: -------------------------------------------------------------------------------- 1 | {{ fullname }} 2 | {{ underline }} 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autofunction:: {{ objname }} 7 | 8 | .. raw:: html 9 | 10 |
11 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | .. _ref_api: 2 | 3 | .. currentmodule:: pyls 4 | 5 | ------------- 6 | Reference API 7 | ------------- 8 | 9 | This is the primary reference of ``pyls``. Please refer to the :ref:`user guide 10 | ` for more information on how to best implement these functions in your 11 | own workflows. 12 | 13 | .. contents:: **List of modules** 14 | :local: 15 | 16 | .. _ref_decomp: 17 | 18 | :mod:`pyls` - PLS decompositions 19 | -------------------------------------- 20 | 21 | .. automodule:: pyls.types 22 | :no-members: 23 | :no-inherited-members: 24 | 25 | .. currentmodule:: pyls 26 | 27 | .. autosummary:: 28 | :template: function.rst 29 | :toctree: generated/ 30 | 31 | pyls.behavioral_pls 32 | pyls.meancentered_pls 33 | 34 | .. _ref_results: 35 | 36 | :mod:`pyls.structures` - PLS data structures 37 | -------------------------------------------- 38 | 39 | .. automodule:: pyls.structures 40 | :no-members: 41 | :no-inherited-members: 42 | 43 | .. currentmodule:: pyls.structures 44 | 45 | .. autosummary:: 46 | :template: class.rst 47 | :toctree: generated/ 48 | 49 | pyls.structures.PLSResults 50 | pyls.structures.PLSPermResults 51 | pyls.structures.PLSBootResults 52 | pyls.structures.PLSSplitHalfResults 53 | pyls.structures.PLSCrossValidationResults 54 | pyls.structures.PLSInputs 55 | 56 | .. _ref_io: 57 | 58 | :mod:`pyls.io` - Data I/O functionality 59 | --------------------------------------- 60 | 61 | .. automodule:: pyls.io 62 | :no-members: 63 | :no-inherited-members: 64 | 65 | .. currentmodule:: pyls 66 | 67 | .. autosummary:: 68 | :template: function.rst 69 | :toctree: generated/ 70 | 71 | pyls.save_results 72 | pyls.load_results 73 | 74 | .. _ref_matlab: 75 | 76 | :mod:`pyls.matlab` - Matlab compatibility 77 | ----------------------------------------- 78 | 79 | .. automodule:: pyls.matlab 80 | :no-members: 81 | :no-inherited-members: 82 | 83 | .. currentmodule:: pyls 84 | 85 | .. autosummary:: 86 | :template: function.rst 87 | :toctree: generated/ 88 | 89 | pyls.import_matlab_result 90 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Configuration file for the Sphinx documentation builder. 5 | 6 | # -- Path setup -------------------------------------------------------------- 7 | 8 | # If extensions (or modules to document with autodoc) are in another directory, 9 | # add these directories to sys.path here. If the directory is relative to the 10 | # documentation root, use os.path.abspath to make it absolute, like shown here. 11 | # 12 | import os 13 | import sys 14 | 15 | # -- Project information ----------------------------------------------------- 16 | 17 | project = 'pyls' 18 | copyright = '2018, pyls developers' 19 | author = 'pyls developers' 20 | 21 | # Import project to get version info 22 | sys.path.insert(0, os.path.abspath(os.path.pardir)) 23 | import pyls # noqa 24 | # The short X.Y version 25 | version = pyls.__version__ 26 | # The full version, including alpha/beta/rc tags 27 | release = pyls.__version__ 28 | 29 | # -- General configuration --------------------------------------------------- 30 | 31 | # Add any Sphinx extension module names here, as strings. They can be 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 33 | # ones. 34 | extensions = [ 35 | 'sphinx.ext.autodoc', 36 | 'sphinx.ext.autosummary', 37 | 'sphinx.ext.doctest', 38 | 'sphinx.ext.intersphinx', 39 | 'sphinx.ext.mathjax', 40 | 'sphinx.ext.napoleon', 41 | 'sphinx.ext.viewcode', 42 | ] 43 | 44 | # Generate the API documentation when building 45 | autosummary_generate = True 46 | autodoc_default_flags = ['members', 'inherited-members'] 47 | numpydoc_show_class_members = False 48 | autoclass_content = "class" 49 | 50 | # Add any paths that contain templates here, relative to this directory. 51 | templates_path = ['_templates'] 52 | 53 | # The suffix(es) of source filenames. 54 | source_suffix = '.rst' 55 | 56 | # The master toctree document. 57 | master_doc = 'index' 58 | 59 | # The language for content autogenerated by Sphinx. Refer to documentation 60 | # for a list of supported languages. 61 | # 62 | # This is also used if you do content translation via gettext catalogs. 63 | # Usually you set "language" from the command line for these cases. 64 | language = None 65 | 66 | # List of patterns, relative to source directory, that match files and 67 | # directories to ignore when looking for source files. 68 | # This pattern also affects html_static_path and html_extra_path . 69 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 70 | 71 | # The name of the Pygments (syntax highlighting) style to use. 72 | pygments_style = 'sphinx' 73 | highlight_language = 'python3' 74 | 75 | # -- Options for HTML output ------------------------------------------------- 76 | 77 | # The theme to use for HTML and HTML Help pages. See the documentation for 78 | # a list of builtin themes. 79 | import sphinx_rtd_theme # noqa 80 | html_theme = 'sphinx_rtd_theme' 81 | html_show_sourcelink = False 82 | 83 | # Theme options are theme-specific and customize the look and feel of a theme 84 | # further. For a list of options available for each theme, see the 85 | # documentation. 86 | # 87 | # html_theme_options = {} 88 | 89 | html_context = { 90 | 'css_files': [ 91 | '_static/theme_overrides.css' 92 | ] 93 | } 94 | 95 | # Add any paths that contain custom static files (such as style sheets) here, 96 | # relative to this directory. They are copied after the builtin static files, 97 | # so a file named "default.css" will overwrite the builtin "default.css". 98 | html_static_path = ['_static'] 99 | 100 | # -- Options for HTMLHelp output --------------------------------------------- 101 | 102 | # Output file base name for HTML help builder. 103 | htmlhelp_basename = 'pylsdoc' 104 | 105 | # -- Extension configuration ------------------------------------------------- 106 | intersphinx_mapping = { 107 | 'numpy': ('https://docs.scipy.org/doc/numpy', None), 108 | 'scipy': ('https://docs.scipy.org/doc/scipy/reference', None), 109 | 'sklearn': ('http://scikit-learn.org/stable', None), 110 | } 111 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | pyls: Partial Least Squares in Python 2 | ===================================== 3 | 4 | This package provides a Python interface for performing partial least squares 5 | (PLS) analyses. 6 | 7 | .. image:: https://travis-ci.org/rmarkello/pyls.svg?branch=master 8 | :target: https://travis-ci.org/rmarkello/pyls 9 | .. image:: https://circleci.com/gh/rmarkello/pyls.svg?style=shield 10 | :target: https://circleci.com/gh/rmarkello/pyls 11 | .. image:: https://codecov.io/gh/rmarkello/pyls/branch/master/graph/badge.svg 12 | :target: https://codecov.io/gh/rmarkello/pyls 13 | .. image:: https://readthedocs.org/projects/pyls/badge/?version=latest 14 | :target: http://pyls.readthedocs.io/en/latest 15 | .. image:: http://img.shields.io/badge/License-GPL%202.0-blue.svg 16 | :target: https://opensource.org/licenses/GPL-2.0 17 | 18 | .. _readme_installation: 19 | 20 | Installation requirements 21 | ------------------------- 22 | 23 | Currently, ``pyls`` works with Python 3.5+ and requires a few dependencies: 24 | 25 | - h5py 26 | - numpy 27 | - scikit-learn 28 | - scipy, and 29 | - tqdm 30 | 31 | Assuming you have the correct version of Python installed, you can install 32 | ``pyls`` by opening a terminal and running the following: 33 | 34 | .. code-block:: bash 35 | 36 | git clone https://github.com/rmarkello/pyls.git 37 | cd pyls 38 | python setup.py install 39 | 40 | All relevant dependencies will be installed alongside the ``pyls`` module. 41 | 42 | .. _readme_quickstart: 43 | 44 | Quickstart 45 | ---------- 46 | 47 | There are a number of ways to use ``pyls``, depending on the type of analysis 48 | you would like to perform. Assuming you have two matrices ``X`` and ``Y`` 49 | representing different observations from a set of samples (i.e., subjects, 50 | neurons, brain regions), you can run a simple analysis with: 51 | 52 | .. code-block:: python 53 | 54 | >>> import pyls 55 | >>> results = pyls.behavioral_pls(X, Y) 56 | 57 | For detailed information on the different methods available and how to 58 | interpret the results object, please refer to our :ref:`user guide `. 59 | 60 | .. _readme_development: 61 | 62 | Development and getting involved 63 | -------------------------------- 64 | 65 | If you've found a bug, are experiencing a problem, or have a question about 66 | using the package, please head on over to our `GitHub issues`_ and make a new 67 | issue with some information about it! Someone will try and get back to you 68 | as quickly as possible, though please note that the primary developer for 69 | ``pyls`` (@rmarkello) is a graduate student so responses make take some time! 70 | 71 | If you're interested in getting involved in the project: welcome |sparkles|! 72 | We're thrilled to welcome new contributors. You should start by reading our 73 | `code of conduct`_; all activity on ``pyls`` should adhere to the CoC. After 74 | that, take a look at our `contributing guidelines`_ so you're familiar with the 75 | processes we (generally) try to follow when making changes to the repository! 76 | Once you're ready to jump in head on over to our issues to see if there's 77 | anything you might like to work on. 78 | 79 | .. _readme_licensing: 80 | 81 | License Information 82 | ------------------- 83 | 84 | This codebase is licensed under the GNU General Public License, version 2. The 85 | full license can be found in the `LICENSE`_ file in the ``pyls`` distribution. 86 | 87 | All trademarks referenced herein are property of their respective holders. 88 | 89 | .. toctree:: 90 | :maxdepth: 2 91 | 92 | usage 93 | api 94 | 95 | .. |sparkles| replace:: ✨ 96 | .. _code of conduct: https://github.com/rmarkello/pyls/blob/master/CODE_OF_CONDUCT.md 97 | .. _contributing guidelines: https://github.com/rmarkello/pyls/blob/master/CONTRIBUTING.md 98 | .. _GitHub issues: https://github.com/rmarkello/pyls/issues 99 | .. _LICENSE: https://github.com/rmarkello/pyls/blob/master/LICENSE 100 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | -r ../requirements.txt 2 | sphinx>=2.0 3 | sphinx_rtd_theme 4 | -------------------------------------------------------------------------------- /docs/usage.rst: -------------------------------------------------------------------------------- 1 | .. _usage: 2 | 3 | ---------- 4 | User guide 5 | ---------- 6 | 7 | Partial least squares (PLS) is a multivariate statistical technique that aims 8 | to find shared information between two sets of variables. If you're unfamiliar 9 | with PLS and are interested in a thorough (albeit quite technical) treatment, 10 | `Abdi et al., 2013 `_ is a good 11 | resource. 12 | 13 | This user guide will go through the basic statistical concepts of the two types 14 | of PLS implemented in the current package (:ref:`usage_behavioral` and 15 | :ref:`usage_meancentered`) and demonstrate how to interpret and use the results 16 | of a PLS analysis (:ref:`usage_results`). If you still have questions after 17 | going through this guide then you can refer to the :ref:`ref_api`! 18 | 19 | .. toctree:: 20 | :caption: Table of Contents 21 | :numbered: 22 | :maxdepth: 2 23 | 24 | user_guide/behavioral.rst 25 | user_guide/meancentered.rst 26 | user_guide/results.rst 27 | -------------------------------------------------------------------------------- /docs/user_guide/behavioral.rst: -------------------------------------------------------------------------------- 1 | .. testsetup:: 2 | 3 | import numpy as np 4 | np.set_printoptions(suppress=True) 5 | 6 | .. _usage_behavioral: 7 | 8 | Behavioral PLS 9 | ============== 10 | 11 | Running a behavioral PLS using ``pyls`` is as simple as: 12 | 13 | .. code-block:: 14 | 15 | >>> import pyls 16 | >>> out = pyls.behavioral_pls(X, Y) 17 | 18 | What we call behavioral PLS in the ``pyls`` package is actually the more 19 | traditional form of PLS (and is generally not prefixed with "behavioral"). This 20 | form of PLS, at its core, attempts to find shared information between two sets 21 | of features derived from a common set of samples. However, as with all things, 22 | there are a number of ever-so-slightly different kinds of PLS that exist in the 23 | wild, so to be thorough we're going to briefly explain the exact flavor 24 | implemented here before diving into a more illustrative example. 25 | 26 | What *exactly* do we mean by "behavioral PLS"? 27 | ---------------------------------------------- 28 | 29 | **Technical answer**: :py:func:`pyls.behavioral_pls` employs a symmetrical, 30 | singular value decomposition (SVD) based form of PLS, and is sometimes referred 31 | to as PLS-correlation (PLS-C), PLS-SVD, or, infrequently, EZ-PLS. Notably, it 32 | is **not** the same as PLS regression (PLS-R). 33 | 34 | **Less technical answer**: :py:func:`pyls.behavioral_pls` is like performing a 35 | principal components analysis (PCA) but when you have two related datasets, 36 | each with multiple features. 37 | 38 | Differences from PLS regression (PLS-R) 39 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 40 | 41 | You can think of the differences between PLS-C and PLS-R similar to how you 42 | might consider the differences between a Pearson correlation and a simple 43 | linear regression. Though this analogy is an over-simplification, the primary 44 | difference to take away is that behavioral PLS (PLS-C) does *not assess* 45 | *directional relationships between sets of data* (e.g., X → Y), but rather 46 | looks at how the two sets generally covary (e.g., X ↔ Y). 47 | 48 | To understand this a bit more we can walk through a detailed example. 49 | 50 | An exercise in calisthenics 51 | --------------------------- 52 | 53 | .. note:: 54 | Descriptions of PLS are almost always accompanied by a litany of equations, 55 | and for good reason: understanding how to interpret the results of a PLS 56 | requires at least a cursory understanding of the math behind it. As such, 57 | this example is going to rely on these equations, but will always do so in 58 | the context of real data. The hope is that this approach will help make the 59 | more abstract mathematical concepts a bit more concrete (and easier to 60 | apply to new data sets!). 61 | 62 | We'll start by loading the example dataset [1]_: 63 | 64 | .. doctest:: 65 | 66 | >>> from pyls.examples import load_dataset 67 | >>> data = load_dataset('linnerud') 68 | 69 | This is the same dataset as in :py:func:`sklearn.datasets.load_linnerud`; the 70 | formatting has just been lightly modified to better suit our purposes. 71 | 72 | Our ``data`` object can be treated as a dictionary, containing all the 73 | information necessary to run a PLS analysis. The keys can be accessed as 74 | attributes, so we can take a quick look at our input matrices 75 | :math:`\textbf{X}` and :math:`\textbf{Y}`: 76 | 77 | .. doctest:: 78 | 79 | >>> sorted(data.keys()) 80 | ['X', 'Y', 'n_boot', 'n_perm'] 81 | >>> data.X.shape 82 | (20, 3) 83 | >>> data.X.head() 84 | Chins Situps Jumps 85 | 0 5.0 162.0 60.0 86 | 1 2.0 110.0 60.0 87 | 2 12.0 101.0 101.0 88 | 3 12.0 105.0 37.0 89 | 4 13.0 155.0 58.0 90 | 91 | The rows of our :math:`\textbf{X}_{n \times p}` matrix here represent *n* 92 | subjects, and the columns indicate *p* different types of exercises these 93 | subjects were able to perform. So the first subject was able to do 5 chin-ups, 94 | 162 situps, and 60 jumping jacks. 95 | 96 | .. doctest:: 97 | 98 | >>> data.Y.shape 99 | (20, 3) 100 | >>> data.Y.head() 101 | Weight Waist Pulse 102 | 0 191.0 36.0 50.0 103 | 1 189.0 37.0 52.0 104 | 2 193.0 38.0 58.0 105 | 3 162.0 35.0 62.0 106 | 4 189.0 35.0 46.0 107 | 108 | The rows of our :math:`\textbf{Y}_{n \times q}` matrix *also* represent *n* 109 | subjects (critically, the same subjects as in :math:`\textbf{X}`), and the 110 | columns indicate *q* physiological measurements taken for each subject. That 111 | same subject referenced above thus has a weight of 191 pounds, a 36 inch waist, 112 | and a pulse of 50 beats per minute. 113 | 114 | Behavioral PLS will attempt to establish whether a relationship exists between 115 | the exercises performed and these physiological variables. If we wanted to run 116 | the full analysis right away, we could do so with: 117 | 118 | .. doctest:: 119 | 120 | >>> from pyls import behavioral_pls 121 | >>> results = behavioral_pls(**data) 122 | 123 | If you're comfortable with the down-and-dirty of PLS and want to go ahead and 124 | start understanding the ``results`` object, feel free to jump ahead to 125 | :ref:`usage_results`. Otherwise, read on for more about what's happening behind 126 | the scenes of :py:func:`~.behavioral_pls` 127 | 128 | The cross-covariance matrix 129 | --------------------------- 130 | 131 | Behavioral PLS works by decomposing the cross-covariance matrix 132 | :math:`\textbf{R}_{q \times p}` generated from the input matrices, where 133 | :math:`\textbf{R} = \textbf{Y}^{T} \textbf{X}`. The results of PLS are a 134 | bit easier to interpret when :math:`\textbf{R}` is the cross-correlation matrix 135 | instead of the cross-covariance matrix, which means that we should z-score each 136 | feature in :math:`\textbf{X}` and :math:`\textbf{Y}` before multiplying them; 137 | this is done automatically by the :py:func:`~.behavioral_pls` function. 138 | 139 | In our example, :math:`\textbf{R}` ends up being a 3 x 3 matrix: 140 | 141 | .. doctest:: 142 | 143 | >>> from pyls.compute import xcorr 144 | >>> R = xcorr(data.X, data.Y) 145 | >>> R 146 | Chins Situps Jumps 147 | Weight -0.389694 -0.493084 -0.226296 148 | Waist -0.552232 -0.645598 -0.191499 149 | Pulse 0.150648 0.225038 0.034933 150 | 151 | The :math:`q` rows of this matrix correspond to the physiological measurements 152 | and the :math:`p` columns to the exercises. Examining the first row, we can see 153 | that ``-0.389694`` is the correlation between ``Weight`` and ``Chins`` across 154 | all the subjects, ``-0.493084`` the correlation between ``Weight`` and 155 | ``Situps``, and so on. 156 | 157 | Singular value decomposition 158 | ---------------------------- 159 | 160 | Once we have generated our correlation matrix :math:`\textbf{R}` we subject it 161 | to a singular value decomposition, where :math:`\textbf{R} = \textbf{USV}^{T}`: 162 | 163 | .. doctest:: 164 | 165 | >>> from pyls.compute import svd 166 | >>> U, S, V = svd(R) 167 | >>> U.shape, S.shape, V.shape 168 | ((3, 3), (3, 3), (3, 3)) 169 | 170 | The outputs of this decomposition are two arrays of left and right singular 171 | vectors (:math:`\textbf{U}_{p \times l}` and :math:`\textbf{V}_{q \times l}`) 172 | and a diagonal matrix of singular values (:math:`\textbf{S}_{l \times l}`). The 173 | rows of :math:`\textbf{U}` correspond to the exercises from our input matrix 174 | :math:`\textbf{X}`, and the rows of :math:`\textbf{V}` correspond to the 175 | physiological measurements from our input matrix :math:`\textbf{Y}`. The 176 | columns of :math:`\textbf{U}` and :math:`\textbf{V}`, on the other hand, 177 | represent new dimensions or components that have been "discovered" in the data. 178 | 179 | .. 180 | 181 | The :math:`i^{th}` columns of :math:`\textbf{U}` and :math:`\textbf{V}` weigh 182 | the contributions of these exercises and physiological measurements, 183 | respectively. Taken together, the :math:`i^{th}` left and right singular 184 | vectors and singular value represent a *latent variable*, a multivariate 185 | pattern that weighs the original exercise and physiological measurements such 186 | that they maximally covary with each other. 187 | 188 | The :math:`i^{th}` singular value is proportional to the total 189 | exercise-physiology covariance accounted for by the latent variable. The 190 | effect size (:math:`\eta`) associated with a particular latent variable can be 191 | estimated as the ratio of the squared singular value (:math:`\sigma`) to the 192 | sum of all the squared singular values: 193 | 194 | .. math:: 195 | 196 | \eta_{i} = \sigma_{i}^{2} \big/ \sum \limits_{j=1}^{l} \sigma_{j}^{2} 197 | 198 | We can use the helper function :py:func:`pyls.compute.varexp` to calculate this 199 | for us: 200 | 201 | .. doctest:: 202 | 203 | >>> from pyls.compute import varexp 204 | >>> pctvar = varexp(S)[0, 0] 205 | >>> print('{:.4f}'.format(pctvar)) 206 | 0.9947 207 | 208 | Taking a look at the variance explained, we see that a whopping ~99.5% of the 209 | covariance between the exercises and physiological measurements in 210 | :math:`\textbf{X}` and :math:`\textbf{Y}` are explained by this latent 211 | variable, suggesting that the relationship between these variable can be 212 | effectively explained by a single dimension. 213 | 214 | Examining the weights from the singular vectors: 215 | 216 | .. doctest:: 217 | 218 | >>> U[:, 0] 219 | array([0.61330742, 0.7469717 , 0.25668519]) 220 | >>> V[:, 0] 221 | array([-0.58989118, -0.77134059, 0.23887675]) 222 | 223 | we see that all the exercises (``U[:, 0]``) are positively weighted, but that 224 | the physiological measurements (``V[:, 0]``) are split, with ``Weight`` and 225 | ``Waist`` measurements negatively weighted and ``Pulse`` positively weighted. 226 | (Note that the order of the weights is the same as the order of the original 227 | columns in our :math:`\textbf{X}` and :math:`\textbf{Y}` matrices.) Taken 228 | together this suggests that, for the subjects in this dataset, individuals who 229 | completed more of a given exercise tended to: 230 | 231 | 1. Complete more of the other exercises, and 232 | 2. Have a lower weight, smaller waist, and higher heart rate. 233 | 234 | It is also worth examining how correlated the projections of the original 235 | variables on this latent variable are. To do that, we can multiply the original 236 | data matrices by the relevant singular vectors and then correlate the results: 237 | 238 | .. doctest:: 239 | 240 | >>> from scipy.stats import pearsonr 241 | >>> XU = np.dot(data.X, U) 242 | >>> YV = np.dot(data.Y, V) 243 | >>> r, p = pearsonr(XU[:, 0], YV[:, 0]) 244 | >>> print('r = {:.4f}, p = {:.4f}'.format(r, p)) 245 | r = 0.4900, p = 0.0283 246 | 247 | The correlation value of this latent variable (~ ``0.49``) suggests that our 248 | interpretation of the singular vectors weights, above, is only *somewhat* 249 | accurate. We can think of this correlation (ranging from -1 to 1) as a proxy 250 | for the question: "how often is this interpretation of the singular vectors 251 | true?" Correlations closer to -1 indicate that the interpretation is largely 252 | inaccurate across subjects, whereas correlations closer to 1 indicate the 253 | interpretation is largely accurate across subjects. 254 | 255 | Latent variable significance testing 256 | ------------------------------------ 257 | 258 | Scientists love null-hypothesis significance testing, so there's a strong urge 259 | for researchers doing these sorts of analyses to want to find a way to 260 | determine whether observed latent variables are significant(ly different from a 261 | specified null model). The issue comes in determining what aspect of the latent 262 | variables to test! 263 | 264 | With behavioral PLS we assess whether the **variance explained** by a given 265 | latent variable is significantly different than would be expected by a null. 266 | Importantly, that null is generated by re-computing the latent variables from 267 | random permutations of the original data, generating a non-parametric 268 | distribution of explained variances by which to measure "significance." 269 | 270 | .. 271 | 272 | Reliability of the singular vectors 273 | ----------------------------------- 274 | 275 | 276 | 277 | .. [1] Tenenhaus, M. (1998). La régression PLS: théorie et pratique. Editions 278 | technip. 279 | -------------------------------------------------------------------------------- /docs/user_guide/meancentered.rst: -------------------------------------------------------------------------------- 1 | .. _usage_meancentered: 2 | 3 | Mean-centered PLS 4 | ================= 5 | 6 | In contrast to behavioral PLS, mean-centered PLS doesn't aim to find 7 | relationships between two sets of variables. Instead, it tries to find 8 | relationships between *groupings* in a single set of variables. Indeed, you can 9 | think of it almost like a multivariate t-test or ANOVA (depending on how many 10 | groups you have). 11 | 12 | An oenological example 13 | ---------------------- 14 | 15 | .. doctest:: 16 | 17 | >>> from pyls.examples import load_dataset 18 | >>> data = load_dataset('wine') 19 | 20 | This is the same dataset as in :py:func:`sklearn.datasets.load_wine`; the 21 | formatting has just been lightly modified to better suit our purposes. 22 | 23 | Our ``data`` object can be treated as a dictionary, containing all the 24 | information necessary to run a PLS analysis. The keys can be accessed as 25 | attributes, so we can take a quick look at our input matrix: 26 | 27 | .. doctest:: 28 | 29 | >>> sorted(data.keys()) 30 | ['X', 'groups', 'n_boot', 'n_perm'] 31 | >>> data.X.shape 32 | (178, 13) 33 | >>> data.X.columns 34 | Index(['alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium', 35 | 'total_phenols', 'flavanoids', 'nonflavanoid_phenols', 36 | 'proanthocyanins', 'color_intensity', 'hue', 37 | 'od280/od315_of_diluted_wines', 'proline'], 38 | dtype='object') 39 | >>> data.groups 40 | [59, 71, 48] 41 | -------------------------------------------------------------------------------- /docs/user_guide/results.rst: -------------------------------------------------------------------------------- 1 | .. _usage_results: 2 | 3 | PLS Results 4 | =========== 5 | 6 | So you ran a PLS analysis and got some results. Congratulations! The easy part 7 | is done. 🙃 Interpreting (trying to interpret) the results of a PLS 8 | analysis---similar to interpreting the results of a PCA or factor analysis or 9 | CCA or any other complex decomposition---can be difficult. The ``pyls`` package 10 | contains some functions, tools, and data structures to try and help. 11 | 12 | The :py:class:`~.structures.PLSResults` data structure is, at its core, a 13 | Python dictionary that is designed to contain all possible results from any of 14 | the analyses available in :py:mod:`pyls.types`. Let's generate a small example 15 | results object to play around with. We'll use the dataset from the 16 | :ref:`usage_behavioral` example: 17 | 18 | .. doctest:: 19 | 20 | >>> from pyls.examples import load_dataset 21 | >>> data = load_dataset('linnerud') 22 | 23 | We can generate the results file by running the behavioral PLS analysis again. 24 | We pass the ``verbose=False`` flag to suppress the progress bar that would 25 | normally be displayed: 26 | 27 | .. doctest:: 28 | 29 | >>> from pyls import behavioral_pls 30 | >>> results = behavioral_pls(**data, verbose=False) 31 | >>> results 32 | PLSResults(x_weights, y_weights, x_scores, y_scores, y_loadings, singvals, varexp, permres, bootres, cvres, inputs) 33 | 34 | Printing the ``results`` object gives us a helpful view of some of the 35 | different outputs available to us. While we won't go into detail about all of 36 | these (see the :ref:`ref_api` for info on those), we'll touch on a few of the 37 | potentially more confusing ones. 38 | -------------------------------------------------------------------------------- /pyls/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | __all__ = [ 4 | '__version__', 5 | 'behavioral_pls', 'meancentered_pls', 'pls_regression', 6 | 'import_matlab_result', 'save_results', 'load_results', 7 | 'examples', 'PLSInputs', 'PLSResults', 8 | 9 | ] 10 | 11 | from ._version import get_versions 12 | __version__ = get_versions()['version'] 13 | del get_versions 14 | 15 | from . import examples 16 | from .io import load_results, save_results 17 | from .matlab import import_matlab_result 18 | from .structures import PLSInputs, PLSResults 19 | from .types import (behavioral_pls, meancentered_pls, pls_regression) 20 | -------------------------------------------------------------------------------- /pyls/compute.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | from scipy.stats import zscore, zmap 5 | from sklearn.utils.extmath import randomized_svd 6 | from sklearn.utils.validation import check_X_y, check_random_state 7 | from pyls import utils 8 | 9 | 10 | def svd(crosscov, n_components=None, seed=None): 11 | """ 12 | Calculates the SVD of `crosscov` and returns singular vectors/values 13 | 14 | Parameters 15 | ---------- 16 | crosscov : (B, T) array_like 17 | Cross-covariance (or cross-correlation) matrix to be decomposed 18 | n_components : int, optional 19 | Number of components to retain from decomposition 20 | seed : {int, :obj:`numpy.random.RandomState`, None}, optional 21 | Seed for random number generation. Default: None 22 | 23 | Returns 24 | ------- 25 | U : (B, L) `numpy.ndarray` 26 | Left singular vectors from singular value decomposition 27 | d : (L, L) `numpy.ndarray` 28 | Diagonal array of singular values from singular value decomposition 29 | V : (J, L) `numpy.ndarray` 30 | Right singular vectors from singular value decomposition 31 | """ 32 | 33 | seed = check_random_state(seed) 34 | crosscov = np.asanyarray(crosscov) 35 | 36 | if n_components is None: 37 | n_components = min(crosscov.shape) 38 | elif not isinstance(n_components, int): 39 | raise TypeError('Provided `n_components` {} must be of type int' 40 | .format(n_components)) 41 | 42 | # run most computationally efficient SVD 43 | if crosscov.shape[0] <= crosscov.shape[1]: 44 | U, d, V = randomized_svd(crosscov.T, n_components=n_components, 45 | random_state=seed, transpose=False) 46 | V = V.T 47 | else: 48 | V, d, U = randomized_svd(crosscov, n_components=n_components, 49 | random_state=seed, transpose=False) 50 | U = U.T 51 | 52 | return U, np.diag(d), V 53 | 54 | 55 | def xcorr(X, Y, norm=False, covariance=False): 56 | """ 57 | Calculates the cross-covariance matrix of `X` and `Y` 58 | 59 | Parameters 60 | ---------- 61 | X : (S, B) array_like 62 | Input matrix, where `S` is samples and `B` is features. 63 | Y : (S, T) array_like, optional 64 | Input matrix, where `S` is samples and `T` is features. 65 | norm : bool, optional 66 | Whether to normalize `X` and `Y` (i.e., sum of squares = 1). Default: 67 | False 68 | covariance : bool, optional 69 | Whether to calculate the cross-covariance matrix instead of the cross- 70 | correlation matrix. Default: False 71 | 72 | Returns 73 | ------- 74 | xprod : (T, B) `numpy.ndarray` 75 | Cross-covariance of `X` and `Y` 76 | """ 77 | 78 | check_X_y(X, Y, multi_output=True) 79 | 80 | # we could just use scipy.stats zscore but if we do this we retain the 81 | # original data structure; if pandas dataframes were given, a dataframe 82 | # will be returned 83 | if not covariance: 84 | Xn = (X - X.mean(axis=0)) / X.std(axis=0, ddof=1) 85 | Yn = (Y - Y.mean(axis=0)) / Y.std(axis=0, ddof=1) 86 | else: 87 | Xn, Yn = X - X.mean(0, keepdims=True), Y - Y.mean(0, keepdims=True) 88 | 89 | if norm: 90 | Xn, Yn = normalize(Xn), normalize(Yn) 91 | 92 | xprod = (Yn.T @ Xn) / (len(Xn) - 1) 93 | 94 | return xprod 95 | 96 | 97 | def normalize(X, axis=0): 98 | """ 99 | Normalizes `X` along `axis` 100 | 101 | Utilizes Frobenius norm (or Hilbert-Schmidt norm / `L_{p,q}` norm where 102 | `p=q=2`) 103 | 104 | Parameters 105 | ---------- 106 | X : (S, B) array_like 107 | Input array 108 | axis : int, optional 109 | Axis for normalization. Default: 0 110 | 111 | Returns 112 | ------- 113 | normed : (S, B) `numpy.ndarray` 114 | Normalized `X` 115 | """ 116 | 117 | normed = np.array(X) 118 | normal_base = np.linalg.norm(normed, axis=axis, keepdims=True) 119 | # avoid DivideByZero errors 120 | zero_items = np.where(normal_base == 0) 121 | normal_base[zero_items] = 1 122 | # normalize and re-set zero_items to 0 123 | normed = normed / normal_base 124 | normed[zero_items] = 0 125 | 126 | return normed 127 | 128 | 129 | def rescale_test(X_train, X_test, Y_train, U, V): 130 | """ 131 | Generates out-of-sample predicted `Y` values 132 | 133 | Parameters 134 | ---------- 135 | X_train : (S1, B) array_like 136 | Data matrix, where `S1` is observations and `B` is features 137 | X_test : (S2, B) 138 | Data matrix, where `S2` is observations and `B` is features 139 | Y_train : (S1, T) array_like 140 | Behavioral matrix, where `S1` is observations and `T` is features 141 | 142 | Returns 143 | ------- 144 | Y_pred : (S2, T) `numpy.ndarray` 145 | Behavioral matrix, where `S2` is observations and `T` is features 146 | """ 147 | 148 | X_resc = zmap(X_test, compare=X_train, ddof=1) 149 | Y_pred = (X_resc @ U @ V.T) + Y_train.mean(axis=0, keepdims=True) 150 | 151 | return Y_pred 152 | 153 | 154 | def perm_sig(orig, perm): 155 | """ 156 | Calculates significance of `orig` values agains `perm` distributions 157 | 158 | Compares amplitude of each singular value to distribution created via 159 | permutation in `perm` 160 | 161 | Parameters 162 | ---------- 163 | orig : (L, L) array_like 164 | Diagonal matrix of singular values for `L` latent variables 165 | perm : (L, P) array_like 166 | Distribution of singular values from permutation testing where `P` is 167 | the number of permutations 168 | 169 | Returns 170 | ------- 171 | sprob : (L,) `numpy.ndarray` 172 | Number of permutations where singular values exceeded original data 173 | decomposition for each of `L` latent variables normalized by the total 174 | number of permutations. Can be interpreted as the statistical 175 | significance of the latent variables (i.e., non-parametric p-value). 176 | """ 177 | 178 | sp = np.sum(perm > np.diag(orig)[:, None], axis=1) + 1 179 | sprob = sp / (perm.shape[-1] + 1) 180 | 181 | return sprob 182 | 183 | 184 | def boot_ci(boot, ci=95): 185 | """ 186 | Generates CI for bootstrapped values `boot` 187 | 188 | Parameters 189 | ---------- 190 | boot : (G, L, B) array_like 191 | Singular vectors, where `G` is features, `L` is components, and `B` is 192 | bootstraps 193 | ci : (0, 100) float, optional 194 | Confidence interval bounds to be calculated. Default: 95 195 | 196 | Returns 197 | ------- 198 | lower : (G, L) `numpy.ndarray` 199 | Lower bound of CI for singular vectors in `boot` 200 | upper : (G, L) `numpy.ndarray` 201 | Upper bound of CI for singular vectors in `boot` 202 | """ 203 | 204 | low = (100 - ci) / 2 205 | prc = [low, 100 - low] 206 | 207 | lower, upper = np.percentile(boot, prc, axis=-1) 208 | 209 | return lower, upper 210 | 211 | 212 | def boot_rel(orig, u_sum, u_square, n_boot): 213 | """ 214 | Determines bootstrap ratios (BSR) of saliences from bootstrap distributions 215 | 216 | Parameters 217 | ---------- 218 | orig : (G, L) array_like 219 | Original singular vectors 220 | u_sum : (G, L) array_like 221 | Sum of bootstrapped singular vectors 222 | u_square : (G, L) array_like 223 | Sum of squared bootstraped singular vectors 224 | n_boot : int 225 | Number of bootstraps used in generating `u_sum` and `u_square` 226 | 227 | Returns 228 | ------- 229 | bsr : (G, L) `numpy.ndarray` 230 | Bootstrap ratios for provided singular vectors 231 | """ 232 | 233 | u_sum2 = (u_sum ** 2) / n_boot 234 | u_se = np.sqrt(np.abs(u_square - u_sum2) / (n_boot - 1)) 235 | bsr = orig / u_se 236 | 237 | return bsr, u_se 238 | 239 | 240 | def procrustes(original, permuted, singular): 241 | """ 242 | Performs Procrustes rotation on `permuted` to align with `original` 243 | 244 | `original` and `permuted` should be either left *or* right singular 245 | vector from two SVDs. `singular` should be the diagonal matrix of 246 | singular values from the SVD that generated `original` 247 | 248 | Parameters 249 | ---------- 250 | original : array_like 251 | permuted : array_like 252 | singular : array_like 253 | 254 | Returns 255 | ------- 256 | resamp : `numpy.ndarray` 257 | Singular values of rotated `permuted` matrix 258 | """ 259 | 260 | temp = original.T @ permuted 261 | N, _, P = randomized_svd(temp, n_components=min(temp.shape)) 262 | resamp = permuted @ singular @ (P.T @ N.T) 263 | 264 | return resamp 265 | 266 | 267 | def get_group_mean(X, Y, n_cond=1, mean_centering=0): 268 | """ 269 | Parameters 270 | ---------- 271 | X : (S, B) array_like 272 | Input data matrix, where `S` is observations and `B` is features 273 | Y : (S, T) array_like, optional 274 | Dummy coded input array, where `S` is observations and `T` 275 | corresponds to the number of different groups x conditions. A value 276 | of 1 indicates that an observation belongs to a specific group or 277 | condition. 278 | n_cond : int, optional 279 | Number of conditions in dummy coded `Y` array. Default: 1 280 | mean_centering : {0, 1, 2}, optional 281 | Mean-centering method. Default: 0 282 | 283 | Returns 284 | ------- 285 | group_mean : (T, B) `numpy.ndarray` 286 | Means to be removed from `X` during centering 287 | """ 288 | 289 | if mean_centering == 0: 290 | # we want means of GROUPS, collapsing across conditions 291 | inds = slice(0, Y.shape[-1], n_cond) 292 | groups = utils.dummy_code(Y[:, inds].sum(axis=0).astype(int) * n_cond) 293 | elif mean_centering == 1: 294 | # we want means of CONDITIONS, collapsing across groups 295 | groups = Y.copy() 296 | elif mean_centering == 2: 297 | # we want the overall mean of the entire dataset 298 | groups = np.ones((len(X), 1)) 299 | else: 300 | raise ValueError("Mean centering type must be in [0, 1, 2].") 301 | 302 | # get mean of data over grouping variable 303 | group_mean = np.row_stack([X[grp].mean(axis=0)[None] for grp in 304 | groups.T.astype(bool)]) 305 | 306 | # we want group_mean to have the same number of rows as Y does columns 307 | # that way, we can easily subtract it for mean centering the data 308 | # and generating the matrix for SVD 309 | if mean_centering == 0: 310 | group_mean = np.repeat(group_mean, n_cond, axis=0) 311 | elif mean_centering == 1: 312 | group_mean = group_mean.reshape(-1, n_cond, X.shape[-1]).mean(axis=0) 313 | group_mean = np.tile(group_mean.T, int(Y.shape[-1] / n_cond)).T 314 | else: 315 | group_mean = np.repeat(group_mean, Y.shape[-1], axis=0) 316 | 317 | return group_mean 318 | 319 | 320 | def get_mean_center(X, Y, n_cond=1, mean_centering=0, means=True): 321 | """ 322 | Parameters 323 | ---------- 324 | X : (S, B) array_like 325 | Input data matrix, where `S` is observations and `B` is features 326 | Y : (S, T) array_like, optional 327 | Dummy coded input array, where `S` is observations and `T` 328 | corresponds to the number of different groups x conditions. A value 329 | of 1 indicates that an observation belongs to a specific group or 330 | condition. 331 | n_cond : int, optional 332 | Number of conditions in dummy coded `Y` array. Default: 1 333 | mean_centering : {0, 1, 2}, optional 334 | Mean-centering method. Default: 0 335 | means : bool, optional 336 | Whether to return demeaned averages instead of demeaned data. Default: 337 | True 338 | 339 | Returns 340 | ------- 341 | mean_centered : {(T, B), (S, B)} `numpy.ndarray` 342 | If `means` is True, returns array with shape (T, B); otherwise, returns 343 | (S, B) 344 | """ 345 | 346 | mc = get_group_mean(X, Y, n_cond=n_cond, mean_centering=mean_centering) 347 | 348 | if means: 349 | # take mean of groups and subtract relevant mean_centering entry 350 | mean_centered = np.row_stack([X[grp].mean(axis=0) - mc[n] for (n, grp) 351 | in enumerate(Y.T.astype(bool))]) 352 | else: 353 | # subtract relevant mean_centering entry from each observation 354 | mean_centered = np.row_stack([X[grp] - mc[n][None] for (n, grp) 355 | in enumerate(Y.T.astype(bool))]) 356 | 357 | return mean_centered 358 | 359 | 360 | def efficient_corr(x, y): 361 | """ 362 | Computes correlation of matching columns in `x` and `y` 363 | 364 | Parameters 365 | ---------- 366 | x, y : (N, M) array_like 367 | Input data arrays 368 | 369 | Returns 370 | ------- 371 | corr : (M,) numpy.ndarray 372 | Correlations of columns in `x` and `y` 373 | """ 374 | 375 | # we need 2D arrays 376 | x, y = np.vstack(x), np.vstack(y) 377 | 378 | # check shapes 379 | if x.shape != y.shape: 380 | if x.shape[-1] != 1 and y.shape[-1] != 1: 381 | raise ValueError('Provided inputs x and y must either have ' 382 | 'matching shapes or one must be a column ' 383 | 'vector.\nProvided data:\n\tx: {}\n\ty: {}' 384 | .format(x.shape, y.shape)) 385 | 386 | corr = np.sum(zscore(x, ddof=1) * zscore(y, ddof=1), axis=0) / (len(x) - 1) 387 | 388 | # fix rounding errors 389 | corr = np.clip(corr, -1, 1) 390 | 391 | return corr 392 | 393 | 394 | def varexp(singular): 395 | """ 396 | Calculates the variance explained by values in `singular` 397 | 398 | Parameters 399 | ---------- 400 | singular : (L, L) array_like 401 | Singular values from singular value decomposition 402 | 403 | Returns 404 | ------- 405 | varexp : (L, L) `numpy.ndarray` 406 | Variance explained 407 | """ 408 | 409 | if singular.ndim != 2: 410 | raise ValueError('Provided `singular` array must be a square diagonal ' 411 | 'matrix, not array of shape {}' 412 | .format(singular.shape)) 413 | 414 | return np.diag(np.diag(singular)**2 / np.sum(np.diag(singular)**2)) 415 | -------------------------------------------------------------------------------- /pyls/examples/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ['available_datasets', 'load_dataset', 'query_dataset'] 2 | 3 | from .datasets import available_datasets, load_dataset, query_dataset 4 | -------------------------------------------------------------------------------- /pyls/examples/datasets.json: -------------------------------------------------------------------------------- 1 | { 2 | "linnerud": { 3 | "description": "These data come from a toy example demonstrating the relationship between exercise ability and physiological fitness.", 4 | "reference": "Tenenhaus, M. (1998). La regression PLS: theorie et pratique. Editions Technip.", 5 | "urls": [ 6 | "https://raw.githubusercontent.com/rmarkello/pyls/3f5e79227d2f9f80887e80bea107a9c7e6b0e0c2/data/linnerud_exercise.csv", 7 | "https://raw.githubusercontent.com/rmarkello/pyls/3f5e79227d2f9f80887e80bea107a9c7e6b0e0c2/data/linnerud_physio.csv" 8 | ], 9 | "X": "linnerud_exercise.csv", 10 | "Y": "linnerud_physio.csv", 11 | "n_perm": 1000, 12 | "n_boot": 1000 13 | }, 14 | "mirchi_2018": { 15 | "description": "Study examining the relationship between changes in functional brain connectivity derived from resting-state functional magnetic resonance imaging (rsfMRI) and behavioral mood scores using the MyConnectome database.", 16 | "reference": "Mirchi, N., Betzel, R. F., Bernhardt, B. C., Dagher, A., & Mišić, B. (2018). Tracking mood fluctuations with functional network patterns. Social Cognitive and Affective Neuroscience.", 17 | "urls": [ 18 | "https://www.dropbox.com/s/29pmo4uf19go442/myconnectome_fc.npy?dl=1", 19 | "https://www.dropbox.com/s/w7px20kxwvqx1d1/myconnectome_panas.csv?dl=1", 20 | "http://web.stanford.edu/group/poldracklab/myconnectome-data/base/parcellation/parcel_data.txt" 21 | ], 22 | "X": "myconnectome_fc.npy", 23 | "Y": "myconnectome_panas.csv", 24 | "n_perm": 10000, 25 | "n_boot": 10000, 26 | "test_size": 0.25, 27 | "test_split": 100, 28 | "parcellation": "parcel_data.txt" 29 | }, 30 | "wine": { 31 | "description": "These data are the results of a chemical analysis of wines grown in the same region in Italy but derived from three different cultivars. The analysis determined the quantities of 13 constituents found in each of the three types of wines.", 32 | "reference": "Lichman, M. (2013). UCI Machine Learning Repository [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, School of Information and Computer Science.", 33 | "urls": [ 34 | "https://raw.githubusercontent.com/rmarkello/pyls/3f5e79227d2f9f80887e80bea107a9c7e6b0e0c2/data/wine.csv" 35 | ], 36 | "X": "wine.csv", 37 | "n_perm": 1000, 38 | "n_boot": 1000, 39 | "groups": [ 40 | 59, 41 | 71, 42 | 48 43 | ] 44 | }, 45 | "whitaker_vertes_2016": { 46 | "description": "Study examining the relationship between developmental brain changes derived from structural magnetic resonance imaging (sMRI) and genetic expression in the brain using the NeuroScience in Psychiatry Network (NSPN) dataset", 47 | "reference": "Whitaker, K. J., Vértes, P. E., Romero-Garcia, R., Váša, F., Moutoussis, M., Prabhu, G., Weiskopf, N., Callaghan, M. F., Wagstyl, K., Rittman, T., Tait, R., Ooi, C., Suckling, J., Inkster, B., Fonagy, P., Dolan, R. J., Jones, P. B., Goodyer, I. M., Bullmore, E. T. (2016). Adolescence is associated with genomically patterned consolidation of the hubs of the human brain connectome. Proceedings of the National Academy of Sciences, 113(32), 9105-9110.", 48 | "urls": [ 49 | "https://raw.githubusercontent.com/KirstieJane/NSPN_WhitakerVertes_PNAS2016/master/DATA/PLS_gene_predictor_vars.csv", 50 | "https://raw.githubusercontent.com/KirstieJane/NSPN_WhitakerVertes_PNAS2016/master/CT_MT_ANALYSES/COMPLETE/PLS/COVARS_none/PLS_MRI_response_vars.csv" 51 | ], 52 | "X": "PLS_gene_predictor_vars.csv", 53 | "Y": "PLS_MRI_response_vars.csv", 54 | "n_perm": 1000, 55 | "n_boot": 1000, 56 | "n_components": 2 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /pyls/examples/datasets.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Functions and utilities for getting datasets for PLS examples 4 | """ 5 | 6 | import json 7 | import os 8 | from pkg_resources import resource_filename 9 | import urllib 10 | 11 | import numpy as np 12 | 13 | from ..structures import PLSInputs 14 | 15 | try: 16 | import pandas as pd 17 | pandas_avail = True 18 | except ImportError: 19 | pandas_avail = False 20 | 21 | with open(resource_filename('pyls', 'examples/datasets.json'), 'r') as src: 22 | _DATASETS = json.load(src) 23 | 24 | 25 | def available_datasets(name=None): 26 | """ 27 | Lists available datasets to download 28 | 29 | Returns 30 | ------- 31 | datasets : list 32 | List of available datasets 33 | """ 34 | 35 | if name is not None: 36 | if name not in _DATASETS.keys(): 37 | raise ValueError('Provided dataset {} is not available. Dataset ' 38 | 'must be one of: {}.' 39 | .format(name, available_datasets())) 40 | else: 41 | return name 42 | 43 | return list(_DATASETS.keys()) 44 | 45 | 46 | def query_dataset(name, key='description'): 47 | """ 48 | Queries dataset `name` for information specified by `key` 49 | 50 | Parameters 51 | ---------- 52 | name : str 53 | Name of dataset. Must be in :func:`pyls.examples.available_datasets()` 54 | key : str, optional 55 | Key to query from `name`. If not specified will return a list of 56 | available keys. Default: 'description' 57 | 58 | Returns 59 | ------- 60 | value 61 | Value specified by `key` for dataset `name` 62 | """ 63 | 64 | name = available_datasets(name) 65 | if key is None: 66 | return list(_DATASETS.get(name).keys()) 67 | 68 | value = _DATASETS.get(name).get(key, None) 69 | if value is None: 70 | raise KeyError('Provided key {} not specified for dataset {}. ' 71 | 'Available keys are {}' 72 | .format(name, key, list(_DATASETS.get(name).keys()))) 73 | 74 | return value 75 | 76 | 77 | def _get_data_dir(data_dir=None): 78 | """ 79 | Gets path to pyls data directory 80 | 81 | Parameters 82 | ---------- 83 | data_dir : str, optional 84 | Path to use as data directory. If not specified, will check for 85 | environmental variable 'PYLS_DATA'; if that is not set, will use 86 | `~/pyls-data` instead. Default: None 87 | 88 | Returns 89 | ------- 90 | data_dir : str 91 | Path to use as data directory 92 | """ 93 | 94 | if data_dir is None: 95 | data_dir = os.environ.get('PYLS_DATA', os.path.join('~', 'pyls-data')) 96 | data_dir = os.path.expanduser(data_dir) 97 | if not os.path.exists(data_dir): 98 | os.makedirs(data_dir) 99 | 100 | return data_dir 101 | 102 | 103 | def load_dataset(name, data_dir=None, verbose=1, return_reference=False): 104 | """ 105 | Loads dataset provided by `name` into a :obj:`PLSInputs` object 106 | 107 | Parameters 108 | ---------- 109 | name : str 110 | Name of dataset. Must be in :func:`pyls.examples.available_datasets()` 111 | data_dir : str, optional 112 | Path to use as data directory to store dataset. If not specified, will 113 | check for environmental variable 'PYLS_DATA'; if that is not set, will 114 | use `~/pyls-data` instead. Default: None 115 | verbose : int, optional 116 | Level of verbosity for status messages about fetching/loading dataset. 117 | Set to 0 for no updates. Default: 1 118 | return_reference : bool, optional 119 | Whether to return APA-style reference for dataset specified by `name`. 120 | Default: False 121 | 122 | Returns 123 | ------- 124 | dataset : :obj:`~.structures.PLSInputs` 125 | PLSInputs object containing pre-loaded data ready to run PLS analysis. 126 | Rerun the analysis by calling :func:`pyls.behavioral_pls(**dataset)` or 127 | :func:`pyls.meancentered_pls(**dataset)`, as appropriate 128 | """ 129 | 130 | name = available_datasets(name) 131 | data_dir = _get_data_dir(data_dir) 132 | _get_dataset(name, data_dir, verbose=verbose) 133 | 134 | dataset = PLSInputs() 135 | for key, value in _DATASETS.get(name, {}).items(): 136 | if isinstance(value, str) and key in PLSInputs.allowed: 137 | fname = os.path.join(data_dir, name, value) 138 | if fname.endswith('.csv') or fname.endswith('.txt'): 139 | if pandas_avail: 140 | value = pd.read_csv(fname, index_col=0) 141 | else: 142 | value = np.genfromtxt(fname, skip_header=True, 143 | delimiter=',')[:, 1:] 144 | elif fname.endswith('.npy'): 145 | value = np.load(fname) 146 | else: 147 | raise ValueError('Cannot recognize datatype of {}. Please ' 148 | 'create an issue on GitHub with dataset you ' 149 | 'are trying to load ({})'.format(fname, name)) 150 | dataset[key] = value 151 | 152 | # make some dataset-specific corrections 153 | if name == 'whitaker_vertes_2016': 154 | dataset.X = dataset.X.T 155 | 156 | if return_reference: 157 | return dataset, query_dataset(name, 'reference') 158 | 159 | return dataset 160 | 161 | 162 | def _get_dataset(name, data_dir=None, verbose=1): 163 | """ 164 | Downloads dataset defined by `name` 165 | 166 | Parameters 167 | ---------- 168 | name : str 169 | Name of dataset. Must be in :func:`pyls.examples.available_datasets()` 170 | data_dir : str 171 | Path to use as data directory to store dataset 172 | """ 173 | 174 | data_dir = os.path.join(_get_data_dir(data_dir), name) 175 | os.makedirs(data_dir, exist_ok=True) 176 | 177 | for url in _DATASETS.get(name, {}).get('urls', []): 178 | parse = urllib.parse.urlparse(url) 179 | fname = os.path.join(data_dir, os.path.basename(parse.path)) 180 | 181 | if not os.path.exists(fname): 182 | out = urllib.request.urlopen(url) 183 | with open(fname, 'wb') as dest: 184 | dest.write(out.read()) 185 | -------------------------------------------------------------------------------- /pyls/io.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Functions for saving and loading PLS data objects 4 | """ 5 | 6 | import h5py 7 | import numpy as np 8 | 9 | from .structures import PLSResults 10 | 11 | 12 | def save_results(fname, results): 13 | """ 14 | Saves PLS `results` to hdf5 file `fname` 15 | 16 | If `fname` does not end with '.hdf5' it will be appended 17 | 18 | Parameters 19 | ---------- 20 | fname : str 21 | Filepath to where hdf5 file should be created and `results` stored 22 | results : :obj:`pyls.structures.PLSResults` 23 | PLSResults object to be saved 24 | 25 | Returns 26 | ------- 27 | fname : str 28 | Filepath to created file 29 | """ 30 | 31 | def _recursive_save(h5file, obj, group='/results'): 32 | """ 33 | Recursively saves `obj` to `h5file` in `group` 34 | 35 | Parameters 36 | ---------- 37 | h5file : :obj:`h5py.File` 38 | obj : dict 39 | group : str, optional 40 | Group in `h5file` in which to create datasets 41 | """ 42 | 43 | grp = h5file.create_group(group) 44 | for key, item in obj.items(): 45 | if isinstance(item, dict): 46 | _recursive_save(h5file, item, group=group + '/' + key) 47 | elif isinstance(item, np.ndarray): 48 | grp.create_dataset(key, item.shape, item.dtype)[...] = item 49 | else: 50 | if item is not None: 51 | grp.attrs[key] = item 52 | else: 53 | grp.attrs[key] = 'None' 54 | 55 | if not isinstance(fname, str): 56 | fname = str(fname) 57 | 58 | if not fname.endswith('.hdf5'): 59 | fname += '.hdf5' 60 | 61 | with h5py.File(fname, 'w') as h5: 62 | _recursive_save(h5, results, group='/results') 63 | 64 | return fname 65 | 66 | 67 | def load_results(fname): 68 | """ 69 | Load PLS results stored in `fname`, generated by `pyls.save_results()` 70 | 71 | Parameters 72 | ---------- 73 | fname : str 74 | Filepath to HDF5 file containing PLS results 75 | 76 | Returns 77 | ------- 78 | results : :obj:`pyls.structures.PLSResults` 79 | Loaded PLS results 80 | """ 81 | 82 | def _recursive_load(h5file, group='/results'): 83 | """ 84 | Recursively loads data from `h5file` 85 | 86 | Parameters 87 | ---------- 88 | h5file : :obj:`h5py.File` 89 | group : str, optional 90 | Group in `h5file` from which to load datasets 91 | 92 | Returns 93 | ------- 94 | results : dict 95 | Dictionary containing loaded data 96 | """ 97 | 98 | results = dict() 99 | for key, item in h5file[group].items(): 100 | if isinstance(item, h5py.Dataset): 101 | results[key] = item[()] 102 | elif isinstance(item, h5py.Group): 103 | results[key] = _recursive_load(h5file, group=group + '/' + key) 104 | for key, value in h5file[group].attrs.items(): 105 | if isinstance(value, str) and value == 'None': 106 | value = None 107 | results[key] = value 108 | 109 | return results 110 | 111 | if not isinstance(fname, str): 112 | fname = str(fname) 113 | 114 | if not fname.endswith('.hdf5'): 115 | fname += '.hdf5' 116 | 117 | if not h5py.is_hdf5(fname): 118 | raise TypeError('Provided file {} is not valid HDF5 format.' 119 | .format(fname)) 120 | 121 | with h5py.File(fname, 'r') as h5file: 122 | return PLSResults(**_recursive_load(h5file, '/results')) 123 | -------------------------------------------------------------------------------- /pyls/matlab/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Utilities for handling PLS results generated using the Matlab PLS toolbox 4 | """ 5 | 6 | __all__ = ['import_matlab_result'] 7 | 8 | from .io import import_matlab_result 9 | -------------------------------------------------------------------------------- /pyls/matlab/io.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from collections.abc import MutableMapping 4 | 5 | import numpy as np 6 | import scipy.io as sio 7 | 8 | from ..structures import PLSResults 9 | 10 | _result_mapping = ( 11 | ('u', 'x_weights'), 12 | ('s', 'singvals'), 13 | ('v', 'y_weights'), 14 | ('usc', 'x_scores'), 15 | ('vsc', 'y_scores'), 16 | ('lvcorrs', 'y_loadings'), 17 | # permres 18 | ('perm_result_sprob', 'pvals'), 19 | ('perm_result_permsamp', 'permsamples'), 20 | # bootres 21 | ('boot_result_compare_u', 'x_weights_normed'), 22 | ('boot_result_u_se', 'x_weights_stderr'), 23 | ('boot_result_bootsamp', 'bootsamples'), 24 | # splitres 25 | ('perm_splithalf_orig_ucorr', 'ucorr'), 26 | ('perm_splithalf_orig_vcorr', 'vcorr'), 27 | ('perm_splithalf_ucorr_prob', 'ucorr_pvals'), 28 | ('perm_splithalf_vcorr_prob', 'vcorr_pvals'), 29 | ('perm_splithalf_ucorr_ul', 'ucorr_uplim'), 30 | ('perm_splithalf_vcorr_ul', 'vcorr_lolim'), 31 | ('perm_splithalf_ucorr_ll', 'ucorr_uplim'), 32 | ('perm_splithalf_vcorr_ll', 'vcorr_lolim'), 33 | # inputs 34 | ('inputs_X', 'X'), 35 | ('stacked_behavdata', 'Y'), 36 | ('num_subj_lst', 'groups'), 37 | ('num_conditions', 'n_cond'), 38 | ('perm_result_num_perm', 'n_perm'), 39 | ('boot_result_num_boot', 'n_boot'), 40 | ('perm_splithalf_num_split', 'n_split'), 41 | ('boot_result_clim', 'ci'), 42 | ('other_input_meancentering_type', 'mean_centering'), 43 | ('method', 'method') 44 | ) 45 | 46 | _mean_centered_mapping = ( 47 | ('boot_result_orig_usc', 'contrast'), 48 | ('boot_result_distrib', 'contrast_boot'), 49 | ('boot_result_ulusc', 'contrast_ci_up'), 50 | ('boot_result_llusc', 'contrast_ci_lo'), 51 | ) 52 | 53 | _behavioral_mapping = ( 54 | ('boot_result_orig_corr', 'y_loadings'), 55 | ('boot_result_distrib', 'y_loadings_boot'), 56 | ('boot_result_ulcorr', 'y_loadings_ci_up'), 57 | ('boot_result_llcorr', 'y_loadings_ci_lo'), 58 | ) 59 | 60 | 61 | def _coerce_void(value): 62 | """ 63 | Converts `value` to `value.dtype` 64 | 65 | Parameters 66 | ---------- 67 | value : array_like 68 | 69 | Returns 70 | ------- 71 | value : dtype 72 | `Value` coerced to `dtype` 73 | """ 74 | 75 | if np.squeeze(value).ndim == 0: 76 | return value.dtype.type(value.squeeze()) 77 | else: 78 | return np.squeeze(value) 79 | 80 | 81 | def _flatten(d, parent_key='', sep='_'): 82 | """ 83 | Flattens nested dictionary `d` into single dictionary with new keyset 84 | 85 | Parameters 86 | ---------- 87 | d : dict 88 | Dictionary to be flattened 89 | parent_key : str, optional 90 | Key of parent dictionary of `d`. Default: '' 91 | sep : str, optional 92 | How to join keys of `d` with `parent_key`, if provided. Default: '_' 93 | 94 | Returns 95 | ------- 96 | flat : dict 97 | Flattened input dictionary `d` 98 | 99 | Notes 100 | ----- 101 | Taken directly from https://stackoverflow.com/a/6027615 102 | """ 103 | 104 | items = [] 105 | for k, v in d.items(): 106 | new_key = parent_key + sep + k if parent_key else k 107 | if isinstance(v, MutableMapping): 108 | items.extend(_flatten(v, new_key, sep=sep).items()) 109 | else: 110 | items.append((new_key, v)) 111 | return dict(items) 112 | 113 | 114 | def _rename_keys(d, mapping): 115 | """ 116 | Renames keys in dictionary `d` based on tuples in `mapping` 117 | 118 | Parameters 119 | ---------- 120 | d : dict 121 | Dictionary with keys to be renamed 122 | mapping : list of tuples 123 | List of (oldkey, newkey) pairs to rename entries in `d` 124 | 125 | Returns 126 | ------- 127 | renamed : dict 128 | Input dictionary `d` with keys renamed 129 | """ 130 | 131 | new_dict = d.copy() 132 | for oldkey, newkey in mapping: 133 | try: 134 | new_dict[newkey] = new_dict.pop(oldkey) 135 | except KeyError: 136 | pass 137 | 138 | return new_dict 139 | 140 | 141 | def import_matlab_result(fname, datamat='datamat_lst'): 142 | """ 143 | Imports `fname` PLS result from Matlab 144 | 145 | Parameters 146 | ---------- 147 | fname : str 148 | Filepath to output mat file obtained from Matlab PLS toolbox. Should 149 | contain at least a result struct object. 150 | datamat : str, optional 151 | Variable name of datamat ('X' array) provided to original PLS if it 152 | exists `fname`. By default the datamat is not stored in the PLS results 153 | structure, but if it is was saved in `fname` it can be loaded and 154 | cached in the returned results object. Default: 'datamat_lst' 155 | 156 | Returns 157 | ------- 158 | results : :obj:`~.structures.PLSResults` 159 | Matlab results in a Python-friendly format 160 | """ 161 | 162 | def get_labels(fields): 163 | labels = [k for k, v in sorted(fields.items(), 164 | key=lambda x: x[-1][-1])] 165 | return labels 166 | 167 | # load mat file using scipy.io 168 | matfile = sio.loadmat(fname) 169 | 170 | # if 'result' key is missing then consider this a malformed PLS result mat 171 | try: 172 | result = matfile.get('result')[0, 0] 173 | except (IndexError, TypeError): 174 | raise ValueError('Cannot get result struct from provided mat file') 175 | 176 | # convert result structure to a dictionary using dtypes as keys 177 | labels = get_labels(result.dtype.fields) 178 | result = {labels[n]: value for n, value in enumerate(result)} 179 | 180 | # convert sub-structures to dictionaries using dtypes as keys 181 | struct = ['boot_result', 'perm_result', 'perm_splithalf', 'other_input'] 182 | for attr in struct: 183 | if result.get(attr) is not None: 184 | labels = get_labels(result[attr].dtype.fields) 185 | result[attr] = {labels[n]: _coerce_void(value) for n, value 186 | in enumerate(result[attr][0, 0])} 187 | 188 | # get input data from results file, if it exists 189 | X = matfile.get(datamat) 190 | result['inputs'] = dict(X=np.vstack(X[:, 0])) if X is not None else dict() 191 | 192 | # squeeze all the values so they're a bit more interpretable 193 | for key, val in result.items(): 194 | if isinstance(val, np.ndarray): 195 | result[key] = _coerce_void(val) 196 | 197 | # flatten the dictionary and rename the keys according to our mapping 198 | result = _rename_keys(_flatten(result), _result_mapping) 199 | if result['method'] == 3: 200 | result = _rename_keys(result, _behavioral_mapping) 201 | if 'y_loadings_ci_up' in result: 202 | result['y_loadings_ci'] = np.stack([ 203 | result['y_loadings_ci_lo'], result['y_loadings_ci_up'] 204 | ], axis=-1) 205 | else: 206 | result = _rename_keys(result, _mean_centered_mapping) 207 | if 'contrast_ci_up' in result: 208 | result['contrast_ci'] = np.stack([ 209 | result['contrast_ci_lo'], result['contrast_ci_up'] 210 | ], axis=-1) 211 | 212 | # index arrays - 1 to account for Matlab vs Python 1- vs 0-indexing 213 | for key in ['bootsamples', 'permsamples']: 214 | try: 215 | result[key] -= 1 216 | except KeyError: 217 | continue 218 | 219 | if result.get('n_split', None) is None: 220 | result['n_split'] = None 221 | 222 | # pack it into a `PLSResults` class instance for easy attribute access 223 | results = PLSResults(**result) 224 | 225 | return results 226 | -------------------------------------------------------------------------------- /pyls/plotting/meancentered.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Functions for plotting results from a mean-centered PLS 4 | """ 5 | 6 | import numpy as np 7 | import pandas as pd 8 | import seaborn as sns 9 | 10 | 11 | def _set_group_lvls(n_conds, n_grps, grp_lvls=None): 12 | """ 13 | Derives a pandas data series of group labels 14 | 15 | Parameters 16 | ---------- 17 | n_conds : int 18 | Number of conditions in the analysis 19 | n_grps : int 20 | Number of groups in the analysis 21 | grp_lvls : list, optional 22 | List of group labels 23 | 24 | Returns 25 | ------- 26 | labels : pd.Series 27 | Series of group labels aligned to the input data structure 28 | """ 29 | 30 | grping = [] 31 | if grp_lvls is None: 32 | for i in range(n_grps): 33 | grping += ["Group" + str(i)] * n_conds 34 | else: 35 | for i in range(n_grps): 36 | grping.extend([grp_lvls[i]] * n_conds) 37 | return pd.Series(grping, name='Group') 38 | 39 | 40 | def _set_cond_lvls(n_conds, n_grps, cond_lvls=None): 41 | """ 42 | Derives a pandas series of condition labels 43 | 44 | Parameters 45 | ---------- 46 | n_conds : int 47 | Number of conditions in the analysis 48 | n_grps : int 49 | Number of groups in the analysis 50 | cond_lvls : list, optional 51 | List of condition labels 52 | 53 | Returns 54 | ------- 55 | labels : pd.Series 56 | Series of condition labels aligned to the input data structure 57 | """ 58 | 59 | if cond_lvls is None: 60 | cond_lvls = ["Condition" + str(i) for i in range(n_conds)] * n_grps 61 | else: 62 | cond_lvls = cond_lvls * n_grps 63 | 64 | return pd.Series(cond_lvls, name='Condition') 65 | 66 | 67 | def _define_vars(results, cond_lvls=None, grp_lvls=None): 68 | """ 69 | Create a pandas data frame from `results` for easy plotting 70 | 71 | Uses the result dictionary returned by PLS as well as user-supplied 72 | condition and group label(s). 73 | 74 | Parameters 75 | ---------- 76 | results : :obj:pyls.PLSResults 77 | The PLS result dictionary 78 | cond_lvls : list, optional 79 | List of condition labels 80 | grp_lvls : list, optional 81 | List of group labels 82 | 83 | Returns 84 | ------- 85 | df : pd.DataFrame 86 | A pandas DataFrame with derived estimates (and upper- and lower- 87 | estimated error) for all latent variables 88 | """ 89 | 90 | estimate = results.bootres.contrast 91 | ul = results.bootres.contrast_uplim 92 | ll = results.bootres.contrast_lolim 93 | 94 | n_grps = len(results.inputs.groups) 95 | n_conds = estimate.shape[1] // n_grps 96 | cond = _set_cond_lvls(n_conds, n_grps, cond_lvls=cond_lvls) 97 | grp = _set_group_lvls(n_conds, n_grps, grp_lvls=grp_lvls) 98 | 99 | num_est = estimate.shape[1] + 1 # for 1-based indexing in plots 100 | colnames = [] 101 | for itm in ['Estimate_LV', 'UL_LV', 'LL_LV']: 102 | for i in range(1, num_est): 103 | colnames.append(itm + str(i)) 104 | 105 | df = pd.DataFrame(np.hstack((estimate, ul, ll)), columns=colnames) 106 | df = pd.concat([df, cond, grp], axis=1) 107 | return df 108 | 109 | 110 | def _rearrange_df(df, plot_order): 111 | """ 112 | Rearranged `df` according to `plot_order` 113 | 114 | In examining plots, users may wish to rearrange the order in which 115 | conditions are presented in order to ease visual interpretation. This 116 | function reorders the dataframe as desired 117 | 118 | Parameters 119 | ---------- 120 | df : pandas.DataFrame 121 | Dataframe containing condition, group labels, and PLS results 122 | plot_order : list 123 | User-defined order in which to plot conditions 124 | 125 | Returns 126 | ------- 127 | df : pd.DataFrame 128 | Provided dataframe `df` with re-ordered conditions 129 | """ 130 | 131 | sorter_idx = dict(zip(plot_order, range(len(plot_order)))) 132 | df['Cond_Arrange'] = df['Condition'].map(sorter_idx) 133 | df = df.sort_values(by=['Group', 'Cond_Arrange'], ascending=[False, True]) 134 | return df.drop(columns=['Cond_Arrange']) 135 | 136 | 137 | def plot_contrast(results, lv=0, cond_labels=None, group_labels=None, 138 | cond_order=None, **kwargs): 139 | """ 140 | Plots group / condition contrast from `results` for a provided `lv` 141 | 142 | Parameters 143 | ---------- 144 | results : :obj:pyls.PLSResults 145 | The PLS result dictionary 146 | lv : int, optional 147 | Index of desired latent variable to plot. Uses zero-indexing, so the 148 | first latent variables is `lv=0`. Default: 0 149 | cond_labels : list, optional 150 | List of condition labels as they were supplied to the original PLS. 151 | If not supplied, uses "ConditionX" as label. Default: None 152 | group_labels : list, optional 153 | List of group labels as they were supplied to the original PLS. If 154 | not supplied, uses "GroupX" as label. Default: None 155 | cond_order : list, optional 156 | Desired order for plotting conditions. If not supplied, plots 157 | conditions in order they were provided to original PLS. Default: None 158 | **kwargs : key, value mappings 159 | Keywords arguments passed to :obj:seaborn.barplot 160 | 161 | Returns 162 | ------- 163 | ax : matplotlib.axes.Axis 164 | A matplotlib axes object for saving or modifying 165 | """ 166 | 167 | df = _define_vars(results, cond_lvls=cond_labels, grp_lvls=group_labels) 168 | if cond_order is not None: 169 | df = _rearrange_df(df, cond_order) 170 | num_sig = (len(df.columns) - 2) // 3 171 | ax = sns.barplot(x="Group", y=df[df.columns[lv]], hue="Condition", 172 | data=df, capsize=0.1, errwidth=1.25, alpha=0.25, ci=None, 173 | **kwargs) 174 | ax.legend(bbox_to_anchor=(1.1, 1.05)) 175 | x = [r.get_x() for r in ax.patches] 176 | nx = np.sort(x) 177 | abs_err = np.abs([df[df.columns[lv + (num_sig * 2)]].get_values(), 178 | df[df.columns[lv + num_sig]].get_values()] 179 | - df[df.columns[lv]].get_values()) 180 | ax.errorbar(x=nx + (np.diff(nx).min() / 2), 181 | y=df[df.columns[lv]], fmt='none', yerr=abs_err, ecolor='black') 182 | 183 | return ax 184 | -------------------------------------------------------------------------------- /pyls/structures.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Data structures to hold PLS inputs and results objects 4 | """ 5 | 6 | from multiprocessing import cpu_count 7 | from textwrap import dedent 8 | from .utils import ResDict 9 | 10 | _pls_input_docs = dict( 11 | decomposition_narrative=dedent("""\ 12 | The singular value decomposition generates mutually orthogonal latent 13 | variables (LVs), comprised of left and right singular vectors and a 14 | diagonal matrix of singular values. The `i`-th pair of singular vectors 15 | detail the contributions of individual input features to an overall, 16 | multivariate pattern (the `i`-th LV), and the singular values explain the 17 | amount of variance captured by that pattern. 18 | 19 | Statistical significance of the LVs is determined via permutation testing. 20 | Bootstrap resampling is used to examine the contribution and reliability of 21 | the input features to each LV. Split-half resampling can optionally be used 22 | to assess the reliability of the LVs. A cross-validated framework can 23 | optionally be used to examine how accurate the decomposition is when 24 | employed in a predictive framework.\ 25 | """), 26 | input_matrix=dedent("""\ 27 | X : (S, B) array_like 28 | Input data matrix, where `S` is samples and `B` is features\ 29 | """), 30 | groups=dedent("""\ 31 | groups : (G,) list of int 32 | List with the number of subjects present in each of `G` groups. Input 33 | data should be organized as subjects within groups (i.e., groups should 34 | be vertically stacked). If there is only one group this can be left 35 | blank.\ 36 | """), 37 | conditions=dedent("""\ 38 | n_cond : int 39 | Number of conditions observed in data. Note that all subjects must have 40 | the same number of conditions. If both conditions and groups are 41 | present then the input data should be organized as subjects within 42 | conditions within groups (i.e., g1c1s[1-S], g1c2s[1-S], g2c1s[1-S], 43 | g2c2s[1-S]).\ 44 | """), 45 | mean_centering=dedent("""\ 46 | mean_centering : {0, 1, 2}, optional 47 | Mean-centering method to use. This will determine how the mean-centered 48 | matrix is generated and what effects are "boosted" during the SVD. 49 | Default: 0\ 50 | """), 51 | # perms / resampling / crossval 52 | stat_test=dedent("""\ 53 | n_perm : int, optional 54 | Number of permutations to use for testing significance of components. 55 | Default: 5000 56 | n_boot : int, optional 57 | Number of bootstraps to use for testing reliability of data features. 58 | Default: 5000\ 59 | """), 60 | split_half=dedent("""\ 61 | n_split : int, optional 62 | Number of split-half resamples to assess during permutation testing. 63 | Default: 0\ 64 | """), 65 | cross_val=dedent("""\ 66 | test_split : int, optional 67 | Number of splits for generating test sets during cross-validation. 68 | Default: 100 69 | test_size : [0, 1) float, optional 70 | Proportion of data to partition to test set during cross-validation. 71 | Default: 0.25\ 72 | """), 73 | covariance=dedent("""\ 74 | covariance : bool, optional 75 | Whether to use the cross-covariance matrix instead of the cross- 76 | correlation during the decomposition. Only set if you are sure this is 77 | what you want as many of the results may become more difficult to 78 | interpret (i.e., :py:attr:`~.structures.PLSResults.behavcorr` will no 79 | longer be intepretable as Pearson correlation values). Default: False\ 80 | """), 81 | rotate=dedent("""\ 82 | rotate : bool, optional 83 | Whether to perform Procrustes rotations during permutation testing. Can 84 | inflate false-positive rates; see Kovacevic et al., (2013) for more 85 | information. Default: True\ 86 | """), 87 | ci=dedent("""\ 88 | ci : [0, 100] float, optional 89 | Confidence interval to use for assessing bootstrap results. This 90 | roughly corresponds to an alpha rate; e.g., the 95%ile CI is 91 | approximately equivalent to a two-tailed p <= 0.05. Default: 95\ 92 | """), 93 | proc_options=dedent("""\ 94 | seed : {int, :obj:`numpy.random.RandomState`, None}, optional 95 | Seed to use for random number generation. Helps ensure reproducibility 96 | of results. Default: None 97 | verbose : bool, optional 98 | Whether to show progress bars as the analysis runs. Note that progress 99 | bars will not persist after the analysis is completed. Default: True 100 | n_proc : int, optional 101 | How many processes to use for parallelizing permutation testing and 102 | bootstrap resampling. If not specified will default to serialized 103 | processing (i.e., one processor). Can optionally specify 'max' to use 104 | all available processors. Default: None\ 105 | """), 106 | pls_results=dedent("""\ 107 | results : :obj:`pyls.structures.PLSResults` 108 | Dictionary-like object containing results from the PLS analysis\ 109 | """), 110 | resamples=dedent("""\ 111 | permsamples : array_like, optional 112 | Resampled array to be used during permutation testing. 113 | If not specified a set of unique permutations will be generated. 114 | Default: None 115 | permindices : Boolean, optional 116 | Re-sampling array to be used during permutation test (if n_perm > 0). 117 | If not specified a set of unique permutations will be generated. 118 | Whether permsamples is an array to permute indices or a pre-permuted 119 | array. Useful when permuting with methods like BrainSMASH or Eigenstrapping. 120 | Default: True 121 | bootsamples : array_like, optional 122 | Resampling array to be used during bootstrap resampling (if n_boot > 123 | 0). If not specified a set of unique bootstraps will be generated. 124 | Default: None\ 125 | """), 126 | references=dedent("""\ 127 | McIntosh, A. R., Bookstein, F. L., Haxby, J. V., & Grady, C. L. (1996). 128 | Spatial pattern analysis of functional brain images using partial least 129 | squares. NeuroImage, 3(3), 143-157. 130 | 131 | McIntosh, A. R., & Lobaugh, N. J. (2004). Partial least squares analysis of 132 | neuroimaging data: applications and advances. NeuroImage, 23, S250-S263. 133 | 134 | Krishnan, A., Williams, L. J., McIntosh, A. R., & Abdi, H. (2011). Partial 135 | Least Squares (PLS) methods for neuroimaging: a tutorial and review. 136 | NeuroImage, 56(2), 455-475. 137 | 138 | Kovacevic, N., Abdi, H., Beaton, D., & McIntosh, A. R. (2013). Revisiting 139 | PLS resampling: comparing significance versus reliability across range of 140 | simulations. In New Perspectives in Partial Least Squares and Related 141 | Methods (pp. 159-170). Springer, New York, NY. Chicago\ 142 | """) 143 | ) 144 | 145 | 146 | class PLSInputs(ResDict): 147 | allowed = [ 148 | 'X', 'Y', 'groups', 'n_cond', 'n_perm', 'n_boot', 'n_split', 149 | 'test_split', 'test_size', 'mean_centering', 'covariance', 'rotate', 150 | 'ci', 'seed', 'verbose', 'n_proc', 'bootsamples', 'permsamples', 151 | 'method', 'n_components', 'aggfunc', 'permindices' 152 | ] 153 | 154 | def __init__(self, *args, **kwargs): 155 | super().__init__(*args, **kwargs) 156 | if self.get('n_split') == 0: 157 | self['n_split'] = None 158 | 159 | if self.get('test_split') == 0: 160 | self['test_split'] = None 161 | 162 | if self.get('n_proc') is not None: 163 | n_proc = self.get('n_proc') 164 | if n_proc == 'max' or n_proc == -1: 165 | self['n_proc'] = cpu_count() 166 | elif n_proc < 0: 167 | self['n_proc'] = cpu_count() + 1 + n_proc 168 | 169 | ts = self.get('test_size') 170 | if ts is not None and (ts < 0 or ts >= 1): 171 | raise ValueError('test_size must be in [0, 1). Provided value: {}' 172 | .format(ts)) 173 | 174 | 175 | PLSInputs.__doc__ = dedent("""\ 176 | PLS input information 177 | 178 | Attributes 179 | ---------- 180 | X : (S, B) array_like 181 | Input data matrix, where `S` is observations and `B` is features. 182 | Y : (S, T) array_like 183 | Behavioral matrix, where `S` is observations and `T` is features. 184 | If from :obj:`.behavioral_pls`, this is the provided behavior matrix; 185 | if from :obj:`.meancentered_pls`, this is a dummy-coded group/condition 186 | matrix. 187 | {groups} 188 | {conditions} 189 | {mean_centering} 190 | {covariance} 191 | {stat_test} 192 | {rotate} 193 | {ci} 194 | {proc_options} 195 | """).format(**_pls_input_docs) 196 | 197 | 198 | class PLSResults(ResDict): 199 | r""" 200 | Dictionary-like object containing results of PLS analysis 201 | 202 | Attributes 203 | ---------- 204 | x_weights : (B, L) `numpy.ndarray` 205 | Weights of `B` features used to project `X` matrix into PLS-derived 206 | component space 207 | y_weights : (J, L) `numpy.ndarray` 208 | Weights of `J` features used to project `Y` matrix into PLS-derived 209 | component space; not available with :func:`.pls_regression` 210 | x_scores : (S, L) `numpy.ndarray` 211 | Projection of `X` matrix into PLS-derived component space 212 | y_scores : (S, L) `numpy.ndarray` 213 | Projection of `Y` matrix into PLS-derived component space 214 | y_loadings : (J, L) `numpy.ndarray` 215 | Covariance of features in `Y` with projected `x_scores` 216 | singvals : (L, L) `numpy.ndarray` 217 | Singular values for PLS-derived component space; not available with 218 | :func:`.pls_regression` 219 | varexp : (L,) `numpy.ndarray` 220 | Variance explained in each of the PLS-derived components 221 | permres : :obj:`~.structures.PLSPermResults` 222 | Results of permutation testing, as applicable 223 | bootres : :obj:`~.structures.PLSBootResults` 224 | Results of bootstrap resampling, as applicable 225 | splitres : :obj:`~.structures.PLSSplitHalfResults` 226 | Results of split-half resampling, as applicable 227 | cvres : :obj:`~.structures.PLSCrossValidationResults` 228 | Results of cross-validation testing, as applicable 229 | inputs : :obj:`~.structures.PLSInputs` 230 | Inputs provided to original PLS 231 | """ 232 | allowed = [ 233 | 'x_weights', 'y_weights', 'x_scores', 'y_scores', 234 | 'y_loadings', 'singvals', 'varexp', 235 | 'permres', 'bootres', 'splitres', 'cvres', 'inputs' 236 | ] 237 | 238 | def __init__(self, **kwargs): 239 | super().__init__(**kwargs) 240 | # create all sub-dictionaries 241 | self.inputs = PLSInputs(**kwargs.get('inputs', kwargs)) 242 | self.bootres = PLSBootResults(**kwargs.get('bootres', kwargs)) 243 | self.permres = PLSPermResults(**kwargs.get('permres', kwargs)) 244 | self.splitres = PLSSplitHalfResults(**kwargs.get('splitres', kwargs)) 245 | self.cvres = PLSCrossValidationResults(**kwargs.get('cvres', kwargs)) 246 | 247 | 248 | class PLSBootResults(ResDict): 249 | """ 250 | Dictionary-like object containing results of PLS bootstrap resampling 251 | 252 | Attributes 253 | ---------- 254 | x_weights_normed : (B, L) `numpy.ndarray` 255 | `x_weights` normalized by their standard error, obtained from bootstrap 256 | resampling (see `x_weights_stderr`) 257 | x_weights_stderr : (B, L) `numpy.ndarray` 258 | Standard error of `x_weights`, used to generate `x_weights_normed` 259 | y_loadings : (J, L) `numpy.ndarray` 260 | Covariance of features in `Y` with projected `x_scores`; not available 261 | with :func:`.meancentered_pls` 262 | y_loadings_boot : (J, L, R) `numpy.ndarray` 263 | Distribution of `y_loadings` across all bootstrap resamples; not 264 | available with :func:`.meancentered_pls` 265 | y_loadings_ci: (J, L, 2) `numpy.ndarray` 266 | Lower (..., 0) and upper (..., 1) bounds of confidence interval for 267 | `y_loadings`; not available with :func:`.meancentered_pls` 268 | contrast : (J, L) `numpy.ndarray` 269 | Group x condition averages of :attr:`brainscores_demeaned`. Can be 270 | treated as a contrast indicating group x condition differences. Only 271 | obtained from :obj:`.meancentered_pls`. 272 | contrast_boot : (J, L, R) `numpy.ndarray` 273 | Bootstrapped distribution of `contrast`; only available with 274 | :func:`.meancentered_pls` 275 | contrast_ci : (J, L, 2) `numpy.ndarray` 276 | Lower (..., 0) and upper (..., 1) bounds of confidence interval for 277 | `contrast`; only available with :func:`.meancentered_pls` 278 | bootsamples : (S, R) `numpy.ndarray` 279 | Indices of bootstrapped samples `S` across `R` resamples. 280 | """ 281 | allowed = [ 282 | 'x_weights_normed', 'x_weights_stderr', 'bootsamples', 283 | 'y_loadings', 'y_loadings_boot', 'y_loadings_ci', 284 | 'contrast', 'contrast_boot', 'contrast_ci' 285 | ] 286 | 287 | 288 | class PLSPermResults(ResDict): 289 | """ 290 | Dictionary-like object containing results of PLS permutation testing 291 | 292 | Attributes 293 | ---------- 294 | pvals : (L,) `numpy.ndarray` 295 | Non-parametric p-values used to examine whether components from 296 | original decomposition explain more variance than permuted components 297 | permsamples : (S, P) `numpy.ndarray` 298 | Resampling array used to permute `S` samples over `P` permutations 299 | """ 300 | allowed = [ 301 | 'pvals', 'permsamples', 'perm_singval' 302 | ] 303 | 304 | 305 | class PLSSplitHalfResults(ResDict): 306 | """ 307 | Dictionary-like object containing results of PLS split-half resampling 308 | 309 | Attributes 310 | ---------- 311 | ucorr, vcorr : (L,) `numpy.ndarray` 312 | Average correlations between split-half resamples in original (non- 313 | permuted) data for left/right singular vectors. Can be interpreted 314 | as reliability of `L` latent variables 315 | ucorr_pvals, vcorr_pvals : (L,) `numpy.ndarray` 316 | Number of permutations where correlation between split-half 317 | resamples exceeded original correlations, normalized by the total 318 | number of permutations. Can be interpreted as the statistical 319 | significance of the reliability of `L` latent variables 320 | ucorr_uplim, vcorr_uplim : (L,) `numpy.ndarray` 321 | Upper bound of confidence interval for correlations between split 322 | halves for left/right singular vectors 323 | ucorr_lolim, vcorr_lolim : (L,) `numpy.ndarray` 324 | Lower bound of confidence interval for correlations between split 325 | halves for left/right singular vectors 326 | """ 327 | allowed = [ 328 | 'ucorr', 'vcorr', 329 | 'ucorr_pvals', 'vcorr_pvals', 330 | 'ucorr_uplim', 'vcorr_uplim', 331 | 'ucorr_lolim', 'vcorr_lolim' 332 | ] 333 | 334 | 335 | class PLSCrossValidationResults(ResDict): 336 | """ 337 | Dictionary-like object containing results of PLS cross-validation testing 338 | 339 | Attributes 340 | ---------- 341 | r_squared : (T, I) `numpy.ndarray` 342 | R-squared ("determination coefficient") for each of `T` predicted 343 | behavioral scores against true behavioral scores across `I` train / 344 | test split 345 | pearson_r : (T, I) `numpy.ndarray` 346 | Pearson's correlation for each of `T` predicted behavioral scores 347 | against true behavioral scores across `I` train / test split 348 | """ 349 | allowed = [ 350 | 'pearson_r', 'r_squared' 351 | ] 352 | -------------------------------------------------------------------------------- /pyls/tests/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ['compare_python_matlab', 'assert_matlab_equivalence'] 2 | 3 | from .matlab import compare_python_matlab, assert_matlab_equivalence 4 | -------------------------------------------------------------------------------- /pyls/tests/conftest.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import pytest 5 | import pyls 6 | 7 | 8 | @pytest.fixture(scope='session') 9 | def testdir(tmpdir_factory): 10 | data_dir = tmpdir_factory.mktemp('data') 11 | return str(data_dir) 12 | 13 | 14 | @pytest.fixture(scope='session') 15 | def mpls_results(): 16 | Xf = 1000 17 | subj = 100 18 | rs = np.random.RandomState(1234) 19 | return pyls.meancentered_pls(rs.rand(subj, Xf), n_cond=2, 20 | n_perm=10, n_boot=10, n_split=10) 21 | 22 | 23 | @pytest.fixture(scope='session') 24 | def bpls_results(): 25 | Xf = 1000 26 | Yf = 100 27 | subj = 100 28 | rs = np.random.RandomState(1234) 29 | return pyls.behavioral_pls(rs.rand(subj, Xf), rs.rand(subj, Yf), 30 | n_perm=10, n_boot=10, n_split=10) 31 | 32 | 33 | @pytest.fixture(scope='session') 34 | def pls_inputs(): 35 | return dict(X=np.random.rand(100, 1000), Y=np.random.rand(100, 100), 36 | groups=[50, 50], n_cond=1, mean_centering=0, 37 | n_perm=10, n_boot=10, n_split=5, 38 | test_size=0.25, test_split=100, 39 | rotate=True, ci=95, seed=1234, verbose=True, 40 | permsamples=10, bootsamples=10) 41 | -------------------------------------------------------------------------------- /pyls/tests/data/bpls_onegroup_onecond_nosplit.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netneurolab/pypyls/e0ff056fe59bbc8d0334d63bbba316708eede75c/pyls/tests/data/bpls_onegroup_onecond_nosplit.mat -------------------------------------------------------------------------------- /pyls/tests/data/bpls_onegroup_onecond_split.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netneurolab/pypyls/e0ff056fe59bbc8d0334d63bbba316708eede75c/pyls/tests/data/bpls_onegroup_onecond_split.mat -------------------------------------------------------------------------------- /pyls/tests/data/empty.mat: -------------------------------------------------------------------------------- 1 | MATLAB 5.0 MAT-file Platform: posix, Created on: Mon Mar 19 11:24:37 2018IM -------------------------------------------------------------------------------- /pyls/tests/data/mpls_multigroup_onecond_nosplit.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netneurolab/pypyls/e0ff056fe59bbc8d0334d63bbba316708eede75c/pyls/tests/data/mpls_multigroup_onecond_nosplit.mat -------------------------------------------------------------------------------- /pyls/tests/data/mpls_multigroup_onecond_split.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netneurolab/pypyls/e0ff056fe59bbc8d0334d63bbba316708eede75c/pyls/tests/data/mpls_multigroup_onecond_split.mat -------------------------------------------------------------------------------- /pyls/tests/data/resultonly.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netneurolab/pypyls/e0ff056fe59bbc8d0334d63bbba316708eede75c/pyls/tests/data/resultonly.mat -------------------------------------------------------------------------------- /pyls/tests/matlab.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import pyls 5 | 6 | 7 | def assert_num_equiv(a, b, atol=1e-4): 8 | """ 9 | Asserts numerical equivalence of `a` and `b` 10 | 11 | Compares numerical equivalence of `a` and `b`, accounting for potential 12 | sign flips. Uses :func:`numpy.allclose` for assessing equivalence once 13 | sign flips have been considered. 14 | 15 | Parameters 16 | ---------- 17 | a, b : array_like 18 | Arrays to compare for numerical equivalence 19 | atol : float, optional 20 | Absolute tolerance for differences in `a` and `b`. Default: 1e-4 21 | 22 | Raises 23 | ------ 24 | AssertionError 25 | If `a` and `b` are not numerically equivalent to `atol` 26 | """ 27 | 28 | # signs may be flipped so adjust accordingly 29 | flip = 1 * np.all(np.sign(b / a) == 1, axis=0, keepdims=True) 30 | flip[flip == 0] = -1 31 | diff = a - (b * flip) 32 | 33 | assert np.allclose(diff, 0, atol=atol) 34 | 35 | 36 | def assert_func_equiv(a, b, corr=0.975, ftol=0.01): 37 | """ 38 | Asserts "functional" equivalence of `a` and `b` 39 | 40 | Given the numerical instabilities of SVD between Matlab and Python we 41 | cannot always assume numerical equivalence, especially when permutation 42 | testing and bootstrap resampling are considered. This function thus 43 | considers whether results are "functionally" equivalent, where functional 44 | equivalence is defined by the correlation of `a` and `b` (if both are one- 45 | dimensional) or the correlation of columns of `a` and `b` (if both are two- 46 | dimensional). Correlations must surpass provided `corr` to be considered 47 | functionally equivalent. 48 | 49 | Parameters 50 | ---------- 51 | a, b : array_like 52 | Arrays to compare for functional equivalence 53 | corr : [0, 1] float, optional 54 | Correlation that must be surpassed in order to achieve functional 55 | equivalence between `a` and `b`. Default: 0.99 56 | ftol : float, optional 57 | If len(a) and len(b) <= 2, the correlation cannot be used to assess 58 | functional equivalence. Instead, this specifies the numerical tolerance 59 | permitted between corresponding values in the two vectors. 60 | 61 | Raises 62 | ------ 63 | AssertionError 64 | If `a` and `b` are not functionally equivalent 65 | """ 66 | 67 | if len(a) == 1 and len(b) == 1: # can't do anything here, really... 68 | return 69 | elif len(a) <= 2 and len(b) <= 2: # can't correlate length 2 array... 70 | assert np.allclose(np.sign(a), np.sign(b)) 71 | if ftol is not None: 72 | assert np.all(np.abs(a - b) < ftol) 73 | return 74 | 75 | if a.ndim > 1: 76 | corrs = pyls.compute.efficient_corr(a, b) 77 | else: 78 | corrs = np.corrcoef(a, b)[0, 1] 79 | 80 | assert np.all(np.abs(corrs) >= corr) 81 | 82 | 83 | def assert_pvals_equiv(a, b, alpha=0.05): 84 | """ 85 | Asserts that p-values in `a` and `b` achieve same statistical significance 86 | 87 | Uses `alpha` to determine significance threshold and ensures that 88 | corresponding p-values in `a` and `b` both reject or fail to reject the 89 | null hypothesis. 90 | 91 | Parameters 92 | ---------- 93 | a, b : array_like 94 | Arrays of p-values to be considered 95 | alpha : [0, 1] float, optional 96 | Alpha to set statistical significance threshold. Default: 0.05 97 | 98 | Raises 99 | ------ 100 | AssertionError 101 | If p-values in `a` and `b` do not achieve identical statistical 102 | significance thresholds 103 | """ 104 | 105 | assert np.all((a < alpha) == (b < alpha)) 106 | 107 | 108 | def compare_python_matlab(python, matlab, *, atol=1e-4, corr=0.975, alpha=0.05, 109 | ftol=0.01): 110 | """ 111 | Compares PLS results generated from `python` and `matlab` 112 | 113 | Due to floating point differences in linear algebra routines like SVD that 114 | propagate through permutation testing and bootstrap resampling, we cannot 115 | expected that PLS results from Python and Matlab will generate _exactly_ 116 | the same results. This function compares the numerical eqivalence of 117 | results we do expect to be exact, and assesses the functional equivalence 118 | of the remaining results using correlations and alpha testing, as 119 | appropriate. 120 | 121 | Parameters 122 | ---------- 123 | python : :obj:`pyls.structures.PLSResults` 124 | PLSResults object generated from Python 125 | matlab : :obj:`pyls.structures.PLSResults` 126 | PLSResults object generated from Matlab 127 | atol : float, optional 128 | Absolute tolerance permitted between `python` and `matlab` results 129 | that should have numerical equivalency. Default: 1e-4 130 | corr : [0, 1] float, optional 131 | Minimum correlation expected between `python` and `matlab` results 132 | that can't be expected to retain numerical equivalency. Default: 0.975 133 | alpha : [0, 1] float, optional 134 | Alpha level for assessing significance of latent variables, used to 135 | compare whether `python` and `matlab` results retain same functional 136 | significance. Default: 0.05 137 | ftol : float, optional 138 | If len(a) and len(b) <= 2, the correlation ( `corr`) cannot be used to 139 | assess functional equivalence. Instead, this value specifies the 140 | numerical tolerance allowed between corresponding values in the two 141 | vectors. Default: 0.01 142 | 143 | Returns 144 | ------- 145 | equivalent : bool 146 | Whether PLSResults objects stored in `python` and `matlab` are 147 | functionally (not necessarily exactly numerically) equivalent 148 | reason : str 149 | If `equivalent=False`, reason for failure; otherwise, empty string 150 | """ 151 | 152 | if not isinstance(python, pyls.PLSResults): 153 | raise ValueError('Provided `python` object must be a pyls.PLSResults ' 154 | 'instance, not {}.'.format(type(python))) 155 | if not isinstance(matlab, pyls.PLSResults): 156 | raise ValueError('Provided `matlab` object must be a pyls.PLSResults ' 157 | 'instance, not {}.'.format(type(matlab))) 158 | 159 | # singular values close to 0 cannot be considered because they're random 160 | keep = ~np.isclose(python['singvals'], 0) 161 | 162 | # check top-level results (only for shared keys) 163 | for k in python.keys(): 164 | if isinstance(python[k], np.ndarray) and (k in matlab): 165 | a, b = python[k][..., keep], matlab[k][..., keep] 166 | try: 167 | assert_num_equiv(a, b, atol=atol) 168 | except AssertionError: 169 | return False, k 170 | 171 | # check pvals for functional equivalence 172 | if matlab.get('permres', {}).get('pvals') is not None: 173 | a = python['permres']['pvals'][keep] 174 | b = matlab['permres']['pvals'][keep] 175 | try: 176 | assert_func_equiv(a, b, corr, ftol=ftol) 177 | assert_pvals_equiv(a, b, alpha) 178 | except AssertionError: 179 | return False, 'permres.pvals' 180 | 181 | # check bootstraps for functional equivalence 182 | if matlab.get('bootres', {}).get('x_weights_normed') is not None: 183 | a = python['bootres']['x_weights_normed'][..., keep] 184 | b = matlab['bootres']['x_weights_normed'][..., keep] 185 | try: 186 | assert_func_equiv(a, b, corr, ftol=ftol) 187 | except AssertionError: 188 | return False, 'bootres.x_weights_normed' 189 | 190 | # check splitcorr for functional equivalence 191 | if matlab.get('splitres', {}).get('ucorr') is not None: 192 | a, b = python['splitres'], matlab['splitres'] 193 | try: 194 | for k in ['ucorr', 'vcorr']: 195 | assert_func_equiv(a[k][keep], b[k][keep], corr, ftol=ftol) 196 | except AssertionError: 197 | return False, 'splitres.{}'.format(k) 198 | 199 | return True, '' 200 | 201 | 202 | def assert_matlab_equivalence(fname, method=None, *, atol=1e-4, corr=0.975, 203 | alpha=0.05, ftol=0.01, **kwargs): 204 | """ 205 | Compares Matlab PLS results stored in `fname` with Python-generated results 206 | 207 | Loads `fname` using :func:`pyls.import_matlab_result`, re-runs analysis, 208 | and then compares results using :func:`pyls.tests.compare_matlab_result`. 209 | 210 | Parameters 211 | ---------- 212 | fname : str 213 | Path to Matlab PLS results 214 | method : function, optional 215 | PLS function to use to re-run analysis from `fname`. If not specified 216 | will try and determine method from `fname`. Default: None 217 | atol : float, optional 218 | Absolute tolerance permitted between `python` and `matlab` results 219 | that should have numerical equivalency. Default: 1e-4 220 | corr : [0, 1] float, optional 221 | Minimum correlation expected between `python` and `matlab` results 222 | that can't be expected to retain numerical equivalency. Default: 0.975 223 | alpha : [0, 1] float, optional 224 | Alpha level for assessing significance of latent variables, used to 225 | compare whether `python` and `matlab` results retain same functional 226 | significance. Default: 0.05 227 | ftol : float, optional 228 | If len(a) and len(b) <= 2, the correlation ( `corr`) cannot be used to 229 | assess functional equivalence. Instead, this value specifies the 230 | numerical tolerance allowed between corresponding values in the two 231 | vectors. Default: 0.01 232 | kwargs : optional 233 | Key-value arguments to provide to PLS analysis. May override arguments 234 | specified in `fname` 235 | 236 | Raises 237 | ------ 238 | AssertionError 239 | If PLS results generated by Python are not the same as those stored in 240 | `fname` 241 | """ 242 | # load matlab result 243 | matlab = pyls.matlab.import_matlab_result(fname) 244 | 245 | # fix n_split default (if not specified in matlab assume 0) 246 | if 'n_split' not in matlab['inputs']: 247 | matlab['inputs']['n_split'] = None 248 | 249 | # get PLS method 250 | fcn = None 251 | if method is None: 252 | if matlab['inputs']['method'] == 1: 253 | fcn = pyls.meancentered_pls 254 | elif matlab['inputs']['method'] == 3: 255 | fcn = pyls.behavioral_pls 256 | elif isinstance(method, str): 257 | if method == 'meancentered': 258 | fcn = pyls.meancentered_pls 259 | elif method == 'behavioral': 260 | fcn = pyls.behavioral_pls 261 | elif callable(method): 262 | if method in [pyls.meancentered_pls, pyls.behavioral_pls]: 263 | fcn = method 264 | 265 | if fcn is None: 266 | raise ValueError('Cannot determine PLS method used to generate {}' 267 | 'from file. Please provide `method` argument.' 268 | .format(fname)) 269 | 270 | # use seed for reproducibility of re-analysis 271 | matlab['inputs']['seed'] = 1234 272 | matlab['inputs']['verbose'] = False 273 | # don't update n_split if it was previously set to None 274 | if matlab['inputs']['n_split'] is None: 275 | if 'n_split' in kwargs: 276 | kwargs.pop('n_split') 277 | matlab['inputs'].update(kwargs) 278 | 279 | # run PLS 280 | python = fcn(**matlab['inputs']) 281 | equiv, reason = compare_python_matlab(python, matlab, atol=atol, corr=corr, 282 | alpha=alpha, ftol=ftol) 283 | 284 | if not equiv: 285 | raise AssertionError('compare_matlab_result failed: {}'.format(reason)) 286 | -------------------------------------------------------------------------------- /pyls/tests/test_base.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import pytest 5 | import pyls 6 | 7 | 8 | # tests for gen_permsamp(), gen_bootsamp(), and gen_splitsamp() are all very 9 | # similar because the code behind them is, in many senses, redundant. 10 | # that being said, the differences between the functions are intricate enough 11 | # that extracting the shared functionality would be more difficult than anyone 12 | # has time for right now. 13 | # thus, we get repetitive tests to make sure that nothing is screwed up! 14 | def test_gen_permsamp(): 15 | # test to make sure that there are no duplicates generated given a 16 | # sufficiently large number of samples / conditions to work with 17 | unique_perms = pyls.base.gen_permsamp([10, 10], 2, seed=1234, n_perm=10) 18 | assert unique_perms.shape == (40, 10) 19 | for n, perm in enumerate(unique_perms.T[::-1], 1): 20 | assert not (perm[:, None] == unique_perms[:, :-n]).all(axis=0).any() 21 | 22 | # test that random state works and gives equivalent permutations when 23 | # the same number of groups / conditions / permutations are provided 24 | same_perms = pyls.base.gen_permsamp([10, 10], 2, seed=1234, n_perm=10) 25 | assert same_perms.shape == (40, 10) 26 | assert np.all(unique_perms == same_perms) 27 | 28 | # test that, given a small number of samples and requesting a large number 29 | # of permutations, duplicate samples are given (and a warning is raised!) 30 | with pytest.warns(UserWarning): 31 | dupe_perms = pyls.base.gen_permsamp([2, 2], 1, n_perm=25) 32 | assert dupe_perms.shape == (4, 25) 33 | dupe = False 34 | for n, perm in enumerate(dupe_perms.T[::-1], 1): 35 | dupe = dupe or (perm[:, None] == dupe_perms[:, :-n]).all(axis=0).any() 36 | assert dupe 37 | 38 | # test that subject conditions are kept together during permutations 39 | # that is, each subject has two conditions so we want to make sure that 40 | # when we permute subject order both conditions for a given subject are 41 | # moved together 42 | cond_perms = pyls.base.gen_permsamp([10], 2, n_perm=10) 43 | assert cond_perms.shape == (20, 10) 44 | for n in range(10): 45 | comp = np.array([f + 10 if f < 10 else f - 10 for f in cond_perms[n]]) 46 | assert np.all(comp == cond_perms[n + 10]) 47 | 48 | # test that subjects are permuted between groups 49 | # that is, no permutation should result in a group having the same subjects 50 | group_perms = pyls.base.gen_permsamp([10, 10], 1, n_perm=10) 51 | g1, g2 = np.sort(group_perms[:10], 0), np.sort(group_perms[10:], 0) 52 | comp = np.arange(0, 10)[:, None] 53 | assert not np.any(np.all(comp == g1, axis=0)) 54 | assert not np.any(np.all((comp + 10) == g2, axis=0)) 55 | 56 | # test that permutations with groups and conditions are appropriate 57 | # we'll use unique_perms since that has 2 groups and 2 conditions already 58 | # we want to confirm that (1) subject conditions are permuted together, and 59 | # (2) subjects are permuted between groups 60 | g1, g2 = unique_perms[:20], unique_perms[20:] 61 | # confirm subject conditions are permuted together 62 | for g in [g1, g2]: 63 | for n in range(10): 64 | comp = [f + 10 if f < 10 or (f >= 20 and f < 30) else f - 10 65 | for f in g[n]] 66 | assert np.all(comp == g[n + 10]) 67 | # confirm subjects perare muted between groups 68 | comp = np.arange(0, 20)[:, None] 69 | assert not np.any(np.all(comp == np.sort(g1, axis=0), axis=0)) 70 | assert not np.any(np.all((comp + 20) == np.sort(g2, axis=0), axis=0)) 71 | 72 | 73 | def test_gen_bootsamp(): 74 | # test to make sure that there are no duplicates generated given a 75 | # sufficiently large number of samples / conditions to work with 76 | unique_boots = pyls.base.gen_bootsamp([10, 10], 2, seed=1234, n_boot=10) 77 | assert unique_boots.shape == (40, 10) 78 | for n, perm in enumerate(unique_boots.T[::-1], 1): 79 | assert not (perm[:, None] == unique_boots[:, :-n]).all(axis=0).any() 80 | 81 | # test that random state works and gives equivalent bootstraps when 82 | # the same number of groups / conditions / bootstraps are provided 83 | same_boots = pyls.base.gen_bootsamp([10, 10], 2, seed=1234, n_boot=10) 84 | assert same_boots.shape == (40, 10) 85 | assert np.all(unique_boots == same_boots) 86 | 87 | # test that, given a small number of samples and requesting a large number 88 | # of bootstraps, duplicate samples are given (and a warning is raised!) 89 | with pytest.warns(UserWarning): 90 | dupe_boots = pyls.base.gen_bootsamp([5], 1, n_boot=125) 91 | assert dupe_boots.shape == (5, 125) 92 | dupe = False 93 | for n, perm in enumerate(dupe_boots.T[::-1], 1): 94 | dupe = dupe or (perm[:, None] == dupe_boots[:, :-n]).all(axis=0).any() 95 | assert dupe 96 | 97 | # test that bootstraps all have the minimum number of unique subjects 98 | # that is, since we are always bootstrapping within groups/conditions, we 99 | # want to ensure that there is never a case where e.g., an entire group is 100 | # replaced with ONE subject (unless there are only two subjects, but then 101 | # what are you really doing?) 102 | # we set a minumum subject threshold equal to 1/2 the number of samples in 103 | # the smallest group; thus, with e.g., groups of [10, 20, 30], the minimum 104 | # number of unique subjects in any given group for any given bootstrap 105 | # should be 5 (=10/2) 106 | for grp in np.split(unique_boots, 4, axis=0): 107 | for boot in grp.T: 108 | assert np.unique(boot).size >= 5 109 | 110 | # make sure that when we're resampling subjects we're doing it for all 111 | # conditions; this is a much easier check than for permutations! 112 | for n in range(10): 113 | assert np.all(unique_boots[n] + 10 == unique_boots[n + 10]) 114 | for n in range(20, 30): 115 | assert np.all(unique_boots[n] + 10 == unique_boots[n + 10]) 116 | 117 | 118 | def test_gen_splitsamp(): 119 | # test to make sure that there are no duplicates generated given a 120 | # sufficiently large number of samples / conditions to work with 121 | unique_splits = pyls.base.gen_splits([10, 10], 2, seed=1234, n_split=10) 122 | assert unique_splits.shape == (40, 10) 123 | for n, perm in enumerate(unique_splits.T[::-1], 1): 124 | assert not (perm[:, None] == unique_splits[:, :-n]).all(axis=0).any() 125 | 126 | # test that random state works and gives equivalent splits when 127 | # the same number of groups / conditions / splits are provided 128 | same_splits = pyls.base.gen_splits([10, 10], 2, seed=1234, n_split=10) 129 | assert same_splits.shape == (40, 10) 130 | assert np.all(unique_splits == same_splits) 131 | 132 | # test that, given a small number of samples and requesting a large number 133 | # of splits, duplicate samples are given (and a warning is raised!) 134 | with pytest.warns(UserWarning): 135 | dupe_splits = pyls.base.gen_splits([5], 1, n_split=125) 136 | assert dupe_splits.shape == (5, 125) 137 | dupe = False 138 | for n, perm in enumerate(dupe_splits.T[::-1], 1): 139 | dupe = dupe or (perm[:, None] == dupe_splits[:, :-n]).all(axis=0).any() 140 | assert dupe 141 | 142 | # make sure that each group is split independently! 143 | for grp in np.split(unique_splits, 4, axis=0): 144 | assert np.all(np.sum(grp, axis=0) == 5) 145 | 146 | # make sure that `test_size` works as expected, too 147 | # `test_size` should determine the proportion of values set to False in 148 | # each group x condition 149 | # by default, `test_size` is 0.5, so the split is half-and-half, but if we 150 | # change it to e.g., 0.2, then there should be `0.2 * n_samples` False 151 | # values in each group x condition 152 | test_splits = pyls.base.gen_splits([10, 10], 2, n_split=10, test_size=0.2) 153 | for grp in np.split(test_splits, 4, axis=0): 154 | assert np.all(np.sum(grp, axis=0) == 8) 155 | 156 | 157 | def test_BasePLS(pls_inputs): 158 | # test that BasePLS accepts all inputs and stores them correctly 159 | basepls = pyls.base.BasePLS(**pls_inputs) 160 | for key in pls_inputs.keys(): 161 | assert hasattr(basepls.inputs, key) 162 | assert np.all(basepls.inputs[key] == pls_inputs[key]) 163 | 164 | # test that groups are handled correctly 165 | X, n_samples = pls_inputs['X'], len(pls_inputs['X']) 166 | # when not provided, should be calculated 167 | basepls = pyls.base.BasePLS(X, n_cond=2) 168 | assert basepls.inputs.groups == [n_samples // 2] 169 | # when provided as an int, should be coerced into a list 170 | basepls = pyls.base.BasePLS(X, groups=n_samples // 2, n_cond=2) 171 | assert basepls.inputs.groups == [n_samples // 2] 172 | # when they don't match the number of samples in the input data, error 173 | with pytest.raises(ValueError): 174 | basepls = pyls.base.BasePLS(X, groups=[100, 100]) 175 | 176 | # ensure errors are raised for not implemented 177 | with pytest.raises(NotImplementedError): 178 | basepls.gen_covcorr(pls_inputs['X'], pls_inputs['Y']) 179 | with pytest.raises(NotImplementedError): 180 | basepls.gen_distrib(pls_inputs['X'], pls_inputs['Y']) 181 | -------------------------------------------------------------------------------- /pyls/tests/test_compute.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import pytest 5 | import pyls 6 | 7 | rs = np.random.RandomState(1234) 8 | 9 | 10 | def test_normalize(): 11 | X = rs.rand(10, 10) 12 | out = pyls.compute.normalize(X, axis=0) 13 | assert np.allclose(np.sum(out**2, axis=0), 1) 14 | 15 | out = pyls.compute.normalize(X, axis=1) 16 | assert np.allclose(np.sum(out**2, axis=1), 1) 17 | 18 | 19 | def test_xcorr(): 20 | X = rs.rand(20, 200) 21 | Y = rs.rand(20, 25) 22 | 23 | xcorr = pyls.compute.xcorr(X, Y) 24 | assert xcorr.shape == (25, 200) 25 | xcorr = pyls.compute.xcorr(X, Y, norm=True) 26 | assert xcorr.shape == (25, 200) 27 | 28 | with pytest.raises(ValueError): 29 | pyls.compute.xcorr(X[:, 0], Y) 30 | with pytest.raises(ValueError): 31 | pyls.compute.xcorr(X[:, 0], Y[:, 0]) 32 | with pytest.raises(ValueError): 33 | pyls.compute.xcorr(X[0:10], Y) 34 | 35 | 36 | def test_efficient_corr(): 37 | x, y = rs.rand(100), rs.rand(100, 10) 38 | assert pyls.compute.efficient_corr(x, y).shape == (10,) 39 | x = rs.rand(100, 10) 40 | assert pyls.compute.efficient_corr(x, y).shape == (10,) 41 | 42 | x = rs.rand(100, 2) 43 | with pytest.raises(ValueError): 44 | pyls.compute.efficient_corr(x, y) 45 | 46 | x, y = np.ones((100, 2)), np.ones((100, 2)) * 5 47 | x[50:, 0], y[50:, 0] = 2, 6 48 | x[50:, 1], y[50:, 1] = 2, 4 49 | assert np.allclose(pyls.compute.efficient_corr(x, y), np.array([1., -1.])) 50 | -------------------------------------------------------------------------------- /pyls/tests/test_examples.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | import pytest 5 | import pyls.examples 6 | 7 | DATASETS = [ 8 | 'mirchi_2018', 'whitaker_vertes_2016', 'wine', 'linnerud' 9 | ] 10 | 11 | 12 | def test_available_datasets(): 13 | # make sure we get a list of strings when called with no arguments 14 | avail = pyls.examples.available_datasets() 15 | assert isinstance(avail, list) 16 | assert all([isinstance(f, str) for f in avail]) 17 | 18 | # check that we get all expected datasets back 19 | assert len(set(DATASETS) - set(avail)) == 0 20 | 21 | # check that we can supply dataset names to function to confirm validity 22 | for f in DATASETS: 23 | assert f == pyls.examples.available_datasets(f) 24 | 25 | # check that providing non-valid dataset name errors 26 | for f in ['thisisnotadataset', 10]: 27 | with pytest.raises(ValueError): 28 | pyls.examples.available_datasets(f) 29 | 30 | 31 | @pytest.mark.parametrize(('dataset', 'keys'), [ 32 | ('linnerud', [ 33 | 'description', 'reference', 'urls', 'X', 'Y', 'n_perm', 'n_boot' 34 | ]), 35 | ('mirchi_2018', [ 36 | 'description', 'reference', 'urls', 'X', 'Y', 37 | 'n_perm', 'n_boot', 'test_size', 'test_split', 'parcellation' 38 | ]), 39 | ('wine', [ 40 | 'description', 'reference', 'urls', 'X', 'n_perm', 'n_boot', 'groups' 41 | ]), 42 | ('whitaker_vertes_2016', [ 43 | 'description', 'reference', 'urls', 'X', 'Y', 'n_perm', 'n_boot', 44 | 'n_components' 45 | ]) 46 | ]) 47 | def test_query_dataset(dataset, keys): 48 | # check that default return string (description) 49 | assert isinstance(pyls.examples.query_dataset(dataset), str) 50 | # check that supplying None returns all available keys 51 | assert set(pyls.examples.query_dataset(dataset, None)) == set(keys) 52 | # check that all valid keys return something 53 | for k in keys: 54 | assert pyls.examples.query_dataset(dataset, k) is not None 55 | # check nonsense keys 56 | for k in ['notakey', 10, 20.5132]: 57 | with pytest.raises(KeyError): 58 | pyls.examples.query_dataset(dataset, k) 59 | 60 | 61 | def test_get_data_dir(tmpdir): 62 | # check that default (no arguments) returns valid default directory 63 | data_dir = pyls.examples.datasets._get_data_dir() 64 | assert isinstance(data_dir, str) 65 | assert os.path.exists(data_dir) 66 | assert os.path.basename(data_dir) == 'pyls-data' 67 | 68 | # check supplying directory returns same directory 69 | assert pyls.examples.datasets._get_data_dir(str(tmpdir)) == str(tmpdir) 70 | assert os.path.exists(str(tmpdir)) 71 | 72 | # check that _get_data_dir() pulls from environmental variable correctly 73 | os.environ['PYLS_DATA'] = str(tmpdir) 74 | assert pyls.examples.datasets._get_data_dir() == str(tmpdir) 75 | 76 | 77 | @pytest.mark.parametrize(('dataset', 'keys'), [ 78 | ('linnerud', ['X', 'Y', 'n_perm', 'n_boot']), 79 | ('mirchi_2018', ['X', 'Y', 'n_perm', 'n_boot', 'test_size', 'test_split']), 80 | ('wine', ['X', 'groups', 'n_perm', 'n_boot']), 81 | ('whitaker_vertes_2016', ['X', 'Y', 'n_perm', 'n_boot', 'n_components']) 82 | ]) 83 | def test_load_dataset(tmpdir, dataset, keys): 84 | ds = pyls.examples.load_dataset(dataset, str(tmpdir)) 85 | assert isinstance(ds, pyls.structures.PLSInputs) 86 | for k in keys: 87 | assert hasattr(ds, k) and getattr(ds, k) is not None 88 | ds, ref = pyls.examples.load_dataset(dataset, str(tmpdir), 89 | return_reference=True) 90 | assert isinstance(ref, str) 91 | -------------------------------------------------------------------------------- /pyls/tests/test_io.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os.path as op 4 | import h5py 5 | import pytest 6 | import pyls 7 | 8 | 9 | def test_load_save(testdir, mpls_results, bpls_results): 10 | for res, fn in zip([mpls_results, bpls_results], ['mpls', 'bpls']): 11 | fname = pyls.save_results(op.join(testdir, fn), res) 12 | assert op.isfile(fname) 13 | assert h5py.is_hdf5(fname) 14 | assert pyls.load_results(fname) == res 15 | 16 | with pytest.raises(TypeError): 17 | pyls.load_results(testdir) 18 | -------------------------------------------------------------------------------- /pyls/tests/test_matlab.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os.path as op 4 | import pkg_resources 5 | import pytest 6 | import pyls 7 | 8 | data_dir = pkg_resources.resource_filename('pyls', 'tests/data') 9 | EXAMPLES = ['mpls_multigroup_onecond_nosplit.mat', 10 | 'mpls_multigroup_onecond_split.mat', 11 | 'bpls_onegroup_onecond_nosplit.mat', 12 | 'bpls_onegroup_onecond_split.mat', 13 | 'resultonly.mat'] 14 | 15 | attrs = [ 16 | 'x_weights', 'singvals', 'y_weights', 'x_scores', 'permres', 'bootres', 17 | 'inputs' 18 | ] 19 | 20 | 21 | @pytest.mark.parametrize('fname', EXAMPLES) 22 | def test_import_matlab(fname): 23 | res = pyls.matlab.import_matlab_result(op.join(data_dir, fname)) 24 | # make sure the mat file cast appropriately 25 | assert isinstance(res, pyls.structures.PLSResults) 26 | # make sure all the attributes are there (don't check outputs) 27 | for attr in attrs: 28 | assert hasattr(res, attr) 29 | if '_split' in fname: 30 | assert hasattr(res, 'splitres') 31 | 32 | 33 | def test_errors(): 34 | with pytest.raises(ValueError): 35 | pyls.matlab.import_matlab_result(op.join(data_dir, 'empty.mat')) 36 | -------------------------------------------------------------------------------- /pyls/tests/test_structures.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import multiprocessing as mp 4 | import numpy as np 5 | import pytest 6 | from pyls import structures 7 | 8 | 9 | def test_PLSInputs(pls_inputs): 10 | # check correct handling of all available PLSInputs keys 11 | pls_inputs = structures.PLSInputs(**pls_inputs) 12 | for key in pls_inputs.keys(): 13 | assert hasattr(pls_inputs, key) 14 | assert np.all(getattr(pls_inputs, key) == pls_inputs[key]) 15 | 16 | # test_split and n_split should be None when set to 0 17 | assert structures.PLSInputs(n_split=0).n_split is None 18 | assert structures.PLSInputs(test_split=0).test_split is None 19 | 20 | # confirm n_proc inputs are handled appropriately 21 | assert structures.PLSInputs(n_proc=1).n_proc == 1 22 | for n_proc in ['max', -1]: 23 | assert structures.PLSInputs(n_proc=n_proc).n_proc == mp.cpu_count() 24 | assert structures.PLSInputs(n_proc=-2).n_proc == mp.cpu_count() - 1 25 | 26 | # check input checking for test_size 27 | with pytest.raises(ValueError): 28 | structures.PLSInputs(test_size=1) 29 | with pytest.raises(ValueError): 30 | structures.PLSInputs(test_size=-0.5) 31 | 32 | # check that PLSInputs rejects disallowed keys 33 | assert structures.PLSInputs(notakey=10).get('notakey') is None 34 | 35 | 36 | @pytest.mark.xfail 37 | def test_PLSResults(): 38 | assert False 39 | 40 | 41 | @pytest.mark.xfail 42 | def test_PLSBootResults(): 43 | assert False 44 | 45 | 46 | @pytest.mark.xfail 47 | def test_PLSPermResults(): 48 | assert False 49 | 50 | 51 | @pytest.mark.xfail 52 | def test_PLSSplitHalfResults(): 53 | assert False 54 | 55 | 56 | @pytest.mark.xfail 57 | def test_PLSCrossValidationResults(): 58 | assert False 59 | -------------------------------------------------------------------------------- /pyls/tests/test_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | from pyls import utils 5 | import pytest 6 | import tqdm 7 | 8 | 9 | def test_empty_dict(): 10 | assert utils._empty_dict({}) 11 | assert utils._empty_dict(dict()) 12 | assert not utils._empty_dict(dict(d=10)) 13 | assert not utils._empty_dict(dict(d=dict(d=dict(d=10)))) 14 | assert not utils._empty_dict([]) 15 | assert not utils._empty_dict(None) 16 | assert not utils._empty_dict('test') 17 | assert not utils._empty_dict(10) 18 | assert not utils._empty_dict(10.0) 19 | assert not utils._empty_dict(set()) 20 | 21 | 22 | def test_not_empty_keys(): 23 | assert utils._not_empty_keys(dict()) == set() 24 | assert utils._not_empty_keys(dict(test=10)) == {'test'} 25 | assert utils._not_empty_keys(dict(test=10, temp=None)) == {'test'} 26 | assert utils._not_empty_keys(dict(test=10, temp={})) == {'test'} 27 | 28 | with pytest.raises(TypeError): 29 | utils._not_empty_keys([10, 20, 30]) 30 | 31 | 32 | def test_ResDict(): 33 | # toy example with some allowed keys 34 | class TestDict(utils.ResDict): 35 | allowed = ['test', 'temp'] 36 | 37 | # confirm string representations work 38 | d = utils.ResDict() 39 | assert str(d) == 'ResDict()' 40 | assert str(TestDict(test={})) == 'TestDict()' 41 | assert str(TestDict(test=None)) == 'TestDict()' 42 | assert d != TestDict() 43 | 44 | # confirm general key checking works 45 | test1 = TestDict(test=10) 46 | test2 = TestDict(test=11) 47 | test3 = TestDict(test=10, temp=11) 48 | assert str(test1) == 'TestDict(test)' 49 | assert str(test2) == 'TestDict(test)' 50 | assert str(test3) == 'TestDict(test, temp)' 51 | assert test1 == test1 52 | assert test1 != test2 53 | assert test1 != test3 54 | 55 | # confirm numpy array comparisons work 56 | test1 = TestDict(test=np.arange(9)) 57 | test2 = TestDict(test=np.arange(9) + 1e-6) # should work 58 | test3 = TestDict(test=np.arange(9) + 1e-5) # too high 59 | test4 = TestDict(test=np.arange(10)) # totally different 60 | assert test1 == test1 61 | assert test1 == test2 62 | assert test1 != test3 63 | assert test1 != test4 64 | 65 | # confirm nested dictionary comparisons work 66 | test1 = TestDict(test=test1) 67 | test2 = TestDict(test=test3) 68 | assert test1 == test1 69 | assert test1 != test2 70 | 71 | # confirm item assignment holds 72 | test1.temp = 10 73 | assert test1.temp == 10 74 | assert test1 == test1 75 | assert test1 != test2 76 | 77 | # confirm rejection of item assignment not in cls.allowed 78 | test1.blargh = 10 79 | assert not hasattr(test1, 'blargh') 80 | 81 | test1.temp = None 82 | test2.temp = None 83 | assert test1 != test2 84 | 85 | 86 | def test_trange(): 87 | # test that verbose=False generates a range object 88 | out = utils.trange(1000, verbose=False, desc='Test tqdm') 89 | assert [f for f in out] == list(range(1000)) 90 | # test that function will accept arbitrary kwargs and overwrite defaults 91 | out = utils.trange(1000, desc='Test tqdm', mininterval=0.5, ascii=False) 92 | assert isinstance(out, tqdm.tqdm) 93 | 94 | 95 | def test_dummy_label(): 96 | groups = [10, 12, 11] 97 | expected = [[10, 12, 11], [10, 10, 12, 12, 11, 11]] 98 | for n_cond in range(1, 3): 99 | dummy = utils.dummy_label(groups, n_cond=n_cond) 100 | assert dummy.shape == (np.sum(groups) * n_cond,) 101 | assert np.unique(dummy).size == len(groups) * n_cond 102 | for n, grp in enumerate(np.unique(dummy)): 103 | assert np.sum(dummy == grp) == expected[n_cond - 1][n] 104 | 105 | 106 | def test_dummy_code(): 107 | groups = [10, 12, 11] 108 | expected = [[10, 12, 11], [10, 10, 12, 12, 11, 11]] 109 | for n_cond in range(1, 3): 110 | dummy = utils.dummy_code(groups, n_cond=n_cond) 111 | assert dummy.shape == (np.sum(groups) * n_cond, len(groups) * n_cond) 112 | assert np.all(np.unique(dummy) == [0, 1]) 113 | for n, grp in enumerate(dummy.T): 114 | assert grp.sum() == expected[n_cond - 1][n] 115 | 116 | 117 | def test_permute_cols(): 118 | x = np.arange(9).reshape(3, 3) 119 | expected = np.array([[0, 1, 5], [6, 4, 2], [3, 7, 8]]) 120 | 121 | out = utils.permute_cols(x, seed=np.random.RandomState(1234)) 122 | assert not np.all(out == x) and np.all(out == expected) 123 | 124 | # don't accept 1D arrays 125 | with pytest.raises(ValueError): 126 | utils.permute_cols(np.arange(9)) 127 | 128 | 129 | def test_unravel(): 130 | expected = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] 131 | assert utils._unravel()(range(10)) == expected 132 | expected = [0, 1, 4, 9, 16, 25, 36, 49, 64, 81] 133 | assert utils._unravel()(x ** 2 for x in range(10)) == expected 134 | 135 | # test context manager status and arbitrary argument acceptance 136 | with utils._unravel(10, test=20) as cm: 137 | assert cm(x**2 for x in range(10)) == expected 138 | 139 | 140 | def test_get_par_func(): 141 | def fcn(x): 142 | return x 143 | assert fcn(10) == 10 144 | assert fcn([10, 10]) == [10, 10] 145 | 146 | if utils.joblib_avail: 147 | import joblib 148 | with utils.get_par_func(1000, fcn) as (par, func): 149 | assert isinstance(par, joblib.Parallel) 150 | assert par.n_jobs == 1000 151 | assert not fcn == func 152 | 153 | utils.joblib_avail = False 154 | with utils.get_par_func(1000, fcn) as (par, func): 155 | assert isinstance(par, utils._unravel) 156 | assert fcn == func 157 | -------------------------------------------------------------------------------- /pyls/tests/types/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netneurolab/pypyls/e0ff056fe59bbc8d0334d63bbba316708eede75c/pyls/tests/types/__init__.py -------------------------------------------------------------------------------- /pyls/tests/types/test_regression.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import pytest 5 | import pyls 6 | 7 | Xf = 1000 8 | Yf = 100 9 | subj = 50 10 | rs = np.random.RandomState(1234) 11 | 12 | 13 | class PLSRegressionTests(): 14 | defaults = pyls.structures.PLSInputs(X=rs.rand(subj, Xf), 15 | Y=rs.rand(subj, Yf), 16 | n_perm=20, n_boot=10, 17 | ci=95, seed=rs, verbose=False) 18 | 19 | def __init__(self, n_components=None, **kwargs): 20 | params = self.defaults.copy() 21 | params.update(kwargs) 22 | self.inputs = pyls.structures.PLSInputs(**params) 23 | self.inputs['n_components'] = n_components 24 | self.output = pyls.pls_regression(**self.inputs) 25 | self.confirm_outputs() 26 | 27 | def make_outputs(self): 28 | """ 29 | Used to make list of expected attributes and shapes for PLS outputs 30 | 31 | Returns 32 | ------- 33 | attrs : list-of-tuples 34 | Each entry in the list is a tuple with the attribute name and 35 | expected shape 36 | """ 37 | 38 | if self.inputs['n_components'] is None: 39 | num_lv = subj - 1 40 | else: 41 | num_lv = self.inputs['n_components'] 42 | 43 | attrs = [ 44 | ('x_weights', (Xf, num_lv)), 45 | ('x_scores', (subj, num_lv)), 46 | ('y_scores', (subj, num_lv)), 47 | ('y_loadings', (Yf, num_lv)), 48 | ('varexp', (num_lv,)), 49 | ] 50 | 51 | return attrs 52 | 53 | def confirm_outputs(self): 54 | """ Confirms generated outputs are of expected shape / size """ 55 | for (attr, shape) in self.make_outputs(): 56 | assert attr in self.output 57 | assert self.output[attr].shape == shape 58 | 59 | 60 | @pytest.mark.parametrize('n_components', [ 61 | None, 2, 5, 10, 15 62 | ]) 63 | def test_regression_onegroup_onecondition(n_components): 64 | PLSRegressionTests(n_components=n_components) 65 | 66 | 67 | @pytest.mark.parametrize('aggfunc', [ 68 | 'mean', 'median', 'sum' 69 | ]) 70 | def test_regression_3dbootstrap(aggfunc): 71 | # confirm providing 3D arrays works 72 | Y = rs.rand(subj, Yf, 100) 73 | PLSRegressionTests(Y=Y, n_components=2, aggfunc=aggfunc) 74 | 75 | # confirm providing valid bootsamples for 3D array works 76 | sboot = pyls.base.gen_bootsamp([subj], 1, n_boot=10) 77 | nboot = pyls.base.gen_bootsamp([100], 1, n_boot=10) 78 | bootsamples = np.array(list(zip(sboot.T, nboot.T))).T 79 | PLSRegressionTests(Y=Y, n_components=2, aggfunc=aggfunc, 80 | bootsamples=bootsamples, n_boot=10) 81 | 82 | 83 | def test_regression_missingdata(): 84 | X = rs.rand(subj, Xf) 85 | X[10] = np.nan 86 | PLSRegressionTests(X=X, n_components=2) 87 | X[20] = np.nan 88 | PLSRegressionTests(X=X, n_components=2) 89 | Y = rs.rand(subj, Yf) 90 | Y[11] = np.nan 91 | PLSRegressionTests(X=X, Y=Y, n_components=2) 92 | 93 | 94 | def test_errors(): 95 | with pytest.raises(ValueError): 96 | PLSRegressionTests(n_components=1000) 97 | with pytest.raises(ValueError): 98 | PLSRegressionTests(Y=rs.rand(subj - 1, Yf)) 99 | with pytest.raises(ValueError): 100 | PLSRegressionTests(Y=rs.rand(subj, Yf, 10), aggfunc='notafunc') 101 | with pytest.raises(TypeError): 102 | PLSRegressionTests(Y=rs.rand(subj, Yf, 10), aggfunc=lambda x: x) 103 | with pytest.raises(ValueError): 104 | PLSRegressionTests(Y=rs.rand(subj, Yf, 10), bootsamples=[[10], [10]]) 105 | -------------------------------------------------------------------------------- /pyls/tests/types/test_svd.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import pytest 5 | import pyls 6 | 7 | Xf = 1000 8 | Yf = 100 9 | subj = 100 10 | rs = np.random.RandomState(1234) 11 | 12 | 13 | class PLSSVDTest(): 14 | defaults = pyls.structures.PLSInputs(X=rs.rand(subj, Xf), 15 | Y=rs.rand(subj, Yf), 16 | groups=None, n_cond=1, 17 | mean_centering=0, rotate=True, 18 | n_perm=20, n_boot=10, n_split=None, 19 | ci=95, seed=rs, verbose=False) 20 | funcs = dict(meancentered=pyls.meancentered_pls, 21 | behavioral=pyls.behavioral_pls) 22 | 23 | def __init__(self, plstype, **kwargs): 24 | self.inputs = pyls.structures.PLSInputs(**{key: kwargs.get(key, val) 25 | for (key, val) in 26 | self.defaults.items()}) 27 | self.output = self.funcs.get(plstype)(**self.inputs) 28 | self.type = plstype 29 | self.confirm_outputs() 30 | 31 | def make_outputs(self): 32 | """ 33 | Used to make list of expected attributes and shapes for PLS outputs 34 | 35 | Returns 36 | ------- 37 | attrs : list-of-tuples 38 | Each entry in the list is a tuple with the attribute name and 39 | expected shape 40 | """ 41 | 42 | dummy = len(self.output.inputs.groups) * self.output.inputs.n_cond 43 | if self.type == 'behavioral': 44 | behavior = Yf * dummy 45 | num_lv = min([f for f in [Xf, behavior] if f != 1]) 46 | else: 47 | behavior = num_lv = dummy 48 | 49 | attrs = [ 50 | ('x_weights', (Xf, num_lv)), 51 | ('y_weights', (behavior, num_lv)), 52 | ('singvals', (num_lv,)), 53 | ('varexp', (num_lv,)), 54 | ('x_scores', (subj, num_lv)), 55 | ('y_scores', (subj, num_lv)), 56 | ] 57 | 58 | return attrs 59 | 60 | def confirm_outputs(self): 61 | """ Confirms generated outputs are of expected shape / size """ 62 | for (attr, shape) in self.make_outputs(): 63 | assert attr in self.output 64 | assert self.output[attr].shape == shape 65 | 66 | 67 | @pytest.mark.parametrize(('n_split', 'rotate'), [ 68 | (None, True), (None, False), (5, True), (5, False) 69 | ]) 70 | def test_behavioral_onegroup_onecondition(n_split, rotate): 71 | PLSSVDTest('behavioral', groups=None, n_cond=1, n_split=n_split, 72 | rotate=rotate) 73 | 74 | 75 | @pytest.mark.parametrize(('n_split', 'rotate'), [ 76 | (None, True), (None, False), (5, True), (5, False) 77 | ]) 78 | def test_behavioral_multigroup_onecondition(n_split, rotate): 79 | PLSSVDTest('behavioral', groups=[33, 34, 33], n_cond=1, n_split=n_split, 80 | rotate=rotate) 81 | 82 | 83 | @pytest.mark.parametrize(('n_split', 'rotate'), [ 84 | (None, True), (None, False), (5, True), (5, False) 85 | ]) 86 | def test_behavioral_onegroup_multicondition(n_split, rotate): 87 | PLSSVDTest('behavioral', groups=subj // 4, n_cond=4, n_split=n_split, 88 | rotate=rotate) 89 | 90 | 91 | @pytest.mark.parametrize(('n_split', 'rotate'), [ 92 | (None, True), (None, False), (5, True), (5, False) 93 | ]) 94 | def test_behavioral_multigroup_multicondition(n_split, rotate): 95 | PLSSVDTest('behavioral', groups=[25, 25], n_cond=2, n_split=n_split, 96 | rotate=rotate) 97 | 98 | 99 | @pytest.mark.parametrize(('mean_centering', 'n_split', 'rotate'), [ 100 | (1, None, True), (1, None, False), (1, 5, True), (1, 5, False), 101 | (2, None, True), (2, None, False), (2, 5, True), (2, 5, False) 102 | ]) 103 | def test_meancentered_multigroup_onecondition(mean_centering, n_split, rotate): 104 | PLSSVDTest('meancentered', groups=[33, 34, 33], n_cond=1, n_split=n_split, 105 | mean_centering=mean_centering, rotate=rotate) 106 | 107 | 108 | @pytest.mark.parametrize(('mean_centering', 'n_split', 'rotate'), [ 109 | (0, None, True), (0, None, False), (0, 5, True), (0, 5, False), 110 | (2, None, True), (2, None, False), (2, 5, True), (2, 5, False) 111 | ]) 112 | def test_meancentered_onegroup_multicondition(mean_centering, n_split, rotate): 113 | PLSSVDTest('meancentered', groups=subj // 2, n_cond=2, n_split=n_split, 114 | mean_centering=mean_centering, rotate=rotate) 115 | 116 | 117 | @pytest.mark.parametrize(('mean_centering', 'n_split', 'rotate'), [ 118 | (0, None, True), (0, None, False), (0, 5, True), (0, 5, False), 119 | (1, None, True), (1, None, False), (1, 5, True), (1, 5, False), 120 | (2, None, True), (2, None, False), (2, 5, True), (2, 5, False) 121 | ]) 122 | def test_meancentered_multigroup_multicondition(mean_centering, n_split, 123 | rotate): 124 | PLSSVDTest('meancentered', groups=[25, 25], n_cond=2, n_split=n_split, 125 | mean_centering=mean_centering, rotate=rotate) 126 | 127 | 128 | def test_warnings(): 129 | with pytest.warns(UserWarning): 130 | PLSSVDTest('meancentered', groups=[50, 50], mean_centering=0) 131 | with pytest.warns(UserWarning): 132 | PLSSVDTest('meancentered', n_cond=2, mean_centering=1) 133 | 134 | 135 | def test_errors(): 136 | with pytest.raises(ValueError): 137 | PLSSVDTest('meancentered', groups=[50, 50], mean_centering=3) 138 | with pytest.raises(ValueError): 139 | PLSSVDTest('meancentered', groups=[subj]) 140 | with pytest.raises(ValueError): 141 | PLSSVDTest('meancentered', n_cond=3) 142 | with pytest.raises(ValueError): 143 | PLSSVDTest('behavioral', Y=rs.rand(subj - 1, Yf)) 144 | -------------------------------------------------------------------------------- /pyls/types/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | The primary PLS decomposition methods for use in conducting PLS analyses 4 | """ 5 | 6 | __all__ = ['behavioral_pls', 'meancentered_pls', 'pls_regression'] 7 | 8 | from .behavioral import behavioral_pls 9 | from .meancentered import meancentered_pls 10 | from .regression import pls_regression 11 | -------------------------------------------------------------------------------- /pyls/types/behavioral.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | from sklearn.metrics import r2_score 5 | from ..base import BasePLS, gen_splits 6 | from ..structures import _pls_input_docs 7 | from .. import compute, utils 8 | 9 | 10 | class BehavioralPLS(BasePLS): 11 | def __init__(self, X, Y, *, groups=None, n_cond=1, n_perm=5000, 12 | n_boot=5000, n_split=100, test_size=0.25, test_split=100, 13 | covariance=False, rotate=True, ci=95, permsamples=None, 14 | bootsamples=None, seed=None, verbose=True, n_proc=None, 15 | **kwargs): 16 | 17 | super().__init__(X=np.asarray(X), Y=np.asarray(Y), groups=groups, 18 | n_cond=n_cond, n_perm=n_perm, n_boot=n_boot, 19 | n_split=n_split, test_size=test_size, 20 | test_split=test_split, covariance=covariance, 21 | rotate=rotate, ci=ci, permsamples=permsamples, 22 | bootsamples=bootsamples, seed=seed, verbose=verbose, 23 | n_proc=n_proc, **kwargs) 24 | 25 | self.results = self.run_pls(self.inputs.X, self.inputs.Y) 26 | 27 | def gen_covcorr(self, X, Y, groups, **kwargs): 28 | """ 29 | Computes cross-covariance matrix from `X` and `Y` 30 | 31 | Parameters 32 | ---------- 33 | X : (S, B) array_like 34 | Input data matrix, where `S` is observations and `B` is features 35 | Y : (S, T) array_like 36 | Input data matrix, where `S` is observations and `T` is features 37 | groups : (S, J) array_like 38 | Dummy coded input array, where `S` is observations and `J` 39 | corresponds to the number of different groups x conditions. A value 40 | of 1 indicates that an observation belongs to a specific group or 41 | condition. 42 | 43 | Returns 44 | ------- 45 | crosscov : (J*T, B) np.ndarray 46 | Cross-covariance matrix 47 | """ 48 | 49 | return np.row_stack([ 50 | compute.xcorr(X[grp], Y[grp], covariance=self.inputs.covariance) 51 | for grp in groups.T.astype(bool) 52 | ]) 53 | 54 | def gen_distrib(self, X, Y, original, groups, *args, **kwargs): 55 | """ 56 | Finds behavioral correlations for single bootstrap resample 57 | 58 | Parameters 59 | ---------- 60 | X : (S, B) array_like 61 | Input data matrix, where `S` is observations and `B` is features 62 | Y : (S, T) array_like 63 | Input data matrix, where `S` is observations and `T` is features 64 | original : (B, L) array_like 65 | Left singular vectors from bootstrap 66 | groups : (S, J) array_like 67 | Dummy coded input array, where `S` is observations and `J` 68 | corresponds to the number of different groups x conditions. A value 69 | of 1 indicates that an observation belongs to a specific group or 70 | condition. 71 | 72 | Returns 73 | ------- 74 | distrib : (T, L) 75 | Behavioral correlations for single bootstrap resample 76 | """ 77 | 78 | tusc = X @ compute.normalize(original) 79 | 80 | return self.gen_covcorr(tusc, Y, groups=groups) 81 | 82 | def crossval(self, X, Y, groups=None, seed=None): 83 | """ 84 | Performs cross-validation of SVD of `X` and `Y` 85 | 86 | Parameters 87 | ---------- 88 | X : (S, B) array_like 89 | Input data matrix, where `S` is observations and `B` is features 90 | Y : (S, T) array_like 91 | Input data matrix, where `S` is observations and `T` is features 92 | seed : {int, :obj:`numpy.random.RandomState`, None}, optional 93 | Seed for random number generation. Default: None 94 | 95 | Returns 96 | ------- 97 | r_scores : (C,) np.ndarray 98 | R (Pearon correlation) scores across train-test splits 99 | r2_scores : (C,) np.ndarray 100 | R^2 (coefficient of determination) scores across train-test splits 101 | """ 102 | 103 | if groups is None: 104 | groups = utils.dummy_code(self.inputs.groups, self.inputs.n_cond) 105 | 106 | # use gen_splits to handle grouping/condition vars in train/test split 107 | splits = gen_splits(self.inputs.groups, 108 | self.inputs.n_cond, 109 | self.inputs.test_split, 110 | seed=seed, 111 | test_size=self.inputs.test_size) 112 | 113 | gen = utils.trange(self.inputs.test_split, verbose=self.inputs.verbose, 114 | desc='Running cross-validation') 115 | with utils.get_par_func(self.inputs.n_proc, 116 | self.__class__._single_crossval) as (par, 117 | func): 118 | out = par( 119 | func(self, X=X, Y=Y, inds=splits[:, i], groups=groups, seed=i) 120 | for i in gen 121 | ) 122 | r_scores, r2_scores = [np.stack(o, axis=-1) for o in zip(*out)] 123 | 124 | return r_scores, r2_scores 125 | 126 | def _single_crossval(self, X, Y, inds, groups=None, seed=None): 127 | """ 128 | Generates single cross-validated r and r^2 score 129 | 130 | Parameters 131 | ---------- 132 | X : (S, B) array_like 133 | Input data matrix, where `S` is observations and `B` is features 134 | Y : (S, T) array_like 135 | Input data matrix, where `S` is observations and `T` is features 136 | inds : (S,) array_like 137 | Train-test split, where train = True and test = False 138 | groups : (S, J) array_like, optional 139 | Dummy coded input array, where `S` is observations and `J` 140 | corresponds to the number of different groups x conditions. A value 141 | of 1 indicates that an observation belongs to a specific group or 142 | condition. If not specified will be generated on-the-fly. Default: 143 | None 144 | seed : {int, :obj:`numpy.random.RandomState`, None}, optional 145 | Seed for random number generation. Default: None 146 | """ 147 | 148 | if groups is None: 149 | groups = utils.dummy_code(self.inputs.groups, self.inputs.n_cond) 150 | 151 | X_train, Y_train, dummy_train = X[inds], Y[inds], groups[inds] 152 | X_test, Y_test, dummy_test = X[~inds], Y[~inds], groups[~inds] 153 | # perform initial decomposition on train set 154 | U, d, V = self.svd(X_train, Y_train, groups=dummy_train, seed=seed) 155 | 156 | # rescale the test set based on the training set 157 | Y_pred = [] 158 | for n, V_spl in enumerate(np.split(V, groups.shape[-1])): 159 | tr_grp = dummy_train[:, n].astype(bool) 160 | te_grp = dummy_test[:, n].astype(bool) 161 | rescaled = compute.rescale_test(X_train[tr_grp], X_test[te_grp], 162 | Y_train[tr_grp], U, V_spl) 163 | Y_pred.append(rescaled) 164 | Y_pred = np.row_stack(Y_pred) 165 | 166 | # calculate r & r-squared from comp of rescaled test & true values 167 | r_scores = compute.efficient_corr(Y_test, Y_pred) 168 | r2_scores = r2_score(Y_test, Y_pred, multioutput='raw_values') 169 | 170 | return r_scores, r2_scores 171 | 172 | def run_pls(self, X, Y): 173 | """ 174 | Runs PLS analysis 175 | 176 | Parameters 177 | ---------- 178 | X : (S, B) array_like 179 | Input data matrix, where `S` is observations and `B` is features 180 | Y : (S, T) array_like 181 | Input data matrix, where `S` is observations and `T` is features 182 | """ 183 | 184 | res = super().run_pls(X, Y) 185 | 186 | # mechanism for splitting outputs along group / condition indices 187 | grps = np.repeat(res['inputs']['groups'], res['inputs']['n_cond']) 188 | res['y_scores'] = np.vstack([ 189 | y @ v for (y, v) in zip(np.split(Y, np.cumsum(grps)[:-1]), 190 | np.split(res['y_weights'], len(grps))) 191 | ]) 192 | 193 | # get lvcorrs 194 | groups = utils.dummy_code(self.inputs.groups, self.inputs.n_cond) 195 | res['y_loadings'] = self.gen_covcorr(res['x_scores'], Y, groups) 196 | 197 | if self.inputs.n_boot > 0: 198 | # compute bootstraps 199 | distrib, u_sum, u_square = self.bootstrap(X, Y, self.rs) 200 | 201 | # add original scaled singular vectors back in 202 | bs = res['x_weights'] @ res['singvals'] 203 | u_sum, u_square = u_sum + bs, u_square + (bs ** 2) 204 | 205 | # calculate bootstrap ratios and confidence intervals 206 | bsrs, uboot_se = compute.boot_rel(bs, u_sum, u_square, 207 | self.inputs.n_boot + 1) 208 | corrci = np.stack(compute.boot_ci(distrib, ci=self.inputs.ci), -1) 209 | 210 | # update results.boot_result dictionary 211 | res['bootres'].update(dict(x_weights_normed=bsrs, 212 | x_weights_stderr=uboot_se, 213 | y_loadings=res['y_loadings'].copy(), 214 | y_loadings_boot=distrib, 215 | y_loadings_ci=corrci, 216 | bootsamples=self.bootsamp)) 217 | 218 | # compute cross-validated prediction-based metrics 219 | if self.inputs.test_split is not None and self.inputs.test_size > 0: 220 | r, r2 = self.crossval(X, Y, groups=self.dummy, seed=self.rs) 221 | res['cvres'].update(dict(pearson_r=r, r_squared=r2)) 222 | 223 | # get rid of the stupid diagonal matrix 224 | res['varexp'] = np.diag(compute.varexp(res['singvals'])) 225 | res['singvals'] = np.diag(res['singvals']) 226 | 227 | return res 228 | 229 | 230 | # let's make it a function 231 | def behavioral_pls(X, Y, *, groups=None, n_cond=1, n_perm=5000, n_boot=5000, 232 | n_split=0, test_size=0.25, test_split=100, 233 | covariance=False, rotate=True, ci=95, permsamples=None, 234 | bootsamples=None, seed=None, verbose=True, n_proc=None, 235 | **kwargs): 236 | pls = BehavioralPLS(X=X, Y=Y, groups=groups, n_cond=n_cond, 237 | n_perm=n_perm, n_boot=n_boot, n_split=n_split, 238 | test_size=test_size, test_split=test_split, 239 | covariance=covariance, rotate=rotate, ci=ci, 240 | permsamples=permsamples, bootsamples=bootsamples, 241 | seed=seed, verbose=verbose, n_proc=n_proc, **kwargs) 242 | return pls.results 243 | 244 | 245 | behavioral_pls.__doc__ = r""" 246 | Performs behavioral PLS on `X` and `Y`. 247 | 248 | Behavioral PLS is a multivariate statistical approach that relates two sets 249 | of variables together. Traditionally, one of these arrays 250 | represents a set of brain features (e.g., functional connectivity 251 | estimates) and the other represents a set of behavioral variables; however, 252 | these arrays can be any two sets of features belonging to a common group of 253 | samples. 254 | 255 | Using a singular value decomposition, behavioral PLS attempts to find 256 | linear combinations of features from the provided arrays that maximally 257 | covary with each other. The decomposition is performed on the cross- 258 | covariance matrix :math:`R`, where :math:`R = Y^{{T}} \times X`, which 259 | represents the covariation of all the input features across samples. 260 | 261 | Parameters 262 | ---------- 263 | {input_matrix} 264 | Y : (S, T) array_like 265 | Input data matrix, where `S` is samples and `T` is features 266 | {groups} 267 | {conditions} 268 | {stat_test} 269 | {split_half} 270 | {cross_val} 271 | {covariance} 272 | {rotate} 273 | {ci} 274 | {resamples} 275 | {proc_options} 276 | 277 | Returns 278 | ---------- 279 | {pls_results} 280 | 281 | Notes 282 | ----- 283 | {decomposition_narrative} 284 | 285 | References 286 | ---------- 287 | 288 | {references} 289 | 290 | Misic, B., Betzel, R. F., de Reus, M. A., van den Heuvel, M.P., 291 | Berman, M. G., McIntosh, A. R., & Sporns, O. (2016). Network level 292 | structure-function relationships in human neocortex. Cerebral Cortex, 293 | 26, 3285-96. 294 | """.format(**_pls_input_docs) 295 | -------------------------------------------------------------------------------- /pyls/types/meancentered.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import warnings 4 | import numpy as np 5 | from ..base import BasePLS 6 | from ..structures import _pls_input_docs 7 | from .. import compute, utils 8 | 9 | 10 | class MeanCenteredPLS(BasePLS): 11 | def __init__(self, X, groups=None, n_cond=1, mean_centering=0, n_perm=5000, 12 | n_boot=5000, n_split=100, rotate=True, ci=95, 13 | permsamples=None, bootsamples=None, seed=None, 14 | verbose=True, n_proc=None, **kwargs): 15 | 16 | # check that groups and conditions are set appropriately 17 | if groups is None: 18 | if len(X) // n_cond != len(X) / n_cond: 19 | raise ValueError('Provided `X` matrix with {} samples is not ' 20 | 'evenly divisible into {} conditions. Please ' 21 | 'confirm inputs are correct and try again. ' 22 | .format(len(X), n_cond)) 23 | groups = [len(X) // n_cond] 24 | elif not isinstance(groups, (list, np.ndarray)): 25 | groups = [groups] 26 | 27 | # check inputs for validity 28 | if n_cond == 1 and len(groups) == 1: 29 | raise ValueError('Cannot perform PLS with only one group and one ' 30 | 'condition. Please confirm inputs are correct.') 31 | if n_cond == 1 and mean_centering == 0: 32 | warnings.warn('Cannot set mean_centering to 0 when there is only ' 33 | 'one condition. Resetting mean_centering to 1.') 34 | mean_centering = 1 35 | elif len(groups) == 1 and mean_centering == 1: 36 | warnings.warn('Cannot set mean_centering to 1 when there is only ' 37 | 'one group. Resetting mean_centering to 0.') 38 | mean_centering = 0 39 | 40 | # instantiate base class, generate dummy array, and run PLS analysis 41 | super().__init__(X=np.asarray(X), groups=groups, n_cond=n_cond, 42 | mean_centering=mean_centering, n_perm=n_perm, 43 | n_boot=n_boot, n_split=n_split, rotate=rotate, ci=ci, 44 | permsamples=permsamples, bootsamples=bootsamples, 45 | seed=seed, verbose=verbose, n_proc=n_proc, **kwargs) 46 | self.inputs.Y = utils.dummy_code(self.inputs.groups, 47 | self.inputs.n_cond) 48 | self.results = self.run_pls(self.inputs.X, self.inputs.Y) 49 | 50 | def gen_covcorr(self, X, Y, **kwargs): 51 | """ 52 | Computes mean-centered matrix from `X` and `Y` 53 | 54 | Parameters 55 | ---------- 56 | X : (S, B) array_like 57 | Input data matrix, where `S` is observations and `B` is features 58 | Y : (S, T) array_like 59 | Dummy coded input array, where `S` is observations and `T` 60 | corresponds to the number of different groups x conditions. A value 61 | of 1 indicates that an observation belongs to a specific group or 62 | condition. 63 | 64 | Returns 65 | ------- 66 | mean_centered : (T, B) np.ndarray 67 | Mean-centered matrix 68 | """ 69 | 70 | mean_centered = compute.get_mean_center(X, Y, self.inputs.n_cond, 71 | self.inputs.mean_centering, 72 | means=True) 73 | return mean_centered 74 | 75 | def gen_distrib(self, X, Y, original, *args, **kwargs): 76 | """ 77 | Finds contrast for single bootstrap resample 78 | 79 | Parameters 80 | ---------- 81 | X : (S, B) array_like 82 | Input data matrix, where `S` is observations and `B` is features 83 | Y : (S, T) array_like 84 | Dummy coded input array, where `S` is observations and `T` 85 | corresponds to the number of different groups x conditions. A value 86 | of 1 indicates that an observation belongs to a specific group or 87 | condition. 88 | original : (B, L) array_like 89 | Left singular vectors from bootstrap 90 | 91 | Returns 92 | ------- 93 | distrib : (T, L) 94 | Contrast for single bootstrap resample 95 | """ 96 | 97 | usc = compute.get_mean_center(X, Y, self.inputs.n_cond, 98 | self.inputs.mean_centering, 99 | means=False) 100 | usc = usc @ compute.normalize(original) 101 | 102 | return np.row_stack([usc[g].mean(axis=0) for g in Y.T.astype(bool)]) 103 | 104 | def make_permutation(self, X, Y, perminds): 105 | """ 106 | Permutes `X` according to `perminds`, leaving `Y` un-permuted 107 | 108 | Parameters 109 | ---------- 110 | X : (S, B) array_like 111 | Input data matrix, where `S` is observations and `B` is features 112 | Y : (S, T) array_like 113 | Input data matrix, where `S` is observations and `T` is features 114 | perminds : (S,) array_like 115 | Array by which to permute `X` 116 | 117 | Returns 118 | ------- 119 | Xp : (S, B) array_like 120 | `X`, permuted according to `perminds` 121 | Yp : (S, T) array_like 122 | Identical to `Y` 123 | """ 124 | 125 | return X[perminds], Y 126 | 127 | def run_pls(self, X, Y): 128 | """ 129 | Runs PLS analysis 130 | 131 | Parameters 132 | ---------- 133 | X : (S, B) array_like 134 | Input data matrix, where `S` is observations and `B` is features 135 | Y : (S, T) array_like, optional 136 | Dummy coded input array, where `S` is observations and `T` 137 | corresponds to the number of different groups x conditions. A value 138 | of 1 indicates that an observation belongs to a specific group or 139 | condition. 140 | 141 | Returns 142 | ------- 143 | res : :obj:`pyls.structures.PLSResults` 144 | PLS results object 145 | """ 146 | 147 | res = super().run_pls(X, Y) 148 | res['y_scores'] = Y @ res['y_weights'] 149 | 150 | # get normalized brain scores and contrast 151 | brainscores_dm = compute.get_mean_center(X, Y, self.inputs.n_cond, 152 | self.inputs.mean_centering, 153 | False) @ res['x_weights'] 154 | contrast = np.row_stack([brainscores_dm[grp].mean(axis=0) for grp 155 | in Y.T.astype(bool)]) 156 | 157 | if self.inputs.n_boot > 0: 158 | # compute bootstraps 159 | distrib, u_sum, u_square = self.bootstrap(X, Y, self.rs) 160 | 161 | # calculate bootstrap ratios and confidence intervals 162 | bs = res['x_weights'] @ res['singvals'] 163 | bsrs, uboot_se = compute.boot_rel(bs, u_sum, u_square, 164 | self.inputs.n_boot) 165 | corrci = np.stack(compute.boot_ci(distrib, ci=self.inputs.ci), -1) 166 | 167 | # update results.boot_result dictionary 168 | res['bootres'].update(dict(x_weights_normed=bsrs, 169 | x_weights_stderr=uboot_se, 170 | bootsamples=self.bootsamp, 171 | contrast=contrast, 172 | contrast_boot=distrib, 173 | contrast_ci=corrci)) 174 | 175 | # get rid of the stupid diagonal matrix 176 | res['varexp'] = np.diag(compute.varexp(res['singvals'])) 177 | res['singvals'] = np.diag(res['singvals']) 178 | 179 | return res 180 | 181 | 182 | def meancentered_pls(X, *, groups=None, n_cond=1, mean_centering=0, 183 | n_perm=5000, n_boot=5000, n_split=0, rotate=True, ci=95, 184 | permsamples=None, bootsamples=None, seed=None, 185 | verbose=True, n_proc=None, **kwargs): 186 | pls = MeanCenteredPLS(X=X, groups=groups, n_cond=n_cond, 187 | mean_centering=mean_centering, 188 | n_perm=n_perm, n_boot=n_boot, n_split=n_split, 189 | rotate=rotate, ci=ci, permsamples=permsamples, 190 | bootsamples=bootsamples, seed=seed, verbose=verbose, 191 | n_proc=n_proc, **kwargs) 192 | return pls.results 193 | 194 | 195 | meancentered_pls.__doc__ = r""" 196 | Performs mean-centered PLS on `X`, sorted into `groups` and `conditions`. 197 | 198 | Mean-centered PLS is a multivariate statistical approach that attempts to 199 | find sets of variables in a matrix which maximally discriminate between 200 | subgroups within the matrix. 201 | 202 | While it carries the name PLS, mean-centered PLS is perhaps more related to 203 | principal components analysis than it is to :obj:`pyls.behavioral_pls`. In 204 | contrast to behavioral PLS, mean-centered PLS does not construct a cross- 205 | covariance matrix. Instead, it operates by averaging the provided data 206 | (`X`) within groups and/or conditions. The resultant matrix :math:`M` is 207 | mean-centered, generating a new matrix :math:`R_{{mean\_centered}}` which 208 | is submitted to singular value decomposition. 209 | 210 | Parameters 211 | ---------- 212 | {input_matrix} 213 | {groups} 214 | {conditions} 215 | {mean_centering} 216 | {stat_test} 217 | {split_half} 218 | {rotate} 219 | {ci} 220 | {resamples} 221 | {proc_options} 222 | 223 | Returns 224 | ---------- 225 | {pls_results} 226 | 227 | Notes 228 | ----- 229 | The provided `mean_centering` argument can be changed to highlight or 230 | "boost" potential group / condition differences by modifying how 231 | :math:`R_{{mean\_centered}}` is generated: 232 | 233 | - `mean_centering=0` will remove group means collapsed across conditions, 234 | emphasizing potential differences between conditions while removing 235 | overall group differences 236 | - `mean_centering=1` will remove condition means collapsed across groups, 237 | emphasizing potential differences between groups while removing overall 238 | condition differences 239 | - `mean_centering=2` will remove the grand mean collapsed across both 240 | groups _and_ conditions, permitting investigation of the full spectrum of 241 | potential group and condition effects. 242 | 243 | {decomposition_narrative} 244 | 245 | References 246 | ---------- 247 | {references} 248 | """.format(**_pls_input_docs) 249 | -------------------------------------------------------------------------------- /pyls/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from contextlib import contextmanager 4 | 5 | import numpy as np 6 | import tqdm 7 | from sklearn.utils import Bunch 8 | from sklearn.utils.validation import check_array, check_random_state 9 | try: 10 | from joblib import Parallel, delayed 11 | joblib_avail = True 12 | except ImportError: 13 | joblib_avail = False 14 | 15 | 16 | class ResDict(Bunch): 17 | """ 18 | Subclass of `sklearn.utils.Bunch` that only accepts keys in `cls.allowed` 19 | 20 | Also edits string representation to show non-empty keys 21 | """ 22 | 23 | allowed = [] 24 | 25 | def __init__(self, **kwargs): 26 | # only keep allowed keys 27 | i = {key: val for key, val in kwargs.items() if key in 28 | self.__class__.allowed} 29 | super().__init__(**i) 30 | 31 | def __str__(self): 32 | # override dict built-in string repr to display only non-empty keys 33 | items = [k for k in self.__class__.allowed 34 | if k in _not_empty_keys(self)] 35 | return '{name}({keys})'.format(name=self.__class__.__name__, 36 | keys=', '.join(items)) 37 | 38 | def __setitem__(self, key, val): 39 | # legit we only want keys that are allowed 40 | if key in self.__class__.allowed: 41 | super().__setitem__(key, val) 42 | 43 | def __eq__(self, value): 44 | # easy check -- are objects the same class? 45 | if not isinstance(value, self.__class__): 46 | return False 47 | # another easy check -- are the non-empty keys different? 48 | if _not_empty_keys(self) != _not_empty_keys(value): 49 | return False 50 | # harder check -- iterate through everything and check item equality 51 | # potentially recursive checks if sub-items are dictionaries 52 | for k, v in self.items(): 53 | v2 = value.get(k, None) 54 | if v is None and v2 is None: 55 | continue 56 | # recursive dictionary comparison 57 | if isinstance(v, dict) and isinstance(v2, dict): 58 | if v != v2: 59 | return False 60 | # compare using numpy testing suite 61 | # this is because arrays may be different size and numpy testing 62 | # is way more solid than anything we could come up with 63 | else: 64 | try: 65 | np.testing.assert_array_almost_equal(v, v2) 66 | except (TypeError, AssertionError): 67 | return False 68 | 69 | return True 70 | 71 | def __ne__(self, value): 72 | return not self == value 73 | 74 | __repr__ = __str__ 75 | 76 | 77 | def _not_empty_keys(dictionary): 78 | """ 79 | Returns list of non-empty keys in `dictionary` 80 | 81 | Non-empty keys are defined as (1) not being None-type and (2) not being an 82 | empty dictionary, itself 83 | 84 | Parameters 85 | ---------- 86 | dictionary : dict 87 | Object to query for non-empty keys 88 | 89 | Returns 90 | ------- 91 | keys : list 92 | Non-empty keys in `dictionary` 93 | """ 94 | 95 | if not isinstance(dictionary, dict): 96 | raise TypeError('Provided input must be type dict, not {}' 97 | .format(type(dictionary))) 98 | 99 | keys = [] 100 | for key, value in dictionary.items(): 101 | if value is not None and not _empty_dict(value): 102 | keys.append(key) 103 | 104 | return set(keys) 105 | 106 | 107 | def _empty_dict(dobj): 108 | """ 109 | Returns True if `dobj` is an empty dictionary; otherwise, returns False 110 | 111 | Parameters 112 | ---------- 113 | dobj 114 | Any Python object 115 | 116 | Returns 117 | ------- 118 | empty : bool 119 | Whether `dobj` is an empty dictionary-like object 120 | """ 121 | 122 | try: 123 | return len(dobj.keys()) == 0 124 | except (AttributeError, TypeError): 125 | return False 126 | 127 | 128 | def trange(n_iter, verbose=True, **kwargs): 129 | """ 130 | Wrapper for :obj:`tqdm.trange` with some default options set 131 | 132 | Parameters 133 | ---------- 134 | n_iter : int 135 | Number of iterations for progress bar 136 | verbose : bool, optional 137 | Whether to return an :obj:`tqdm.tqdm` progress bar instead of a range 138 | generator. Default: True 139 | kwargs 140 | Key-value arguments provided to :func:`tqdm.trange` 141 | 142 | Returns 143 | ------- 144 | progbar : :obj:`tqdm.tqdm` 145 | """ 146 | 147 | form = ('{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt}' 148 | ' | {elapsed}<{remaining}') 149 | defaults = dict(ascii=True, leave=False, bar_format=form) 150 | defaults.update(kwargs) 151 | 152 | return tqdm.trange(n_iter, disable=not verbose, **defaults) 153 | 154 | 155 | def dummy_code(groups, n_cond=1): 156 | """ 157 | Dummy codes `groups` and `n_cond` 158 | 159 | Parameters 160 | ---------- 161 | groups : (G,) list 162 | List with number of subjects in each of `G` groups 163 | n_cond : int, optional 164 | Number of conditions, for each subject. Default: 1 165 | 166 | Returns 167 | ------- 168 | Y : (S, F) `numpy.ndarray` 169 | Dummy-coded group array 170 | """ 171 | 172 | labels = dummy_label(groups, n_cond) 173 | dummy = np.column_stack([labels == g for g in np.unique(labels)]) 174 | 175 | return dummy.astype(int) 176 | 177 | 178 | def dummy_label(groups, n_cond=1): 179 | """ 180 | Generates group labels for `groups` and `n_cond` 181 | 182 | Parameters 183 | ---------- 184 | groups : (G,) list 185 | List with number of subjects in each of `G` groups 186 | n_cond : int, optional 187 | Number of conditions, for each subject. Default: 1 188 | 189 | Returns 190 | ------- 191 | Y : (S,) `numpy.ndarray` 192 | Dummy-label group array 193 | """ 194 | 195 | num_labels = len(groups) * n_cond 196 | 197 | return np.repeat(np.arange(num_labels) + 1, np.repeat(groups, n_cond)) 198 | 199 | 200 | def permute_cols(x, seed=None): 201 | """ 202 | Permutes the rows for each column in `x` separately 203 | 204 | Taken from https://stackoverflow.com/a/27489131 205 | 206 | Parameters 207 | ---------- 208 | x : (S, B) array_like 209 | Input array to be permuted 210 | seed : {int, :obj:`numpy.random.RandomState`, None}, optional 211 | Seed for random number generation. Default: None 212 | 213 | Returns 214 | ------- 215 | permuted : `numpy.ndarray` 216 | Permuted array 217 | """ 218 | 219 | # can't permute row with only 1 sample... 220 | x = check_array(x) 221 | rs = check_random_state(seed) 222 | ix_i = rs.random_sample(x.shape).argsort(axis=0) 223 | ix_j = np.tile(np.arange(x.shape[1]), (x.shape[0], 1)) 224 | return x[ix_i, ix_j] 225 | 226 | 227 | class _unravel(): 228 | """ 229 | Small utility to unravel generator object into a list 230 | 231 | Parameters 232 | ---------- 233 | x : generator 234 | 235 | Returns 236 | ------- 237 | y : list 238 | """ 239 | def __init__(self, *args, **kwargs): 240 | pass 241 | 242 | def __call__(self, x): 243 | return [f for f in x] 244 | 245 | def __enter__(self, *args, **kwargs): 246 | return self 247 | 248 | def __exit__(self, *args, **kwargs): 249 | pass 250 | 251 | 252 | @contextmanager 253 | def get_par_func(n_proc, func, **kwargs): 254 | """ 255 | Creates joblib-style parallelization function if joblib is available 256 | 257 | Parameters 258 | ---------- 259 | n_proc : int 260 | Number of processors (i.e., jobs) to use for parallelization 261 | func : function 262 | Function to parallelize 263 | 264 | Returns 265 | ------- 266 | parallel : :obj:`joblib.Parallel` object 267 | Object to parallelize over `func` 268 | func : :obj:`joblib.delayed` object 269 | Provided `func` wrapped in `joblib.delayed` 270 | """ 271 | 272 | if joblib_avail: 273 | func = delayed(func) 274 | with Parallel(n_jobs=n_proc, max_nbytes=1e6, 275 | mmap_mode='r+', **kwargs) as parallel: 276 | yield parallel, func 277 | else: 278 | parallel = _unravel() 279 | yield parallel, func 280 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | h5py 2 | numpy 3 | scikit-learn 4 | scipy 5 | tqdm 6 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = pyls 3 | url = https://github.com/rmarkello/pyls 4 | download_url = https://github.com/rmarkello/pyls 5 | author = pyls developers 6 | maintainer = Ross Markello 7 | maintainer_email = rossmarkello@gmail.com 8 | classifiers = 9 | Development Status :: 3 - Alpha 10 | Intended Audience :: Science/Research 11 | License :: OSI Approved :: GNU General Public License v2 (GPLv2) 12 | Programming Language :: Python :: 3.5 13 | Programming Language :: Python :: 3.6 14 | Programming Language :: Python :: 3.7 15 | license = BSD-3 16 | description = A toolbox for performing multivariate PLS decomposition analyses 17 | long_description = file:README.md 18 | long_description_content_type = text/markdown; charset=UTF-8 19 | platforms = OS Independent 20 | provides = 21 | pyls 22 | 23 | [options] 24 | python_requires = >=3.5.1 25 | install_requires = 26 | h5py 27 | numpy 28 | scikit-learn 29 | scipy 30 | tqdm 31 | tests_require = 32 | pytest >=3.6 33 | test_suite = pytest 34 | zip_safe = False 35 | packages = find: 36 | include_package_data = True 37 | 38 | [options.extras_require] 39 | doc = 40 | sphinx >=2.0 41 | sphinx-argparse 42 | sphinx_rtd_theme 43 | plotting = 44 | pandas 45 | seaborn 46 | style = 47 | flake8 48 | test = 49 | pytest-cov 50 | all = 51 | %(doc)s 52 | %(plotting)s 53 | %(style)s 54 | %(test)s 55 | 56 | [options.package_data] 57 | pyls = 58 | data/* 59 | pyls/examples/*json 60 | pyls/tests/data/* 61 | 62 | [coverage:run] 63 | omit = 64 | */pyls/tests/matlab.py 65 | */pyls/_version.py 66 | 67 | [flake8] 68 | doctests = True 69 | exclude = 70 | *build/* 71 | *sphinx* 72 | */__init__.py 73 | ignore = E402, W503 74 | max-line-length = 79 75 | 76 | [tool:pytest] 77 | doctest_optionflags = NORMALIZE_WHITESPACE 78 | xfail_strict = true 79 | addopts = -rx 80 | 81 | [versioneer] 82 | VCS = git 83 | style = pep440 84 | versionfile_source = pyls/_version.py 85 | versionfile_build = pyls/_version.py 86 | tag_prefix = 87 | parentdir_prefix = 88 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | 4 | from setuptools import setup 5 | import versioneer 6 | 7 | SETUP_REQUIRES = ['setuptools >= 30.3.0'] 8 | SETUP_REQUIRES += ['wheel'] if 'bdist_wheel' in sys.argv else [] 9 | 10 | if __name__ == "__main__": 11 | setup(name='pyls', 12 | setup_requires=SETUP_REQUIRES, 13 | version=versioneer.get_version(), 14 | cmdclass=versioneer.get_cmdclass()) 15 | --------------------------------------------------------------------------------