├── .appveyor.yml ├── .github └── workflows │ └── python-package.yml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE.md ├── MANIFEST.in ├── README.md ├── setup.cfg ├── setup.py ├── struct_lmm ├── __init__.py ├── _lmm.py ├── _testit.py └── test │ ├── __init__.py │ └── test_structlmm.py └── version.py /.appveyor.yml: -------------------------------------------------------------------------------- 1 | build: off 2 | clone_depth: 4 3 | platform: x64 4 | configuration: Release 5 | 6 | environment: 7 | PKG_NAME: struct_lmm 8 | PYTHON: "C:\\Python36-x64" 9 | ARCH: x64 10 | 11 | build_script: 12 | - set PATH=%PYTHON%;%PYTHON%\Scripts;%PATH% 13 | - powershell -Command "(New-Object Net.WebClient).DownloadFile('https://raw.githubusercontent.com/horta/ci/master/appveyor.bat', 'appveyor.bat')" && appveyor.bat 14 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python package 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | strategy: 17 | matrix: 18 | python-version: [3.6, 3.7, 3.8] 19 | 20 | steps: 21 | - uses: actions/checkout@v2 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v2 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | - name: Install dependencies 27 | run: | 28 | python -m pip install --upgrade pip 29 | pip install flake8 pytest black isort setuptools --upgrade 30 | - name: Lint 31 | run: | 32 | flake8 . 33 | black --check . 34 | isort --check-only . 35 | - name: Test with pytest 36 | run: | 37 | pip install -e . 38 | pytest 39 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .pytest_cache/ 2 | .asv/env/ 3 | .asv/html/ 4 | glimix-core/ 5 | 6 | # Byte-compiled / optimized / DLL files 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | 11 | # C extensions 12 | *.so 13 | 14 | *.c 15 | 16 | # Distribution / packaging 17 | .Python 18 | env/ 19 | build/ 20 | develop-eggs/ 21 | dist/ 22 | downloads/ 23 | eggs/ 24 | .eggs/ 25 | lib/ 26 | lib64/ 27 | parts/ 28 | sdist/ 29 | var/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *,cover 53 | .hypothesis/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | 63 | # Flask stuff: 64 | instance/ 65 | .webassets-cache 66 | 67 | # Scrapy stuff: 68 | .scrapy 69 | 70 | # Sphinx documentation 71 | doc/_build/ 72 | 73 | # PyBuilder 74 | target/ 75 | 76 | # IPython Notebook 77 | .ipynb_checkpoints 78 | 79 | # pyenv 80 | .python-version 81 | 82 | # celery beat schedule file 83 | celerybeat-schedule 84 | 85 | # dotenv 86 | .env 87 | 88 | # virtualenv 89 | venv/ 90 | ENV/ 91 | 92 | # Spyder project settings 93 | .spyderproject 94 | 95 | # Rope project settings 96 | .ropeproject 97 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/ambv/black 3 | rev: 20.8b1 4 | hooks: 5 | - id: black 6 | language_version: python3 7 | - repo: https://github.com/timothycrosley/isort 8 | rev: 5.5.2 9 | hooks: 10 | - id: isort 11 | - repo: https://gitlab.com/pycqa/flake8 12 | rev: 3.8.3 13 | hooks: 14 | - id: flake8 15 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Apache License 2 | ============== 3 | 4 | _Version 2.0, January 2004_ 5 | _<>_ 6 | 7 | ### Terms and Conditions for use, reproduction, and distribution 8 | 9 | #### 1. Definitions 10 | 11 | “License” shall mean the terms and conditions for use, reproduction, and 12 | distribution as defined by Sections 1 through 9 of this document. 13 | 14 | “Licensor” shall mean the copyright owner or entity authorized by the copyright 15 | owner that is granting the License. 16 | 17 | “Legal Entity” shall mean the union of the acting entity and all other entities 18 | that control, are controlled by, or are under common control with that entity. 19 | For the purposes of this definition, “control” means **(i)** the power, direct or 20 | indirect, to cause the direction or management of such entity, whether by 21 | contract or otherwise, or **(ii)** ownership of fifty percent (50%) or more of the 22 | outstanding shares, or **(iii)** beneficial ownership of such entity. 23 | 24 | “You” (or “Your”) shall mean an individual or Legal Entity exercising 25 | permissions granted by this License. 26 | 27 | “Source” form shall mean the preferred form for making modifications, including 28 | but not limited to software source code, documentation source, and configuration 29 | files. 30 | 31 | “Object” form shall mean any form resulting from mechanical transformation or 32 | translation of a Source form, including but not limited to compiled object code, 33 | generated documentation, and conversions to other media types. 34 | 35 | “Work” shall mean the work of authorship, whether in Source or Object form, made 36 | available under the License, as indicated by a copyright notice that is included 37 | in or attached to the work (an example is provided in the Appendix below). 38 | 39 | “Derivative Works” shall mean any work, whether in Source or Object form, that 40 | is based on (or derived from) the Work and for which the editorial revisions, 41 | annotations, elaborations, or other modifications represent, as a whole, an 42 | original work of authorship. For the purposes of this License, Derivative Works 43 | shall not include works that remain separable from, or merely link (or bind by 44 | name) to the interfaces of, the Work and Derivative Works thereof. 45 | 46 | “Contribution” shall mean any work of authorship, including the original version 47 | of the Work and any modifications or additions to that Work or Derivative Works 48 | thereof, that is intentionally submitted to Licensor for inclusion in the Work 49 | by the copyright owner or by an individual or Legal Entity authorized to submit 50 | on behalf of the copyright owner. For the purposes of this definition, 51 | “submitted” means any form of electronic, verbal, or written communication sent 52 | to the Licensor or its representatives, including but not limited to 53 | communication on electronic mailing lists, source code control systems, and 54 | issue tracking systems that are managed by, or on behalf of, the Licensor for 55 | the purpose of discussing and improving the Work, but excluding communication 56 | that is conspicuously marked or otherwise designated in writing by the copyright 57 | owner as “Not a Contribution.” 58 | 59 | “Contributor” shall mean Licensor and any individual or Legal Entity on behalf 60 | of whom a Contribution has been received by Licensor and subsequently 61 | incorporated within the Work. 62 | 63 | #### 2. Grant of Copyright License 64 | 65 | Subject to the terms and conditions of this License, each Contributor hereby 66 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, 67 | irrevocable copyright license to reproduce, prepare Derivative Works of, 68 | publicly display, publicly perform, sublicense, and distribute the Work and such 69 | Derivative Works in Source or Object form. 70 | 71 | #### 3. Grant of Patent License 72 | 73 | Subject to the terms and conditions of this License, each Contributor hereby 74 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, 75 | irrevocable (except as stated in this section) patent license to make, have 76 | made, use, offer to sell, sell, import, and otherwise transfer the Work, where 77 | such license applies only to those patent claims licensable by such Contributor 78 | that are necessarily infringed by their Contribution(s) alone or by combination 79 | of their Contribution(s) with the Work to which such Contribution(s) was 80 | submitted. If You institute patent litigation against any entity (including a 81 | cross-claim or counterclaim in a lawsuit) alleging that the Work or a 82 | Contribution incorporated within the Work constitutes direct or contributory 83 | patent infringement, then any patent licenses granted to You under this License 84 | for that Work shall terminate as of the date such litigation is filed. 85 | 86 | #### 4. Redistribution 87 | 88 | You may reproduce and distribute copies of the Work or Derivative Works thereof 89 | in any medium, with or without modifications, and in Source or Object form, 90 | provided that You meet the following conditions: 91 | 92 | * **(a)** You must give any other recipients of the Work or Derivative Works a copy of 93 | this License; and 94 | * **(b)** You must cause any modified files to carry prominent notices stating that You 95 | changed the files; and 96 | * **(c)** You must retain, in the Source form of any Derivative Works that You distribute, 97 | all copyright, patent, trademark, and attribution notices from the Source form 98 | of the Work, excluding those notices that do not pertain to any part of the 99 | Derivative Works; and 100 | * **(d)** If the Work includes a “NOTICE” text file as part of its distribution, then any 101 | Derivative Works that You distribute must include a readable copy of the 102 | attribution notices contained within such NOTICE file, excluding those notices 103 | that do not pertain to any part of the Derivative Works, in at least one of the 104 | following places: within a NOTICE text file distributed as part of the 105 | Derivative Works; within the Source form or documentation, if provided along 106 | with the Derivative Works; or, within a display generated by the Derivative 107 | Works, if and wherever such third-party notices normally appear. The contents of 108 | the NOTICE file are for informational purposes only and do not modify the 109 | License. You may add Your own attribution notices within Derivative Works that 110 | You distribute, alongside or as an addendum to the NOTICE text from the Work, 111 | provided that such additional attribution notices cannot be construed as 112 | modifying the License. 113 | 114 | You may add Your own copyright statement to Your modifications and may provide 115 | additional or different license terms and conditions for use, reproduction, or 116 | distribution of Your modifications, or for any such Derivative Works as a whole, 117 | provided Your use, reproduction, and distribution of the Work otherwise complies 118 | with the conditions stated in this License. 119 | 120 | #### 5. Submission of Contributions 121 | 122 | Unless You explicitly state otherwise, any Contribution intentionally submitted 123 | for inclusion in the Work by You to the Licensor shall be under the terms and 124 | conditions of this License, without any additional terms or conditions. 125 | Notwithstanding the above, nothing herein shall supersede or modify the terms of 126 | any separate license agreement you may have executed with Licensor regarding 127 | such Contributions. 128 | 129 | #### 6. Trademarks 130 | 131 | This License does not grant permission to use the trade names, trademarks, 132 | service marks, or product names of the Licensor, except as required for 133 | reasonable and customary use in describing the origin of the Work and 134 | reproducing the content of the NOTICE file. 135 | 136 | #### 7. Disclaimer of Warranty 137 | 138 | Unless required by applicable law or agreed to in writing, Licensor provides the 139 | Work (and each Contributor provides its Contributions) on an “AS IS” BASIS, 140 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, 141 | including, without limitation, any warranties or conditions of TITLE, 142 | NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are 143 | solely responsible for determining the appropriateness of using or 144 | redistributing the Work and assume any risks associated with Your exercise of 145 | permissions under this License. 146 | 147 | #### 8. Limitation of Liability 148 | 149 | In no event and under no legal theory, whether in tort (including negligence), 150 | contract, or otherwise, unless required by applicable law (such as deliberate 151 | and grossly negligent acts) or agreed to in writing, shall any Contributor be 152 | liable to You for damages, including any direct, indirect, special, incidental, 153 | or consequential damages of any character arising as a result of this License or 154 | out of the use or inability to use the Work (including but not limited to 155 | damages for loss of goodwill, work stoppage, computer failure or malfunction, or 156 | any and all other commercial damages or losses), even if such Contributor has 157 | been advised of the possibility of such damages. 158 | 159 | #### 9. Accepting Warranty or Additional Liability 160 | 161 | While redistributing the Work or Derivative Works thereof, You may choose to 162 | offer, and charge a fee for, acceptance of support, warranty, indemnity, or 163 | other liability obligations and/or rights consistent with this License. However, 164 | in accepting such obligations, You may act only on Your own behalf and on Your 165 | sole responsibility, not on behalf of any other Contributor, and only if You 166 | agree to indemnify, defend, and hold each Contributor harmless for any liability 167 | incurred by, or claims asserted against, such Contributor by reason of your 168 | accepting any such warranty or additional liability. 169 | 170 | _END OF TERMS AND CONDITIONS_ 171 | 172 | ### APPENDIX: How to apply the Apache License to your work 173 | 174 | To apply the Apache License to your work, attach the following boilerplate 175 | notice, with the fields enclosed by brackets `[]` replaced with your own 176 | identifying information. (Don't include the brackets!) The text should be 177 | enclosed in the appropriate comment syntax for the file format. We also 178 | recommend that a file or class name and description of purpose be included on 179 | the same “printed page” as the copyright notice for easier identification within 180 | third-party archives. 181 | 182 | Copyright 2018 C. Lippert, D. Horta, F. P. Casale, and O. Stegle 183 | 184 | Licensed under the Apache License, Version 2.0 (the "License"); 185 | you may not use this file except in compliance with the License. 186 | You may obtain a copy of the License at 187 | 188 | http://www.apache.org/licenses/LICENSE-2.0 189 | 190 | Unless required by applicable law or agreed to in writing, software 191 | distributed under the License is distributed on an "AS IS" BASIS, 192 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 193 | See the License for the specific language governing permissions and 194 | limitations under the License. 195 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE.md 2 | include README.md 3 | include version.py 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Struct-LMM 2 | 3 | Structured Linear Mixed Model (StructLMM) is a computationally efficient method to 4 | test for and characterize loci that interact with multiple environments [1]. 5 | 6 | This a standalone module that implements the basic functionalities of StructLMM. 7 | However, we recommend using StructLMM via 8 | [LIMIX2](https://limix.readthedocs.io/en/2.0.x/index.html) as this additionally 9 | implements: 10 | 11 | - Multiple methods for GWAS; 12 | - Methods to characterize GxE at specific variants; 13 | - Command line interface. 14 | 15 | ## Install 16 | 17 | From terminal, it can be installed using [pip](https://pypi.org/pypi/pip): 18 | 19 | ```bash 20 | pip install struct-lmm 21 | ``` 22 | 23 | ## Usage 24 | 25 | ```python 26 | >>> from numpy import ones, concatenate 27 | >>> from numpy.random import RandomState 28 | >>> 29 | >>> from struct_lmm import StructLMM 30 | >>> 31 | >>> random = RandomState(1) 32 | >>> n = 20 33 | >>> k = 4 34 | >>> y = random.randn(n, 1) 35 | >>> E = random.randn(n, k) 36 | >>> M = ones((n, 1)) 37 | >>> x = 1.0 * (random.rand(n, 1) < 0.2) 38 | >>> 39 | >>> lmm = StructLMM(y, M, E) 40 | >>> lmm.fit(verbose=False) 41 | >>> # Association test 42 | >>> pv = lmm.score_2dof_assoc(x) 43 | >>> print(pv) 44 | 0.8470017313426488 45 | >>> # Association test 46 | >>> pv, rho = lmm.score_2dof_assoc(x, return_rho=True) 47 | >>> print(pv) 48 | 0.8470017313426488 49 | >>> print(rho) 50 | 0 51 | >>> M = concatenate([M, x], axis=1) 52 | >>> lmm = StructLMM(y, M, E) 53 | >>> lmm.fit(verbose=False) 54 | >>> # Interaction test 55 | >>> pv = lmm.score_2dof_inter(x) 56 | >>> print(pv) 57 | 0.6781100453132024 58 | ``` 59 | 60 | ## Problems 61 | 62 | If you encounter any problem, please, consider submitting a [new issue](https://github.com/limix/struct-lmm/issues/new). 63 | 64 | ## Authors 65 | 66 | - [Danilo Horta](https://github.com/horta) 67 | - [Francesco Paolo Casale](https://github.com/fpcasale) 68 | - [Oliver Stegle](https://github.com/ostegle) 69 | - [Rachel Moore](https://github.com/rm18) 70 | 71 | ## License 72 | 73 | This project is licensed under the [MIT License](https://raw.githubusercontent.com/limix/struct-lmm/master/LICENSE.md). 74 | 75 | [1] Moore, R., Casale, F. P., Bonder, M. J., Horta, D., Franke, L., Barroso, I., & 76 | Stegle, O. (2018). [A linear mixed-model approach to study multivariate 77 | gene–environment interactions](https://www.nature.com/articles/s41588-018-0271-0) (p. 1). Nature Publishing Group. 78 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | author = D. Horta, P. Casale, R. Moore 3 | author_email = rm18@sanger.ac.uk 4 | classifiers = 5 | Development Status :: 5 - Production/Stable 6 | License :: OSI Approved :: MIT License 7 | Operating System :: OS Independent 8 | Programming Language :: Python 9 | description = Linear mixed model to study multivariate genotype-environment interactions 10 | download_url = https://github.com/limix/struct-lmm 11 | keywords = lmm, gwas, environment 12 | license = MIT 13 | long_description = file: README.md 14 | long_description_content_type = text/markdown 15 | maintainer = Danilo Horta 16 | platforms = Windows, MacOS, Linux 17 | maintainer_email = horta@ebi.ac.uk 18 | name = struct-lmm 19 | url = https://github.com/limix/struct-lmm 20 | version = attr: version.get 21 | 22 | [options] 23 | zip_safe = True 24 | include_package_data = True 25 | packages = find: 26 | setup_requires = 27 | pytest-runner>=5 28 | install_requires = 29 | chiscore>=0.2.2 30 | glimix-core>=3.1.11 31 | numpy-sugar>=1.5.1 32 | numpy>=1.17 33 | pytest-doctestplus>=0.8.0 34 | pytest>=5 35 | 36 | [aliases] 37 | test = pytest 38 | 39 | [tool:pytest] 40 | addopts = 41 | --doctest-plus 42 | --doctest-modules 43 | --text-file-format="rst" 44 | --doctest-glob='*.rst' 45 | --ignore="setup.py" 46 | --ignore="doc/conf.py" 47 | doctest_plus = enabled 48 | doctest_optionflags = NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL ELLIPSIS ALLOW_UNICODE FLOAT_CMP 49 | doctest_plus_atol = 1e-03 50 | doctest_plus_rtol = 1e-03 51 | doctest_rst = enabled 52 | norecursedirs = .eggs .git *.egg-info build .ropeproject .undodir 53 | pep8ignore = E402 W0212 W0622 R0915 54 | 55 | [tool:isort] 56 | multi_line_output=3 57 | include_trailing_comma=True 58 | force_grid_wrap=0 59 | combine_as_imports=True 60 | line_length=88 61 | 62 | [pylint] 63 | disable = redefined-builtin,R0915 64 | 65 | [flake8] 66 | ignore = E501 E741 E203 W503 W0212 W0622 R0915 67 | 68 | [rstcheck] 69 | ignore_substitutions = today, version 70 | ignore_directives = plot, autofunction, command-output, autmodule, automodule, autoclass, autoattribute, automethod, doctest 71 | ignore_messages = Error in "math" directive 72 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | if __name__ == "__main__": 4 | setup() 5 | -------------------------------------------------------------------------------- /struct_lmm/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | StructLMM 3 | ========= 4 | 5 | Let n be the number of samples. 6 | StructLMM [1] extends the conventional linear mixed model by including an 7 | additional per-individual effect term that accounts for genotype-environment 8 | interaction, which can be represented as an n×1 vector, 𝛃. 9 | The model is given by 10 | 11 | 𝐲 = 𝙼𝛂 + 𝐠𝛽 + 𝐠⊙𝛃 + 𝐞 + 𝛆, 12 | 13 | where 14 | 15 | 𝛽 ∼ 𝓝(0, 𝓋₀⋅ρ), 𝛃 ∼ 𝓝(𝟎, 𝓋₀(1-ρ)𝙴𝙴ᵀ), 𝐞 ∼ 𝓝(𝟎, 𝓋₁𝚆𝚆ᵀ), and 𝛆 ∼ 𝓝(𝟎, 𝓋₂𝙸). 16 | 17 | The vector 𝐲 is the outcome, matrix 𝙼 contains the covariates, and vector 𝐠 is the 18 | genetic variant. 19 | The matrices 𝙴 and 𝚆 are generally the same, and represent the environment 20 | configuration for each sample. 21 | The parameters 𝓋₀, 𝓋₁, and 𝓋₂ are the overall variances. 22 | The parameter ρ ∈ [𝟶, 𝟷] dictates the relevance of genotype-environment interaction 23 | versus the genotype effect alone. 24 | The term 𝐞 accounts for additive environment-only effects while 𝛆 accounts for 25 | noise effects. 26 | 27 | .. [1] Moore, R., Casale, F. P., Bonder, M. J., Horta, D., Franke, L., Barroso, I., & 28 | Stegle, O. (2018). A linear mixed-model approach to study multivariate 29 | gene–environment interactions (p. 1). Nature Publishing Group. 30 | """ 31 | from ._lmm import StructLMM 32 | from ._testit import test 33 | 34 | __version__ = "0.3.2" 35 | 36 | __all__ = ["StructLMM", "__version__", "test"] 37 | -------------------------------------------------------------------------------- /struct_lmm/_lmm.py: -------------------------------------------------------------------------------- 1 | from chiscore import davies_pvalue, optimal_davies_pvalue 2 | 3 | 4 | class StructLMM: 5 | r""" 6 | Structured linear mixed model that accounts for genotype-environment interactions. 7 | 8 | Let n be the number of samples. 9 | StructLMM [1] extends the conventional linear mixed model by including an 10 | additional per-individual effect term that accounts for genotype-environment 11 | interaction, which can be represented as an n×1 vector, 𝛃. 12 | The model is given by 13 | 14 | 𝐲 = 𝙼𝛂 + 𝐠𝛽 + 𝐠⊙𝛃 + 𝐞 + 𝛆, 15 | 16 | where 17 | 18 | 𝛽 ∼ 𝓝(0, 𝓋₀⋅ρ), 𝛃 ∼ 𝓝(𝟎, 𝓋₀(1-ρ)𝙴𝙴ᵀ), 𝐞 ∼ 𝓝(𝟎, 𝓋₁𝚆𝚆ᵀ), and 𝛆 ∼ 𝓝(𝟎, 𝓋₂𝙸). 19 | 20 | The vector 𝐲 is the outcome, matrix 𝙼 contains the covariates, and vector 𝐠 is the 21 | genetic variant. 22 | The matrices 𝙴 and 𝚆 are generally the same, and represent the environment 23 | configuration for each sample. 24 | The parameters 𝓋₀, 𝓋₁, and 𝓋₂ are the overall variances. 25 | The parameter ρ ∈ [𝟶, 𝟷] dictates the relevance of genotype-environment interaction 26 | versus the genotype effect alone. 27 | The term 𝐞 accounts for additive environment-only effects while 𝛆 accounts for 28 | noise effects. 29 | 30 | The above model is equivalent to 31 | 32 | 𝐲 = 𝙼𝛂 + 𝐠⊙𝛃 + 𝐞 + 𝛆, 33 | 34 | where 35 | 36 | 𝛃 ∼ 𝓝(𝟎, 𝓋₀(ρ𝟏𝟏ᵀ + (1-ρ)𝙴𝙴ᵀ)), 𝐞 ∼ 𝓝(𝟎, 𝓋₁𝚆𝚆ᵀ), and 𝛆 ∼ 𝓝(𝟎, 𝓋₂𝙸). 37 | 38 | Its marginalised form is given by 39 | 40 | 𝐲 ∼ 𝓝(𝙼𝛂, 𝓋₀𝙳(ρ𝟏𝟏ᵀ + (1-ρ)𝙴𝙴ᵀ)𝙳 + 𝓋₁𝚆𝚆ᵀ + 𝓋₂𝙸), 41 | 42 | where 𝙳 = diag(𝐠). 43 | 44 | StructLMM method is used to perform two types of statistical tests. 45 | The association one compares the following hypothesis: 46 | 47 | 𝓗₀: 𝓋₀ = 0 48 | 𝓗₁: 𝓋₀ > 0 49 | 50 | 𝓗₀ denotes no genetic association, while 𝓗₁ models any genetic association. 51 | In particular, 𝓗₁ includes genotype-environment interaction as part of genetic 52 | association. 53 | The interaction test is slightly more complicated as the term 𝐠𝛽 is now considered 54 | a fixed one. In pratice, we include 𝐠 in the covariates matrix 𝙼 and set ρ = 0. 55 | We refer to this modified model as the interaction model. 56 | The compared hypothesis are: 57 | 58 | 𝓗₀: 𝓋₀ = 0 (given the interaction model) 59 | 𝓗₁: 𝓋₀ > 0 (given the interaction model) 60 | 61 | Implementation 62 | -------------- 63 | 64 | We employ the score-test statistic [2] for both tests 65 | 66 | 𝑄 = ½𝐲ᵀ𝙿(∂𝙺)𝙿𝐲, 67 | 68 | where 69 | 70 | 𝙿 = 𝙺⁻¹ - 𝙺⁻¹𝙼(𝙼ᵀ𝙺⁻¹𝙼)⁻¹𝙼ᵀ𝙺⁻¹ and cov(𝐲) = 𝙺 71 | 72 | for the REML-estimated parameters under the null hypothesis. 73 | The derivative is taken over the parameter being tested. 74 | 75 | Lets for now assume that ρ is given. 76 | In practice, we have 77 | 78 | 𝙺ᵨ = 𝓋₀𝙳(ρ𝟏𝟏ᵀ + (1-ρ)𝙴𝙴ᵀ)𝙳 + 𝓋₁𝚆𝚆ᵀ + 𝓋₂𝙸 79 | ∂𝙺ᵨ = 𝙳(ρ𝟏𝟏ᵀ + (1-ρ)𝙴𝙴ᵀ)𝙳 80 | 81 | for association test and 82 | 83 | 𝙺₀ = 𝓋₀𝙳𝙴𝙴ᵀ𝙳 + 𝓋₁𝚆𝚆ᵀ + 𝓋₂𝙸 84 | ∂𝙺₀ = 𝙳𝙴𝙴ᵀ𝙳 85 | 86 | for interaction test, for parameters estimated via REML. 87 | The outcome distribution under null is 88 | 89 | 𝐲 ∼ 𝓝(𝙼𝛂, 𝓋₁𝚆𝚆ᵀ + 𝓋₂𝙸). 90 | 91 | It can be shown [2]_ that 92 | 93 | 𝑄 ∼ ∑ᵢ𝜆ᵢ𝜒²(1), 94 | 95 | where the weights 𝜆ᵢ are the non-zero eigenvalues of ½√𝙿(∂𝙺)√𝙿. 96 | We employ modified Liu approximation to 𝑄 proposed [3] and modified in [4]. 97 | 98 | References 99 | ---------- 100 | .. [1] Moore, R., Casale, F. P., Bonder, M. J., Horta, D., Franke, L., Barroso, 101 | I., & Stegle, O. (2018). A linear mixed-model approach to study multivariate 102 | gene–environment interactions (p. 1). Nature Publishing Group. 103 | .. [2] Lippert, C., Xiang, J., Horta, D., Widmer, C., Kadie, C., Heckerman, D., 104 | & Listgarten, J. (2014). Greater power and computational efficiency for 105 | kernel-based association testing of sets of genetic variants. Bioinformatics, 106 | 30(22), 3206-3214. 107 | .. [3] Liu, H., Tang, Y., & Zhang, H. H. (2009). A new chi-square approximation 108 | to the distribution of non-negative definite quadratic forms in non-central 109 | normal variables. Computational Statistics & Data Analysis, 53(4), 853-856. 110 | .. [4] Lee, Seunggeun, Michael C. Wu, and Xihong Lin. "Optimal tests for rare 111 | variant effects in sequencing association studies." Biostatistics 13.4 (2012): 112 | 762-775. 113 | """ 114 | 115 | def __init__(self, y, M, E, W=None): 116 | from numpy import asarray, atleast_2d, sqrt 117 | from numpy_sugar import ddot 118 | 119 | self._y = atleast_2d(asarray(y, float).ravel()).T 120 | self._E = atleast_2d(asarray(E, float).T).T 121 | 122 | if W is None: 123 | self._W = self._E 124 | elif isinstance(W, tuple): 125 | # W must be an eigen-decomposition of 𝚆𝚆ᵀ 126 | self._W = ddot(W[0], sqrt(W[1])) 127 | else: 128 | self._W = atleast_2d(asarray(W, float).T).T 129 | 130 | self._M = atleast_2d(asarray(M, float).T).T 131 | 132 | nsamples = len(self._y) 133 | if nsamples != self._M.shape[0]: 134 | raise ValueError("Number of samples mismatch between y and M.") 135 | 136 | if nsamples != self._E.shape[0]: 137 | raise ValueError("Number of samples mismatch between y and E.") 138 | 139 | if nsamples != self._W.shape[0]: 140 | raise ValueError("Number of samples mismatch between y and W.") 141 | 142 | self._lmm = None 143 | self._rhos = [0.0, 0.1 ** 2, 0.2 ** 2, 0.3 ** 2, 0.4 ** 2, 0.5 ** 2, 0.5, 0.999] 144 | 145 | def fit(self, verbose=True): 146 | from glimix_core.lmm import Kron2Sum 147 | 148 | self._lmm = Kron2Sum(self._y, [[1]], self._M, self._W, restricted=True) 149 | self._lmm.fit(verbose=verbose) 150 | self._covarparam0 = self._lmm.C0[0, 0] 151 | self._covarparam1 = self._lmm.C1[0, 0] 152 | 153 | def _P(self, v): 154 | """ 155 | Let 𝙺 be the optimal covariance matrix under the null hypothesis. 156 | Given 𝐯, this method computes 157 | 158 | 𝙿𝐯 = 𝙺⁻¹𝐯 - 𝙺⁻¹𝙼(𝙼ᵀ𝙺⁻¹𝙼)⁻¹𝙼ᵀ𝙺⁻¹𝐯. 159 | """ 160 | from numpy_sugar.linalg import rsolve 161 | from scipy.linalg import cho_solve 162 | 163 | x = rsolve(self._lmm.covariance(), v) 164 | if self._lmm.X is not None: 165 | Lh = self._lmm._terms["Lh"] 166 | t = self._lmm.X @ cho_solve(Lh, self._lmm.M.T @ x) 167 | x -= rsolve(self._lmm.covariance(), t) 168 | 169 | return x 170 | 171 | def _score_stats(self, g, rhos): 172 | """ 173 | Let 𝙺 be the optimal covariance matrix under the null hypothesis. 174 | For a given ρ, the score-based test statistic is given by 175 | 176 | 𝑄ᵨ = ½𝐲ᵀ𝙿ᵨ(∂𝙺ᵨ)𝙿ᵨ𝐲, 177 | 178 | where 179 | 180 | ∂𝙺ᵨ = 𝙳(ρ𝟏𝟏ᵀ + (1-ρ)𝙴𝙴ᵀ)𝙳 181 | 182 | and 𝙳 = diag(𝐠). 183 | """ 184 | from numpy import zeros 185 | from numpy_sugar import ddot 186 | 187 | Q = zeros(len(rhos)) 188 | DPy = ddot(g, self._P(self._y)) 189 | s = DPy.sum() 190 | l = s * s 191 | DPyE = DPy.T @ self._E 192 | r = DPyE @ DPyE.T 193 | for i, rho in enumerate(rhos): 194 | Q[i] = (rho * l + (1 - rho) * r) / 2 195 | 196 | return Q 197 | 198 | def _score_stats_null_dist(self, g): 199 | """ 200 | Under the null hypothesis, the score-based test statistic follows a weighted sum 201 | of random variables: 202 | 203 | 𝑄 ∼ ∑ᵢ𝜆ᵢχ²(1), 204 | 205 | where 𝜆ᵢ are the non-zero eigenvalues of ½√𝙿(∂𝙺)√𝙿. 206 | 207 | Note that 208 | 209 | ∂𝙺ᵨ = 𝙳(ρ𝟏𝟏ᵀ + (1-ρ)𝙴𝙴ᵀ)𝙳 = (ρ𝐠𝐠ᵀ + (1-ρ)𝙴̃𝙴̃ᵀ) 210 | 211 | for 𝙴̃ = 𝙳𝙴. 212 | By using SVD decomposition, one can show that the non-zero eigenvalues of 𝚇𝚇ᵀ 213 | are equal to the non-zero eigenvalues of 𝚇ᵀ𝚇. 214 | Therefore, 𝜆ᵢ are the non-zero eigenvalues of 215 | 216 | ½[√ρ𝐠 √(1-ρ)𝙴̃]𝙿[√ρ𝐠 √(1-ρ)𝙴̃]ᵀ. 217 | 218 | """ 219 | from math import sqrt 220 | 221 | from numpy import empty 222 | from numpy.linalg import eigvalsh 223 | from numpy_sugar import ddot 224 | 225 | Et = ddot(g, self._E) 226 | Pg = self._P(g) 227 | PEt = self._P(Et) 228 | 229 | gPg = g.T @ Pg 230 | EtPEt = Et.T @ PEt 231 | gPEt = g.T @ PEt 232 | 233 | n = Et.shape[1] + 1 234 | F = empty((n, n)) 235 | 236 | lambdas = [] 237 | for i in range(len(self._rhos)): 238 | rho = self._rhos[i] 239 | 240 | F[0, 0] = rho * gPg 241 | F[0, 1:] = sqrt(rho) * sqrt(1 - rho) * gPEt 242 | F[1:, 0] = F[0, 1:] 243 | F[1:, 1:] = (1 - rho) * EtPEt 244 | 245 | lambdas.append(eigvalsh(F) / 2) 246 | 247 | return lambdas 248 | 249 | def _score_stats_pvalue(self, Qs, lambdas): 250 | """ 251 | Computes Pr(𝑄 > q) for 𝑄 ∼ ∑ᵢ𝜆ᵢχ²(1). 252 | 253 | Pr(𝑄 > q) is the p-value for the score statistic. 254 | 255 | Parameters 256 | ---------- 257 | Qs : array_like 258 | 𝑄ᵨ statistic. 259 | lambdas : array_like 260 | 𝜆ᵢ from the null distribution for each ρ. 261 | """ 262 | from numpy import stack 263 | 264 | return stack([_mod_liu(Q, lam) for Q, lam in zip(Qs, lambdas)], axis=0) 265 | 266 | def _qmin(self, pliumod): 267 | import scipy.stats as st 268 | from numpy import zeros 269 | 270 | # T statistic 271 | T = pliumod[:, 0].min() 272 | 273 | qmin = zeros(len(self._rhos)) 274 | percentile = 1 - T 275 | for i in range(len(self._rhos)): 276 | q = st.chi2.ppf(percentile, pliumod[i, 3]) 277 | mu_q = pliumod[i, 1] 278 | sigma_q = pliumod[i, 2] 279 | dof = pliumod[i, 3] 280 | qmin[i] = (q - dof) / (2 * dof) ** 0.5 * sigma_q + mu_q 281 | 282 | return qmin 283 | 284 | # SKAT 285 | def score_2dof_inter(self, X): 286 | """ 287 | Interaction test. 288 | 289 | Parameters 290 | ---------- 291 | X : 1d-array 292 | Genetic variant. 293 | 294 | Returns 295 | ------- 296 | float 297 | P-value. 298 | """ 299 | from numpy import empty 300 | from numpy_sugar import ddot 301 | 302 | Q_rho = self._score_stats(X.ravel(), [0]) 303 | 304 | g = X.ravel() 305 | Et = ddot(g, self._E) 306 | PEt = self._P(Et) 307 | 308 | EtPEt = Et.T @ PEt 309 | gPEt = g.T @ PEt 310 | 311 | n = Et.shape[1] + 1 312 | F = empty((n, n)) 313 | 314 | F[0, 0] = 0 315 | F[0, 1:] = gPEt 316 | F[1:, 0] = F[0, 1:] 317 | F[1:, 1:] = EtPEt 318 | F /= 2 319 | 320 | return davies_pvalue(Q_rho[0], F) 321 | 322 | # SKAT-O 323 | def score_2dof_assoc(self, X, return_rho=False): 324 | """ 325 | Association test. 326 | 327 | Parameters 328 | ---------- 329 | X : 1d-array 330 | Genetic variant. 331 | return_rho : bool (optional) 332 | ``True`` to return the optimal ρ; ``False`` otherwise (Default). 333 | 334 | Returns 335 | ------- 336 | float 337 | P-value. 338 | float 339 | Optimal ρ. Returned only if ``return_rho == True``. 340 | """ 341 | from numpy import empty, sum, trace, where 342 | from numpy.linalg import eigvalsh 343 | 344 | Q_rho = self._score_stats(X.ravel(), self._rhos) 345 | null_lambdas = self._score_stats_null_dist(X.ravel()) 346 | pliumod = self._score_stats_pvalue(Q_rho, null_lambdas) 347 | optimal_rho = pliumod[:, 0].argmin() 348 | qmin = self._qmin(pliumod) 349 | 350 | # 3. Calculate quantites that occur in null distribution 351 | Px1 = self._P(X) 352 | m = 0.5 * (X.T @ Px1) 353 | xoE = X * self._E 354 | PxoE = self._P(xoE) 355 | ETxPxE = 0.5 * (xoE.T @ PxoE) 356 | ETxPx1 = xoE.T @ Px1 357 | ETxPx11xPxE = 0.25 / m * (ETxPx1 @ ETxPx1.T) 358 | ZTIminusMZ = ETxPxE - ETxPx11xPxE 359 | eigh = eigvalsh(ZTIminusMZ) 360 | 361 | eta = ETxPx11xPxE @ ZTIminusMZ 362 | vareta = 4 * trace(eta) 363 | 364 | OneZTZE = 0.5 * (X.T @ PxoE) 365 | tau_top = OneZTZE @ OneZTZE.T 366 | tau_rho = empty(len(self._rhos)) 367 | for i in range(len(self._rhos)): 368 | tau_rho[i] = self._rhos[i] * m + (1 - self._rhos[i]) / m * tau_top 369 | 370 | MuQ = sum(eigh) 371 | VarQ = sum(eigh ** 2) * 2 + vareta 372 | KerQ = sum(eigh ** 4) / (sum(eigh ** 2) ** 2) * 12 373 | Df = 12 / KerQ 374 | 375 | # 4. Integration 376 | T = pliumod[:, 0].min() 377 | pvalue = optimal_davies_pvalue( 378 | qmin, MuQ, VarQ, KerQ, eigh, vareta, Df, tau_rho, self._rhos, T 379 | ) 380 | 381 | # Final correction to make sure that the p-value returned is sensible 382 | multi = 3 383 | if len(self._rhos) < 3: 384 | multi = 2 385 | idx = where(pliumod[:, 0] > 0)[0] 386 | pval = pliumod[:, 0].min() * multi 387 | if pvalue <= 0 or len(idx) < len(self._rhos): 388 | pvalue = pval 389 | if pvalue == 0: 390 | if len(idx) > 0: 391 | pvalue = pliumod[:, 0][idx].min() 392 | 393 | if return_rho: 394 | return pvalue, optimal_rho 395 | return pvalue 396 | 397 | 398 | def _mod_liu(q, w): 399 | from chiscore import liu_sf 400 | 401 | (pv, dof_x, _, info) = liu_sf(q, w, [1] * len(w), [0] * len(w), True) 402 | return (pv, info["mu_q"], info["sigma_q"], dof_x) 403 | -------------------------------------------------------------------------------- /struct_lmm/_testit.py: -------------------------------------------------------------------------------- 1 | def test(verbose=True): 2 | """ 3 | Run tests to verify this package's integrity. 4 | 5 | Parameters 6 | ---------- 7 | verbose : bool 8 | ``True`` to show diagnostic. Defaults to ``True``. 9 | 10 | Returns 11 | ------- 12 | int 13 | Exit code: ``0`` for success. 14 | """ 15 | args = [ 16 | "--doctest-plus", 17 | "--doctest-plus-rtol=1e-02", 18 | "--doctest-plus-atol=1e-02", 19 | "--doctest-modules", 20 | ] 21 | if not verbose: 22 | args += ["--quiet"] 23 | 24 | args += ["--pyargs", __name__.split(".")[0]] 25 | 26 | return __import__("pytest").main(args) 27 | -------------------------------------------------------------------------------- /struct_lmm/test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/limix/struct-lmm/35f9b82aa2a07b180d69381f7d8d5a51a2592e37/struct_lmm/test/__init__.py -------------------------------------------------------------------------------- /struct_lmm/test/test_structlmm.py: -------------------------------------------------------------------------------- 1 | from numpy import ones, stack 2 | from numpy.random import RandomState 3 | from numpy.testing import assert_allclose 4 | 5 | from struct_lmm import StructLMM 6 | 7 | 8 | def test_structlmm_assoc(): 9 | random = RandomState(1) 10 | n = 20 11 | k = 4 12 | y = random.randn(n, 1) 13 | E = random.randn(n, k) 14 | M = ones((n, 1)) 15 | x = 1.0 * (random.rand(n, 1) < 0.2) 16 | 17 | slmm = StructLMM(y, M, E, W=E) 18 | slmm.fit(verbose=False) 19 | 20 | pv = slmm.score_2dof_assoc(x) 21 | assert_allclose([pv], [0.8470039620073695], rtol=1e-5) 22 | 23 | x = random.rand(n, 1) 24 | pv, rho = slmm.score_2dof_assoc(x, True) 25 | assert_allclose([pv, rho], [0.15803897226190278, 7], rtol=1e-5) 26 | 27 | 28 | def test_structlmm_inter(): 29 | random = RandomState(1) 30 | n = 20 31 | k = 4 32 | y = random.randn(n, 1) 33 | E = random.randn(n, k) 34 | M = ones(n) 35 | x = 1.0 * (random.rand(n) < 0.2) 36 | M = stack([M, x], axis=1) 37 | 38 | slmm = StructLMM(y, M, E, W=E) 39 | slmm.fit(verbose=False) 40 | 41 | pv = slmm.score_2dof_inter(x) 42 | assert_allclose([pv], [0.6781070640353783], rtol=1e-5) 43 | -------------------------------------------------------------------------------- /version.py: -------------------------------------------------------------------------------- 1 | import re 2 | from os.path import join 3 | 4 | from setuptools import find_packages 5 | 6 | 7 | def get(): 8 | pkgnames = find_packages() 9 | if len(pkgnames) == 0: 10 | return "unknown" 11 | pkgname = pkgnames[0] 12 | content = open(join(pkgname, "__init__.py"), encoding="utf8").read() 13 | c = re.compile(r"__version__ *= *('[^']+'|\"[^\"]+\")") 14 | m = c.search(content) 15 | if m is None: 16 | return "unknown" 17 | return m.groups()[0][1:-1] 18 | --------------------------------------------------------------------------------