├── .gitignore
├── .idea
    ├── .gitignore
    ├── inspectionProfiles
    │   ├── Project_Default.xml
    │   └── profiles_settings.xml
    ├── misc.xml
    ├── modules.xml
    ├── permGWAS.iml
    └── vcs.xml
├── Docker
    ├── Dockerfile
    └── requirements.txt
├── LICENSE
├── README.md
├── create_h5_file.py
├── create_plot.py
├── data
    ├── config.yaml
    ├── cov_matrix.csv
    ├── k_matrix.csv
    ├── k_matrix.h5
    ├── x_matrix.csv
    ├── x_matrix.h5
    ├── x_matrix.map
    ├── x_matrix.ped
    ├── y_matrix.csv
    └── y_matrix.pheno
├── docs
    ├── DATAGUIDE.md
    ├── INSTALLATION.md
    ├── OPTIONS.md
    ├── PERMUTATIONS.md
    ├── PLOTS.md
    ├── QUICKSTART.md
    ├── manhattan.png
    └── qq_plot.png
├── models
    ├── __init__.py
    ├── _base_model.py
    └── lmm.py
├── optimize
    ├── __init__.py
    └── brent.py
├── perform_gwas.py
├── permGWAS.py
├── permGWAS_logo.png
├── postprocess
    ├── __init__.py
    └── plot_functions.py
├── preprocess
    ├── __init__.py
    └── data_loader.py
├── supplementary_data
    ├── simulated_phenotypes_her30.h5
    └── suppl_data_John_et_al_2022
    │   └── AraGWAS_thresholds.csv
└── utils
    ├── __init__.py
    ├── check_functions.py
    └── helper_functions.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Datasource local storage ignored files
5 | /dataSources/
6 | /dataSources.local.xml
7 | # Editor-based HTTP Client requests
8 | /httpRequests/
9 | 


--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
 1 | <component name="InspectionProjectProfileManager">
 2 |   <profile version="1.0">
 3 |     <option name="myName" value="Project Default" />
 4 |     <inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
 5 |       <option name="ignoredErrors">
 6 |         <list>
 7 |           <option value="N806" />
 8 |           <option value="N803" />
 9 |         </list>
10 |       </option>
11 |     </inspection_tool>
12 |   </profile>
13 | </component>


--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 | <component name="InspectionProjectProfileManager">
2 |   <settings>
3 |     <option name="USE_PROJECT_PROFILE" value="false" />
4 |     <version value="1.0" />
5 |   </settings>
6 | </component>


--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (permGWAS2.0)" project-jdk-type="Python SDK" />
4 | </project>


--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/permGWAS.iml" filepath="$PROJECT_DIR$/.idea/permGWAS.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/.idea/permGWAS.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$">
 5 |       <excludeFolder url="file://$MODULE_DIR$/venv" />
 6 |     </content>
 7 |     <orderEntry type="jdk" jdkName="Python 3.10 (permGWAS2.0)" jdkType="Python SDK" />
 8 |     <orderEntry type="sourceFolder" forTests="false" />
 9 |   </component>
10 | </module>


--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$" vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/Docker/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM nvidia/cuda:11.5.2-base-ubuntu20.04
2 | RUN apt-get update && apt-get install -y python3 && apt-get install -y python3-pip
3 | RUN apt-get install -y vim
4 | RUN apt-get install -y git
5 | RUN mkdir /configfiles
6 | COPY requirements.txt /configfiles
7 | RUN pip3 install -r /configfiles/requirements.txt
8 | RUN pip3 install torch==1.11.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
9 | 


--------------------------------------------------------------------------------
/Docker/requirements.txt:
--------------------------------------------------------------------------------
1 | h5py
2 | matplotlib
3 | numpy
4 | pandas
5 | pandas-plink
6 | scipy
7 | seaborn
8 | pyyaml


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Grimm Lab - Bioinformatics and Machine Learning
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![Python 3.8](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10-blue)](https://www.python.org/downloads/release/python-3100/)
 2 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 3 | 
 4 | <img src="/permGWAS_logo.png" data-canonical-src="/permGWAS_logo.png" height="80" />  
 5 | 
 6 | ## permGWAS2
 7 | 
 8 | This is an improved version of permGWAS. The original version can be found at [permGWAS Version1](https://github.com/grimmlab/permGWAS/releases/tag/permGWAS)
 9 | 
10 | permGWAS2 is an open source software tool written in python to efficiently perform genome-wide association studies (GWAS)
11 | with permutation-based thresholds. It uses a batch-wise Linear Mixed Model to compute several univariate tests simultaneously. 
12 | permGWAS2 provides support for multiple CPUs as well as for GPUs. 
13 | 
14 | In contrast to the original version, permGWAS2 allows for two different permutation strategies:
15 | 
16 | x (default): permute the fixed effects matrix including covariates and the SNP of interest (equivalent to permuting y and the covariance matrix)
17 | 
18 | y: permute only the phenotype vector (same method as in the original permGWAS)
19 | 
20 | Details on the architecture of permGWAS and permGWAS2, benchmarking results of the framework and on permutation-based thresholds can be found in our publications.
21 | 
22 | ## How to run permGWAS2
23 | 1. [Requirements & Installation](./docs/INSTALLATION.md)
24 | 2. [Quickstart Guide](./docs/QUICKSTART.md)
25 | 3. [Data Guide](./docs/DATAGUIDE.md)
26 | 4. [permGWAS2 with permutations](./docs/PERMUTATIONS.md)
27 | 5. [Create plots](./docs/PLOTS.md)
28 | 6. [Optional settings](./docs/OPTIONS.md)
29 | 
30 | 
31 | ## Publications & Citation
32 | 
33 | John, M., Korte, A., Todesco M., & Grimm, D. G. (2024). 
34 | **Population-aware permutation-based significance thresholds for genome-wide association studies**.  
35 | Bioinformatics Advances, 2024
36 | 
37 | DOI: [https://doi.org/10.1093/bioadv/vbae168](https://doi.org/10.1093/bioadv/vbae168)
38 | 
39 | John, M., Ankenbrand, M. J., Artmann, C., Freudenthal, J. A., Korte, A., & Grimm, D. G. (2022).  
40 | **Efficient Permutation-based Genome-wide Association Studies for Normal and Skewed Phenotypic Distributions**.  
41 | Bioinformatics, 2022.   
42 | 
43 | DOI: [https://doi.org/10.1093/bioinformatics/btac455](https://doi.org/10.1093/bioinformatics/btac455)
44 | 


--------------------------------------------------------------------------------
/create_h5_file.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import pathlib
 3 | from preprocess import data_loader
 4 | from utils import check_functions
 5 | 
 6 | if __name__ == "__main__":
 7 |     # Input parameters
 8 |     parser = argparse.ArgumentParser()
 9 |     parser.add_argument('-x', '--genotype_file', type=str,
10 |                         help='specify the name of the genotype file, absolute and relative paths are accepted, '
11 |                              'only accept CSV, PLINK and binary PLINK files, '
12 |                              'PLINK and binary PLINK: all required files must be in the same folder with same prefix,'
13 |                              'for format CSV files check documentation')
14 |     parser.add_argument('-sd', '--save_dir', type=str, default=None,
15 |                         help='specify a directory to save newly generated H5 file. Optional, if None is specified, '
16 |                              'H5 file will be saved in same directory as original genotype file.')
17 | 
18 |     args = vars(parser.parse_args())
19 |     args["genotype_file"] = check_functions.check_file(args["genotype_file"])
20 |     if pathlib.Path(args["genotype_file"]).suffix in ('.h5', '.hdf5', '.h5py'):
21 |         raise Exception('Genotype file is already in HDF5, H5, H5PY')
22 |     if args["save_dir"] is None:
23 |         args["save_dir"] = pathlib.Path(args["genotype_file"]).parent
24 |     out_file = pathlib.Path(args["genotype_file"]).with_suffix('.h5').stem
25 |     args["save_dir"], out_file = check_functions.check_dir_paths(out_dir=args["save_dir"], out_file=out_file, prefix='')
26 | 
27 |     # load data from file
28 |     print('Load data from file ' + str(args["genotype_file"]))
29 |     dataset = data_loader.Genotype(genotype_file=args["genotype_file"])
30 |     dataset.load_genotype_data()
31 | 
32 |     # save data as H5
33 |     dataset.save_genotype_hdf5(filename=args["save_dir"].joinpath(out_file))
34 | 


--------------------------------------------------------------------------------
/create_plot.py:
--------------------------------------------------------------------------------
 1 | # create Manhattan and QQ-plots
 2 | import pandas as pd
 3 | import pathlib
 4 | import argparse
 5 | 
 6 | from utils import check_functions
 7 | from postprocess import plot_functions
 8 | 
 9 | if __name__ == "__main__":
10 |     parser = argparse.ArgumentParser()
11 |     parser.add_argument('-p_val', '--p_value_file', type=str, default=None,
12 |                         help='Specify the full path to the p_value file, absolute and relative paths are accepted, '
13 |                              'only accept .csv files. p_value files must at least contain chromosome ids (CHR), '
14 |                              'position ids (POS) and corresponding p_values (p_value).')
15 |     parser.add_argument('-min_p_val', '--min_p_value_file', type=str, default=None,
16 |                         help='Optional, specify the full path to the file containing minimal p-values in order to '
17 |                              'compute permutation-based thresholds, absolute and relative paths are accepted, '
18 |                              'only accept .csv files.')
19 |     parser.add_argument('-mplot', '--manhattan', action='store_true',
20 |                         help='optional, creates manhattan plot')
21 |     parser.add_argument('-qqplot', action='store_true',
22 |                         help='optional, creates QQ-plot')
23 |     parser.add_argument('-out_dir', type=str, default=None,
24 |                         help='Specify the name of the directory plots should be stored in,'
25 |                              'absolute and relative paths are accepted. Optional, if not provided, files will be '
26 |                              'stored in same folder as p_value file.')
27 |     parser.add_argument('-out_file', type=str, default=None,
28 |                         help='Specify NAME of plots, will be stored as manhattan_NAME.png or qq_plot_NAME.png,'
29 |                              'optional, if not provided name of p_value file will be used.')
30 |     parser.add_argument('-sig_level', type=int, default=5,
31 |                         help='Significance level (percentage values) to compute threshold for Manhattan plot. '
32 |                              'Optional, default is 5.')
33 |     args = vars(parser.parse_args())
34 | 
35 |     args["p_value_file"] = check_functions.check_file(args["p_value_file"])
36 |     if args["min_p_value_file"] is not None:
37 |         args["min_p_value_file"] = check_functions.check_file(args["min_p_value_file"])
38 |     if args["out_dir"] is None:
39 |         args["out_dir"] = pathlib.Path(args["p_value_file"]).parent
40 |     if args["out_file"] is None:
41 |         args["out_file"] = pathlib.Path(args["p_value_file"]).stem
42 | 
43 |     df = pd.read_csv(args["p_value_file"])
44 |     if not {'CHR', 'POS', 'p_value'}.issubset(df.columns):
45 |         raise Exception('Cannot create Manhattan plot; need CHR, POS and p_value in DataFrame.')
46 | 
47 |     if args["manhattan"]:
48 |         out_dir, out_file = check_functions.check_dir_paths(out_dir=args["out_dir"], out_file=args["out_file"],
49 |                                                             prefix='manhattan_')
50 |         print('Save Manhattan plot with significance level of %d.' % args["sig_level"])
51 |         if args["min_p_value_file"] is not None:
52 |             df_min = pd.read_csv(args["min_p_value_file"])
53 |             if not 'min_p_val' in df_min.columns:
54 |                 raise Exception('Cannot compute permutation-based threshold, need min_p_val in DataFrame.')
55 |             min_p_val = df_min['min_p_val'].values
56 |         else:
57 |             min_p_val = None
58 |         plot_functions.manhattan_plot(df=df, data_dir=out_dir, filename=out_file,
59 |                                       min_p_values=min_p_val, sig_level=args["sig_level"])
60 | 
61 |     if args["qqplot"]:
62 |         out_dir, out_file = check_functions.check_dir_paths(out_dir=args["out_dir"], out_file=args["out_file"],
63 |                                                             prefix='qq_plot_')
64 |         print('Save QQ-plot.')
65 |         plot_functions.qq_plot(p_values=df['p_value'].values, data_dir=out_dir, filename=out_file)
66 | 


--------------------------------------------------------------------------------
/data/config.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | genotype_file: "./data/x_matrix.h5"
 3 | phenotype_file: "./data/y_matrix.csv"
 4 | trait: "phenotype_value"
 5 | kinship_file:
 6 | covariate_file:
 7 | covariate_list:
 8 | perm_method: "x"
 9 | maf_threshold: 0
10 | perm: 100


--------------------------------------------------------------------------------
/data/cov_matrix.csv:
--------------------------------------------------------------------------------
   1 | accession_id,covariate
   2 | 9381,1
   3 | 9380,1
   4 | 9378,1
   5 | 9371,0
   6 | 9367,1
   7 | 9363,0
   8 | 9356,0
   9 | 9355,0
  10 | 9354,0
  11 | 9353,0
  12 | 9352,1
  13 | 9351,0
  14 | 9344,1
  15 | 9343,0
  16 | 9339,1
  17 | 9336,1
  18 | 9332,0
  19 | 9323,0
  20 | 9321,1
  21 | 9482,1
  22 | 9481,0
  23 | 9472,0
  24 | 9471,1
  25 | 9470,1
  26 | 9469,0
  27 | 9455,0
  28 | 9454,1
  29 | 9453,0
  30 | 9451,1
  31 | 9419,1
  32 | 9418,0
  33 | 9409,1
  34 | 9402,0
  35 | 9369,1
  36 | 9349,1
  37 | 9476,1
  38 | 9433,1
  39 | 9446,1
  40 | 9443,1
  41 | 9442,1
  42 | 997,1
  43 | 996,0
  44 | 1068,1
  45 | 1026,0
  46 | 1585,1
  47 | 1435,1
  48 | 1169,1
  49 | 1075,1
  50 | 1132,1
  51 | 1064,0
  52 | 1063,0
  53 | 1062,1
  54 | 1247,1
  55 | 991,1
  56 | 1391,0
  57 | 1374,0
  58 | 1318,0
  59 | 1254,1
  60 | 1163,1
  61 | 1153,1
  62 | 1073,1
  63 | 1072,1
  64 | 394,0
  65 | 7,0
  66 | 203,1
  67 | 236,0
  68 | 367,0
  69 | 123,1
  70 | 395,0
  71 | 196,1
  72 | 264,0
  73 | 185,1
  74 | 297,0
  75 | 318,0
  76 | 323,0
  77 | 79,0
  78 | 198,0
  79 | 371,0
  80 | 280,0
  81 | 12,1
  82 | 347,1
  83 | 268,1
  84 | 288,1
  85 | 377,1
  86 | 252,0
  87 | 296,1
  88 | 341,1
  89 | 156,0
  90 | 397,1
  91 | 263,0
  92 | 48,1
  93 | 45,1
  94 | 210,0
  95 | 83,0
  96 | 372,1
  97 | 393,0
  98 | 205,1
  99 | 87,0
 100 | 62,1
 101 | 309,1
 102 | 222,1
 103 | 160,1
 104 | 229,1
 105 | 369,0
 106 | 227,1
 107 | 230,0
 108 | 217,0
 109 | 194,1
 110 | 391,1
 111 | 340,1
 112 | 167,0
 113 | 266,1
 114 | 208,1
 115 | 335,1
 116 | 213,1
 117 | 388,1
 118 | 331,0
 119 | 216,1
 120 | 277,1
 121 | 85,0
 122 | 310,1
 123 | 389,1
 124 | 387,0
 125 | 191,0
 126 | 224,0
 127 | 82,1
 128 | 225,1
 129 | 295,1
 130 | 169,1
 131 | 375,0
 132 | 292,1
 133 | 215,1
 134 | 337,1
 135 | 320,1
 136 | 171,0
 137 | 346,1
 138 | 151,1
 139 | 137,1
 140 | 291,0
 141 | 385,0
 142 | 84,1
 143 | 349,0
 144 | 219,0
 145 | 322,1
 146 | 204,0
 147 | 273,1
 148 | 212,1
 149 | 146,0
 150 | 348,0
 151 | 157,1
 152 | 214,0
 153 | 316,0
 154 | 186,0
 155 | 314,1
 156 | 293,1
 157 | 183,1
 158 | 287,0
 159 | 290,0
 160 | 168,0
 161 | 343,1
 162 | 153,1
 163 | 339,1
 164 | 60,0
 165 | 174,0
 166 | 88,1
 167 | 359,1
 168 | 298,1
 169 | 162,1
 170 | 311,1
 171 | 329,0
 172 | 175,0
 173 | 163,0
 174 | 77,1
 175 | 302,1
 176 | 231,0
 177 | 148,0
 178 | 106,1
 179 | 283,1
 180 | 184,1
 181 | 122,1
 182 | 170,1
 183 | 396,1
 184 | 275,0
 185 | 244,1
 186 | 116,1
 187 | 364,0
 188 | 121,0
 189 | 165,0
 190 | 32,1
 191 | 201,1
 192 | 326,1
 193 | 368,1
 194 | 332,1
 195 | 361,0
 196 | 202,0
 197 | 200,1
 198 | 257,0
 199 | 80,0
 200 | 9,0
 201 | 187,1
 202 | 89,0
 203 | 207,0
 204 | 69,1
 205 | 188,1
 206 | 306,0
 207 | 360,0
 208 | 237,1
 209 | 327,1
 210 | 261,1
 211 | 86,1
 212 | 228,0
 213 | 190,0
 214 | 74,1
 215 | 8,0
 216 | 4,1
 217 | 159,1
 218 | 262,0
 219 | 51,0
 220 | 5,1
 221 | 363,1
 222 | 338,1
 223 | 355,1
 224 | 269,1
 225 | 278,0
 226 | 179,0
 227 | 6,0
 228 | 206,1
 229 | 461,1
 230 | 466,1
 231 | 9490,1
 232 | 9496,0
 233 | 9504,1
 234 | 9499,1
 235 | 9308,0
 236 | 9305,1
 237 | 9302,1
 238 | 9309,1
 239 | 4980,0
 240 | 5444,1
 241 | 5394,1
 242 | 5461,1
 243 | 5494,1
 244 | 5398,0
 245 | 5466,1
 246 | 5450,0
 247 | 4675,0
 248 | 4632,0
 249 | 5769,1
 250 | 4757,1
 251 | 4827,0
 252 | 4820,0
 253 | 5159,1
 254 | 5759,0
 255 | 5739,1
 256 | 5738,1
 257 | 5770,1
 258 | 5826,1
 259 | 5745,1
 260 | 5744,1
 261 | 5774,1
 262 | 5760,0
 263 | 5746,1
 264 | 5762,0
 265 | 5711,1
 266 | 5802,1
 267 | 5740,0
 268 | 5716,1
 269 | 5772,1
 270 | 5722,1
 271 | 5751,1
 272 | 5721,0
 273 | 5812,0
 274 | 5792,0
 275 | 5735,1
 276 | 5767,0
 277 | 5817,1
 278 | 5807,1
 279 | 5777,1
 280 | 5736,0
 281 | 5763,1
 282 | 5813,0
 283 | 5741,1
 284 | 5731,1
 285 | 5819,1
 286 | 5724,1
 287 | 5789,0
 288 | 5141,1
 289 | 5175,1
 290 | 5145,0
 291 | 5469,0
 292 | 5106,1
 293 | 5299,0
 294 | 5335,1
 295 | 7121,1
 296 | 7106,0
 297 | 7104,0
 298 | 7113,1
 299 | 7116,0
 300 | 7149,1
 301 | 7228,1
 302 | 7301,1
 303 | 7109,1
 304 | 6987,1
 305 | 7028,1
 306 | 7029,1
 307 | 7030,1
 308 | 7013,0
 309 | 7017,0
 310 | 7032,1
 311 | 7073,0
 312 | 242,0
 313 | 104,1
 314 | 282,1
 315 | 96,1
 316 | 23,1
 317 | 6102,0
 318 | 6938,0
 319 | 8304,1
 320 | 8238,1
 321 | 8386,0
 322 | 8348,1
 323 | 1416,1
 324 | 6237,0
 325 | 6226,1
 326 | 6184,0
 327 | 6174,0
 328 | 6172,1
 329 | 6171,1
 330 | 6170,1
 331 | 6151,0
 332 | 6150,0
 333 | 6149,1
 334 | 6148,1
 335 | 6147,0
 336 | 6146,0
 337 | 6145,0
 338 | 6144,0
 339 | 6142,1
 340 | 6141,1
 341 | 6137,0
 342 | 6136,0
 343 | 6131,1
 344 | 6134,0
 345 | 6133,1
 346 | 6132,1
 347 | 6129,0
 348 | 6128,1
 349 | 6127,1
 350 | 6126,0
 351 | 6125,0
 352 | 6123,1
 353 | 6122,1
 354 | 6121,1
 355 | 6119,1
 356 | 6116,1
 357 | 6115,0
 358 | 6114,0
 359 | 6111,1
 360 | 6110,1
 361 | 6108,0
 362 | 6107,1
 363 | 6106,1
 364 | 6104,1
 365 | 6103,0
 366 | 6101,1
 367 | 6100,1
 368 | 6099,1
 369 | 6098,0
 370 | 6097,1
 371 | 6095,1
 372 | 6093,1
 373 | 6092,1
 374 | 6091,1
 375 | 6090,1
 376 | 6177,1
 377 | 6221,1
 378 | 6244,1
 379 | 6241,1
 380 | 6240,1
 381 | 6238,1
 382 | 6236,1
 383 | 6235,1
 384 | 6220,1
 385 | 6218,1
 386 | 6217,1
 387 | 6216,0
 388 | 6215,1
 389 | 6214,1
 390 | 6210,1
 391 | 6209,0
 392 | 6207,1
 393 | 6201,0
 394 | 6200,0
 395 | 6199,1
 396 | 6198,1
 397 | 6197,1
 398 | 6195,1
 399 | 6194,1
 400 | 6193,1
 401 | 6192,1
 402 | 6191,1
 403 | 6189,1
 404 | 6163,1
 405 | 6154,1
 406 | 8274,1
 407 | 7192,1
 408 | 7194,1
 409 | 7210,0
 410 | 7238,1
 411 | 7245,0
 412 | 7246,0
 413 | 7256,0
 414 | 7265,0
 415 | 7268,1
 416 | 366,1
 417 | 5245,0
 418 | 5264,0
 419 | 7195,0
 420 | 7262,0
 421 | 7250,1
 422 | 1925,1
 423 | 5719,1
 424 | 5798,1
 425 | 5816,1
 426 | 5821,0
 427 | 5710,1
 428 | 5715,1
 429 | 5720,0
 430 | 5733,1
 431 | 5820,1
 432 | 5737,1
 433 | 5755,0
 434 | 5781,1
 435 | 5782,1
 436 | 5784,1
 437 | 5146,0
 438 | 5133,0
 439 | 5709,1
 440 | 5712,0
 441 | 5795,0
 442 | 5750,0
 443 | 5708,0
 444 | 5749,1
 445 | 5727,0
 446 | 5780,0
 447 | 5756,1
 448 | 5723,1
 449 | 5730,0
 450 | 5717,0
 451 | 5732,1
 452 | 5804,1
 453 | 5752,1
 454 | 5799,1
 455 | 5713,1
 456 | 5728,1
 457 | 5787,1
 458 | 5788,1
 459 | 5793,0
 460 | 5803,0
 461 | 5758,1
 462 | 7317,0
 463 | 7034,1
 464 | 615,0
 465 | 627,0
 466 | 607,0
 467 | 631,0
 468 | 623,0
 469 | 719,1
 470 | 640,0
 471 | 827,0
 472 | 895,1
 473 | 946,1
 474 | 936,0
 475 | 7717,0
 476 | 7787,1
 477 | 7837,1
 478 | 7847,1
 479 | 7867,1
 480 | 8077,1
 481 | 8122,1
 482 | 1743,0
 483 | 1799,1
 484 | 2175,1
 485 | 2160,0
 486 | 2171,0
 487 | 2148,1
 488 | 2180,0
 489 | 2157,1
 490 | 1948,0
 491 | 1941,0
 492 | 1949,0
 493 | 1965,1
 494 | 1981,1
 495 | 1992,1
 496 | 2016,1
 497 | 2011,1
 498 | 2019,1
 499 | 2020,1
 500 | 2151,1
 501 | 1862,1
 502 | 1872,0
 503 | 1864,1
 504 | 1871,1
 505 | 1857,1
 506 | 1865,0
 507 | 1873,0
 508 | 1850,0
 509 | 1858,1
 510 | 1874,1
 511 | 1868,1
 512 | 1829,1
 513 | 1853,0
 514 | 1926,0
 515 | 1966,1
 516 | 1918,1
 517 | 1959,0
 518 | 1936,1
 519 | 1952,0
 520 | 1960,1
 521 | 1968,1
 522 | 1938,0
 523 | 1963,1
 524 | 1720,0
 525 | 1736,1
 526 | 1744,1
 527 | 1752,0
 528 | 1729,1
 529 | 1745,1
 530 | 1753,0
 531 | 1722,1
 532 | 1730,0
 533 | 1738,1
 534 | 1724,1
 535 | 1740,1
 536 | 1733,1
 537 | 1718,1
 538 | 1726,1
 539 | 1750,0
 540 | 1782,1
 541 | 1719,1
 542 | 7566,1
 543 | 1751,1
 544 | 2214,0
 545 | 2201,1
 546 | 2204,1
 547 | 2294,0
 548 | 2280,1
 549 | 2338,1
 550 | 2292,1
 551 | 2300,1
 552 | 2316,0
 553 | 2283,0
 554 | 7584,1
 555 | 7580,1
 556 | 7578,0
 557 | 7570,0
 558 | 8608,0
 559 | 8727,1
 560 | 8760,1
 561 | 8768,1
 562 | 8616,1
 563 | 8617,1
 564 | 8770,1
 565 | 8730,1
 566 | 8619,1
 567 | 8629,1
 568 | 8612,1
 569 | 8724,1
 570 | 8631,0
 571 | 8725,0
 572 | 8640,1
 573 | 8759,0
 574 | 8774,1
 575 | 8824,1
 576 | 8557,0
 577 | 8791,1
 578 | 8777,1
 579 | 8811,1
 580 | 8787,0
 581 | 8805,1
 582 | 8534,1
 583 | 8687,1
 584 | 9045,1
 585 | 8673,1
 586 | 9041,1
 587 | 8701,1
 588 | 9053,0
 589 | 8985,1
 590 | 8957,1
 591 | 8966,0
 592 | 8695,1
 593 | 8967,1
 594 | 9004,1
 595 | 8690,1
 596 | 9012,1
 597 | 8969,1
 598 | 8961,1
 599 | 8970,1
 600 | 8954,0
 601 | 8962,0
 602 | 8965,1
 603 | 8973,0
 604 | 8975,1
 605 | 9006,1
 606 | 8976,1
 607 | 9007,1
 608 | 8977,0
 609 | 8992,1
 610 | 9008,0
 611 | 9001,1
 612 | 8719,1
 613 | 8996,1
 614 | 9011,1
 615 | 1742,0
 616 | 1749,1
 617 | 999,1
 618 | 1061,0
 619 | 1404,0
 620 | 1552,1
 621 | 1257,1
 622 | 1158,1
 623 | 1070,1
 624 | 9452,1
 625 | 417,1
 626 | 421,0
 627 | 407,0
 628 | 424,1
 629 | 402,1
 630 | 403,1
 631 | 404,1
 632 | 428,0
 633 | 429,1
 634 | 409,1
 635 | 413,0
 636 | 5883,1
 637 | 5848,0
 638 | 6416,1
 639 | 5838,1
 640 | 6287,1
 641 | 6417,0
 642 | 5841,1
 643 | 5894,1
 644 | 5904,0
 645 | 5913,0
 646 | 5921,1
 647 | 5939,1
 648 | 5969,0
 649 | 5895,0
 650 | 5905,1
 651 | 5914,0
 652 | 5923,0
 653 | 5932,1
 654 | 5942,1
 655 | 5961,1
 656 | 5970,1
 657 | 5884,0
 658 | 5906,1
 659 | 5924,0
 660 | 5933,1
 661 | 5943,1
 662 | 5953,1
 663 | 5963,0
 664 | 5972,0
 665 | 5888,0
 666 | 5934,1
 667 | 5898,1
 668 | 5908,1
 669 | 5919,0
 670 | 5926,0
 671 | 5935,1
 672 | 5945,1
 673 | 5955,1
 674 | 5891,1
 675 | 5900,0
 676 | 5927,0
 677 | 5946,1
 678 | 5966,0
 679 | 5975,0
 680 | 5901,1
 681 | 5911,0
 682 | 5875,1
 683 | 5948,1
 684 | 5893,1
 685 | 5902,1
 686 | 5920,1
 687 | 5938,1
 688 | 5959,1
 689 | 5968,1
 690 | 5988,1
 691 | 5999,1
 692 | 6455,0
 693 | 5979,1
 694 | 6421,0
 695 | 5991,1
 696 | 5992,1
 697 | 6004,0
 698 | 6458,0
 699 | 5982,1
 700 | 5993,0
 701 | 6425,0
 702 | 6451,1
 703 | 6309,1
 704 | 5984,0
 705 | 5994,0
 706 | 6444,1
 707 | 5997,0
 708 | 6007,0
 709 | 6445,0
 710 | 6453,0
 711 | 5998,1
 712 | 6427,1
 713 | 6435,0
 714 | 6446,0
 715 | 6403,1
 716 | 5922,1
 717 | 5915,0
 718 | 5910,1
 719 | 6401,0
 720 | 6003,0
 721 | 5899,0
 722 | 6396,0
 723 | 5873,0
 724 | 6418,0
 725 | 5874,1
 726 | 5916,1
 727 | 5878,1
 728 | 5983,1
 729 | 5990,1
 730 | 5996,1
 731 | 5940,1
 732 | 5846,1
 733 | 5871,0
 734 | 6436,1
 735 | 5872,1
 736 | 5956,1
 737 | 6402,1
 738 | 4758,1
 739 | 5285,0
 740 | 9153,0
 741 | 9137,0
 742 | 9151,1
 743 | 9143,0
 744 | 9201,1
 745 | 6173,0
 746 | 6284,0
 747 | 6276,1
 748 | 6258,1
 749 | 6252,0
 750 | 6255,0
 751 | 6166,0
 752 | 6085,1
 753 | 6025,1
 754 | 6268,1
 755 | 6180,1
 756 | 6143,1
 757 | 6041,1
 758 | 5829,1
 759 | 8427,1
 760 | 8218,1
 761 | 6023,0
 762 | 5835,1
 763 | 5831,1
 764 | 5830,1
 765 | 6039,0
 766 | 6086,1
 767 | 6413,1
 768 | 6412,1
 769 | 6411,1
 770 | 6087,0
 771 | 6077,0
 772 | 6076,1
 773 | 6071,0
 774 | 6069,0
 775 | 6038,1
 776 | 6036,1
 777 | 6035,0
 778 | 6034,1
 779 | 6030,0
 780 | 6024,0
 781 | 6021,0
 782 | 6019,1
 783 | 6017,1
 784 | 6013,0
 785 | 6012,0
 786 | 6011,1
 787 | 6010,1
 788 | 5870,0
 789 | 5867,0
 790 | 5865,1
 791 | 5860,1
 792 | 5836,1
 793 | 6231,0
 794 | 6212,1
 795 | 6140,1
 796 | 6138,0
 797 | 6120,1
 798 | 6118,0
 799 | 6073,0
 800 | 6022,1
 801 | 6020,0
 802 | 8227,1
 803 | 8225,1
 804 | 8230,0
 805 | 5856,0
 806 | 8307,1
 807 | 1409,0
 808 | 6959,0
 809 | 7525,1
 810 | 6961,1
 811 | 6967,1
 812 | 6973,1
 813 | 6974,1
 814 | 6976,1
 815 | 7516,1
 816 | 6979,1
 817 | 6980,0
 818 | 6982,1
 819 | 6983,0
 820 | 6985,1
 821 | 6931,1
 822 | 6043,0
 823 | 6945,1
 824 | 7519,1
 825 | 7526,1
 826 | 7523,1
 827 | 6956,1
 828 | 6960,1
 829 | 7524,1
 830 | 6963,1
 831 | 6964,0
 832 | 6965,0
 833 | 6966,1
 834 | 6969,1
 835 | 6971,1
 836 | 6975,1
 837 | 7517,0
 838 | 6978,1
 839 | 6981,0
 840 | 6984,0
 841 | 6899,0
 842 | 6903,0
 843 | 6904,1
 844 | 6905,0
 845 | 6906,1
 846 | 6909,1
 847 | 6911,1
 848 | 6916,1
 849 | 8215,0
 850 | 6921,1
 851 | 6932,0
 852 | 6046,0
 853 | 6944,1
 854 | 7515,1
 855 | 7514,1
 856 | 6962,1
 857 | 6968,0
 858 | 6972,1
 859 | 6970,1
 860 | 6977,1
 861 | 8329,1
 862 | 7379,0
 863 | 7080,1
 864 | 6744,1
 865 | 7098,1
 866 | 7158,0
 867 | 7163,0
 868 | 7165,0
 869 | 7340,1
 870 | 7372,0
 871 | 7394,0
 872 | 7397,1
 873 | 281,1
 874 | 8258,0
 875 | 8259,1
 876 | 8290,1
 877 | 7461,1
 878 | 7323,1
 879 | 8254,0
 880 | 8270,1
 881 | 8233,1
 882 | 8285,0
 883 | 6016,1
 884 | 8423,0
 885 | 8237,0
 886 | 6040,1
 887 | 6064,0
 888 | 6957,0
 889 | 8369,1
 890 | 8247,1
 891 | 8426,0
 892 | 8428,0
 893 | 9058,1
 894 | 8249,0
 895 | 9057,1
 896 | 7139,1
 897 | 7307,1
 898 | 7331,1
 899 | 7337,1
 900 | 7378,0
 901 | 7405,0
 902 | 66,0
 903 | 149,1
 904 | 328,1
 905 | 334,0
 906 | 2274,1
 907 | 5753,1
 908 | 6709,1
 909 | 7000,0
 910 | 6989,0
 911 | 7031,1
 912 | 7062,0
 913 | 7460,1
 914 | 7123,0
 915 | 7128,1
 916 | 7145,1
 917 | 7147,0
 918 | 7166,1
 919 | 7255,0
 920 | 7275,1
 921 | 7258,0
 922 | 7291,1
 923 | 7310,0
 924 | 7330,1
 925 | 7333,0
 926 | 7411,0
 927 | 178,0
 928 | 378,1
 929 | 8241,1
 930 | 6988,0
 931 | 8256,1
 932 | 8796,0
 933 | 8264,0
 934 | 8265,0
 935 | 8231,0
 936 | 8271,0
 937 | 6190,0
 938 | 8275,0
 939 | 8420,1
 940 | 8283,1
 941 | 8284,1
 942 | 6008,1
 943 | 8422,1
 944 | 8296,1
 945 | 8297,1
 946 | 8300,1
 947 | 8235,0
 948 | 8306,0
 949 | 8310,0
 950 | 8236,1
 951 | 8311,0
 952 | 8314,0
 953 | 8239,1
 954 | 8240,0
 955 | 8323,1
 956 | 8242,0
 957 | 8325,1
 958 | 8326,1
 959 | 8222,1
 960 | 8430,1
 961 | 6042,1
 962 | 8335,1
 963 | 8343,1
 964 | 6074,0
 965 | 8351,0
 966 | 8353,0
 967 | 8354,0
 968 | 7296,1
 969 | 8365,1
 970 | 8374,1
 971 | 8376,0
 972 | 8378,0
 973 | 8412,1
 974 | 8387,0
 975 | 8389,1
 976 | 6243,1
 977 | 7507,0
 978 | 7343,1
 979 | 6005,1
 980 | 5729,1
 981 | 5380,1
 982 | 5381,0
 983 | 5565,1
 984 | 7011,1
 985 | 7199,1
 986 | 7224,1
 987 | 7277,0
 988 | 7490,1
 989 | 7492,1
 990 | 7300,0
 991 | 7306,0
 992 | 7408,1
 993 | 7418,0
 994 | 5887,0
 995 | 5987,0
 996 | 173,0
 997 | 357,1
 998 | 258,1
 999 | 374,0
1000 | 94,1
1001 | 1859,1
1002 | 6188,1
1003 | 5207,0
1004 | 6448,1
1005 | 8312,1
1006 | 8313,0
1007 | 8334,1
1008 | 8337,1
1009 | 8357,1
1010 | 8366,0
1011 | 8411,0
1012 | 8388,1
1013 | 8395,1
1014 | 7014,1
1015 | 7035,0
1016 | 6810,1
1017 | 7498,0
1018 | 7506,0
1019 | 7390,0
1020 | 7284,1
1021 | 7081,1
1022 | 8243,0
1023 | 8245,1
1024 | 7033,0
1025 | 2150,1
1026 | 100000,1
1027 | 8266,0
1028 | 6897,1
1029 | 6898,1
1030 | 5837,0
1031 | 6907,1
1032 | 7438,1
1033 | 6910,1
1034 | 6913,1
1035 | 6914,0
1036 | 6918,1
1037 | 6919,1
1038 | 8214,1
1039 | 6924,1
1040 | 8424,1
1041 | 6926,0
1042 | 6928,1
1043 | 6933,1
1044 | 7520,1
1045 | 7521,0
1046 | 6936,0
1047 | 7522,1
1048 | 6937,0
1049 | 6939,0
1050 | 6900,1
1051 | 6901,1
1052 | 6908,1
1053 | 6009,0
1054 | 6915,0
1055 | 6917,1
1056 | 6920,0
1057 | 6922,0
1058 | 6923,0
1059 | 6927,0
1060 | 6929,1
1061 | 6930,0
1062 | 6940,0
1063 | 6942,1
1064 | 6943,1
1065 | 7518,1
1066 | 6946,1
1067 | 8213,0
1068 | 6951,1
1069 | 6958,1
1070 | 7305,0
1071 | 7376,1
1072 | 7386,1
1073 | 7404,1
1074 | 7403,0
1075 | 7457,1
1076 | 7463,1
1077 | 7015,1
1078 | 7024,0
1079 | 7079,0
1080 | 7152,1
1081 | 7297,1
1082 | 7381,0
1083 | 7413,0
1084 | 7176,1
1085 | 7352,0
1086 | 2327,0
1087 | 7117,1
1088 | 7172,1
1089 | 7168,1
1090 | 7423,0
1091 | 7425,1
1092 | 7223,1
1093 | 7239,1
1094 | 7276,1
1095 | 7281,1
1096 | 7287,1
1097 | 7292,0
1098 | 7299,0
1099 | 7303,0
1100 | 7309,0
1101 | 7328,0
1102 | 7406,1
1103 | 2320,1
1104 | 7242,1
1105 | 7462,1
1106 | 5385,1
1107 | 5292,0
1108 | 5337,1
1109 | 5350,1
1110 | 5377,1
1111 | 5386,0
1112 | 5310,1
1113 | 5282,1
1114 | 5339,0
1115 | 5322,0
1116 | 5331,1
1117 | 5364,0
1118 | 5373,1
1119 | 4879,1
1120 | 7069,1
1121 | 7496,0
1122 | 7502,1
1123 | 7344,0
1124 | 7346,1
1125 | 7353,0
1126 | 7373,0
1127 | 7384,0
1128 | 81,1
1129 | 373,0
1130 | 383,0
1131 | 1867,0
1132 | 957,0
1133 | 998,1
1134 | 1006,1
1135 | 992,1
1136 | 1002,1
1137 | 1166,1
1138 | 9077,1
1139 | 9104,1
1140 | 9152,0
1141 | 9165,0
1142 | 9179,0
1143 | 6996,0
1144 | 7008,1
1145 | 6729,1
1146 | 7092,1
1147 | 7164,1
1148 | 7169,0
1149 | 7181,0
1150 | 7252,1
1151 | 7446,1
1152 | 7270,1
1153 | 7483,0
1154 | 7316,1
1155 | 7351,1
1156 | 7391,1
1157 | 1,1
1158 | 392,0
1159 | 379,1
1160 | 380,1
1161 | 267,1
1162 | 2057,0
1163 | 5742,0
1164 | 5056,1
1165 | 5122,1
1166 | 5158,0
1167 | 5832,0
1168 | 6994,0
1169 | 7002,1
1170 | 7026,0
1171 | 6730,0
1172 | 7075,1
1173 | 7126,1
1174 | 7227,0
1175 | 7229,0
1176 | 7449,1
1177 | 6847,0
1178 | 6953,1
1179 | 7320,1
1180 | 7354,0
1181 | 7283,0
1182 | 2,0
1183 | 386,1
1184 | 1716,1
1185 | 1967,1
1186 | 5785,1
1187 | 4802,1
1188 | 5116,0
1189 | 5202,1
1190 | 7071,1
1191 | 7064,1
1192 | 7078,1
1193 | 7094,1
1194 | 7141,1
1195 | 7143,0
1196 | 7151,0
1197 | 7150,0
1198 | 7424,0
1199 | 7178,1
1200 | 7188,1
1201 | 7201,1
1202 | 7206,1
1203 | 7205,1
1204 | 7231,0
1205 | 7244,1
1206 | 7260,1
1207 | 7263,1
1208 | 7280,0
1209 | 7282,0
1210 | 7472,0
1211 | 7382,0
1212 | 7392,1
1213 | 7477,1
1214 | 8610,0
1215 | 8692,1
1216 | 6727,1
1217 | 7105,1
1218 | 7479,1
1219 | 7482,0
1220 | 7504,1
1221 | 7508,0
1222 | 7355,1
1223 | 5896,0
1224 | 166,1
1225 | 223,1
1226 | 126,1
1227 | 390,0
1228 | 321,1
1229 | 259,1
1230 | 362,1
1231 | 260,1
1232 | 91,0
1233 | 641,1
1234 | 5160,1
1235 | 5232,0
1236 | 5606,0
1237 | 5628,0
1238 | 7004,0
1239 | 7100,1
1240 | 7102,0
1241 | 7110,0
1242 | 7135,1
1243 | 7186,1
1244 | 7430,1
1245 | 2187,1
1246 | 6094,0
1247 | 6096,0
1248 | 6109,0
1249 | 6112,1
1250 | 6124,0
1251 | 6169,1
1252 | 6202,1
1253 | 6203,0
1254 | 6242,0
1255 | 6318,1
1256 | 6990,1
1257 | 6992,1
1258 | 6998,0
1259 | 4927,1
1260 | 4935,1
1261 | 4862,1
1262 | 5596,1
1263 | 5517,1
1264 | 5582,0
1265 | 5590,1
1266 | 5536,1
1267 | 5670,0
1268 | 5678,0
1269 | 5645,1
1270 | 2290,1
1271 | 5805,0
1272 | 4997,1
1273 | 5341,1
1274 | 6449,1
1275 | 1366,1
1276 | 1363,0
1277 | 1317,1
1278 | 1313,1
1279 | 1312,1
1280 | 1360,1
1281 | 1362,1
1282 | 1256,1
1283 | 9342,1
1284 | 9450,1
1285 | 9437,0
1286 | 9436,0
1287 | 9434,1
1288 | 9427,1
1289 | 9421,1
1290 | 9416,0
1291 | 9413,0
1292 | 9412,1
1293 | 9411,1
1294 | 9410,0
1295 | 9408,1
1296 | 9407,0
1297 | 9405,1
1298 | 9404,1
1299 | 9399,1
1300 | 9392,0
1301 | 9391,1
1302 | 9390,1
1303 | 9388,0
1304 | 9386,1
1305 | 9385,1
1306 | 9384,1
1307 | 9383,0
1308 | 9382,1
1309 | 


--------------------------------------------------------------------------------
/data/k_matrix.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grimmlab/permGWAS/3f7a1c2e3e4c63281f5719425ff9ac405f8d9cfc/data/k_matrix.h5


--------------------------------------------------------------------------------
/data/x_matrix.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grimmlab/permGWAS/3f7a1c2e3e4c63281f5719425ff9ac405f8d9cfc/data/x_matrix.h5


--------------------------------------------------------------------------------
/data/x_matrix.map:
--------------------------------------------------------------------------------
  1 | 1 1_657 0 657
  2 | 1 1_3102 0 3102
  3 | 1 1_4648 0 4648
  4 | 1 1_4880 0 4880
  5 | 1 1_5975 0 5975
  6 | 1 1_6063 0 6063
  7 | 1 1_6449 0 6449
  8 | 1 1_6514 0 6514
  9 | 1 1_6603 0 6603
 10 | 1 1_6768 0 6768
 11 | 1 1_7601 0 7601
 12 | 1 1_8193 0 8193
 13 | 1 1_8617 0 8617
 14 | 1 1_10219 0 10219
 15 | 1 1_10449 0 10449
 16 | 1 1_10969 0 10969
 17 | 1 1_11493 0 11493
 18 | 1 1_11696 0 11696
 19 | 1 1_12584 0 12584
 20 | 1 1_12659 0 12659
 21 | 1 1_13045 0 13045
 22 | 1 1_14385 0 14385
 23 | 1 1_19819 0 19819
 24 | 1 1_20892 0 20892
 25 | 1 1_21043 0 21043
 26 | 1 1_21128 0 21128
 27 | 1 1_21829 0 21829
 28 | 1 1_22522 0 22522
 29 | 1 1_23838 0 23838
 30 | 1 1_25315 0 25315
 31 | 1 1_25365 0 25365
 32 | 1 1_25773 0 25773
 33 | 1 1_26288 0 26288
 34 | 1 1_27265 0 27265
 35 | 1 1_28948 0 28948
 36 | 1 1_28978 0 28978
 37 | 1 1_29291 0 29291
 38 | 1 1_30529 0 30529
 39 | 1 1_30683 0 30683
 40 | 1 1_31515 0 31515
 41 | 1 1_31926 0 31926
 42 | 1 1_32210 0 32210
 43 | 1 1_32807 0 32807
 44 | 1 1_34125 0 34125
 45 | 1 1_34599 0 34599
 46 | 1 1_35856 0 35856
 47 | 1 1_37072 0 37072
 48 | 1 1_38946 0 38946
 49 | 1 1_39751 0 39751
 50 | 1 1_41178 0 41178
 51 | 1 1_41427 0 41427
 52 | 1 1_44567 0 44567
 53 | 1 1_45075 0 45075
 54 | 1 1_45580 0 45580
 55 | 1 1_45683 0 45683
 56 | 1 1_46373 0 46373
 57 | 1 1_46499 0 46499
 58 | 1 1_46912 0 46912
 59 | 1 1_47577 0 47577
 60 | 1 1_47692 0 47692
 61 | 1 1_48118 0 48118
 62 | 1 1_48181 0 48181
 63 | 1 1_49080 0 49080
 64 | 1 1_51392 0 51392
 65 | 1 1_51706 0 51706
 66 | 1 1_51878 0 51878
 67 | 1 1_52202 0 52202
 68 | 1 1_53183 0 53183
 69 | 1 1_53729 0 53729
 70 | 1 1_53901 0 53901
 71 | 1 1_55684 0 55684
 72 | 1 1_57136 0 57136
 73 | 1 1_57686 0 57686
 74 | 1 1_59637 0 59637
 75 | 1 1_60083 0 60083
 76 | 1 1_60772 0 60772
 77 | 1 1_61122 0 61122
 78 | 1 1_61266 0 61266
 79 | 1 1_61405 0 61405
 80 | 1 1_61661 0 61661
 81 | 1 1_62259 0 62259
 82 | 1 1_62935 0 62935
 83 | 1 1_63084 0 63084
 84 | 1 1_63645 0 63645
 85 | 1 1_63759 0 63759
 86 | 1 1_63915 0 63915
 87 | 1 1_64149 0 64149
 88 | 1 1_64651 0 64651
 89 | 1 1_68340 0 68340
 90 | 1 1_68880 0 68880
 91 | 1 1_69311 0 69311
 92 | 1 1_70933 0 70933
 93 | 1 1_71326 0 71326
 94 | 1 1_71348 0 71348
 95 | 1 1_71868 0 71868
 96 | 1 1_72138 0 72138
 97 | 1 1_72756 0 72756
 98 | 1 1_72894 0 72894
 99 | 1 1_72924 0 72924
100 | 1 1_73047 0 73047
101 | 1 1_73467 0 73467
102 | 1 1_73691 0 73691
103 | 1 1_73851 0 73851
104 | 1 1_73989 0 73989
105 | 1 1_74169 0 74169
106 | 1 1_74707 0 74707
107 | 1 1_75481 0 75481
108 | 1 1_75721 0 75721
109 | 1 1_75899 0 75899
110 | 1 1_76188 0 76188
111 | 1 1_76217 0 76217
112 | 1 1_76847 0 76847
113 | 1 1_76879 0 76879
114 | 1 1_76906 0 76906
115 | 1 1_77127 0 77127
116 | 1 1_77140 0 77140
117 | 1 1_77243 0 77243
118 | 1 1_77458 0 77458
119 | 1 1_78803 0 78803
120 | 1 1_78975 0 78975
121 | 1 1_79418 0 79418
122 | 1 1_80216 0 80216
123 | 1 1_80374 0 80374
124 | 1 1_80400 0 80400
125 | 1 1_81068 0 81068
126 | 1 1_81496 0 81496
127 | 1 1_81854 0 81854
128 | 1 1_81869 0 81869
129 | 1 1_82197 0 82197
130 | 1 1_82290 0 82290
131 | 1 1_83117 0 83117
132 | 1 1_83177 0 83177
133 | 1 1_83219 0 83219
134 | 1 1_84144 0 84144
135 | 1 1_84379 0 84379
136 | 1 1_84558 0 84558
137 | 1 1_85561 0 85561
138 | 1 1_85860 0 85860
139 | 1 1_86656 0 86656
140 | 1 1_87060 0 87060
141 | 1 1_87791 0 87791
142 | 1 1_87985 0 87985
143 | 1 1_88300 0 88300
144 | 1 1_88658 0 88658
145 | 1 1_89312 0 89312
146 | 1 1_90606 0 90606
147 | 1 1_92353 0 92353
148 | 1 1_92866 0 92866
149 | 1 1_93562 0 93562
150 | 1 1_93740 0 93740
151 | 


--------------------------------------------------------------------------------
/data/y_matrix.csv:
--------------------------------------------------------------------------------
   1 | accession_id,phenotype_value
   2 | 9381,
   3 | 9380,
   4 | 9378,
   5 | 9371,
   6 | 9367,
   7 | 9363,
   8 | 9356,
   9 | 9355,
  10 | 9354,
  11 | 9353,
  12 | 9352,
  13 | 9351,
  14 | 9344,
  15 | 9343,
  16 | 9339,
  17 | 9336,
  18 | 9332,
  19 | 9323,
  20 | 9321,
  21 | 9482,
  22 | 9481,
  23 | 9472,
  24 | 9471,
  25 | 9470,
  26 | 9469,
  27 | 9455,
  28 | 9454,
  29 | 9453,
  30 | 9451,
  31 | 9419,
  32 | 9418,
  33 | 9409,
  34 | 9402,
  35 | 9369,
  36 | 9349,
  37 | 9476,
  38 | 9433,
  39 | 9446,
  40 | 9443,
  41 | 9442,
  42 | 997,
  43 | 996,
  44 | 1068,
  45 | 1026,
  46 | 1585,
  47 | 1435,
  48 | 1169,
  49 | 1075,
  50 | 1132,
  51 | 1064,
  52 | 1063,
  53 | 1062,
  54 | 1247,
  55 | 991,
  56 | 1391,
  57 | 1374,
  58 | 1318,
  59 | 1254,
  60 | 1163,
  61 | 1153,
  62 | 1073,
  63 | 1072,
  64 | 394,
  65 | 7,
  66 | 203,
  67 | 236,
  68 | 367,
  69 | 123,
  70 | 395,
  71 | 196,
  72 | 264,
  73 | 185,
  74 | 297,
  75 | 318,
  76 | 323,
  77 | 79,
  78 | 198,
  79 | 371,
  80 | 280,
  81 | 12,
  82 | 347,
  83 | 268,
  84 | 288,
  85 | 377,
  86 | 252,
  87 | 296,
  88 | 341,
  89 | 156,
  90 | 397,
  91 | 263,
  92 | 48,
  93 | 45,
  94 | 210,
  95 | 83,
  96 | 372,
  97 | 393,
  98 | 205,
  99 | 87,
 100 | 62,
 101 | 309,
 102 | 222,
 103 | 160,
 104 | 229,
 105 | 369,
 106 | 227,
 107 | 230,
 108 | 217,
 109 | 194,
 110 | 391,
 111 | 340,
 112 | 167,
 113 | 266,
 114 | 208,
 115 | 335,
 116 | 213,
 117 | 388,
 118 | 331,
 119 | 216,
 120 | 277,
 121 | 85,
 122 | 310,
 123 | 389,
 124 | 387,
 125 | 191,
 126 | 224,
 127 | 82,
 128 | 225,
 129 | 295,
 130 | 169,
 131 | 375,
 132 | 292,
 133 | 215,
 134 | 337,
 135 | 320,
 136 | 171,
 137 | 346,
 138 | 151,
 139 | 137,
 140 | 291,
 141 | 385,
 142 | 84,
 143 | 349,
 144 | 219,
 145 | 322,
 146 | 204,
 147 | 273,
 148 | 212,
 149 | 146,
 150 | 348,
 151 | 157,
 152 | 214,
 153 | 316,
 154 | 186,
 155 | 314,
 156 | 293,
 157 | 183,
 158 | 287,
 159 | 290,
 160 | 168,
 161 | 343,
 162 | 153,
 163 | 339,
 164 | 60,
 165 | 174,
 166 | 88,
 167 | 359,
 168 | 298,
 169 | 162,
 170 | 311,
 171 | 329,
 172 | 175,
 173 | 163,
 174 | 77,
 175 | 302,
 176 | 231,
 177 | 148,
 178 | 106,
 179 | 283,
 180 | 184,
 181 | 122,
 182 | 170,
 183 | 396,
 184 | 275,
 185 | 244,
 186 | 116,
 187 | 364,
 188 | 121,
 189 | 165,
 190 | 32,
 191 | 201,
 192 | 326,
 193 | 368,
 194 | 332,
 195 | 361,
 196 | 202,
 197 | 200,
 198 | 257,
 199 | 80,
 200 | 9,
 201 | 187,
 202 | 89,
 203 | 207,
 204 | 69,
 205 | 188,
 206 | 306,
 207 | 360,
 208 | 237,
 209 | 327,
 210 | 261,
 211 | 86,
 212 | 228,
 213 | 190,
 214 | 74,
 215 | 8,
 216 | 4,
 217 | 159,
 218 | 262,
 219 | 51,
 220 | 5,
 221 | 363,
 222 | 338,
 223 | 355,
 224 | 269,
 225 | 278,
 226 | 179,
 227 | 6,
 228 | 206,
 229 | 461,
 230 | 466,
 231 | 9490,
 232 | 9496,
 233 | 9504,
 234 | 9499,
 235 | 9308,
 236 | 9305,
 237 | 9302,
 238 | 9309,
 239 | 4980,
 240 | 5444,
 241 | 5394,
 242 | 5461,
 243 | 5494,
 244 | 5398,
 245 | 5466,
 246 | 5450,
 247 | 4675,
 248 | 4632,
 249 | 5769,
 250 | 4757,
 251 | 4827,
 252 | 4820,
 253 | 5159,
 254 | 5759,
 255 | 5739,
 256 | 5738,
 257 | 5770,
 258 | 5826,
 259 | 5745,
 260 | 5744,
 261 | 5774,
 262 | 5760,
 263 | 5746,
 264 | 5762,
 265 | 5711,
 266 | 5802,
 267 | 5740,
 268 | 5716,
 269 | 5772,
 270 | 5722,
 271 | 5751,
 272 | 5721,
 273 | 5812,
 274 | 5792,
 275 | 5735,
 276 | 5767,
 277 | 5817,
 278 | 5807,
 279 | 5777,
 280 | 5736,
 281 | 5763,
 282 | 5813,
 283 | 5741,
 284 | 5731,
 285 | 5819,
 286 | 5724,
 287 | 5789,
 288 | 5141,
 289 | 5175,
 290 | 5145,
 291 | 5469,
 292 | 5106,
 293 | 5299,
 294 | 5335,
 295 | 7121,
 296 | 7106,
 297 | 7104,
 298 | 7113,
 299 | 7116,
 300 | 7149,
 301 | 7228,
 302 | 7301,
 303 | 7109,
 304 | 6987,
 305 | 7028,
 306 | 7029,
 307 | 7030,
 308 | 7013,
 309 | 7017,
 310 | 7032,
 311 | 7073,
 312 | 242,
 313 | 104,
 314 | 282,
 315 | 96,
 316 | 23,
 317 | 6102,
 318 | 6938,
 319 | 8304,
 320 | 8238,
 321 | 8386,
 322 | 8348,
 323 | 1416,
 324 | 6237,
 325 | 6226,
 326 | 6184,
 327 | 6174,
 328 | 6172,
 329 | 6171,
 330 | 6170,
 331 | 6151,
 332 | 6150,
 333 | 6149,
 334 | 6148,
 335 | 6147,
 336 | 6146,
 337 | 6145,
 338 | 6144,
 339 | 6142,
 340 | 6141,
 341 | 6137,
 342 | 6136,
 343 | 6131,
 344 | 6134,
 345 | 6133,
 346 | 6132,
 347 | 6129,
 348 | 6128,
 349 | 6127,
 350 | 6126,
 351 | 6125,
 352 | 6123,
 353 | 6122,
 354 | 6121,
 355 | 6119,
 356 | 6116,
 357 | 6115,
 358 | 6114,
 359 | 6111,
 360 | 6110,
 361 | 6108,
 362 | 6107,
 363 | 6106,
 364 | 6104,
 365 | 6103,
 366 | 6101,
 367 | 6100,
 368 | 6099,
 369 | 6098,
 370 | 6097,
 371 | 6095,
 372 | 6093,
 373 | 6092,
 374 | 6091,
 375 | 6090,
 376 | 6177,
 377 | 6221,
 378 | 6244,
 379 | 6241,
 380 | 6240,
 381 | 6238,
 382 | 6236,
 383 | 6235,
 384 | 6220,
 385 | 6218,
 386 | 6217,
 387 | 6216,
 388 | 6215,
 389 | 6214,
 390 | 6210,
 391 | 6209,
 392 | 6207,
 393 | 6201,
 394 | 6200,
 395 | 6199,
 396 | 6198,
 397 | 6197,
 398 | 6195,
 399 | 6194,
 400 | 6193,
 401 | 6192,
 402 | 6191,
 403 | 6189,
 404 | 6163,
 405 | 6154,
 406 | 8274,74.0
 407 | 7192,
 408 | 7194,
 409 | 7210,
 410 | 7238,
 411 | 7245,
 412 | 7246,
 413 | 7256,
 414 | 7265,
 415 | 7268,
 416 | 366,
 417 | 5245,
 418 | 5264,
 419 | 7195,
 420 | 7262,
 421 | 7250,
 422 | 1925,
 423 | 5719,
 424 | 5798,
 425 | 5816,
 426 | 5821,
 427 | 5710,
 428 | 5715,
 429 | 5720,
 430 | 5733,
 431 | 5820,
 432 | 5737,
 433 | 5755,
 434 | 5781,
 435 | 5782,
 436 | 5784,
 437 | 5146,
 438 | 5133,
 439 | 5709,
 440 | 5712,
 441 | 5795,
 442 | 5750,
 443 | 5708,
 444 | 5749,
 445 | 5727,
 446 | 5780,
 447 | 5756,
 448 | 5723,
 449 | 5730,
 450 | 5717,
 451 | 5732,
 452 | 5804,
 453 | 5752,
 454 | 5799,
 455 | 5713,
 456 | 5728,
 457 | 5787,
 458 | 5788,
 459 | 5793,
 460 | 5803,
 461 | 5758,
 462 | 7317,
 463 | 7034,
 464 | 615,
 465 | 627,
 466 | 607,
 467 | 631,
 468 | 623,
 469 | 719,
 470 | 640,
 471 | 827,
 472 | 895,
 473 | 946,
 474 | 936,
 475 | 7717,
 476 | 7787,
 477 | 7837,
 478 | 7847,
 479 | 7867,
 480 | 8077,
 481 | 8122,
 482 | 1743,
 483 | 1799,
 484 | 2175,
 485 | 2160,
 486 | 2171,
 487 | 2148,
 488 | 2180,
 489 | 2157,
 490 | 1948,
 491 | 1941,
 492 | 1949,
 493 | 1965,
 494 | 1981,
 495 | 1992,
 496 | 2016,
 497 | 2011,
 498 | 2019,
 499 | 2020,
 500 | 2151,
 501 | 1862,
 502 | 1872,
 503 | 1864,
 504 | 1871,
 505 | 1857,
 506 | 1865,
 507 | 1873,
 508 | 1850,
 509 | 1858,
 510 | 1874,
 511 | 1868,
 512 | 1829,
 513 | 1853,
 514 | 1926,
 515 | 1966,
 516 | 1918,
 517 | 1959,
 518 | 1936,
 519 | 1952,
 520 | 1960,
 521 | 1968,
 522 | 1938,
 523 | 1963,
 524 | 1720,
 525 | 1736,
 526 | 1744,
 527 | 1752,
 528 | 1729,
 529 | 1745,
 530 | 1753,
 531 | 1722,
 532 | 1730,
 533 | 1738,
 534 | 1724,
 535 | 1740,
 536 | 1733,
 537 | 1718,
 538 | 1726,
 539 | 1750,
 540 | 1782,
 541 | 1719,
 542 | 7566,
 543 | 1751,
 544 | 2214,
 545 | 2201,
 546 | 2204,
 547 | 2294,
 548 | 2280,
 549 | 2338,
 550 | 2292,
 551 | 2300,
 552 | 2316,
 553 | 2283,
 554 | 7584,
 555 | 7580,
 556 | 7578,
 557 | 7570,
 558 | 8608,
 559 | 8727,
 560 | 8760,
 561 | 8768,
 562 | 8616,
 563 | 8617,
 564 | 8770,
 565 | 8730,
 566 | 8619,
 567 | 8629,
 568 | 8612,
 569 | 8724,
 570 | 8631,
 571 | 8725,
 572 | 8640,
 573 | 8759,
 574 | 8774,
 575 | 8824,
 576 | 8557,
 577 | 8791,
 578 | 8777,
 579 | 8811,
 580 | 8787,
 581 | 8805,
 582 | 8534,
 583 | 8687,
 584 | 9045,
 585 | 8673,
 586 | 9041,
 587 | 8701,
 588 | 9053,
 589 | 8985,
 590 | 8957,
 591 | 8966,
 592 | 8695,
 593 | 8967,
 594 | 9004,
 595 | 8690,
 596 | 9012,
 597 | 8969,
 598 | 8961,
 599 | 8970,
 600 | 8954,
 601 | 8962,
 602 | 8965,
 603 | 8973,
 604 | 8975,
 605 | 9006,
 606 | 8976,
 607 | 9007,
 608 | 8977,
 609 | 8992,
 610 | 9008,
 611 | 9001,
 612 | 8719,
 613 | 8996,
 614 | 9011,
 615 | 1742,
 616 | 1749,
 617 | 999,
 618 | 1061,
 619 | 1404,
 620 | 1552,
 621 | 1257,
 622 | 1158,
 623 | 1070,
 624 | 9452,
 625 | 417,
 626 | 421,
 627 | 407,
 628 | 424,
 629 | 402,
 630 | 403,
 631 | 404,
 632 | 428,
 633 | 429,
 634 | 409,
 635 | 413,
 636 | 5883,
 637 | 5848,
 638 | 6416,
 639 | 5838,
 640 | 6287,
 641 | 6417,
 642 | 5841,
 643 | 5894,
 644 | 5904,
 645 | 5913,
 646 | 5921,
 647 | 5939,
 648 | 5969,
 649 | 5895,
 650 | 5905,
 651 | 5914,
 652 | 5923,
 653 | 5932,
 654 | 5942,
 655 | 5961,
 656 | 5970,
 657 | 5884,
 658 | 5906,
 659 | 5924,
 660 | 5933,
 661 | 5943,
 662 | 5953,
 663 | 5963,
 664 | 5972,
 665 | 5888,
 666 | 5934,
 667 | 5898,
 668 | 5908,
 669 | 5919,
 670 | 5926,
 671 | 5935,
 672 | 5945,
 673 | 5955,
 674 | 5891,
 675 | 5900,
 676 | 5927,
 677 | 5946,
 678 | 5966,
 679 | 5975,
 680 | 5901,
 681 | 5911,
 682 | 5875,
 683 | 5948,
 684 | 5893,
 685 | 5902,
 686 | 5920,
 687 | 5938,
 688 | 5959,
 689 | 5968,
 690 | 5988,
 691 | 5999,
 692 | 6455,
 693 | 5979,
 694 | 6421,
 695 | 5991,
 696 | 5992,
 697 | 6004,
 698 | 6458,
 699 | 5982,
 700 | 5993,
 701 | 6425,
 702 | 6451,
 703 | 6309,
 704 | 5984,
 705 | 5994,
 706 | 6444,
 707 | 5997,
 708 | 6007,
 709 | 6445,
 710 | 6453,
 711 | 5998,
 712 | 6427,
 713 | 6435,
 714 | 6446,
 715 | 6403,
 716 | 5922,
 717 | 5915,
 718 | 5910,
 719 | 6401,
 720 | 6003,
 721 | 5899,
 722 | 6396,
 723 | 5873,
 724 | 6418,
 725 | 5874,
 726 | 5916,
 727 | 5878,
 728 | 5983,
 729 | 5990,
 730 | 5996,
 731 | 5940,
 732 | 5846,
 733 | 5871,
 734 | 6436,
 735 | 5872,
 736 | 5956,
 737 | 6402,
 738 | 4758,
 739 | 5285,
 740 | 9153,
 741 | 9137,
 742 | 9151,
 743 | 9143,
 744 | 9201,
 745 | 6173,
 746 | 6284,
 747 | 6276,
 748 | 6258,
 749 | 6252,
 750 | 6255,
 751 | 6166,
 752 | 6085,
 753 | 6025,
 754 | 6268,
 755 | 6180,
 756 | 6143,
 757 | 6041,
 758 | 5829,
 759 | 8427,
 760 | 8218,
 761 | 6023,
 762 | 5835,
 763 | 5831,
 764 | 5830,
 765 | 6039,
 766 | 6086,
 767 | 6413,
 768 | 6412,
 769 | 6411,
 770 | 6087,
 771 | 6077,
 772 | 6076,
 773 | 6071,
 774 | 6069,
 775 | 6038,
 776 | 6036,
 777 | 6035,
 778 | 6034,
 779 | 6030,
 780 | 6024,
 781 | 6021,
 782 | 6019,
 783 | 6017,
 784 | 6013,
 785 | 6012,
 786 | 6011,
 787 | 6010,
 788 | 5870,
 789 | 5867,
 790 | 5865,
 791 | 5860,
 792 | 5836,
 793 | 6231,
 794 | 6212,
 795 | 6140,
 796 | 6138,
 797 | 6120,
 798 | 6118,
 799 | 6073,
 800 | 6022,
 801 | 6020,
 802 | 8227,
 803 | 8225,
 804 | 8230,97.0
 805 | 5856,
 806 | 8307,
 807 | 1409,
 808 | 6959,51.0
 809 | 7525,46.0
 810 | 6961,46.0
 811 | 6967,44.0
 812 | 6973,53.0
 813 | 6974,103.0
 814 | 6976,56.0
 815 | 7516,100.0
 816 | 6979,44.0
 817 | 6980,51.0
 818 | 6982,49.0
 819 | 6983,71.0
 820 | 6985,56.0
 821 | 6931,46.0
 822 | 6043,90.0
 823 | 6945,55.0
 824 | 7519,76.0
 825 | 7526,53.0
 826 | 7523,57.0
 827 | 6956,69.0
 828 | 6960,47.0
 829 | 7524,51.0
 830 | 6963,60.0
 831 | 6964,93.0
 832 | 6965,102.0
 833 | 6966,53.0
 834 | 6969,70.0
 835 | 6971,51.0
 836 | 6975,51.0
 837 | 7517,107.0
 838 | 6978,49.0
 839 | 6981,44.0
 840 | 6984,53.0
 841 | 6899,54.0
 842 | 6903,57.0
 843 | 6904,66.0
 844 | 6905,65.0
 845 | 6906,43.0
 846 | 6909,51.0
 847 | 6911,46.0
 848 | 6916,63.0
 849 | 8215,51.0
 850 | 6921,64.0
 851 | 6932,51.0
 852 | 6046,93.0
 853 | 6944,49.0
 854 | 7515,49.0
 855 | 7514,58.0
 856 | 6962,52.0
 857 | 6968,71.0
 858 | 6972,63.0
 859 | 6970,48.0
 860 | 6977,49.0
 861 | 8329,46.0
 862 | 7379,
 863 | 7080,
 864 | 6744,
 865 | 7098,
 866 | 7158,
 867 | 7163,57.0
 868 | 7165,
 869 | 7340,
 870 | 7372,
 871 | 7394,
 872 | 7397,
 873 | 281,
 874 | 8258,73.0
 875 | 8259,73.0
 876 | 8290,50.0
 877 | 7461,61.0
 878 | 7323,56.0
 879 | 8254,52.0
 880 | 8270,49.0
 881 | 8233,59.0
 882 | 8285,70.0
 883 | 6016,75.0
 884 | 8423,70.0
 885 | 8237,97.0
 886 | 6040,71.0
 887 | 6064,96.0
 888 | 6957,84.0
 889 | 8369,76.0
 890 | 8247,87.0
 891 | 8426,49.0
 892 | 8428,
 893 | 9058,101.0
 894 | 8249,81.0
 895 | 9057,76.0
 896 | 7139,
 897 | 7307,
 898 | 7331,
 899 | 7337,
 900 | 7378,
 901 | 7405,
 902 | 66,
 903 | 149,
 904 | 328,
 905 | 334,
 906 | 2274,
 907 | 5753,
 908 | 6709,52.0
 909 | 7000,65.0
 910 | 6989,
 911 | 7031,
 912 | 7062,46.0
 913 | 7460,49.0
 914 | 7123,59.5
 915 | 7128,
 916 | 7145,
 917 | 7147,71.0
 918 | 7166,
 919 | 7255,46.0
 920 | 7275,46.0
 921 | 7258,
 922 | 7291,
 923 | 7310,
 924 | 7330,
 925 | 7333,
 926 | 7411,
 927 | 178,
 928 | 378,
 929 | 8241,73.0
 930 | 6988,48.0
 931 | 8256,61.0
 932 | 8796,
 933 | 8264,46.0
 934 | 8265,44.0
 935 | 8231,91.0
 936 | 8271,49.0
 937 | 6190,
 938 | 8275,68.0
 939 | 8420,56.0
 940 | 8283,71.0
 941 | 8284,61.0
 942 | 6008,60.0
 943 | 8422,106.0
 944 | 8296,45.0
 945 | 8297,73.0
 946 | 8300,61.0
 947 | 8235,60.0
 948 | 8306,96.0
 949 | 8310,49.0
 950 | 8236,91.0
 951 | 8311,49.0
 952 | 8314,64.0
 953 | 8239,52.0
 954 | 8240,93.0
 955 | 8323,51.0
 956 | 8242,120.0
 957 | 8325,49.0
 958 | 8326,67.0
 959 | 8222,90.0
 960 | 8430,
 961 | 6042,56.0
 962 | 8335,104.0
 963 | 8343,62.0
 964 | 6074,91.0
 965 | 8351,78.0
 966 | 8353,41.0
 967 | 8354,70.0
 968 | 7296,70.0
 969 | 8365,51.0
 970 | 8374,59.0
 971 | 8376,84.0
 972 | 8378,56.0
 973 | 8412,
 974 | 8387,52.0
 975 | 8389,63.0
 976 | 6243,56.0
 977 | 7507,
 978 | 7343,
 979 | 6005,
 980 | 5729,
 981 | 5380,
 982 | 5381,
 983 | 5565,
 984 | 7011,
 985 | 7199,
 986 | 7224,
 987 | 7277,
 988 | 7490,
 989 | 7492,
 990 | 7300,
 991 | 7306,60.0
 992 | 7408,
 993 | 7418,63.0
 994 | 5887,
 995 | 5987,
 996 | 173,
 997 | 357,
 998 | 258,
 999 | 374,
1000 | 94,
1001 | 1859,
1002 | 6188,
1003 | 5207,
1004 | 6448,
1005 | 8312,66.0
1006 | 8313,49.0
1007 | 8334,64.0
1008 | 8337,70.0
1009 | 8357,
1010 | 8366,
1011 | 8411,
1012 | 8388,60.0
1013 | 8395,69.0
1014 | 7014,92.0
1015 | 7035,
1016 | 6810,
1017 | 7498,
1018 | 7506,
1019 | 7390,
1020 | 7284,
1021 | 7081,46.0
1022 | 8243,66.0
1023 | 8245,46.0
1024 | 7033,76.0
1025 | 2150,
1026 | 100000,58.0
1027 | 8266,99.0
1028 | 6897,62.0
1029 | 6898,41.0
1030 | 5837,57.0
1031 | 6907,58.0
1032 | 7438,75.0
1033 | 6910,49.0
1034 | 6913,99.0
1035 | 6914,73.0
1036 | 6918,108.0
1037 | 6919,71.0
1038 | 8214,51.0
1039 | 6924,49.0
1040 | 8424,46.0
1041 | 6926,49.0
1042 | 6928,55.0
1043 | 6933,56.0
1044 | 7520,60.0
1045 | 7521,60.0
1046 | 6936,67.0
1047 | 7522,83.0
1048 | 6937,65.0
1049 | 6939,49.0
1050 | 6900,90.0
1051 | 6901,86.0
1052 | 6908,49.0
1053 | 6009,98.0
1054 | 6915,53.0
1055 | 6917,121.0
1056 | 6920,71.0
1057 | 6922,48.0
1058 | 6923,44.0
1059 | 6927,51.0
1060 | 6929,71.0
1061 | 6930,49.0
1062 | 6940,49.0
1063 | 6942,46.0
1064 | 6943,49.0
1065 | 7518,103.0
1066 | 6946,62.0
1067 | 8213,44.0
1068 | 6951,68.0
1069 | 6958,49.0
1070 | 7305,
1071 | 7376,
1072 | 7386,
1073 | 7404,
1074 | 7403,
1075 | 7457,
1076 | 7463,
1077 | 7015,
1078 | 7024,
1079 | 7079,
1080 | 7152,
1081 | 7297,
1082 | 7381,
1083 | 7413,
1084 | 7176,
1085 | 7352,
1086 | 2327,
1087 | 7117,
1088 | 7172,
1089 | 7168,
1090 | 7423,
1091 | 7425,
1092 | 7223,
1093 | 7239,
1094 | 7276,
1095 | 7281,
1096 | 7287,
1097 | 7292,
1098 | 7299,
1099 | 7303,
1100 | 7309,
1101 | 7328,
1102 | 7406,
1103 | 2320,
1104 | 7242,
1105 | 7462,
1106 | 5385,
1107 | 5292,
1108 | 5337,
1109 | 5350,
1110 | 5377,
1111 | 5386,
1112 | 5310,
1113 | 5282,
1114 | 5339,
1115 | 5322,
1116 | 5331,
1117 | 5364,
1118 | 5373,
1119 | 4879,
1120 | 7069,
1121 | 7496,
1122 | 7502,
1123 | 7344,
1124 | 7346,64.0
1125 | 7353,
1126 | 7373,
1127 | 7384,
1128 | 81,
1129 | 373,
1130 | 383,
1131 | 1867,
1132 | 957,
1133 | 998,
1134 | 1006,
1135 | 992,
1136 | 1002,
1137 | 1166,
1138 | 9077,
1139 | 9104,
1140 | 9152,
1141 | 9165,
1142 | 9179,
1143 | 6996,
1144 | 7008,
1145 | 6729,
1146 | 7092,
1147 | 7164,
1148 | 7169,
1149 | 7181,
1150 | 7252,
1151 | 7446,
1152 | 7270,
1153 | 7483,
1154 | 7316,
1155 | 7351,
1156 | 7391,
1157 | 1,
1158 | 392,
1159 | 379,
1160 | 380,
1161 | 267,
1162 | 2057,
1163 | 5742,
1164 | 5056,
1165 | 5122,
1166 | 5158,
1167 | 5832,
1168 | 6994,
1169 | 7002,
1170 | 7026,
1171 | 6730,
1172 | 7075,
1173 | 7126,
1174 | 7227,
1175 | 7229,
1176 | 7449,
1177 | 6847,
1178 | 6953,
1179 | 7320,
1180 | 7354,
1181 | 7283,
1182 | 2,
1183 | 386,
1184 | 1716,
1185 | 1967,
1186 | 5785,
1187 | 4802,
1188 | 5116,
1189 | 5202,
1190 | 7071,
1191 | 7064,79.0
1192 | 7078,
1193 | 7094,58.5
1194 | 7141,
1195 | 7143,
1196 | 7151,
1197 | 7150,
1198 | 7424,43.0
1199 | 7178,
1200 | 7188,
1201 | 7201,
1202 | 7206,
1203 | 7205,
1204 | 7231,46.0
1205 | 7244,
1206 | 7260,
1207 | 7263,
1208 | 7280,
1209 | 7282,51.0
1210 | 7472,
1211 | 7382,
1212 | 7392,
1213 | 7477,59.0
1214 | 8610,
1215 | 8692,
1216 | 6727,
1217 | 7105,
1218 | 7479,
1219 | 7482,
1220 | 7504,
1221 | 7508,
1222 | 7355,
1223 | 5896,
1224 | 166,
1225 | 223,
1226 | 126,
1227 | 390,
1228 | 321,
1229 | 259,
1230 | 362,
1231 | 260,
1232 | 91,
1233 | 641,
1234 | 5160,
1235 | 5232,
1236 | 5606,
1237 | 5628,
1238 | 7004,
1239 | 7100,
1240 | 7102,
1241 | 7110,
1242 | 7135,
1243 | 7186,
1244 | 7430,
1245 | 2187,
1246 | 6094,
1247 | 6096,
1248 | 6109,
1249 | 6112,
1250 | 6124,
1251 | 6169,
1252 | 6202,
1253 | 6203,
1254 | 6242,
1255 | 6318,
1256 | 6990,
1257 | 6992,
1258 | 6998,
1259 | 4927,
1260 | 4935,
1261 | 4862,
1262 | 5596,
1263 | 5517,
1264 | 5582,
1265 | 5590,
1266 | 5536,
1267 | 5670,
1268 | 5678,
1269 | 5645,
1270 | 2290,
1271 | 5805,
1272 | 4997,
1273 | 5341,
1274 | 6449,
1275 | 1366,
1276 | 1363,
1277 | 1317,
1278 | 1313,
1279 | 1312,
1280 | 1360,
1281 | 1362,
1282 | 1256,
1283 | 9342,
1284 | 9450,
1285 | 9437,
1286 | 9436,
1287 | 9434,
1288 | 9427,
1289 | 9421,
1290 | 9416,
1291 | 9413,
1292 | 9412,
1293 | 9411,
1294 | 9410,
1295 | 9408,
1296 | 9407,
1297 | 9405,
1298 | 9404,
1299 | 9399,
1300 | 9392,
1301 | 9391,
1302 | 9390,
1303 | 9388,
1304 | 9386,
1305 | 9385,
1306 | 9384,
1307 | 9383,
1308 | 9382,
1309 | 


--------------------------------------------------------------------------------
/data/y_matrix.pheno:
--------------------------------------------------------------------------------
  1 | FID IID phenotype_value
  2 | 5837 5837 57.0
  3 | 6008 6008 60.0
  4 | 6009 6009 98.0
  5 | 6016 6016 75.0
  6 | 6040 6040 71.0
  7 | 6042 6042 56.0
  8 | 6043 6043 90.0
  9 | 6046 6046 93.0
 10 | 6064 6064 96.0
 11 | 6074 6074 91.0
 12 | 6243 6243 56.0
 13 | 6709 6709 52.0
 14 | 6897 6897 62.0
 15 | 6898 6898 41.0
 16 | 6899 6899 54.0
 17 | 6900 6900 90.0
 18 | 6901 6901 86.0
 19 | 6903 6903 57.0
 20 | 6904 6904 66.0
 21 | 6905 6905 65.0
 22 | 6906 6906 43.0
 23 | 6907 6907 58.0
 24 | 6908 6908 49.0
 25 | 6909 6909 51.0
 26 | 6910 6910 49.0
 27 | 6911 6911 46.0
 28 | 6913 6913 99.0
 29 | 6914 6914 73.0
 30 | 6915 6915 53.0
 31 | 6916 6916 63.0
 32 | 6917 6917 121.0
 33 | 6918 6918 108.0
 34 | 6919 6919 71.0
 35 | 6920 6920 71.0
 36 | 6921 6921 64.0
 37 | 6922 6922 48.0
 38 | 6923 6923 44.0
 39 | 6924 6924 49.0
 40 | 6926 6926 49.0
 41 | 6927 6927 51.0
 42 | 6928 6928 55.0
 43 | 6929 6929 71.0
 44 | 6930 6930 49.0
 45 | 6931 6931 46.0
 46 | 6932 6932 51.0
 47 | 6933 6933 56.0
 48 | 6936 6936 67.0
 49 | 6937 6937 65.0
 50 | 6939 6939 49.0
 51 | 6940 6940 49.0
 52 | 6942 6942 46.0
 53 | 6943 6943 49.0
 54 | 6944 6944 49.0
 55 | 6945 6945 55.0
 56 | 6946 6946 62.0
 57 | 6951 6951 68.0
 58 | 6956 6956 69.0
 59 | 6957 6957 84.0
 60 | 6958 6958 49.0
 61 | 6959 6959 51.0
 62 | 6960 6960 47.0
 63 | 6961 6961 46.0
 64 | 6962 6962 52.0
 65 | 6963 6963 60.0
 66 | 6964 6964 93.0
 67 | 6965 6965 102.0
 68 | 6966 6966 53.0
 69 | 6967 6967 44.0
 70 | 6968 6968 71.0
 71 | 6969 6969 70.0
 72 | 6970 6970 48.0
 73 | 6971 6971 51.0
 74 | 6972 6972 63.0
 75 | 6973 6973 53.0
 76 | 6974 6974 103.0
 77 | 6975 6975 51.0
 78 | 6976 6976 56.0
 79 | 6977 6977 49.0
 80 | 6978 6978 49.0
 81 | 6979 6979 44.0
 82 | 6980 6980 51.0
 83 | 6981 6981 44.0
 84 | 6982 6982 49.0
 85 | 6983 6983 71.0
 86 | 6984 6984 53.0
 87 | 6985 6985 56.0
 88 | 6988 6988 48.0
 89 | 7000 7000 65.0
 90 | 7014 7014 92.0
 91 | 7033 7033 76.0
 92 | 7062 7062 46.0
 93 | 7064 7064 79.0
 94 | 7081 7081 46.0
 95 | 7094 7094 58.5
 96 | 7123 7123 59.5
 97 | 7147 7147 71.0
 98 | 7163 7163 57.0
 99 | 7231 7231 46.0
100 | 7255 7255 46.0
101 | 7275 7275 46.0
102 | 7282 7282 51.0
103 | 7296 7296 70.0
104 | 7306 7306 60.0
105 | 7323 7323 56.0
106 | 7346 7346 64.0
107 | 7418 7418 63.0
108 | 7424 7424 43.0
109 | 7438 7438 75.0
110 | 7460 7460 49.0
111 | 7461 7461 61.0
112 | 7477 7477 59.0
113 | 7514 7514 58.0
114 | 7515 7515 49.0
115 | 7516 7516 100.0
116 | 7517 7517 107.0
117 | 7518 7518 103.0
118 | 7519 7519 76.0
119 | 7520 7520 60.0
120 | 7521 7521 60.0
121 | 7522 7522 83.0
122 | 7523 7523 57.0
123 | 7524 7524 51.0
124 | 7525 7525 46.0
125 | 7526 7526 53.0
126 | 8213 8213 44.0
127 | 8214 8214 51.0
128 | 8215 8215 51.0
129 | 8222 8222 90.0
130 | 8230 8230 97.0
131 | 8231 8231 91.0
132 | 8233 8233 59.0
133 | 8235 8235 60.0
134 | 8236 8236 91.0
135 | 8237 8237 97.0
136 | 8239 8239 52.0
137 | 8240 8240 93.0
138 | 8241 8241 73.0
139 | 8242 8242 120.0
140 | 8243 8243 66.0
141 | 8245 8245 46.0
142 | 8247 8247 87.0
143 | 8249 8249 81.0
144 | 8254 8254 52.0
145 | 8256 8256 61.0
146 | 8258 8258 73.0
147 | 8259 8259 73.0
148 | 8264 8264 46.0
149 | 8265 8265 44.0
150 | 8266 8266 99.0
151 | 8270 8270 49.0
152 | 8271 8271 49.0
153 | 8274 8274 74.0
154 | 8275 8275 68.0
155 | 8283 8283 71.0
156 | 8284 8284 61.0
157 | 8285 8285 70.0
158 | 8290 8290 50.0
159 | 8296 8296 45.0
160 | 8297 8297 73.0
161 | 8300 8300 61.0
162 | 8306 8306 96.0
163 | 8310 8310 49.0
164 | 8311 8311 49.0
165 | 8312 8312 66.0
166 | 8313 8313 49.0
167 | 8314 8314 64.0
168 | 8323 8323 51.0
169 | 8325 8325 49.0
170 | 8326 8326 67.0
171 | 8329 8329 46.0
172 | 8334 8334 64.0
173 | 8335 8335 104.0
174 | 8337 8337 70.0
175 | 8343 8343 62.0
176 | 8351 8351 78.0
177 | 8353 8353 41.0
178 | 8354 8354 70.0
179 | 8365 8365 51.0
180 | 8369 8369 76.0
181 | 8374 8374 59.0
182 | 8376 8376 84.0
183 | 8378 8378 56.0
184 | 8387 8387 52.0
185 | 8388 8388 60.0
186 | 8389 8389 63.0
187 | 8395 8395 69.0
188 | 8420 8420 56.0
189 | 8422 8422 106.0
190 | 8423 8423 70.0
191 | 8424 8424 46.0
192 | 8426 8426 49.0
193 | 9057 9057 76.0
194 | 9058 9058 101.0
195 | 100000 100000 58.0
196 | 


--------------------------------------------------------------------------------
/docs/DATAGUIDE.md:
--------------------------------------------------------------------------------
 1 | # Data Guide
 2 | 
 3 | The minimal requirement is to provide a genotype and a phenotype file. We provide test data in the folder `data`.
 4 | permGWAS2 is designed to work with several genotype file formats:
 5 | 
 6 | ## Genotype file
 7 | permGWAS needs **fully imputed** genotypes. We support our custom HDF5/H5/H5PY file, CSV PLINK and binary PLINK files. 
 8 | We recommend to use permGWAS2 with HDF5/H5/H5PY files. For this we provide a function to create an H5 file which satisfies 
 9 | our requirements and takes CSV, PLINK and binary PLINK genotype files as an input. For more info on how to use this function, 
10 | see the section **Create H5 file** below.
11 | 
12 | ### HDF5/H5/H5PY
13 | The file has to contain the following keys:
14 | 
15 | - snps: genotype matrix, additively encoded (012)
16 | - sample_ids: vector containing corresponding sample ids
17 | - position_index: vector containing the positions of all SNPs
18 | - chr_index: vector containing the corresponding chromosome number
19 | 
20 | ```shell
21 | python3 permGWAS.py -x ./data/x_matrix.h5 -y ./data/y_matrix.csv 
22 | ```
23 | 
24 | ### CSV
25 | The **first column** should be the **sample ids**. The **column names** should be the **SNP identifiers** in the form 
26 | "CHR_POSITION" (e.g. Chr1_657). The values should be the genotype matrix in **additive encoding**. 
27 | 
28 | ```shell
29 | python3 permGWAS.py -x ./data/x_matrix.csv -y ./data/y_matrix.csv 
30 | ```
31 | 
32 | ### PLINK
33 | To use PLINK data, a .map and .ped file with the same prefix need to be in the same folder. 
34 | To run permGWAS2 with PLINK files, you can use PREFIX.map or PREFIX.ped as option for the genotype file.
35 | 
36 | ```shell
37 | python3 permGWAS.py -x ./data/x_matrix.map -y ./data/y_matrix.pheno 
38 | ```
39 | 
40 | ### binary PLINK
41 | To use binary PLINK data, a .bed, .bim and .fam file with the same prefix need to be in the same folder. 
42 | To run permGWAS2 with binary PLINK files, you can use PREFIX.bed, PREFIX.bim or PREFIX.fam as option for the genotype file.
43 | 
44 | 
45 | ## Phenotype file 
46 | permGWAS2 currently only accepts CSV, PHENO and TXT files for the phenotype. Here the **first column** should contain 
47 | the **sample ids**. The remaining columns should contain the phenotype values with the phenotype name as column name. 
48 | For TXT and PHENO files it is assumed that the values are separated by a **single space**. The samples need not be in 
49 | the same order as in the genotype file. permGWAS2 automatically matched genotype and phenotype and discards all samples 
50 | where only one of both is available.
51 | It is possible to run permGWAS with several traits one after another as long as they are stored in the same 
52 | phenotype file.
53 | 
54 | ```shell
55 | python3 permGWAS.py -x ./data/x_matrix.h5 -y ./data/y_matrix.csv -trait phenotype_value phenotype_2
56 | ```
57 | You can also run permGWAS2 for all available phenotypes in your phenotype file:
58 | 
59 | ```shell
60 | python3 permGWAS.py -x ./data/x_matrix.h5 -y ./data/y_matrix.csv -trait all
61 | ```
62 | 
63 | ## Kinship file
64 | Per default permGWAS2 computes the realized relationship kernel as kinship matrix. 
65 | It is also possible to provide a kinship matrix. Currently, permGWAS only accepts CSV, H5, HDF5, H5PY files as 
66 | kinship file. For CSV files the first column should contain the sample ids. For H5, HDF5, H5PY files the kinship 
67 | matrix should have the key 'kinship' and the corresponding sample ids the key 'sample_ids'
68 | The sample ids need to match those of the genotype matrix.
69 | 
70 | ```shell
71 | python3 permGWAS.py -x ./data/x_matrix.h5 -y ./data/y_matrix.csv -k ./data/k_matrix.csv
72 | ```
73 | 
74 | ## Covariates file
75 | It is possible to run permGWAS2 with covariates. If no covariates file is provided, only the intercept will be used as 
76 | fixed effect. Currently, permGWAS2 only accepts CSV files for covariates. Here the first column should contain the 
77 | sample ids. The sample ids must match those of the phenotype file.
78 | 
79 | ```shell
80 | python3 permGWAS.py -x ./data/x_matrix.h5 -y ./data/y_matrix.csv -cov ./data/cov_matrix.csv
81 | ```
82 | 
83 | ## create H5 file
84 | We provide a function to create an H5 file which satisfies our requirements. It is possible to create the H5 based on a 
85 | CSV, PLINK or binary PLINK files which have to fulfil the same requirements as above. The function takes the genotype 
86 | file path via the option `-x` and additionally one can specify a new directory to save the H5 file via `-sd` if the save 
87 | directory is not specified, the new file will be stored in the same directory as the input file.
88 | 
89 | ```shell
90 | python3 create_h5_file.py -x ./data/x_matrix.map -sd ./data/test
91 | ```


--------------------------------------------------------------------------------
/docs/INSTALLATION.md:
--------------------------------------------------------------------------------
 1 | # Requirements
 2 | 
 3 | To ensure a stable working environment, we recommend using [Docker](https://www.docker.com). To follow this recommendation, 
 4 | docker needs to be installed and running on your machine. We provide a Dockerfile based on CUDA 11.5 and Ubuntu 20.4.
 5 | 
 6 | If you want to use permGWAS2 without Docker, you need to install all packages mentioned in the 
 7 | [requirements file](../Docker/requirements.txt). 
 8 | 
 9 | # Installation Guide
10 | 
11 | 1. Clone the repository into the directory where you want to set up the project
12 | 
13 | ```shell
14 | git clone https://github.com/grimmlab/permGWAS.git
15 | ```
16 | 
17 | 2. To use permGWAS2 within a Docker environment, navigate to `Docker` and build a Docker image using the provided Dockerfile.
18 | 
19 | ```shell
20 | cd permGWAS/Docker
21 | docker build -t IMAGENAME .
22 | ```
23 | 
24 | 3. Run an interactive Docker container based on the created image.\
25 | You have to mount the directory where the repository is located on your machine in the Docker container. 
26 | If you want to work on GPU, specify the GPUs to mount.
27 | 
28 | ```shell
29 | docker run -it -v PATH_TO_REPO_FOLDER:/NAME_OF_DIRECTORY_IN_CONTAINER --gpus device=DEVICE_NUMBER --name CONTAINERNAME IMAGENAME
30 | ```
31 | 
32 | ### Example
33 | 
34 | 1. Assume our repository is located in a folder called `/myhome` and we want to name our image `permGWAS_image`
35 | 
36 | ```shell
37 | cd /myhome/permGWAS/Docker
38 | docker build -t permGWAS_image .
39 | ```
40 | 
41 | 2. Further, assume that we want to call our container `permGWAS_container`, our data is located in (subfolders of)
42 | `/myhome` (i.e. we only need to mount one directory) and we want to use GPU 1. Then we have to run the following command:
43 | 
44 | ```shell
45 | docker run -it -v /myhome/:/myhome_in_container/ --gpus device=1 --name permGWAS_container permGWAS_image
46 | ```
47 | 
48 | 3. If we need to mount a second directory (e.g. we want to save our results in a different folder called `/results`), 
49 | we can run the following:
50 | 
51 | ```shell
52 | docker run -it -v /myhome/:/myhome_in_container/ -v /results/:/results/ --gpus device=1 --name permGWAS_container permGWAS_image
53 | ```
54 | 
55 | With this the setup is finished. For details on how to run permGWAS, see our [Quickstart Guide](./QUICKSTART.md).


--------------------------------------------------------------------------------
/docs/OPTIONS.md:
--------------------------------------------------------------------------------
 1 | # Optional settings
 2 | ## Minor allele frequency (MAF)
 3 | It is possible to filter the markers for minor allele frequency. For this use the flag `-maf` and specify an integer 
 4 | value between 0 and 30. For example to remove all SNPs with MAF<10%:
 5 | ```shell
 6 | python3 permGWAS.py -x ./data/x_matrix.h5 -y ./data/y_matrix.csv -maf 10
 7 | ```
 8 | Per default permGWAS2 does not filter for MAF.
 9 | 
10 | ## GPU usage
11 | For faster computations, permGWAS2 supports GPU usage. If one or several GPUs are available permGWAS2 will per default use 
12 | the GPU device 0 for its computations. If no GPUs are available, permGWAS will perform all computations on CPUs only. 
13 | To change the GPU you can use the flag `-device` and specify the number of the GPU to use. If you do NOT want to use 
14 | GPUs, although they are available, you can use the flag `disable_gpu`:
15 | ```shell
16 | python3 permGWAS.py -x ./data/x_matrix.h5 -y ./data/y_matrix.csv -device 1
17 | 
18 | python3 permGWAS.py -x ./data/x_matrix.h5 -y ./data/y_matrix.csv -disable_gpu
19 | ```
20 | 
21 | ## Batch size
22 | It is possible to adjust the batch size for the simultaneous computation of univariate tests via `-batch`. Here the 
23 | default is set to 50000. If you run into memory errors while using permGWAS2 we suggest reducing the batch size.
24 | ```shell
25 | python3 permGWAS.py -x ./data/x_matrix.h5 -y ./data/y_matrix.csv -batch 10000
26 | ```
27 | When using permGWAS2 with permutations, several univariate tests will be computed for all permutations at once. 
28 | To prevent running into memory errors, one can adjust the batch size for permutations separately via `-batch_perm`.
29 | Here the default value is set to 1000. We suggest adjusting this parameter depending on the number of samples and number
30 | of permutations. For more information about permutations see [permGWAS2 with permutations](./PERMUTATIONS.md)
31 | ```shell
32 | python3 permGWAS.py -x ./data/x_matrix.h5 -y ./data/y_matrix.csv -perm 100 -batch_perm 500
33 | ```
34 | 
35 | ## Batch-wise loading of genotype
36 | As memory is a limiting factor, permGWAS2 is also capable to load the genotype matrix batch-wise from file under certain 
37 | conditions. For this you have to provide a precomputed kinship matrix (see [DataGuide](./DATAGUIDE.md)) and the genotype matrix 
38 | must be provided via an HDF5 file (see [DataGuide](./DATAGUIDE.md) for a function to create an HDF5 file). 
39 | 
40 | However, if memory is not an issue, we recommend loading the genotype file completely to improve the speed of permGWAS2.
41 | When no precomputed kinship is provided, the genotype matrix will be loaded completely per default. It is also possible 
42 | to force permGWAS2 to load the genotype matrix completely even if a kinship is provided via the flag `-load_genotype`. 
43 | ```shell
44 | python3 permGWAS.py -x ./data/x_matrix.h5 -y ./data/y_matrix.csv -load_genotype
45 | ```
46 | 
47 | ## Model (coming soon)
48 | permGWAS computes test statistics and p-values based on a Linear Mixed Model (LMM). In the future there will be other 
49 | models available. The model can be chosen via `-model`. Currently, only `lmm` is available. 
50 | 
51 | ## Non-additive encoding
52 | permGWAS assumes that the genotypes are in additive encoding (i.e. number of minor alleles) and produces an error if the genotypes 
53 | are encoded differently. If your data is **not additively encoded**, you can use the flag `-not_add`. For example if you 
54 | are working with other data than SNP data. However, our framework was developed for SNP data, and we give no guarantee that it 
55 | works for other purposes. 
56 | 
57 | 
58 | See [Quickstart](./QUICKSTART.md), [permGWAS2 with permutations](./PERMUTATIONS.md) and [Create plots](./PLOTS.md) for 
59 | detailed explanations of other flags and options.
60 | 
61 | ## Overview of all flags and options
62 | |**flag**|**description**|
63 | |---|---|
64 | |-x (--genotype_file) | absolute or relative path to genotype file |
65 | |-y (--phenotype_file) | absolute or relative path to phenotype file |
66 | |-trait (--y_name)| name of phenotype (column) to be used in phenotype file, optional, default is "phenotype_value"|
67 | |-k (-kinship_file) | absolute or relative path to kinship file, optional|
68 | |-cov (--covariate_file) | absolute or relative path to covariates file, optional|
69 | |-cov_list (--covariate_list) | names of covariates to use from covariate_file, optional |
70 | |-maf (--maf_threshold) | minor allele frequency threshold as percentage value, optional, default is 0|
71 | |-load_genotype | choose whether to load full genotype from file or batch-wise during computations, optional, default is False|
72 | |-config (--config_file) | full path to yaml config file|
73 | |-model | specify model name, only relevant if you define your own models, currently only lmm is available|
74 | |-out_dir | name of the directory result-files should be stored in, optional, if not provided, files will be stored in folder "results" in current directory|
75 | |-out_file | NAME of result files, will be stored as NAME_p_values and NAME_min_p_values, optional, if not provided name of phenotype will be used|
76 | |-disable_gpu | use if you want to perform computations on CPU only though GPU would be available| 
77 | |-device | GPU device to be used, optional, default is 0|
78 | |-perm | number of permutations to be performed, optional, default is 0|
79 | |-perm_method | method to use for permutations: y - permute only y, x - permute y and kinship matrix, default is x|
80 | |-adj_p_value | additionally compute permutation-based adjusted p-values and store them in the p-value file, optional default is False|
81 | |-batch (--batch_size) | number of SNPs to work on simultaneously, optional, default is 50000|
82 | |-batch_perm (--perm_batch_size) | number of SNPs to work on simultaneously while using permutations, optional, default is 1000|
83 | |-mplot (--plot, --manhattan)| creates Manhattan plot, optional|
84 | |-qqplot | creates QQ-plot, optional|
85 | |-not_add | use when genotype is not in additive encoding| 
86 | 


--------------------------------------------------------------------------------
/docs/PERMUTATIONS.md:
--------------------------------------------------------------------------------
 1 | # permGWAS2 with permutations
 2 | 
 3 | The main purpose of permGWAS2 is to perform GWAS with permutation-based thresholds. To use permGWAS2 with permutations, 
 4 | you have to specify the number of permutations *q* via the flag `-perm`:
 5 | ```shell
 6 | python3 permGWAS.py -x ./data/x_matrix.h5 -y ./data/y_matrix.csv -perm 100
 7 | ```
 8 | This creates an additional result file `min_p_values_NAME.csv` containing for each permutation the seed and the minimal 
 9 | p-value. Additionally, the `summary_statistics_NAME.txt` output file now contains permutation-based significance 
10 | thresholds for common significance levels $\alpha$.
11 | 
12 | ### General workflow of permGWAS2 with permutations
13 | 1. Compute p-values for all available SNPs during normal GWAS run
14 | 2. Create *q* permutations
15 | 3. Compute the test statistic for each permutation and SNP in batches
16 | 4. For each permutation find the maximal test statistic over all SNPs and compute the corresponding minimal p-value
17 | 5. The permutation-based threshold is given as the ($1-\alpha$)th percentile for a significance level $\alpha$ 
18 | (*maxT/minP method*)
19 | 
20 | ### Additional settings
21 | - permGWAS2 supports two different permutation strategies which can be selected via the flag `-perm_method`:
22 |   1. `x`(default): permutes the fixed effects matrix including SNP of interest and covariates (equivalent to permuting 
23 |   the phenotype and covariance matrix). This method considers the population structure while permuting.
24 |   2. `y`: only permute the phenotype vector. This method is faster but breaks the population structure between the 
25 |   samples 
26 | ```shell
27 | python3 permGWAS.py -x ./data/x_matrix.h5 -y ./data/y_matrix.csv -perm 100 -perm_method x
28 | 
29 | python3 permGWAS.py -x ./data/x_matrix.h5 -y ./data/y_matrix.csv -perm 100 -perm_method y
30 | ```
31 | - permGWAS2 supports computations on GPUs. If GPUs are available, it will automatically use the 0th GPU. If no GPUs are 
32 | available, permGWAS will perform all computations on CPUs only. To change the GPU you can use the flag `-device` and 
33 | specify the number of the GPU to use. If you do NOT want to use GPUs, although they are available, you can use the flag 
34 | `disable_gpu`:
35 | ```shell
36 | python3 permGWAS.py -x ./data/x_matrix.h5 -y ./data/y_matrix.csv -perm 100 -device 1
37 | 
38 | python3 permGWAS.py -x ./data/x_matrix.h5 -y ./data/y_matrix.csv -perm 100 -dasable_gpu
39 | ```
40 | - Since permGWAS2 computes the test statistics for different SNPs and permutations simultaneously in batches, the 
41 | available VRAM poses a limitation. To avoid running into memory errors (when using GPUs), you can manually adjust the 
42 | batch-size, i.e. the number of SNPs to be processed simultaneously for all permutations, via the flag `-batch_perm` 
43 | (The default are 1000 SNPs):
44 | ```shell
45 | python3 permGWAS.py -x ./data/x_matrix.h5 -y ./data/y_matrix.csv -perm 100 -batch_perm 500
46 | ```
47 | - permGWAS is also able to compute permutation-based adjusted p-values and save them in the p_value output file via the 
48 | flag `adj_p_value`. However, it should be noted that in order to get meaningful adjusted p-values, millions of 
49 | permutations are needed. 
50 | ```shell
51 | python3 permGWAS.py -x ./data/x_matrix.h5 -y ./data/y_matrix.csv -perm 100 -adj_p_value
52 | ```
53 | 


--------------------------------------------------------------------------------
/docs/PLOTS.md:
--------------------------------------------------------------------------------
 1 | # Create plots
 2 | 
 3 | permGWAS is able to create Manhattan and QQ-plots during a GWAS run and form existing p-value files via the 
 4 | `create_plot.py` script.
 5 | 
 6 | ## Manhattan plot
 7 | <img src="manhattan.png">
 8 | 
 9 | - While running permGWAS, you can use the flag `-mplot` to generate and save a Manhattan plot with Bonferroni 
10 | significance threshold for significance level $\alpha=0.05$. If you use permGWAS2 with permutations, additionally the 
11 | permutation-based threshold will be plotted.
12 | - If you already have result files generated by permGWAS, you can also create a Manhattan plot afterward. You only need 
13 | to specify the p-value file (relative or absolute path) and use the flag `-mplot`:
14 | ```shell
15 | python3 create_plot.py -p_val PATH_TO_RESULT_P_VALUE_FILE -mplot
16 | ```
17 | - By default, it uses a significance level of 5%. You can change it via the flag `-sig_level`, which expects an integer 
18 | value, e.g. 
19 | ```shell
20 | python3 create_plot.py -p_val PATH_TO_RESULT_P_VALUE_FILE -mplot -sig_level 1
21 | ```
22 | - If you have a corresponding minimal p-value file available, you can additionally plot the permutation-based significance 
23 | threshold by giving the path to the file via the flag `-min_p_val`:
24 | ```shell
25 | python3 create_plot.py -p_val PATH_TO_RESULT_P_VALUE_FILE -min_p_val PATH_TO_MIN_P_VALUE_FILE -mplot
26 | ```
27 | - The resulting Manhattan plot will be saved in the same folder where the p-value file is stored, unless you specify a 
28 | different directory via `-out_dir`. If no other name is specified via `-out_file`, the plot will be named 
29 | `manhattan_PHENOTYPE_NAME.png`. 
30 | 
31 | 
32 | ### QQ-plot
33 | <img src="qq_plot.png">
34 | 
35 | - While running permGWAS, you can use the flag `-qqplot` to generate and save a simple QQ-plot including the inflation 
36 | factor lambda.
37 | - To generate a QQ-plot afterward based on existing p-value result files, you only need to specify the p-value file 
38 | (relative or absolute path) and use the flag `-qqplot`:
39 | ```shell
40 | python3 create_plot.py -p_val PATH_TO_RESULT_P_VALUE_FILE -qqplot
41 | ```
42 | - The resulting QQ-plot will be saved in the same folder where the p-value file is stored, unless you specify a 
43 | different directory via `-out_dir`. If no other name is specified via `-out_file`, the plot will be named 
44 | `qq_plot_PHENOTYPE_NAME.png`. 


--------------------------------------------------------------------------------
/docs/QUICKSTART.md:
--------------------------------------------------------------------------------
  1 | # Quickstart Guide
  2 | 
  3 | ## Simple workflow using Docker
  4 | 
  5 | 1. Create a new Docker container using our [Installation Guide](./INSTALLATION.md) or start an existing container with:
  6 | 
  7 | ```shell
  8 | docker start -i CONTAINERNAME
  9 | ```
 10 | 
 11 | 2. Navigate to the directory where the permGWAS2 repository is located:
 12 | 
 13 | ```shell
 14 | cd /REPO_DIRECTORY/permGWAS
 15 | ```
 16 | 
 17 | 3. Run the script with the test data provided in the `./data` folder:
 18 | 
 19 | ```shell
 20 | python3 permGWAS.py -x ./data/x_matrix.h5 -y ./data/y_matrix.csv 
 21 | ```
 22 | 
 23 | To use permGWAS2 without Docker, simply omit the first step. 
 24 | 
 25 | 
 26 | ## Basic settings
 27 | ### 1. Input Data
 28 | Details on the supported data types can be found in the [Data Guide](./DATAGUIDE.md). 
 29 | ###### Genotype & Phenotype
 30 | - The minimal requirement is to provide a genotype and a phenotype file (as relative or absolute paths) via the 
 31 | flags `-x` and `-y`, respectively. 
 32 | - By default, permGWAS assumes that the phenotype in the phenotype file is called `phenotype_value`. You can specify a 
 33 | different name via the flag `-trait`:
 34 | ```shell
 35 | python3 permGWAS.py -x PATH_TO_GENOTYPE -y PATH_TO_PHENOTYPE -trait PHENO_NAME
 36 | ```
 37 | - It is possible to run permGWAS2 for several phenotypes located in the same phenotype file one after another. You can 
 38 | either specify a list of phenotypes or run permGWAS2 for all available phenotypes in the file by using the key word `all`:
 39 | ```shell
 40 | python3 permGWAS.py -x PATH_TO_GENOTYPE -y PATH_TO_PHENOTYPE -trait PHENO_1 PHENO_2 PHENO_3
 41 | 
 42 | python3 permGWAS.py -x PATH_TO_GENOTYPE -y PATH_TO_PHENOTYPE -trait all
 43 | ```
 44 | 
 45 | ###### Kinship
 46 | By default, permGWAS2 computes the realized relationship kernel as kinship matrix. You can use a pre-computed genomic 
 47 | relationship matrix via the flag `-k`:
 48 | ```shell
 49 | python3 permGWAS.py -x PATH_TO_GENOTYPE -y PATH_TO_PHENOTYPE -k PATH_TO_KINSHIP
 50 | ```
 51 | 
 52 | ###### Covariates
 53 | It is possible to run permGWAS2 with additional covariates. To specify the covariate file, use the flag `cov`. 
 54 | By default, this uses all available covariates in the file. If you only want to use certain columns/covariates, you 
 55 | have to use the flag `-cov_list` and specify the covariate names as a list:
 56 | ```shell
 57 | python3 permGWAS.py -x PATH_TO_GENOTYPE -y PATH_TO_PHENOTYPE -cov PATH_TO_COVARIATE_FILE
 58 | 
 59 | python3 permGWAS.py -x PATH_TO_GENOTYPE -y PATH_TO_PHENOTYPE -cov PATH_TO_COVARIATE_FILE -cov_list COV_1 COV_2 COV_3
 60 | ```
 61 | 
 62 | ### 2. Config file
 63 | permGWAS2 accepts yaml config files where you can specify all flags and options instead of passing them all separately:
 64 | ```shell
 65 | python3 permGWAS.py -config ./data/config.yaml 
 66 | ```
 67 | The config file should have the following structure:
 68 | ```YAML
 69 | ---
 70 | genotype_file: "PATH_TO_GENOTYPE"
 71 | phenotype_file: "PATH_TO_PHENOTYPE"
 72 | trait: "PHENO_NAME"
 73 | kinship_file: "PATH_TO_KINSHIP"
 74 | covariate_file: "PATH_TO_COVARIATE_FILE"
 75 | covariate_list:
 76 |     - "COV_1"
 77 |     - "COV_2"
 78 |     - "COV_3"
 79 | ```
 80 | 
 81 | ### 3. Output files
 82 | Per default permGWAS2 creates a CSV output file and saves it in a directory called `results`. You can also specify a 
 83 | different directory for the output files via the flag `-out_dir`. The output file will be saved under the name
 84 | `p_values_NAME.csv`, where NAME will be the phenotype name by default, but can also be changed via `-out_file`.
 85 | ```shell
 86 | python3 permGWAS.py -x PATH_TO_GENOTYPE -y PATH_TO_PHENOTYPE -out_dir RESULT_FILE_DIR -out_file RESULT_FILE_NAME
 87 | ```
 88 | The result file contains for each analyzed SNP:
 89 | - CHR: chromosome number
 90 | - POS: position within chromosome
 91 | - p_value: computed p-value
 92 | - test_stat: computed test statistic
 93 | - maf: minor allele frequency of SNP
 94 | - SE: standard error
 95 | - effect_size: coefficient beta
 96 | 
 97 | Additionally, a TXT file with summary statistics will be saved. 
 98 | This file contains the estimates of the variance components of the null model,
 99 |  the narrow-sense heritability, the Bonferroni threshold and, 
100 | if activated, the permutation-based threshold.
101 | 
102 | 
103 | ## Further options
104 | The table below shows all available flags. For detailed explanations of further flags and options go to 
105 | [permGWAS2 with permutations](./PERMUTATIONS.md), [Create plots](./PLOTS.md) and [Optional settings](./OPTIONS.md). 
106 | 
107 | |**flag**|**description**|
108 | |---|---|
109 | |-x (--genotype_file) | absolute or relative path to genotype file |
110 | |-y (--phenotype_file) | absolute or relative path to phenotype file |
111 | |-trait (--y_name)| name of phenotype (column) to be used in phenotype file, optional, default is "phenotype_value"|
112 | |-k (-kinship_file) | absolute or relative path to kinship file, optional|
113 | |-cov (--covariate_file) | absolute or relative path to covariates file, optional|
114 | |-cov_list (--covariate_list) | names of covariates to use from covariate_file, optional |
115 | |-maf (--maf_threshold) | minor allele frequency threshold as percentage value, optional, default is 0|
116 | |-load_genotype | choose whether to load full genotype from file or batch-wise during computations, optional, default is False|
117 | |-config (--config_file) | full path to yaml config file|
118 | |-model | specify model name, only relevant if you define your own models, currently only lmm is available|
119 | |-out_dir | name of the directory result-files should be stored in, optional, if not provided, files will be stored in folder "results" in current directory|
120 | |-out_file | NAME of result files, will be stored as NAME_p_values and NAME_min_p_values, optional, if not provided name of phenotype will be used|
121 | |-disable_gpu | use if you want to perform computations on CPU only though GPU would be available| 
122 | |-device | GPU device to be used, optional, default is 0|
123 | |-perm | number of permutations to be performed, optional, default is 0|
124 | |-perm_method | method to use for permutations: y - permute only y, x - permute y and kinship matrix, default is x|
125 | |-adj_p_value | additionally compute permutation-based adjusted p-values and store them in the p-value file, optional default is False|
126 | |-batch (--batch_size) | number of SNPs to work on simultaneously, optional, default is 50000|
127 | |-batch_perm (--perm_batch_size) | number of SNPs to work on simultaneously while using permutations, optional, default is 1000|
128 | |-mplot (--plot, --manhattan)| creates Manhattan plot, optional|
129 | |-qqplot | creates QQ-plot, optional|
130 | |-not_add | use when genotype is not in additive encoding| 


--------------------------------------------------------------------------------
/docs/manhattan.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grimmlab/permGWAS/3f7a1c2e3e4c63281f5719425ff9ac405f8d9cfc/docs/manhattan.png


--------------------------------------------------------------------------------
/docs/qq_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grimmlab/permGWAS/3f7a1c2e3e4c63281f5719425ff9ac405f8d9cfc/docs/qq_plot.png


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ["_base_model", "lmm"]


--------------------------------------------------------------------------------
/models/_base_model.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | import torch
  3 | import pathlib
  4 | import pandas as pd
  5 | import numpy as np
  6 | 
  7 | from preprocess import data_loader
  8 | from postprocess import plot_functions
  9 | 
 10 | 
 11 | class BaseModel(abc.ABC):
 12 | 
 13 |     def __init__(self, dataset: data_loader.Dataset, batch_size: int, device: torch.device, perm: int = None,
 14 |                  perm_batch_size: int = None):
 15 |         self.dataset = dataset
 16 |         self.batch_size = batch_size
 17 |         self.device = device
 18 |         self.perm_batch_size = perm_batch_size
 19 |         self.perm = perm
 20 |         self.v_g = None  # genetic variance component for LMM
 21 |         self.v_e = None  # residual variance component for LMM
 22 |         self.delta = None  # v_e/v_g
 23 |         self.effect_size = None  # effect sizes for all SNPs
 24 |         self.SE = None  # standard errors for all SNPs
 25 |         self.test_stat = None  # tests statistics for all SNPs
 26 |         self.p_value = None  # p_values for all SNPs
 27 |         self.seeds = None  # seeds for permutation with numpy generator
 28 |         self.perm_p_val = None  # permutation-based p-values
 29 |         self.min_p_value = None  # minimal p-values for all permutations
 30 | 
 31 |     @abc.abstractmethod
 32 |     def gwas(self):
 33 |         """
 34 |         Function to perform batch-wise computation of univariate test
 35 | 
 36 |         """
 37 | 
 38 |     @abc.abstractmethod
 39 |     def perm_gwas(self, **kwargs):
 40 |         """
 41 |         Function to perform batch-wise computation of permutation-based test
 42 | 
 43 |         """
 44 | 
 45 |     # general methods
 46 |     def perm_seeds(self) -> np.array:
 47 |         """
 48 |         get seeds for permutations
 49 | 
 50 |         :return: array with seeds
 51 |         """
 52 |         rng = np.random.default_rng()
 53 |         return rng.choice(1000000, self.perm, replace=False)
 54 | 
 55 |     def permute(self, data: torch.tensor) -> torch.tensor:
 56 |         """
 57 |         Create tensor with permutations of input data
 58 | 
 59 |         :param data: input data to permute of shape (n,c) or (n)
 60 | 
 61 |         :return: tensor with permuted data of shape (p,n,c) or (n,p)
 62 |         """
 63 |         data = data.to(torch.device("cpu"))
 64 |         x_perm = []
 65 |         for seed in self.seeds:
 66 |             tmp = np.random.default_rng(seed=seed)
 67 |             x_perm.append(tmp.permutation(data, axis=0))
 68 |         if data.ndim == 1:
 69 |             return torch.t(torch.tensor(np.array(x_perm), dtype=torch.float64, device=self.device))
 70 |         else:
 71 |             return torch.tensor(np.array(x_perm), dtype=torch.float64, device=self.device)
 72 | 
 73 |     def save_results(self, data_dir: pathlib.Path, filename: str):
 74 |         """
 75 |         Save p-values results to csv file as p_values_filename. If permutations were computed, also save
 76 |         minimal p-values as min_p_values_filename.
 77 | 
 78 |         :param data_dir: full path to results directory
 79 |         :param filename: name of results file
 80 |         """
 81 |         df = pd.DataFrame({'CHR': self.dataset.chromosomes,
 82 |                            'POS': self.dataset.positions,
 83 |                            'p_value': self.p_value,
 84 |                            'test_stat': self.test_stat,
 85 |                            'maf': self.dataset.maf,
 86 |                            'SE': self.SE,
 87 |                            'effect_size': self.effect_size})
 88 |         if self.perm_p_val is not None:
 89 |             df['adjusted_p_val'] = self.perm_p_val
 90 |         df.to_csv(data_dir.joinpath('p_values_' + filename), index=False)
 91 |         if self.min_p_value is not None:
 92 |             df_min = pd.DataFrame({'seed': self.seeds,
 93 |                                    'min_p_val': self.min_p_value})
 94 |             df_min.to_csv(data_dir.joinpath('min_p_values_' + filename), index=False)
 95 | 
 96 |     def manhattan_plot(self, data_dir: pathlib.Path, filename: str, sig_level: int = 5):
 97 |         """
 98 |         Save Manhattan plot as manhattan_FILENAME.png to data_dir
 99 | 
100 |         :param data_dir: full path to save directory
101 |         :param filename: name of file
102 |         :param sig_level: significance level for Bonferroni and perm thresholds, default is 5
103 |         """
104 |         df = pd.DataFrame({'CHR': self.dataset.chromosomes,
105 |                            'POS': self.dataset.positions,
106 |                            'p_value': self.p_value})
107 | 
108 |         plot_functions.manhattan_plot(df=df, data_dir=data_dir, filename=filename, min_p_values=self.min_p_value,
109 |                                       sig_level=sig_level)
110 | 
111 |     def qq_plot(self, data_dir: pathlib.Path, filename: str):
112 |         """
113 |         Save QQ-plot as qq_plot_FILENAME.png to data_dir
114 | 
115 |         :param data_dir: full path to save directory
116 |         :param filename: name of file
117 |         """
118 |         plot_functions.qq_plot(p_values=self.p_value, data_dir=data_dir, filename=filename)
119 | 


--------------------------------------------------------------------------------
/models/lmm.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import time
  4 | import scipy.stats as stats
  5 | 
  6 | from . import _base_model
  7 | from preprocess import data_loader
  8 | from optimize import brent
  9 | 
 10 | 
 11 | class LMM(_base_model.BaseModel):
 12 | 
 13 |     def __init__(self, dataset: data_loader.Dataset, batch_size: int, device: torch.device, perm: int = None,
 14 |                  perm_batch_size: int = None):
 15 |         super().__init__(dataset=dataset, batch_size=batch_size, device=device, perm=perm,
 16 |                          perm_batch_size=perm_batch_size)
 17 |         self.D = None  # eigenvalues of K
 18 |         self.U = None  # unitary matrix of eigenvectors of K
 19 |         self.freedom_deg = None  # adjusted degrees of freedom = n_samples - degrees of freedom = int
 20 |         self.Uy = None  # y after linear transformation with eigenvectors
 21 |         self.UZ = None  # fixed effects after linear transformation with eigenvectors
 22 | 
 23 |     def gwas(self):
 24 |         """
 25 |         Perform batch-wise computation of univariate test with linear mixed model (EMMAX):
 26 |         (1) compute spectral decomposition of K=UDU'
 27 |         (2) transform data: U'y, U'Z
 28 |         (3) estimate delta and compute variance components
 29 |         (4) calculate residual sum of squares of null model
 30 |         (5) batch-wise:
 31 |             (a) linearly transform marker
 32 |             (b) calculate effect size, residual sum of squares and standard error
 33 |             (c) calculate test statistic
 34 |         (6) calculate p-values
 35 |         Dataset:
 36 |             X: genotype matrix of shape (n,m) or (n,b) if batch-wise
 37 |             y: phenotype vector of shape (n)
 38 |             K: kinship matrix of shape (n,n)
 39 |             fixed: vector/matrix of fixed effects of shape (n) or (n,c)
 40 |         """
 41 |         start = time.time()
 42 |         self.freedom_deg = self.dataset.n_samples - self.dataset.fixed.shape[1]
 43 |         # get spectral decomposition
 44 |         self.D, self.U = torch.linalg.eigh(self.dataset.K)
 45 |         # linearly transform data, i.e. compute U'y and U'Z for fixed effects Z
 46 |         self.Uy = self.transform_input(X=self.dataset.y, U=self.U)  # shape (n)
 47 |         self.UZ = self.transform_input(X=self.dataset.fixed, U=self.U)  # shape (n) or (n,c)
 48 |         # estimate delta and compute variance components
 49 |         self.delta = self.estimate_delta(gridlen=100, logdelta_min=-10, logdelta_max=10, reml=True)
 50 |         D = self.D + self.delta
 51 |         ZD = self._zd(UZ=self.UZ, D=D)
 52 |         ZDZ = self._zdz(UZ=self.UZ, ZD=ZD)
 53 |         self.v_g, self.v_e = self.compute_var_components(D=D, UZ=self.UZ, ZD=ZD, ZDZ=ZDZ, reml=True)
 54 |         # calculate residual sum of squares of null model
 55 |         RSS_0 = self.get_rss_h0()  # shape: (1)
 56 |         self.freedom_deg -= 1
 57 |         # in batches:
 58 |         SE = []
 59 |         effect_size = []
 60 |         test_stat = []
 61 |         for batch in range(int(np.ceil(self.dataset.n_snps / self.batch_size))):
 62 |             # set bounds for SNP batch
 63 |             lower_bound, upper_bound = self._bounds(batch_size=self.batch_size, batch=batch)
 64 |             # load and transform batch of SNPs
 65 |             US = self._s_matrix(lower_bound=lower_bound, upper_bound=upper_bound)  # shape: (n,b)
 66 |             # transform data
 67 |             US = self.transform_input(X=US, U=self.U)
 68 |             # calculate effect size, residual sum of squares and standard error
 69 |             RSS_1, stds, betas = self.get_rss_and_se(D=D, S=US, ZD=ZD, ZDZ=ZDZ)
 70 |             SE.append(stds.to(torch.device("cpu")))
 71 |             effect_size.append(betas.to(torch.device("cpu")))
 72 |             # calculate test statistic
 73 |             test_stat.append(self.get_f_score(rss0=RSS_0, rss1=RSS_1).to(torch.device("cpu")))
 74 |             # free GPU space
 75 |             if self.device.type != "cpu":
 76 |                 with torch.cuda.device(self.device):
 77 |                     del RSS_1
 78 |                     del US
 79 |                     del stds
 80 |                     del betas
 81 |                     torch.cuda.empty_cache()
 82 |         self.SE = torch.cat(SE, dim=0)  # shape: (m)
 83 |         self.effect_size = torch.cat(effect_size, dim=0)  # shape: (m)
 84 |         self.test_stat = torch.cat(test_stat, dim=0)  # shape: (m)
 85 |         time_test_stats = time.time()
 86 |         print("Have test statistics of %d SNPs. Elapsed time: %f" % (self.test_stat.shape[0], time_test_stats - start))
 87 |         print("Calculate P-values now")
 88 |         # compute p-values
 89 |         self.p_value = torch.tensor(list(map(self.get_p_value, self.test_stat)))
 90 |         print("Have P-values. Elapsed time: ", time.time() - time_test_stats)
 91 |         if self.device.type != "cpu":
 92 |             with torch.cuda.device(self.device):
 93 |                 del D
 94 |                 del ZD
 95 |                 del ZDZ
 96 |                 del self.dataset.K
 97 |                 torch.cuda.empty_cache()
 98 | 
 99 |     def perm_gwas(self, perm_method: str = 'x', adj_p_value: bool = False):
100 |         """
101 |         Perform batch-wise computation of permutation-based test with linear mixed model (EMMAX):
102 |         reuse spectral decomposition of K=UDU'
103 |         perm method y:
104 |             (1) permute phenotype p times
105 |             (2) transform data: U'y
106 |             (3) estimate delta and compute variance components for each permutation
107 |             (4) calculate residual sum of squares of null model
108 |             (5) batch-wise:
109 |                 (a) linearly transform marker
110 |                 (b) calculate residual sum of squares
111 |                 (c) calculate test statistic
112 |         perm method x:
113 |             (1) permute fixed effects p times
114 |             (2) transform data: U'Z
115 |             (3) estimate delta and compute variance components for each permutation
116 |             (4) calculate residual sum of squares of null model
117 |             (5) batch-wise:
118 |                 (a) permute marker p times
119 |                 (b) linearly transform marker
120 |                 (c) calculate residual sum of squares
121 |                 (d) calculate test statistic
122 |         (6) calculate minimal p-values for Westfall-Young permutation-based threshold
123 |         optional: (7) calculate permutation-based p-values
124 |         Dataset:
125 |             X: genotype matrix of shape (n,m) or (n,b) if batch-wise
126 |             y: phenotype vector of shape (n)
127 |             K: kinship matrix of shape (n,n)
128 |             fixed: vector/matrix of fixed effects of shape (n) or (n,c)
129 | 
130 |         :param perm_method: y to permute phenotype or x to permute fixed effects + marker
131 |         :param adj_p_value: if True compute adjusted p-values, default is false
132 |         """
133 |         start = time.time()
134 |         if self.test_stat is None:
135 |             raise Exception('Need to first calculate true test statistics using LMM.gwas().')
136 |         self.freedom_deg = self.dataset.n_samples - self.dataset.fixed.shape[1]
137 |         self.seeds = self.perm_seeds()
138 |         if perm_method == 'y':
139 |             # compute permutations of y
140 |             self.Uy = self.permute(data=self.dataset.y)  # shape: (n,p)
141 |             self.Uy = torch.unsqueeze(torch.t(self.transform_input(X=self.Uy, U=self.U)), 2)  # shape: (p,n,1)
142 |             # estimate variance components for each permutation
143 |             self.delta = self.estimate_delta_perm(gridlen=100, logdelta_min=-10, logdelta_max=10, reml=True)
144 |             self.D = self._d_delta(delta=self.delta, batch_size=self.perm)  # shape: (p,1,n)
145 |             self.UZ = self.get_3d_copy(v=self.UZ, batch_size=self.perm)  # shape: (p,n,c)
146 |             ZD = self._zd(UZ=self.UZ, D=self.D)  # shape: (p,c,n)
147 |             ZDZ = self._zdz(UZ=self.UZ, ZD=ZD)  # shape: (p,c,c)
148 |             v_g, _ = self.compute_var_components(D=self.D, UZ=self.UZ, ZD=ZD, ZDZ=ZDZ, reml=True)  # shape: (p)
149 |         elif perm_method == 'x':
150 |             self.Uy = self.get_3d_copy(v=self.Uy, batch_size=self.perm)  # shape: (p,n,1)
151 |             if self.dataset.fixed.shape[1] > 1:
152 |                 # permute and transform fixed effects
153 |                 self.UZ = self.permute(data=self.dataset.fixed)  # shape: (p,n,c)
154 |                 self.UZ = self.transform_input(X=self.UZ, U=self.U)  # shape: (p,n,c)
155 |                 # estimate variance components
156 |                 self.delta = self.estimate_delta_perm(gridlen=100, logdelta_min=-10, logdelta_max=10, reml=True)
157 |                 self.D = self._d_delta(delta=self.delta, batch_size=self.perm)  # shape: (p,1,n)
158 |                 ZD = self._zd(UZ=self.UZ, D=self.D)  # shape: (p,c,n)
159 |                 ZDZ = self._zdz(UZ=self.UZ, ZD=ZD)  # shape: (p,c,c)
160 |                 v_g, _ = self.compute_var_components(D=self.D, UZ=self.UZ, ZD=ZD, ZDZ=ZDZ, reml=True)  # shape: (p)
161 |             else:
162 |                 # reuse UZ, delta, sigma and get 3D copies
163 |                 self.D = self._d_delta(delta=self.delta, batch_size=self.perm)  # shape: (p,1,n)
164 |                 self.UZ = self.get_3d_copy(v=self.UZ, batch_size=self.perm)  # shape: (p,n,c)
165 |                 ZD = self._zd(UZ=self.UZ, D=self.D)  # shape: (p,c,n)
166 |                 ZDZ = self._zdz(UZ=self.UZ, ZD=ZD)  # shape: (p,c,c)
167 |                 v_g = self.v_g.repeat(self.perm)  # shape: (p)
168 |         else:
169 |             raise Exception('Choose either permutation method x or y.')
170 |         # calculate rss for null model
171 |         RSS_0 = self.get_rss_h0().repeat(self.perm)  # shape: (p)
172 |         self.freedom_deg -= 1
173 |         if self.device.type != "cpu":
174 |             with torch.cuda.device(self.device):
175 |                 del self.delta
176 |                 del self.dataset.y
177 |                 del self.dataset.fixed
178 |                 torch.cuda.empty_cache()
179 |         var_comp_time = time.time()
180 |         print("Have variance components. Elapsed time: ", var_comp_time - start)
181 |         test_stat = []
182 |         for batch in range(int(np.ceil(self.dataset.n_snps / self.perm_batch_size))):
183 |             # set bounds for SNP batch
184 |             lower_bound, upper_bound = self._bounds(batch_size=self.perm_batch_size, batch=batch)
185 |             # load and transform batch of SNPs
186 |             print("\rCalculate perm test statistics for SNPs %d to %d" % (lower_bound, upper_bound), end='')
187 |             if perm_method == 'y':
188 |                 US = self._s_matrix(lower_bound=lower_bound, upper_bound=upper_bound, save_meta=False)  # shape: (n,b)
189 |                 # transform data
190 |                 US = self.transform_input(X=US, U=self.U)
191 |                 # get 3D copy of S for permutations
192 |                 US = self.get_3d_copy(v=US, batch_size=self.perm)  # shape: (p,n,b)
193 |             else:
194 |                 US = self._s_matrix(lower_bound=lower_bound, upper_bound=upper_bound, device=torch.device("cpu"),
195 |                                     save_meta=False)  # shape: (n,b)
196 |                 US = self.permute(data=US)  # shape: (p,n,b)
197 |                 # transform data
198 |                 US = self.transform_input(X=US, U=self.U)  # shape: (p,n,b)
199 |             # calculate residual sum of squares
200 |             RSS = self.get_rss_perm(S=US, ZD=ZD, ZDZ=ZDZ, v_g=v_g)  # shape: (p,b)
201 |             # calculate test statistics
202 |             test_stat.append(self.get_f_score(rss0=torch.t(RSS_0.repeat(RSS.shape[1], 1)),
203 |                                               rss1=RSS).to(torch.device("cpu")))  # shape: (p,b))
204 |             if self.device.type != "cpu":
205 |                 with torch.cuda.device(self.device):
206 |                     del RSS
207 |                     del US
208 |                     torch.cuda.empty_cache()
209 |         test_stat = torch.cat(test_stat, dim=1).to(torch.device("cpu"))  # shape: (p,m)
210 |         time_test_stats = time.time()
211 |         print("\nHave perm test statistics. Elapsed time: ", time_test_stats - var_comp_time)
212 |         if adj_p_value:
213 |             # calculate permutation-based p-values
214 |             self.perm_p_val = self.get_perm_p_value(perm_test_stats=test_stat)  # shape: (m)
215 |             print("Have adjusted p-values")
216 |         # calculate Westfall-Young permutation-based threshold
217 |         self.min_p_value = self.get_min_p_value(test_stat=test_stat)  # shape: (p)
218 |         print("Have minimal p-values. Elapsed time: ", time.time() - time_test_stats)
219 | 
220 |     def estimate_delta(self, gridlen: int = 100, logdelta_min: int = -10, logdelta_max: int = 10,
221 |                        reml: bool = True) -> torch.tensor:
222 |         """
223 |         Estimate ratio of variance components delta of LMM
224 |         Get grid of evenly divided delta values on logarithmic scale and compute neg loglikelihood for each
225 | 
226 |         :param gridlen: length of grid, default=100
227 |         :param logdelta_min: lower bound for delta (log value), default=-10
228 |         :param logdelta_max: upper bound for delta (log value), default=10
229 |         :param reml: if True use REML estimate, if False use ML, default=True
230 | 
231 |         :return: optimal delta
232 |         """
233 |         deltas = torch.exp(torch.linspace(start=logdelta_min, end=logdelta_max, steps=gridlen + 1, device=self.device))
234 |         neglogs = self.negloglikelihood(delta=deltas, Uy=self.Uy, UZ=self.UZ, reml=reml)
235 |         neglogs.to(self.device)
236 |         delta_opt = self._minimize(Uy=self.Uy, UZ=self.UZ, deltas=deltas, neglogs=neglogs, gridlen=gridlen, reml=reml)
237 |         return delta_opt
238 | 
239 |     def _minimize(self, Uy: torch.tensor, UZ: torch.tensor, deltas: torch.tensor, neglogs: torch.tensor,
240 |                   gridlen: int = 100, reml: bool = True) -> torch.tensor:
241 |         """
242 |         minimize negative loglikelihood function with brent search
243 | 
244 |         :param Uy: transformed phenotype vector U'y
245 |         :param UZ: transformed vector of fixed effects U'Z
246 |         :param deltas: tensor with possible delta values in ascending order
247 |         :param neglogs: tensor with negative loglikelihood value for each delta
248 |         :param gridlen: length of delta grid, default=100
249 |         :param reml: if True use REML estimate, if False use ML, default=True
250 | 
251 |         :return: optimal delta
252 |         """
253 |         tmp = torch.argmin(neglogs)
254 |         delta_opt = deltas[tmp]
255 |         neglog_opt = neglogs[tmp]
256 |         # use brent search for each triple in grid
257 |         for i in range(gridlen - 1):
258 |             if (neglogs[i + 1] < neglogs[i]) and (neglogs[i + 1] < neglogs[i + 2]):
259 |                 delta_tmp, neglog_tmp, niters = brent.brent_search(f=self.negloglikelihood, a=deltas[i],
260 |                                                                    b=deltas[i + 2], x=deltas[i + 1], fx=neglogs[i + 1],
261 |                                                                    Uy=Uy, UZ=UZ, reml=reml)
262 |                 if neglog_tmp < neglog_opt:
263 |                     delta_opt = delta_tmp
264 |                     neglog_opt = neglog_tmp
265 |         return delta_opt
266 | 
267 |     def negloglikelihood(self, delta: torch.tensor, UZ: torch.tensor, Uy: torch.tensor, reml: bool = True) \
268 |             -> torch.tensor:
269 |         """
270 |         compute negative loglikelihood for one delta value or several values in parallel
271 | 
272 |         :param delta: ratio of variance components
273 |         :param UZ: transformed fixed effects U'Z
274 |         :param Uy: transformed phenotype U'y
275 |         :param reml: if True use REML estimate, if False use ML, default=True
276 | 
277 |         :return: negative loglikelihood
278 |         """
279 |         if delta.ndim == 0:
280 |             D = self.D + delta
281 |         else:
282 |             D = self._d_delta(delta=delta, batch_size=len(delta))  # shape: (b,1,n)
283 |         ZD = self._zd(UZ=UZ, D=D)
284 |         ZDZ = self._zdz(UZ=UZ, ZD=ZD)
285 |         beta = self._beta(ZDZ=ZDZ, ZDy=torch.matmul(ZD, Uy))
286 |         sigma = self._sigma(D=D, Uy=Uy, UZ=UZ, beta=beta, reml=reml)
287 |         if D.ndim == 1:
288 |             logdetD = torch.sum(torch.log(D))
289 |         else:
290 |             logdetD = torch.sum(torch.squeeze(torch.log(D)), 1)
291 |         if not reml:
292 |             return (self.dataset.n_samples*torch.log(2*torch.pi*sigma) + logdetD + self.dataset.n_samples) / 2
293 |         else:
294 |             if UZ.ndim == 2:
295 |                 logdetZ = torch.logdet(torch.matmul(torch.t(UZ), UZ))
296 |             elif UZ.ndim == 3:
297 |                 logdetZ = torch.logdet(torch.matmul(torch.transpose(UZ, dim0=1, dim1=2), UZ))
298 |             else:
299 |                 logdetZ = torch.logdet(torch.matmul(torch.transpose(UZ, dim0=2, dim1=3), UZ))
300 |             logdetZDZ = torch.logdet(ZDZ)
301 |             return (self.freedom_deg*torch.log(2*torch.pi*sigma) + logdetD + self.freedom_deg - logdetZ + logdetZDZ) / 2
302 | 
303 |     def compute_var_components(self, D: torch.tensor, UZ: torch.tensor, ZD: torch.tensor, ZDZ: torch.tensor,
304 |                                reml: bool = True) -> tuple:
305 |         """
306 |         Compute variance components v_g^2 and v_e^2 with Var(y) = v_g^2K + v_e^2I
307 | 
308 |         :param D: vector with eigenvalues of K
309 |         :param UZ: transformed fixed effects U'Z
310 |         :param ZD: precomputed matrix product of (U'Z)'D^-1
311 |         :param ZDZ: precomputed matrix product of (U'Z)'D^-1(U'Z)
312 |         :param reml: if True use REML estimate, if False use ML, default=True
313 | 
314 |         :return: v_g^2 and v_e^2
315 |         """
316 |         beta = self._beta(ZDZ=ZDZ, ZDy=torch.matmul(ZD, self.Uy))
317 |         v_g = self._sigma(D=D, Uy=self.Uy, UZ=UZ, beta=beta, reml=reml)
318 |         v_e = self.delta * v_g
319 |         return v_g, v_e
320 | 
321 |     def get_rss_h0(self, sigma_opt: bool = True, reml: bool = True) -> torch.tensor:
322 |         """
323 |         Compute residual sum of squares of H0 (marker has no effect on phenotype),
324 |         i.e. for fixed effects Z, covariance matrix V and phenotype y compute:
325 |             b = (Z'V^{-1}Z)^{-1}Z'V^{-1}y
326 |             rss = (y-Zb)'V^{-1}(y-Zb)
327 |         note that for optimal sigma_g rss=n-c (REML) or rss=n (ML)
328 | 
329 |         :param sigma_opt: if True return degrees of freedom, default is True
330 |         :param reml: if True use REML estimate, if False use ML, default=True
331 | 
332 |         :return: residual sum of squares
333 |         """
334 |         if sigma_opt:
335 |             if reml:
336 |                 return torch.tensor(self.dataset.n_samples - self.dataset.fixed.shape[1], device=self.device)
337 |             else:
338 |                 return torch.tensor(self.dataset.n_samples, device=self.device)
339 |         else:
340 |             raise NotImplementedError
341 | 
342 |     def get_rss_and_se(self, D: torch.tensor, S: torch.tensor, ZD: torch.tensor, ZDZ: torch.tensor) -> tuple:
343 |         """
344 |         Compute residual sum of squares of alternative hypothesis (marker has effect on phenotype),
345 |         i.e. for a 3D tensor with batches of fixed effects X and 3D tensor with copies of phenotype y:
346 |             beta = (X'D^{-1}X)^{-1}X'D^{-1}y
347 |             rss = (y-Xbeta)'D^{-1}(y-Xbeta)
348 |         Use block-wise computation for beta, i.e., for computation of beta use the fact that X=[Z,s] for fixed
349 |         effects Z and SNP s.
350 | 
351 |         :param D: vector with eigenvalues of K + ratio of variance components delta; shape: (n)
352 |         :param S: matrix containing several markers in batches; shape: (n,b)
353 |         :param ZD: precomputed matrix product of (U'Z)'D^-1; shape: (c,n)
354 |         :param ZDZ: precomputed matrix product of (U'Z)'D^-1(U'Z); shape: (c,c)
355 | 
356 |         :return: residual sum of squares, standard error and effect size in batches
357 |         """
358 |         batch_size = S.shape[1]
359 |         # get (X'D^{-1}X)^{-1}
360 |         SD, XDX = self._xdx(D=D, S=S, ZD=ZD, ZDZ=ZDZ)
361 |         XDX = torch.linalg.pinv(XDX, hermitian=True)
362 |         # compute Z'Dy
363 |         ZDy = self.get_3d_copy(v=torch.matmul(ZD, self.Uy), batch_size=batch_size)  # shape: (b,c,1)
364 |         # compute X'Dy
365 |         SD = torch.matmul(SD, self.Uy).reshape(batch_size, 1, 1)  # shape: (b,1,1)
366 |         # put together 3D tensor
367 |         SD = torch.cat((ZDy, SD), dim=1)  # shape: (b,c+1,1)
368 |         # compute beta
369 |         beta = torch.matmul(XDX, SD)  # shape: (b,c+1,1)
370 |         # compute rss
371 |         S = self._x_batch(X=S, fixed=self.UZ)  # shape (b,n,c+1)
372 |         S = torch.matmul(S, beta)  # shape (b,n,1)
373 |         S = self.get_3d_copy(v=self.Uy, batch_size=batch_size) - S  # shape (b,n,1)
374 |         resD = torch.div(S, torch.unsqueeze(D, 1))
375 |         S = torch.squeeze(torch.matmul(torch.transpose(resD, dim0=1, dim1=2), S)) / self.v_g
376 |         # get standard error
377 |         diag = torch.diagonal(XDX, dim1=1, dim2=2)[:, -1]
378 |         se = torch.sqrt(self.v_g * diag)
379 |         return S, se, torch.squeeze(beta[:, -1])
380 | 
381 |     def get_f_score(self, rss0: torch.tensor, rss1: torch.tensor) -> torch.tensor:
382 |         """
383 |         Compute tensor of test statistics
384 | 
385 |         :param rss0: residual sum of squares of H0: marker has no effect on phenotype
386 |         :param rss1: residual sum of squares of H1: marker has effect on phenotype
387 | 
388 |         :return: F1 score
389 |         """
390 |         return self.freedom_deg * (rss0 - rss1) / rss1
391 | 
392 |     def get_p_value(self, f_score: float) -> float:
393 |         """
394 |         Compute p-value using survival function of f distribution
395 | 
396 |         :param f_score: F1 score
397 | 
398 |         :return: p-value
399 |         """
400 |         return stats.f.sf(f_score, 1, self.freedom_deg)
401 | 
402 |     # functions for permutations
403 |     def estimate_delta_perm(self, gridlen: int = 100, logdelta_min: int = -10, logdelta_max: int = 10,
404 |                             reml: bool = True) -> torch.tensor:
405 |         """
406 |         Estimate ratio of variance components delta of LMM for permutations
407 |         Get grid of evenly divided delta values on logarithmic scale and compute neg loglikelihood for each
408 | 
409 |         :param gridlen: length of grid, default=100
410 |         :param logdelta_min: lower bound for delta (log value), default=-10
411 |         :param logdelta_max: upper bound for delta (log value), default=10
412 |         :param reml: if True use REML estimate, if False use ML, default=True
413 | 
414 |         :return: tensor with optimal delta for each permutation
415 |         """
416 |         deltas = torch.exp(torch.linspace(start=logdelta_min, end=logdelta_max, steps=gridlen + 1, device=self.device))
417 |         if self.UZ.ndim == 2:
418 |             # for perm method y: same U'Z for each permutation
419 |             neglogs = self.negloglikelihood(delta=deltas, Uy=self.get_4d_copy(v=self.Uy, batch_size=len(deltas)),
420 |                                             UZ=self.UZ, reml=reml)
421 |         else:
422 |             # for perm method x: have different U'Z for each permutation
423 |             neglogs = self.negloglikelihood(delta=deltas, Uy=self.get_4d_copy(v=self.Uy, batch_size=len(deltas)),
424 |                                             UZ=self.get_4d_copy(v=self.UZ, batch_size=len(deltas)), reml=reml)
425 |         neglogs.to(self.device)
426 |         delta_opt = []
427 |         if self.UZ.ndim == 2:
428 |             # for perm method y: same U'Z for each permutation
429 |             for i in range(self.perm):
430 |                 delta_opt.append(self._minimize(Uy=self.Uy[i, :, 0], UZ=self.UZ, deltas=deltas, neglogs=neglogs[i, :],
431 |                                                 gridlen=100, reml=True))
432 |         else:
433 |             # for perm method x: have different U'Z for each permutation
434 |             for i in range(self.perm):
435 |                 delta_opt.append(self._minimize(Uy=self.Uy[i, :, 0], UZ=self.UZ[i, :, :], deltas=deltas,
436 |                                                 neglogs=neglogs[i, :], gridlen=100, reml=True))
437 |         return torch.tensor(delta_opt, device=self.device)
438 | 
439 |     def get_rss_perm(self, S: torch.tensor, ZD: torch.tensor, ZDZ: torch.tensor, v_g: torch.tensor) -> torch.tensor:
440 |         """
441 |         Compute residual sum of squares of alternative hypothesis (marker has effect on phenotype) with permutations,
442 |         i.e. for a 4D tensor with copies of batches of fixed effects Z and markers S and 4D tensor with copies of
443 |         permutations of phenotype y:
444 |             b = (X'X)^{-1}X'y
445 |             rss = (y-Xb)'(y-Xb)
446 |         Use block-wise computation for beta, i.e., for computation of beta use the fact that X=[Z,s] for fixed
447 |         effects Z and SNP s.
448 | 
449 |         :param S: matrix containing batch of markers, shape: (p,n,b)
450 |         :param ZD: 3D tensor containing matrix product (U'Z)'D^{-1} for each permutation, shape: (p,c,n)
451 |         :param ZDZ: 3D tensor containing matrix product (U'Z)'D^{-1}(U'Z) for each permutation, shape: (p,c,c)
452 |         :param v_g: tensor containing genetic variance component for each permutation, shape: (p)
453 | 
454 |         :return: residual sum of squares in batches
455 |         """
456 |         batch_size = S.shape[2]
457 |         y_batch = self.get_4d_copy(v=self.Uy, batch_size=batch_size)  # shape: (p,b,n,1)
458 |         beta = self._beta_perm(S=S, ZD=ZD, ZDZ=ZDZ, y_batch=y_batch, batch_size=batch_size)  # shape: (p,b,c+1,1)
459 |         # compute residuals
460 |         S = self._x_batch(X=S, fixed=self.UZ)  # shape: (p,b,n,c+1)
461 |         S = y_batch - torch.matmul(S, beta)  # shape: (p,b,n,1)
462 |         # compute residual sum of squares
463 |         rss = torch.div(torch.transpose(S, dim0=2, dim1=3), self.get_4d_copy(v=self.D, batch_size=batch_size))
464 |         rss = torch.squeeze(torch.matmul(rss, S))  # shape: (p,b)
465 |         return torch.t(torch.div(torch.t(rss), torch.unsqueeze(v_g, dim=0)))
466 | 
467 |     def get_perm_p_value(self, perm_test_stats: torch.tensor) -> torch.tensor:
468 |         """
469 |         Compute permutation-based p-values via
470 |         p = R/(qm) with R being the number of permuted test statistics bigger than the observed test statistic
471 | 
472 |         :param perm_test_stats: matrix containing test-statistics for all permutations and SNPs, dim (p,m)
473 | 
474 |         :return: adjusted p-values
475 |         """
476 |         sorted_test_stats, ind = torch.sort(perm_test_stats.flatten())
477 |         n = sorted_test_stats.shape[0]
478 |         test_stats_ind = torch.searchsorted(sorted_test_stats.contiguous(), self.test_stat.contiguous(), right=True)
479 |         adj_p_value = ((n - test_stats_ind) / n).type(torch.float64)
480 |         return torch.where(adj_p_value == 0., 1 / n, adj_p_value)
481 | 
482 |     def get_min_p_value(self, test_stat: torch.tensor) -> torch.tensor:
483 |         """
484 |         Compute minimal p-values for each permutation:
485 |         First search the maximal test statistic for each permutation, since the survival function is decreasing, this
486 |         gives the minimal p-value
487 | 
488 |         :param test_stat: matrix containing test-statistics for all permutations and SNPs, dim (p,m)
489 | 
490 |         :return: vector containing the minimal p-value for each permutation
491 |         """
492 |         max_test_stats, _ = torch.max(test_stat, dim=1)
493 |         min_p_val = []
494 |         for test in max_test_stats:
495 |             min_p_val.append(self.get_p_value(f_score=test))
496 |         return torch.tensor(min_p_val)
497 | 
498 |     # functions to compute intermediate results
499 |     @staticmethod
500 |     def _zd(UZ: torch.tensor, D: torch.tensor) -> torch.tensor:
501 |         """
502 |         Compute (U'Z)'D^{-1} for fixed effects Z of shape (n,c) or (p,n,c)
503 | 
504 |         :param UZ: transformed fixed effects U'Z
505 |         :param D: vector with eigenvalues of K + ratio of variance components delta
506 | 
507 |         :return: Z'D^{-1}
508 |         """
509 |         if UZ.ndim == 2:
510 |             return torch.div(torch.t(UZ), D)
511 |         elif UZ.ndim == 3:
512 |             return torch.div(torch.transpose(UZ, dim0=1, dim1=2), D)
513 |         elif UZ.ndim == 4:
514 |             return torch.div(torch.transpose(UZ, dim0=2, dim1=3), D)
515 | 
516 |     @staticmethod
517 |     def _zdz(UZ: torch.tensor, ZD: torch.tensor) -> torch.tensor:
518 |         """
519 |         Compute (U'Z)'D^{-1}(U'Z) for fixed effects Z of shape (c,c) or (p,c,c)
520 | 
521 |         :param UZ: transformed fixed effects U'Z
522 |         :param ZD: precomputed (U'Z)'D^{-1}
523 | 
524 |         :return: (U'Z)'D^{-1}(U'Z)
525 |         """
526 |         return torch.matmul(ZD, UZ)
527 | 
528 |     @staticmethod
529 |     def _beta(ZDZ: torch.tensor, ZDy: torch.tensor) -> torch.tensor:
530 |         """
531 |         compute effect size beta = ((U'Z)'D^-1(U'Z))^-1(U'Z)'D^-1(U'y)
532 | 
533 |         :param ZDZ: precomputed matrix product of (U'Z)'D^-1(U'Z)
534 |         :param ZDy: precomputed matrix product of (U'Z)'D^-1(U'y)
535 | 
536 |         :return: beta
537 |         """
538 |         return torch.linalg.solve(ZDZ, ZDy)
539 | 
540 |     def _sigma(self, D: torch.tensor, Uy: torch.tensor, UZ: torch.tensor, beta: torch.tensor, reml: bool = True) \
541 |             -> torch.tensor:
542 |         """
543 |         compute variance component v_g^2 = ((U'y)-(U'Z)beta)'D^-1((U'y)-(U'Z)beta)/(n-c)
544 | 
545 |         :param D: vector with eigenvalues of K + ratio of variance components delta
546 |         :param Uy: transformed phenotype U'y, shape (n)
547 |         :param UZ: transformed fixed effects U'Z, shape (n,c)
548 |         :param beta: effect size, shape (c)
549 |         :param reml: if True use REML estimate, if False use ML, default=True
550 | 
551 |         :return: v_g^2
552 |         """
553 |         if D.ndim == 3:
554 |             if Uy.ndim == 1:
555 |                 Uy = self.get_3d_copy(v=Uy, batch_size=D.shape[0])
556 |             if beta.ndim == 2:
557 |                 beta = torch.unsqueeze(beta, 2)
558 |         res = Uy - torch.matmul(UZ, beta)
559 |         res = torch.multiply(res, res)
560 |         if D.ndim == 1:
561 |             res = torch.sum(torch.div(res, D))
562 |         elif res.ndim == 3:
563 |             res = torch.div(torch.transpose(res, dim0=1, dim1=2), D)
564 |             res = torch.sum(torch.squeeze(res), 1)
565 |         elif res.ndim == 4:
566 |             res = torch.div(torch.transpose(res, dim0=2, dim1=3), D)
567 |             res = torch.sum(torch.squeeze(res), 2)
568 |         if not reml:
569 |             return res / self.dataset.n_samples
570 |         else:
571 |             return res / self.freedom_deg
572 | 
573 |     def _xdx(self, D: torch.tensor, S: torch.tensor, ZD: torch.tensor, ZDZ: torch.tensor) -> tuple:
574 |         """
575 |         Compute (X'D^{-1}X)^{-1} for X=([Z,s_i],...,[Z,s_{i+b-1}]) of shape (b,n,c+1) for fixed effects Z of shape (n,c)
576 |         and SNPs s_j
577 |         For permutations compute 4D version
578 | 
579 |         :param D: vector with eigenvalues of K + ratio of variance components delta; shape: (n) or (p,1,n)
580 |         :param S: matrix with batch of b SNPs (n,b) or (p,n,b)
581 |         :param ZD: Z'D^{-1} for fixed effects Z and matrix of eigenvalues+delta D (c,n) or (p,c,n) for perm
582 |         :param ZDZ: Z'D^{-1}Z for fixed effects Z and matrix of eigenvalues+delta D (c,c) or (p,c,c) for perm
583 | 
584 |         :return: S'D^{-1} and (X'D^{-1}X)^{-1}
585 |         """
586 |         if ZD.ndim == 2:
587 |             batch_size = S.shape[1]
588 |             # compute Z'Ds_i for each SNP s_i in batches
589 |             ZDS = torch.unsqueeze(torch.t(torch.matmul(ZD, S)), dim=2)  # shape: (b,c,1)
590 |             # compute s_iDs_i for all SNPs in batch
591 |             SD = torch.unsqueeze(torch.div(torch.t(S), D), dim=1)  # shape: (b,1,n)
592 |             XDX = torch.bmm(SD, torch.unsqueeze(torch.t(S), dim=2))  # shape: (b,1,1)
593 |             # put together 3D tensor for XDX
594 |             XDX = torch.cat((torch.cat((self.get_3d_copy(v=ZDZ, batch_size=batch_size), ZDS), dim=2),
595 |                              torch.cat((torch.transpose(ZDS, dim0=1, dim1=2), XDX), dim=2)), dim=1)  # shape: (b,c+1,c+1)
596 |         elif ZD.ndim == 3:
597 |             batch_size = S.shape[2]
598 |             # get 4D copy of ZDZ for batch
599 |             ZDZ_4d = self.get_4d_copy(v=ZDZ, batch_size=batch_size)  # shape: (p,b,c,c)
600 |             # compute Z'D^{-1}S
601 |             ZDS = torch.unsqueeze(torch.transpose(torch.matmul(ZD, S), dim0=1, dim1=2), 3)  # shape: (p,b,c,1)
602 |             # compute S'D^{-1}S
603 |             St = torch.transpose(S, dim0=1, dim1=2)  # shape: (p,b,n)
604 |             SD = torch.unsqueeze(torch.divide(St, self.D), dim=2)  # shape: (p,b,1,n)
605 |             # compute S'D^{-1}S
606 |             XDX = torch.matmul(SD, torch.unsqueeze(St, dim=3))
607 |             # put together X'D^{-1}X
608 |             XDX = torch.concat((torch.transpose(ZDS, dim0=2, dim1=3), XDX), dim=3)
609 |             XDX = torch.concat((torch.concat((ZDZ_4d, ZDS), dim=3), XDX), dim=2)  # shape: (p,b,c+1,c+1)
610 |         else:
611 |             raise Exception('Can only compute XDX for 2D or 3D version of ZD.')
612 |         return SD, XDX
613 | 
614 |     def _beta_perm(self, S: torch.tensor, ZD: torch.tensor, ZDZ: torch.tensor, y_batch: torch.tensor, batch_size: int) \
615 |             -> torch.tensor:
616 |         """
617 |         Compute betas for permutations in 4D tensor using block-wise computations
618 | 
619 |         :param S: matrix containing batch of markers, shape: (p,n,b)
620 |         :param ZD: 3D tensor containing matrix product (U'Z)'D^{-1} for each permutation, shape: (p,c,n)
621 |         :param ZDZ: 3D tensor containing matrix product (U'Z)'D^{-1}(U'Z) for each permutation, shape: (p,c,c)
622 |         :param y_batch: 4D copy of permutations of phenotype vector, shape: (p,b,n,1)
623 |         :param batch_size: number of markers
624 | 
625 |         :return: 4D tensor with beta values for all markers nad permutations, shape: (p,b,c+1,1)
626 |         """
627 |         # get S'D^{-1}S and X'D^{-1}X
628 |         SD, XDX = self._xdx(D=self.D, S=S, ZD=ZD, ZDZ=ZDZ)  # shape: (p,b,1,n), (p,b,c+1,c+1)
629 |         # get X'D^{-1}y
630 |         XDy = self.get_4d_copy(v=torch.matmul(ZD, self.Uy), batch_size=batch_size)  # shape: (p,b,c,1)
631 |         SD = torch.matmul(SD, y_batch)  # shape: (p,b,1,1)
632 |         XDy = torch.concat((XDy, SD), dim=2)  # shape: (p,b,c+1,1)
633 |         # get beta of shape: (p,b,c+1,1)
634 |         return self._beta(ZDZ=XDX, ZDy=XDy)
635 | 
636 |     # functions for data transformation
637 |     @staticmethod
638 |     def transform_input(X: torch.tensor, U: torch.tensor) -> torch.tensor:
639 |         """
640 |         compute U'X
641 | 
642 |         :param X: input vector/matrix
643 |         :param U: input matrix
644 | 
645 |         :return: product with transpose
646 |         """
647 |         return torch.matmul(torch.t(U), X)
648 | 
649 |     def _d_delta(self, delta: torch.tensor, batch_size: int):
650 |         """
651 |         get 3D tensor with D + delta*I as batches for diagonal matrix with eigenvalues D and different variance
652 |         component ratios delta. If delta is one value, return tensor with b copies of D+delta.
653 | 
654 |         :param delta: variance component ratio shape: (b) or (1)
655 |         :param batch_size: number of needed copies of D
656 | 
657 |         :return: D + delta of shape (b,1,n)
658 |         """
659 |         if delta.ndim == 1:
660 |             return torch.unsqueeze(self.D.repeat(batch_size, 1) + torch.unsqueeze(delta, 1), 1)
661 |         else:
662 |             return torch.unsqueeze((self.D + delta).repeat(batch_size, 1), 1)
663 | 
664 |     def _s_matrix(self, lower_bound: int, upper_bound: int, device=None, save_meta: bool = True) -> torch.tensor:
665 |         """
666 |         load batch of markers to specified device
667 | 
668 |         :param lower_bound: lower bound of marker batch
669 |         :param upper_bound: upper bound of marker batch
670 |         :param device: either cpu or cuda device
671 |         :param save_meta: if genotype is loaded batch-wise, set to False for permutations to prevent saving of meta info
672 | 
673 |         :return: matrix with markers of shape (n,upper_bound-lower_bound)
674 |         """
675 |         if device is None:
676 |             device = self.device
677 |         if self.dataset.X is None:
678 |             # load X batch-wise
679 |             self.dataset.load_genotype_batch_wise(device=device, save_meta=save_meta, snp_lower_index=lower_bound,
680 |                                                   snp_upper_index=upper_bound)  # shape: (n,b)
681 |             S = self.dataset.X  # shape: (n,b)
682 |             self.dataset.reset_genotype()
683 |         else:
684 |             # get X_batch if X was completely loaded before
685 |             S = self.dataset.X[:, lower_bound:upper_bound].to(device)  # shape: (n,b)
686 |         return S
687 | 
688 |     def _x_batch(self, X: torch.tensor, fixed: torch.tensor) -> torch.tensor:
689 |         """
690 |         Create 3D or 4D tensor where each matrix in the 3D tensor contains the same fixed effects and a different SNP,
691 |         and the 4D tensor contains copies of the 3D tensors
692 | 
693 |         :param X: genotype matrix/tensor of shape (n,b) or (p,n,b)
694 |         :param fixed: matrix/tensor of fixed effects of shape (n,c) or (p,n,c)
695 | 
696 |         :return: tensor of shape (b,n,c+1) or (p,b,n,c+1)
697 |         """
698 |         if X.ndim == 2:
699 |             b = self.get_3d_copy(v=fixed, batch_size=X.shape[1])
700 |             return torch.cat((b, torch.transpose(torch.unsqueeze(X, 0), 0, 2)), dim=2)
701 |         elif X.ndim == 3:
702 |             b = self.get_4d_copy(v=fixed, batch_size=X.shape[2])
703 |             return torch.cat((b, torch.unsqueeze(torch.transpose(X, dim0=1, dim1=2), 3)), dim=3)
704 | 
705 |     @staticmethod
706 |     def get_3d_copy(v: torch.tensor, batch_size: int) -> torch.tensor:
707 |         """
708 |         Create 3D tensor with copies of input tensor
709 | 
710 |         :param v: vector/matrix of shape (n) or (n,c)
711 |         :param batch_size: batch size of new 3D tensor
712 | 
713 |         :return: tensor of copies of v with shape (batch_size,n,1) or (batch_size,n,c)
714 |         """
715 |         if v.ndim == 1:
716 |             return torch.unsqueeze(v.expand(batch_size, v.shape[0]), 2)
717 |         if v.ndim == 2:
718 |             return v.expand(batch_size, v.shape[0], v.shape[1])
719 | 
720 |     @staticmethod
721 |     def get_4d_copy(v: torch.tensor, batch_size: int) -> torch.tensor:
722 |         """
723 |         Create 4D tensor with copies of input tensor
724 | 
725 |         :param v: tensor of shape (p,n,c)
726 |         :param batch_size: batch size of new 4D tensor
727 | 
728 |         :return: tensor of copies of v with shape (p,b,n,c)
729 |         """
730 |         return torch.transpose(v.expand(batch_size, v.shape[0], v.shape[1], v.shape[2]), dim0=0, dim1=1)
731 | 
732 |     # helper functions
733 |     def _bounds(self, batch_size: int, batch: int) -> tuple:
734 |         """
735 |         compute upper and lower bound for natch-wise computations
736 | 
737 |         :param batch_size: number of markers within batch
738 |         :param batch: number of batch
739 | 
740 |         :return: lower and upper bound
741 |         """
742 |         lower_bound = batch * batch_size
743 |         upper_bound = (batch + 1) * batch_size
744 |         if upper_bound > self.dataset.n_snps:
745 |             upper_bound = self.dataset.n_snps
746 |         return lower_bound, upper_bound
747 | 


--------------------------------------------------------------------------------
/optimize/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grimmlab/permGWAS/3f7a1c2e3e4c63281f5719425ff9ac405f8d9cfc/optimize/__init__.py


--------------------------------------------------------------------------------
/optimize/brent.py:
--------------------------------------------------------------------------------
  1 | # Brent's method
  2 | 
  3 | def brent_search(f, a: float, b: float, x: float = None, fx: float = None, rel_tol: float = 1.48e-08,
  4 |                  abs_tol: float = 1.48e-08, max_iter: int = 500, **kwargs) -> tuple:
  5 |     """
  6 |     Find minimum of a function using Brent's method (see Numerical Recipes 3rd Edition: The Art of Scientific Computing)
  7 |     Given a function f with minimum in interval [a,b], find local minimum.
  8 | 
  9 |     :param f: function to be minimized
 10 |     :param a: lower bound of interval
 11 |     :param b: upper bound of interval
 12 |     :param x: starting point (initial guess of minimum)
 13 |     :param fx: function value of f
 14 |     :param rel_tol: relative tolerance, default=1.48e-08
 15 |     :param abs_tol: absolute tolerance, default=1.48e-08
 16 |     :param max_iter: maximal number of iterations, default=500
 17 |     :param kwargs: additional arguments of f
 18 | 
 19 |     :return: minimum x, function value of minimum f(x) and number of iterations
 20 |     """
 21 | 
 22 |     golden = 0.381966011250105097
 23 |     if a > b:
 24 |         raise ValueError('Interval boundaries do not fit. a must be smaller or equal to b.')
 25 |     if x is None:
 26 |         x = a + golden * (b-a)
 27 |     if fx is None:
 28 |         fx = f(x, **kwargs)
 29 |     if not (a <= x <= b):
 30 |         raise ValueError('Starting value x needs to be within interval boundaries.')
 31 | 
 32 |     # initialize values
 33 |     x_sec, fx_sec = x, fx  # second best value and function value
 34 |     x_trd, fx_trd = x, fx  # third best value and function value
 35 |     d, e = 0.0, 0.0  # step size and direction of last two iterations
 36 |     i = -1
 37 | 
 38 |     for i in range(max_iter):
 39 |         mid = 0.5 * (a + b)
 40 |         tol1 = rel_tol * abs(x) + abs_tol
 41 |         tol2 = 2.0 * tol1
 42 | 
 43 |         # check stopping crit
 44 |         if abs(x - mid) <= tol2 - 0.5 * (b - a):
 45 |             break
 46 | 
 47 |         # compute Lagrange polynomial through (x, f(x)), (x_sec, f(x_sec)) and (x_trd, f(x_trd))
 48 |         if abs(e) > tol1:
 49 |             tmp1 = (x - x_sec) * (fx - fx_trd)
 50 |             denominator = (x - x_trd) * (fx - fx_sec)
 51 |             numerator = (x - x_trd) * denominator - (x - x_sec) * tmp1
 52 |             denominator = 2.0 * (denominator - tmp1)
 53 |             if denominator > 0.0:
 54 |                 numerator = -numerator
 55 |             denominator = abs(denominator)
 56 |             tmp1 = e
 57 |             e = d
 58 | 
 59 |             if (abs(numerator) >= abs(0.5 * denominator * tmp1)) or (numerator <= denominator * (a-x)) or \
 60 |                     (numerator >= denominator * (b-x)):
 61 |                 # golden section step
 62 |                 e = b-x if x < mid else a-x
 63 |                 d = golden * e
 64 |             else:
 65 |                 # polynomial interpolation step
 66 |                 d = numerator / denominator
 67 |                 x_new = x + d
 68 |                 if (x_new - a < tol2) or (b - x_new < tol2):
 69 |                     d = tol1 if x < mid else -tol1
 70 |         else:
 71 |             # golden section step
 72 |             e = b - x if x < mid else a - x
 73 |             d = golden * e
 74 | 
 75 |         # function must not be evaluated too close to x
 76 |         if tol1 <= abs(d):
 77 |             x_new = x + d
 78 |         elif 0.0 < d:
 79 |             x_new = x + tol1
 80 |         else:
 81 |             x_new = x - tol1
 82 |         fx_new = f(x_new, **kwargs)
 83 | 
 84 |         # check if x_new is better than previous x
 85 |         if fx_new <= fx:
 86 |             # decrease interval size
 87 |             if x_new >= x:
 88 |                 a = x
 89 |             else:
 90 |                 b = x
 91 |             # replace previous best 3 with current best 3
 92 |             x_trd, fx_trd = x_sec, fx_sec
 93 |             x_sec, fx_sec = x, fx
 94 |             x, fx = x_new, fx_new
 95 |         else:
 96 |             # decrease interval size
 97 |             if x_new < x:
 98 |                 a = x_new
 99 |             else:
100 |                 b = x_new
101 |             # check if x_new better than second or third and replace accordingly
102 |             if fx_new <= fx_sec or x_sec == x:
103 |                 x_trd, fx_trd = x_sec, fx_sec
104 |                 x_sec, fx_sec = x_new, fx_new
105 |             elif fx_new <= fx_trd or x_trd == x or x_trd == x_sec:
106 |                 x_trd, fx_trd = x_new, fx_new
107 | 
108 |     return x, fx, i+1
109 | 


--------------------------------------------------------------------------------
/perform_gwas.py:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | import time
 3 | import torch
 4 | 
 5 | from preprocess import data_loader
 6 | from utils import helper_functions
 7 | 
 8 | 
 9 | def run(genotype_file: pathlib.Path, phenotype_file: pathlib.Path, model: str, trait: str = 'phenotype_value',
10 |         kinship_file: pathlib.Path = None, covariate_file: pathlib.Path = None, covariate_list: list = None,
11 |         maf_threshold: int = 0, load_genotype: bool = False,
12 |         out_dir: pathlib.Path = pathlib.Path.cwd().joinpath('results'), out_file: str = None,
13 |         device: torch.device = torch.device('cpu'), perm: int = 0, perm_method: str = 'x',
14 |         adj_p_value: bool = False, batch_size: int = 50000, perm_batch_size: int = 1000, manhattan: bool = False,
15 |         qqplot: bool = False, not_add: bool = False):
16 |     # check user specified arguments
17 |     start = time.time()
18 |     print('Start loading data now')
19 | 
20 |     # load data
21 |     dataset = data_loader.Dataset(genotype_file=genotype_file, phenotype_file=phenotype_file, trait=trait,
22 |                                   maf_threshold=maf_threshold, load_genotype=load_genotype, kinship_file=kinship_file,
23 |                                   covariate_file=covariate_file, covariate_list=covariate_list, not_add=not_add)
24 |     dataset.to_device(device=device)
25 |     have_data = time.time()
26 |     print('Loaded data, elapsed time: %f s.' % (have_data - start))
27 |     print('Start performing GWAS on phenotype %s for %d samples and %d SNPs.'
28 |           % (trait, dataset.n_samples, dataset.n_snps))
29 | 
30 |     # perform GWAS
31 |     gwas_model = helper_functions.get_model_class_name(model_name=model)(dataset=dataset, batch_size=batch_size,
32 |                                                                          device=device, perm=perm,
33 |                                                                          perm_batch_size=perm_batch_size)
34 |     gwas_model.gwas()
35 |     done_gwas = time.time()
36 |     print('Done performing GWAS on phenotype %s for %d samples and %d SNPs.\n'
37 |           'Elapsed time: %f s' % (trait, dataset.n_samples, len(dataset.positions), done_gwas - have_data))
38 | 
39 |     # perform GWAS with permutations
40 |     if perm > 0:
41 |         print('Start performing GWAS with %d permutations.' % perm)
42 |         gwas_model.perm_gwas(perm_method=perm_method, adj_p_value=adj_p_value)
43 |         done_perm = time.time()
44 |         print('Done performing GWAS with %d permutations.\n'
45 |               'Elapsed time: %f s' % (perm, done_perm - done_gwas))
46 | 
47 |     # save results
48 |     print('Save results.')
49 |     gwas_model.save_results(data_dir=out_dir, filename=out_file)
50 |     total_time = time.time() - start
51 |     print('Total time: ', total_time)
52 | 
53 |     # plots
54 |     if manhattan:
55 |         print('Save Manhattan plot with significance level of 5%.')
56 |         gwas_model.manhattan_plot(data_dir=out_dir, filename=out_file, sig_level=5)
57 |         total_time = time.time() - start
58 |     if qqplot:
59 |         print('Save QQ-plot.')
60 |         gwas_model.qq_plot(data_dir=out_dir, filename=out_file)
61 |         total_time = time.time() - start
62 | 
63 |     # summary statistics
64 |     if not load_genotype:
65 |         # reset number of SNPs in case of batch-wise loading
66 |         dataset.n_snps = len(dataset.positions)
67 |     helper_functions.get_summary_stats(out_dir=out_dir, out_file=out_file, genotype_file=genotype_file,
68 |                                        phenotype_file=phenotype_file, trait=trait, samples=dataset.n_samples,
69 |                                        snps=dataset.n_snps, model=model, maf_threshold=maf_threshold, perm=perm,
70 |                                        v_g=gwas_model.v_g.item(), v_e=gwas_model.v_e.item(),
71 |                                        min_p_val=gwas_model.min_p_value, time=total_time, kinship_file=kinship_file,
72 |                                        covariate_file=covariate_file, covariate_list=covariate_list,
73 |                                        perm_method=perm_method)
74 | 


--------------------------------------------------------------------------------
/permGWAS.py:
--------------------------------------------------------------------------------
 1 | # run the script here
 2 | import argparse
 3 | import pathlib
 4 | 
 5 | from utils import check_functions
 6 | import perform_gwas
 7 | 
 8 | 
 9 | if __name__ == "__main__":
10 |     parser = argparse.ArgumentParser()
11 |     parser.add_argument('-x', '--genotype_file', type=str, default=None,
12 |                         help='Specify the full path to the genotype file, absolute and relative paths are accepted, '
13 |                              'only accept .h5, .hdf5, .h5py, .csv, PLINK and binary PLINK files, '
14 |                              'PLINK and binary PLINK: all required files must be in the same folder with same prefix. '
15 |                              'See documentation for correct format.')
16 |     parser.add_argument('-y', '--phenotype_file', type=str, default=None,
17 |                         help='Specify the full path to the phenotype file, absolute and relative paths are '
18 |                              'accepted, only accept .csv, .txt and .pheno files. See documentation for correct format.')
19 |     parser.add_argument('-trait', '--trait', '--y_name', nargs='+', type=str, default=['phenotype_value'],
20 |                         help='Specify the name of phenotype (column) to be used in phenotype file,'
21 |                              'default is "phenotype_value". You can run permGWAS on several phenotypes one after '
22 |                              'another if they are in the same phenotype_file. Juste name the phenotypes, '
23 |                              'e.g. --trait pheno1 pheno2 if you want to use all available traits use --trait all')
24 |     parser.add_argument('-k', '--kinship_file', '--k', '--kinship', type=str, default=None,
25 |                         help='Specify the the full path to the kinship file, absolute and relative paths are accepted,'
26 |                              'only accept .csv and .h5/.h5py/.hdf5 files. See documentation for correct format. '
27 |                              'Optional, if not provided realized relationship kernel will be calculated')
28 |     parser.add_argument('-cov', '--covariate_file', '--cov', '--cov_file', type=str, default=None,
29 |                         help='Specify the full path to the covariates file, absolute and relative paths are accepted,'
30 |                              'currently only accept .csv files. Optional, if not provided only intercept will be used '
31 |                              'as fixed effect.')
32 |     parser.add_argument('-cov_list', '--covariate_list', nargs='+', type=str, default=None,
33 |                         help='Specify the covariates (column headers) to use from the covariates file. Optional, if '
34 |                              'not provided, will use all available columns as covariates.')
35 |     parser.add_argument('-maf', '--maf_threshold', '--maf', type=int, choices=range(0, 31), default=0,
36 |                         help='Specify minor allele frequency threshold as percentage value. '
37 |                              'Optional, if not provided no maf filtering will be performed.')
38 |     parser.add_argument('-load_genotype', action='store_true',
39 |                         help='If used, genotype matrix will be completely loaded from file during preprocessing. '
40 |                              'Otherwise load genotype batch-wise during computations of test statistics. '
41 |                              'Batch-wise loading is only possible, if kinship file is provided. Default is False')
42 |     parser.add_argument('-config', '--config_file', type=str, default=None,
43 |                         help='Specify the full path to the yaml config file. Specify all required arguments to use in '
44 |                              'this config file and just give the config file instead of all required parameters. '
45 |                              'For more info regarding the required format see the documentation.')
46 |     parser.add_argument('-model', type=str, default='lmm',
47 |                         help='Specify the model to use for GWAS. Currently only lmm (linear mixed model) is '
48 |                              'implemented.')
49 |     parser.add_argument('-out_dir', '--out_dir', type=str, default=pathlib.Path.cwd().joinpath('results'),
50 |                         help='Specify the name of the directory result-files should be stored in,'
51 |                              'absolute and relative paths are accepted. Optional, if not provided, files will be '
52 |                              'stored in folder "results" in current directory,')
53 |     parser.add_argument('-out_file', '--out_file', type=str, default=None,
54 |                         help='Specify NAME of result files, will be stored as p_values_NAME and min_p_values_NAME,'
55 |                              'optional, if not provided name of phenotype will be used. If you run permGWAS with '
56 |                              'several phenotypes, will always use name of phenotype.')
57 |     parser.add_argument('-disable_gpu', action='store_true',
58 |                         help='If used, GPUs will be disabled and only CPUs will be used for computations.')
59 |     parser.add_argument('-device', '--device', type=int, default=0,
60 |                         help='Specify GPU device to be used, default is 0.')
61 |     parser.add_argument('-perm', '--perm', type=int, default=0,
62 |                         help='Specify the number of permutations (integer value) to be performed, optional, if not '
63 |                              'provided no permutations will be performed')
64 |     parser.add_argument('-perm_method', type=str, default='x',
65 |                         help='Specify the method to use for permutations: x or y,'
66 |                              'for x permute fixed effects matrix including SNP of interest, which is equivalent to '
67 |                              'permuting the phenotype and the covariance matrix; for y permute only the phenotype '
68 |                              'vector as in permGWAS Version1. Default is x.')
69 |     parser.add_argument('-adj_p_value', action='store_true',
70 |                         help='If used, will additionally compute adjusted permutation-based p-values for each SNP.')
71 |     parser.add_argument('-batch', '--batch_size', '--batch', type=int, default=50000,
72 |                         help='Specify number of SNPs to work on simultaneously, default is 50000')
73 |     parser.add_argument('-batch_perm', '--perm_batch_size', '--batch_perm', type=int, default=1000,
74 |                         help='Specify number of SNPs to work on simultaneously, default is 1000')
75 |     parser.add_argument('-mplot', '--manhattan', '--plot', action='store_true',
76 |                         help='optional, creates manhattan plot')
77 |     parser.add_argument('-qqplot', '--qqplot', action='store_true',
78 |                         help='optional, creates QQ-plot')
79 |     parser.add_argument('-not_add', '--not_add', action='store_true',
80 |                         help='optional, use if genotype has different encoding.')
81 |     args = vars(parser.parse_args())
82 |     # check config file
83 |     args = check_functions.check_all_arguments(args=args)
84 |     phenotypes = args["trait"]
85 | 
86 |     # run pipeline
87 |     for trait in phenotypes:
88 |         print('Working on phenotype ', trait)
89 |         args["trait"] = trait
90 |         args = check_functions.check_output_files(args=args)
91 |         print('Checked if all specified files exist.')
92 |         try:
93 |             perform_gwas.run(**args)
94 |             args["out_file"] = None
95 |         except Exception as exc:
96 |             print("Failure when running permGWAS2.0")
97 |             print(exc)
98 |             continue
99 | 


--------------------------------------------------------------------------------
/permGWAS_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grimmlab/permGWAS/3f7a1c2e3e4c63281f5719425ff9ac405f8d9cfc/permGWAS_logo.png


--------------------------------------------------------------------------------
/postprocess/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grimmlab/permGWAS/3f7a1c2e3e4c63281f5719425ff9ac405f8d9cfc/postprocess/__init__.py


--------------------------------------------------------------------------------
/postprocess/plot_functions.py:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | import numpy as np
 3 | import pandas as pd
 4 | import matplotlib.pyplot as plt
 5 | import seaborn as sns
 6 | import scipy.stats as stats
 7 | plt.rc('axes', axisbelow=True)
 8 | plt.rcParams['axes.labelsize'] = 16
 9 | plt.rcParams['xtick.labelsize'] = 14
10 | plt.rcParams['ytick.labelsize'] = 14
11 | plt.rcParams['legend.fontsize'] = 16
12 | plt.rcParams['axes.titlesize'] = 20
13 | 
14 | from utils import helper_functions
15 | 
16 | 
17 | def manhattan_plot(df: pd.DataFrame, data_dir: pathlib.Path, filename: str, min_p_values: np.array = None,
18 |                    sig_level: int = 5):
19 |     """
20 |     Save Manhattan plot as manhattan_FILENAME.png to data_dir
21 | 
22 |     :param df: DataFrame containing chromosome (CHR) and position (POS) identifiers, and corresponding p_values
23 |     :param data_dir: full path to save directory
24 |     :param filename: name of file
25 |     :param min_p_values: array containing minimal p_values to compute permutation-based threshold
26 |     :param sig_level: significance level for Bonferroni and perm thresholds, default is 5
27 |     """
28 |     if not {'CHR', 'POS', 'p_value'}.issubset(df.columns):
29 |         raise Exception('Cannot create Manhattan plot; need CHR, POS and p_value in DataFrame.')
30 |     n_snps = len(df)
31 |     df = df[df['p_value'] <= 0.01].copy()
32 |     if isinstance(df['CHR'].values[0], str):
33 |         try:
34 |             df['CHR'] = [int(x.replace('Chr', '')) for x in df['CHR']]
35 |         except Exception as exc:
36 |             print("Chromosome identifier might be wrong. Use the chromosome number.")
37 |             print(exc)
38 |     running_pos = 0
39 |     cumul_pos = []
40 |     for chrom, group_df in df.groupby('CHR'):
41 |         cumul_pos.append(group_df['POS'] + running_pos)
42 |         running_pos += group_df['POS'].max()
43 |     df['cumul_pos'] = pd.concat(cumul_pos)
44 | 
45 |     fig, ax = plt.subplots(1, 1, figsize=(20, 5), constrained_layout=True)
46 |     sns.scatterplot(ax=ax, data=df, x='cumul_pos', y='p_value', hue='CHR', palette='colorblind', linewidth=0, s=20,
47 |                     legend=None)
48 |     ax.spines['top'].set_visible(False)
49 |     ax.spines['right'].set_visible(False)
50 |     ax.set_yscale("log")
51 |     ax.invert_yaxis()
52 |     ax.minorticks_off()
53 |     ax.set_xlabel('Chromosome')
54 |     ax.set_ylabel(r'$-log_{10}$(p-value)')
55 |     ax.set_xticks(df.groupby('CHR')['cumul_pos'].median())
56 |     ax.set_xticklabels(np.unique(df['CHR']))
57 | 
58 |     if min_p_values is not None:
59 |         ax.axhline(helper_functions.compute_perm_threshold(min_p_values, sig_level), linewidth=1.5, color='blue',
60 |                    label='permGWAS2')
61 |     ax.axhline(helper_functions.compute_bonf_threshold(n_snps, sig_level), linewidth=1.5, color='red',
62 |                label='Bonferroni')
63 |     ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.13), fancybox=True, ncol=2, frameon=True)
64 |     fig.savefig(data_dir.joinpath('manhattan_' + pathlib.Path(filename).with_suffix('.png').as_posix()))
65 |     fig.clf()
66 | 
67 | 
68 | def qq_plot(p_values: np.array, data_dir: pathlib.Path, filename: str):
69 |     """
70 |     Save QQ-plot as qq_plot_FILENAME.png to data_dir
71 | 
72 |     :param p_values: array containing p_values
73 |     :param data_dir: full path to save directory
74 |     :param filename: name of file
75 |     """
76 |     n_snps = len(p_values)
77 |     observed_p = -np.log10(np.sort(p_values))
78 |     expected_p = -np.log10(np.arange(1.0 / float(n_snps), 1, 1.0 / float(n_snps + 1)))
79 |     inflation_factor = np.median(stats.chi2.isf(p_values, 1)) / 0.456
80 | 
81 |     plt.figure(figsize=(6, 6))
82 |     plt.plot(expected_p, observed_p, '.', markersize=4, markeredgewidth=0, alpha=0.8)
83 |     plt.plot(expected_p, expected_p, 'k--', linewidth=0.75)
84 |     plt.text(3.5, 0.5, "$\lambda=%.2f$" % inflation_factor)
85 |     plt.xlabel('Expected $-log10(p-value)$')
86 |     plt.ylabel('Observed $-log10(p-value)$')
87 |     plt.savefig(data_dir.joinpath('qq_plot_' + pathlib.Path(filename).with_suffix('.png').as_posix()))
88 |     plt.clf()
89 | 


--------------------------------------------------------------------------------
/preprocess/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grimmlab/permGWAS/3f7a1c2e3e4c63281f5719425ff9ac405f8d9cfc/preprocess/__init__.py


--------------------------------------------------------------------------------
/preprocess/data_loader.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import pandas as pd
  4 | import h5py
  5 | import pathlib
  6 | from pandas_plink import read_plink1_bin
  7 | 
  8 | 
  9 | class Genotype:
 10 |     """
 11 |     Class for loading of genotype data.
 12 | 
 13 |     **Attributes**
 14 | 
 15 |         - genotype_file (*pathlib.Path*): full path to genotype file for data loading
 16 |         - X (*torch.tensor*): matrix containing genotype values
 17 |         - sample_ids (*numpy.array*): ids of genotype samples
 18 |         - chromosomes (*numpy.array*): chromosome identifier of SNPs
 19 |         - positions (*numpy.array*): position identifier of SNPs
 20 |         - maf (*torch.tensor*): vector containing minor allele frequencies
 21 |         - sample_index (*numpy.array*): indices of the samples to load from the genotype matrix
 22 |         - n_samples (*int*): number of samples
 23 |         - n_snps (*int*): number of SNPs
 24 |         - maf_threshold (*int*): threshold for minor allele frequency filtering
 25 | 
 26 |     **Functions**
 27 | 
 28 |         -   load_genotype_ids(load_genotype): load sample_ids from .h5/.hdf5/.h5py file
 29 |         -   load_genotype_data(): load and encode genotype data from file, calls the following functions:
 30 |             -   load_genotype_hdf5_file(sample_index, snp_lower_index, snp_upper_index): load genotype data from
 31 |                 .h5/.hdf5/.h5py files
 32 |             -   load_genotype_csv_file(): load genotype data from .csv files
 33 |             -   load_genotype_binary_plink_file(): load genotype data from binary PLINK files
 34 |             -   load_genotype_plink_file(): load genotype data from PLINK files
 35 |             -   encode_genotype(): check encoding of genotype, change to additive if necessary, create torch.tensor,
 36 |                 calls the following functions:
 37 |                 -   check_encoding()
 38 |                 -   get_additive_encoding()
 39 |         -   load_genotype_batch_wise(maf_threshold, snp_lower_index, snp_upper_index): batch-wise loading and filtering
 40 |             of genotype data
 41 |         -   filter_monomorphic_snps(): remove monomorphic SNPs
 42 |         -   get_minor_allele_freq(): compute minor allele frequencies
 43 |         -   use_maf_filter(maf_threshold): filter for minor allele frequency
 44 |         -   save_genotype_hdf5(filename): save genotype data as .h5 file
 45 |         -   reset_genotype(): delete X for batch-wise loading
 46 |         -   get_matched_data(data, row_index): filter samples of data
 47 | 
 48 |     :param genotype_file: full path to genotype file
 49 |     :param maf_threshold: threshold for minor allele frequency filtering
 50 |     :param not_add: use if genotype has different / not additive encoding
 51 |     """
 52 | 
 53 |     def __init__(self, genotype_file: pathlib.Path, maf_threshold: int = 0, not_add: bool = False):
 54 |         self.genotype_file = genotype_file
 55 |         self.maf_threshold = maf_threshold
 56 |         self.not_add = not_add
 57 |         self.sample_ids = None
 58 |         self.chromosomes = None
 59 |         self.positions = None
 60 |         self.X = None
 61 |         self.maf = None
 62 |         self.sample_index = None
 63 |         self.n_samples = None
 64 |         self.n_snps = None
 65 | 
 66 |     def load_genotype_ids(self, load_genotype: bool = False) -> np.array:
 67 |         """
 68 |         Load sample_ids from .h5/.hdf5/.h5py genotype file.
 69 |         """
 70 |         if self.genotype_file.suffix not in ('.h5', '.hdf5', '.h5py'):
 71 |             raise Exception('Can only load genotype IDs from .h5/.hdf5/.h5py files.')
 72 |         with h5py.File(self.genotype_file, "r") as gt:
 73 |             self.sample_ids = gt['sample_ids'][:].astype(str)
 74 |             if not load_genotype:
 75 |                 self.n_snps = len(gt['position_index'][:])
 76 | 
 77 |     def load_genotype_data(self):
 78 |         """
 79 |         Load and encode genotype data. Accepts PLINK files, binary PLINK files, .csv and .h5, .hdf5, .h5py files.
 80 |         For .h5/.hdf5/.h5py files only load needed samples defined in self.sample_index.
 81 |         After loading check encoding of genotype and change to additive if necessary.
 82 |         Return genotype matrix as torch.tensor, chromosomes, positions and sample_ids as np.arrays.
 83 |         """
 84 |         suffix = self.genotype_file.suffix
 85 |         if suffix in ('.h5', '.hdf5', '.h5py'):
 86 |             self.X, self.chromosomes, self.positions = self.load_genotype_hdf5_file()
 87 |         elif suffix == '.csv':
 88 |             self.X, self.sample_ids, self.chromosomes, self.positions = self.load_genotype_csv_file()
 89 |         elif suffix in ('.bed', '.bim', '.fam'):
 90 |             self.X, self.sample_ids, self.chromosomes, self.positions = self.load_genotype_binary_plink_file()
 91 |         elif suffix in ('.map', '.ped'):
 92 |             self.X, self.sample_ids, self.chromosomes, self.positions = self.load_genotype_plink_file()
 93 |         # check if genotype is in additive encoding, change encoding if not
 94 |         # change X from np.array to torch.tensor
 95 |         self.encode_genotype()
 96 |         self.n_samples = len(self.sample_ids)
 97 |         self.n_snps = len(self.positions)
 98 | 
 99 |     def load_genotype_batch_wise(self, device: torch.device = torch.device("cpu"), save_meta: bool = True,
100 |                                  snp_lower_index: int = None, snp_upper_index: int = None):
101 |         """
102 |         Load and encode genotype data batch-wise. After loading filter for monomorphic snps and minor allele frequency.
103 |         Only accept .h5(.hdf5/.h5py files.
104 | 
105 |         :param device: device (cpu/gpu) for computations
106 |         :param save_meta: save chromosome and position identifiers if True
107 |         :param snp_lower_index: lower bound of batch
108 |         :param snp_upper_index: upper bound of batch
109 |         """
110 |         self.X, chromosomes, positions = self.load_genotype_hdf5_file(snp_lower_index=snp_lower_index,
111 |                                                                       snp_upper_index=snp_upper_index)
112 |         self.encode_genotype()
113 |         chromosomes, positions = self.filter_monomorphic_snps(chromosomes=chromosomes, positions=positions)
114 |         maf = self.get_minor_allele_freq()
115 |         if self.maf_threshold != 0:
116 |             maf, chromosomes, positions = self.use_maf_filter(maf=maf, chromosomes=chromosomes, positions=positions)
117 |         self.X = self.X.to(device)
118 | 
119 |         if save_meta:
120 |             if self.chromosomes is None:
121 |                 self.chromosomes = chromosomes
122 |                 self.positions = positions
123 |                 self.maf = maf
124 |             else:
125 |                 self.chromosomes = np.concatenate((self.chromosomes, chromosomes))
126 |                 self.positions = np.concatenate((self.positions, positions))
127 |                 self.maf = torch.cat((self.maf, maf))
128 | 
129 |     def load_genotype_hdf5_file(self, snp_lower_index: int = None, snp_upper_index: int = None) -> tuple:
130 |         """
131 |         Load genotype matrix from .h5/.hdf5/.h5py file.
132 |         Only load needed samples and SNPs batch wise:
133 |             will only load specified samples given in sample_index
134 |             if snp_upper_bound/snp_lower_bound is given, will load SNPs batch-wise, else will load all SNPs
135 |         H5, HDF5, H5PY files need to have the following structure:
136 |             snps:           genotype matrix either in additive encoding or in raw nucleotide encoding (biallelic
137 |                             notation (i.e. 'AA', 'AT', ...) or iupac notation (i.e. 'A', 'W', ...)) with samples as
138 |                             rows and markers as columns
139 |             sample_ids:     sample identifier in the same order as the rows of the genotype matrix
140 |             chr_index:      chromosome identifier in the same order as the columns of the genotype matrix
141 |             position_index: position number (integer) in the same order as the columns of the genotype matrix
142 | 
143 |         :param snp_lower_index: lower bound of batch
144 |         :param snp_upper_index: upper bound of batch
145 | 
146 |         :return: Genotype values, chromosomes and positions and sample_ids if no sample_index is specified
147 |         """
148 |         with h5py.File(self.genotype_file, "r") as gt:
149 |             chromosomes = gt['chr_index'][snp_lower_index:snp_upper_index].astype(str)
150 |             positions = gt['position_index'][snp_lower_index:snp_upper_index].astype(int)
151 |             if isinstance(self.sample_index, (np.ndarray, list)):
152 |                 # using sample indices directly does not work for h5py --> use workaround
153 |                 indices, inverse = np.unique(self.sample_index, return_inverse=True)
154 |                 X = gt['snps'][indices, snp_lower_index:snp_upper_index]
155 |                 X = X[inverse, :]
156 |                 return X, chromosomes, positions
157 |             else:
158 |                 raise Exception('sample_index needs to be a list in order to load certain genotype samples only.')
159 | 
160 |     def load_genotype_csv_file(self) -> (np.array, np.array, np.array, np.array):
161 |         """
162 |         Load .csv genotype file. File must have the following structure:
163 |         First column must contain the sample ids, the column names should be the SNP ids as CHROMOSOME_POSITION.
164 |         The values should be the genotype matrix either in additive encoding or in raw nucleotide encoding (biallelic
165 |         notation (i.e. 'AA', 'AT', ...) or iupac notation (i.e. 'A', 'W', ...)).
166 | 
167 |         :return: Genotype values, sample_ids, chromosomes and positions
168 |         """
169 |         gt = pd.read_csv(self.genotype_file, index_col=0)
170 |         snp_ids = np.array(list(map(lambda a: a.split("_"), gt.columns.values)))
171 |         chromosomes = snp_ids[:, 0]
172 |         positions = snp_ids[:, 1].astype(int)
173 |         sample_ids = np.asarray(gt.index, dtype=str)
174 |         X = np.asarray(gt.values)
175 |         return X, sample_ids, chromosomes, positions
176 | 
177 |     def load_genotype_binary_plink_file(self) -> (np.array, np.array, np.array, np.array):
178 |         """
179 |         Load binary PLINK file, .bim, .fam, .bed files with same prefix need to be in same folder.
180 | 
181 |         :return: Genotype values, sample_ids, chromosomes and positions
182 |         """
183 |         prefix = self.genotype_file.with_suffix('').as_posix()
184 |         gt = read_plink1_bin(prefix + '.bed', prefix + '.bim', prefix + '.fam', ref="a0", verbose=False)
185 |         sample_ids = np.array(gt['fid'], dtype=str).flatten()
186 |         positions = np.array(gt['pos']).flatten()
187 |         chromosomes = np.array(gt['chrom']).flatten()
188 |         X = np.asarray(gt.values)
189 |         return X, sample_ids, chromosomes, positions
190 | 
191 |     def load_genotype_plink_file(self) -> (np.array, np.array, np.array, np.array):
192 |         """
193 |         Load PLINK files, .map and .ped file with same prefix need to be in same folder.
194 |         Accepts GENOTYPENAME.ped and GENOTYPENAME.map as input
195 | 
196 |         :return: Genotype values, sample_ids, chromosomes and positions
197 |         """
198 |         prefix = self.genotype_file.with_suffix('').as_posix()
199 |         with open(prefix + '.map', 'r') as f:
200 |             chromosomes = []
201 |             positions = []
202 |             for line in f:
203 |                 tmp = line.strip().split(" ")
204 |                 chromosomes.append(tmp[0].strip())
205 |                 positions.append(int(float(tmp[-1].strip())))
206 |         chromosomes = np.array(chromosomes)
207 |         positions = np.array(positions)
208 |         iupac_map = {"AA": "A", "GG": "G", "TT": "T", "CC": "C", "AG": "R", "GA": "R", "RR": "R", "CT": "Y", "TC": "Y",
209 |                      "YY": "Y", "GC": "S", "CG": "S", "SS": "S", "AT": "W", "TA": "W", "WW": "W", "GT": "K", "TG": "K",
210 |                      "KK": "K", "AC": "M", "CA": "M", "MM": "M"}
211 |         with open(prefix + '.ped', 'r') as f:
212 |             sample_ids = []
213 |             X = []
214 |             for line in f:
215 |                 tmp = line.strip().split(" ")
216 |                 sample_ids.append(tmp[1].strip())
217 |                 snps = []
218 |                 j = 6
219 |                 while j < len(tmp) - 1:
220 |                     snps.append(iupac_map[tmp[j] + tmp[j + 1]])
221 |                     j += 2
222 |                 X.append(snps)
223 |         sample_ids = np.array(sample_ids, dtype=str)
224 |         X = np.array(X)
225 |         return X, sample_ids, chromosomes, positions
226 | 
227 |     def encode_genotype(self):
228 |         """
229 |         first check encoding of genotype, then change to additive if necessary, finally change X from np.array
230 |         to torch.tensor
231 |         """
232 |         if self.not_add:
233 |             print('Genotype might not be in additive encoding. Will not check encoding of genotype.')
234 |             self.X = torch.tensor(self.X, dtype=torch.float64)
235 |         else:
236 |             enc_of_X = self.check_encoding()
237 |             # if genotype in biallelic notation, will change to iupac notation and then encode additively
238 |             if enc_of_X == 'biallelic':
239 |                 iupac_map = {"AA": "A", "GG": "G", "TT": "T", "CC": "C", "AG": "R", "GA": "R", "CT": "Y", "TC": "Y",
240 |                              "GC": "S", "CG": "S", "AT": "W", "TA": "W", "GT": "K", "TG": "K", "AC": "M", "CA": "M"}
241 |                 self.X = np.vectorize(iupac_map.__getitem__)(self.X.astype(str))
242 |                 enc_of_X = 'iupac'
243 |             if enc_of_X == 'iupac':
244 |                 self.X = torch.tensor(self.get_additive_encoding(), dtype=torch.float64)
245 |             elif enc_of_X == 'additive':
246 |                 self.X = torch.tensor(self.X, dtype=torch.float64)
247 |             else:
248 |                 raise Exception('Genotype in wrong encoding. Can only deal with additive, iupac and biallelic '
249 |                                 'encoding. If you want to use different encoding use flag -not_add.')
250 | 
251 |     def check_encoding(self):
252 |         """
253 |         Check the encoding of the genotype matrix
254 | 
255 |         :return: encoding of the genotype matrix
256 |         """
257 |         if self.X[0, 0].astype(str) in ['A', 'C', 'G', 'T', 'M', 'R', 'W', 'S', 'Y', 'K']:
258 |             return 'iupac'
259 |         elif self.X[0, 0] in [0, 1, 2]:
260 |             return 'additive'
261 |         elif self.X[0, 0] in ["AA", "GG", "TT", "CC", "AG", "GA", "CT", "TC", "GC", "CG", "AT", "TA", "GT", "TG",
262 |                               "AC", "CA"]:
263 |             return 'biallelic'
264 |         else:
265 |             raise Exception('Genotype in wrong encoding. Can only deal with additive, iupac and biallelic encoding. '
266 |                             'Please check again.')
267 | 
268 |     def get_additive_encoding(self):
269 |         """
270 |         Function to compute additive encoding of genotype matrix with
271 |             0: homozygous major allele
272 |             1: heterozygous
273 |             2: homozygous minor allele
274 | 
275 |         :return: gnotype in additive encoding
276 |         """
277 |         alleles = []
278 |         index_arr = []
279 |         pairs = [['A', 'C'], ['A', 'G'], ['A', 'T'], ['C', 'G'], ['C', 'T'], ['G', 'T']]
280 |         heterozygous_nuc = ['M', 'R', 'W', 'S', 'Y', 'K']
281 |         for i, col in enumerate(np.transpose(self.X)):
282 |             unique, inv, counts = np.unique(col, return_counts=True, return_inverse=True)
283 |             unique = unique.astype(str)
284 |             boolean = (unique == 'A') | (unique == 'T') | (unique == 'C') | (unique == 'G')
285 |             tmp = np.zeros(3)
286 |             if len(unique) > 3:
287 |                 raise Exception('More than two alleles encountered at snp ' + str(i))
288 |             elif len(unique) == 3:
289 |                 hetero = unique[~boolean][0]
290 |                 homozygous = unique[boolean]
291 |                 for j, pair in enumerate(pairs):
292 |                     if all(h in pair for h in homozygous) and hetero != heterozygous_nuc[j]:
293 |                         raise Exception('More than two alleles encountered at snp ' + str(i))
294 |                 tmp[~boolean] = 1.0
295 |                 tmp[np.argmin(counts[boolean])] = 2.0
296 |             elif len(unique) == 2:
297 |                 if list(unique) in pairs:
298 |                     tmp[np.argmin(counts)] = 2.0
299 |                 else:
300 |                     tmp[(~boolean).nonzero()] = 1.0
301 |             else:
302 |                 if unique[0] in heterozygous_nuc:
303 |                     tmp[0] = 1.0
304 |             alleles.append(tmp)
305 |             index_arr.append(inv)
306 |         alleles = np.transpose(np.array(alleles))
307 |         index_arr = np.transpose(np.array(index_arr))
308 |         cols = np.arange(alleles.shape[1])
309 |         return alleles[index_arr, cols]
310 | 
311 |     def filter_monomorphic_snps(self, chromosomes: np.array = None, positions: np.array = None) -> (np.array, np.array):
312 |         """
313 |         Remove monomorphic SNPs, i.e., SNPs that are constant
314 | 
315 |         :param chromosomes: vector with chromosome identifiers
316 |         :param positions: vector with position identifiers
317 | 
318 |         :return filtered chromosomes and positions
319 |         """
320 |         tmp = self.X == self.X[0, :]
321 |         self.X = self.X[:, ~tmp.all(0)]
322 |         if chromosomes is None:
323 |             self.chromosomes = self.chromosomes[~tmp.all(0)]
324 |             self.positions = self.positions[~tmp.all(0)]
325 |         else:
326 |             return chromosomes[~tmp.all(0)], positions[~tmp.all(0)]
327 | 
328 |     def get_minor_allele_freq(self):
329 |         """
330 |         Function to calculate minor allele frequencies of each SNP
331 | 
332 |         :return: vector containing frequencies
333 |         """
334 | 
335 |         return (torch.sum(self.X, 0)) / (2 * self.X.shape[0])
336 | 
337 |     def use_maf_filter(self, maf: torch.tensor = None, chromosomes: np.array = None, positions: np.array = None) \
338 |             -> (torch.tensor, np.array, np.array):
339 |         """
340 |         filter genotype by minor allele frequency
341 | 
342 |         :param maf: vector containing minor allele frequencies
343 |         :param chromosomes: vector with chromosome identifiers
344 |         :param positions: vector with position identifiers
345 | 
346 |         :return: tensor with filtered maf frequencies, chromosomes and positions
347 |         """
348 |         if maf is None:
349 |             tmp = self.maf > (self.maf_threshold / 100)
350 |             self.X = self.X[:, tmp]
351 |             self.chromosomes = self.chromosomes[tmp]
352 |             self.positions = self.positions[tmp]
353 |             self.maf = self.maf[tmp]
354 |         else:
355 |             # for batch-wise loading
356 |             tmp = maf > (self.maf_threshold / 100)
357 |             self.X = self.X[:, tmp]
358 |             return maf[tmp], chromosomes[tmp], positions[tmp]
359 | 
360 |     def save_genotype_hdf5(self, filename: pathlib.Path):
361 |         """
362 |         Save genotype data to .h5 file
363 | 
364 |         :param filename: Full path to new genotype file
365 |         """
366 |         if any(elem is None for elem in [self.X, self.sample_ids, self.chromosomes, self.positions]):
367 |             raise Exception('Cannot save genotype file. Some values are None, please check again.')
368 |         print('Save genotype data as ' + filename.as_posix() + '.\nThis might take some time.')
369 |         with h5py.File(filename.with_suffix('.h5'), 'w') as f:
370 |             f.create_dataset('sample_ids', data=self.sample_ids.astype(bytes), chunks=True, compression="gzip")
371 |             f.create_dataset('chr_index', data=self.chromosomes.astype(bytes), chunks=True, compression="gzip")
372 |             f.create_dataset('position_index', data=self.positions.astype(int), chunks=True, compression="gzip")
373 |             f.create_dataset('snps', data=self.X, chunks=True, compression="gzip", compression_opts=7)
374 |         print('Done saving H5 file.')
375 | 
376 |     def reset_genotype(self):
377 |         """
378 |         Delete X for batchwise loading
379 |         """
380 |         self.X = None
381 | 
382 |     @staticmethod
383 |     def get_matched_data(data, row_index: np.array):
384 |         """
385 |         Get rows of data specified in index array
386 | 
387 |         :param data: data to match, either np.array or torch.tensor
388 |         :param row_index: row-index array for filtering / matching
389 |         """
390 |         if data.ndim == 2:
391 |             return data[row_index, :]
392 |         if data.ndim == 1:
393 |             return data[row_index]
394 |         else:
395 |             raise Exception('Cannot match data, dimensions are wrong. Expected dimension 1 or 2 but got '
396 |                             + str(data.ndim) + ' instead. Please check again.')
397 | 
398 | 
399 | class Dataset(Genotype):
400 |     """
401 |     Class for loading and preparation of genotype, phenotype, kinship and covariates.
402 | 
403 |     **Attributes**
404 | 
405 |         - genotype_file (*pathlib.Path*): full path to genotype file for data loading
406 |         - X (*torch.tensor*): matrix containing genotype values
407 |         - sample_ids (*numpy.array*): ids of genotype samples
408 |         - chromosomes (*numpy.array*): chromosome identifier of SNPs
409 |         - positions (*numpy.array*): position identifier of SNPs
410 |         - y (*torch.tensor*): tensor containing phenotypic values
411 |         - K (*torch.tensor*): kinship matrix
412 |         - fixed (*torch.tensor*): matrix containing fixed effects, i.e. vector of ones and covariates if available
413 |         - maf (*torch.tensor*): vector containing minor allele frequencies
414 |         - sample_index (*np.array*): vector containing sample indices for batch-wise loading of X
415 |         - n_samples (*int*): number of samples
416 |         - n_snps (*int*): number of SNPs
417 |         - maf_threshold (*int*): threshold for minor allele frequency filtering
418 | 
419 |     **Functions**
420 | 
421 |         -   load_and_prepare_data(): load load and match data, calls the following functions:
422 |             -   see class Genotype for all genotype specific functions
423 |             -   load_phenotype(phenotype_file, trait): load phenotype fom file
424 |             -   load_kinship(kinship_file): load kinship matrix from file
425 |             -   compute_rrk_kinship(): compute realized relationship kernel
426 |             -   normalize_kinship(): normalize kinship matrix using a Gower's centered matrix
427 |             -   load_covariates(covariates_file, column_list): load covariates from file
428 |             -   get_fixed_effects(): create fixed effects vector/matrix
429 |             -   match_data(data_ids1, data_ids2): match ids of two datasets
430 |         -   to_device(device): move tensors to device
431 | 
432 |     :param genotype_file: full path to genotype file
433 |     :param phenotype_file: full path to phenotype file
434 |     :param trait: name of phenotypic trait to use
435 |     :param maf_threshold: minor allele frequency threshold to use for SNP filtering, default is 0 (no filtering)
436 |     :param load_genotype: bool, if False load genotype batch-wise during computations, default is False
437 |     :param kinship_file: full path to kinship file, optional, if missing, compute rrk kinship
438 |     :param covariate_file: full path to covariate file, optional
439 |     :param covariate_list: list of covariates to use, optional
440 |     :param not_add: use if genotype has different / not additive encoding
441 |     """
442 | 
443 |     def __init__(self, genotype_file: pathlib.Path, phenotype_file: pathlib.Path, trait: str, maf_threshold: int = 0,
444 |                  load_genotype: bool = False, kinship_file: pathlib.Path = None, covariate_file: pathlib.Path = None,
445 |                  covariate_list: list = None, not_add: bool = False):
446 |         super().__init__(genotype_file=genotype_file, maf_threshold=maf_threshold, not_add=not_add)
447 | 
448 |         self.y = None
449 |         self.K = None
450 |         self.fixed = None
451 |         self.load_and_prepare_data(phenotype_file=phenotype_file, trait=trait, load_genotype=load_genotype,
452 |                                    kinship_file=kinship_file, covariate_file=covariate_file,
453 |                                    covariate_list=covariate_list)
454 | 
455 |     def load_and_prepare_data(self, phenotype_file: pathlib.Path, trait: str, load_genotype: bool = False,
456 |                               kinship_file: pathlib.Path = None, covariate_file: pathlib.Path = None,
457 |                               covariate_list: list = None):
458 |         """
459 |         Load and match genotype, phenotype, kinship and covariates.
460 |         1. Load phenotype from file.
461 |         2. Load genotype and match with pheno:
462 |             If load_genotype is False, only load geno sample_ids from file and match data
463 |             Load genotype sample_ids, match with pheno and load geno data only for needed samples
464 |         3. Filter genotype for monomorphic SNPs and minor allele frequency
465 |         4. Load kinship from file and match with geno, or compute kinship from geno data
466 |         5. if available load covariates from file
467 | 
468 |         :param phenotype_file: full path to phenotype file
469 |         :param trait: name of phenotypic trait to use
470 |         :param load_genotype: bool, if False load genotype batch-wise during computations, default is False
471 |         :param kinship_file: full path to kinship file, optional, if missing, compute rrk kinship
472 |         :param covariate_file: full path to covariate file, optional
473 |         :param covariate_list: list of covariates to use, optional
474 |         """
475 |         # load phenotype
476 |         y, y_ids = self.load_phenotype(phenotype_file=phenotype_file, trait=trait)
477 |         # load genotype
478 |         if not load_genotype:
479 |             # only load and match sample ids of genotype, values will be loaded batch-wise during computations
480 |             self.load_genotype_ids(load_genotype=False)
481 |             pheno_index, self.sample_index = self.match_data(data_ids1=y_ids, data_ids2=self.sample_ids)
482 |             if len(pheno_index) == 0:
483 |                 raise Exception("Samples of genotype and phenotype do not match.")
484 |         else:
485 |             if self.genotype_file.suffix in ('.h5', '.hdf5', '.h5py'):
486 |                 # load genotype sample ids, match data and only load genotype values for needed samples
487 |                 self.load_genotype_ids()
488 |                 pheno_index, self.sample_index = self.match_data(data_ids1=y_ids, data_ids2=self.sample_ids)
489 |                 if len(pheno_index) == 0:
490 |                     raise Exception("Samples of genotype and phenotype do not match.")
491 |                 self.load_genotype_data()
492 |             else:
493 |                 self.load_genotype_data()
494 |                 pheno_index, self.sample_index = self.match_data(data_ids1=y_ids, data_ids2=self.sample_ids)
495 |                 if len(pheno_index) == 0:
496 |                     raise Exception("Samples of genotype and phenotype do not match.")
497 |                 self.X = self.get_matched_data(data=self.X, row_index=self.sample_index)
498 |             self.filter_monomorphic_snps()
499 |             self.maf = self.get_minor_allele_freq()
500 |             if self.maf_threshold != 0:
501 |                 self.use_maf_filter()
502 |             self.n_snps = len(self.positions)
503 |         self.y = self.get_matched_data(data=y, row_index=pheno_index)
504 |         self.sample_ids = self.get_matched_data(data=self.sample_ids, row_index=self.sample_index)
505 |         self.n_samples = len(self.y)
506 |         # kinship
507 |         if kinship_file is None:
508 |             # compute kinship matrix
509 |             self.K = self.compute_rrk_kinship()
510 |         else:
511 |             # load kinship from file
512 |             self.K, K_ids = self.load_kinship(kinship_file=kinship_file)
513 |             _, K_index = self.match_data(data_ids1=self.sample_ids, data_ids2=K_ids)
514 |             if len(K_index) == len(self.sample_ids):
515 |                 self.K = self.K[K_index, :][:, K_index]
516 |             else:
517 |                 raise Exception("Sample ids of genotype and kinship matrix do not match. Please check again")
518 |         self.normalize_kinship()
519 |         # fixed effects
520 |         if covariate_file is not None:
521 |             # load covariates from file
522 |             cov = self.load_covariates(covariate_file=covariate_file, covariate_list=covariate_list)
523 |             cov_ids = np.asarray(cov.index, dtype=y_ids.dtype).flatten()
524 |             _, cov_index = self.match_data(data_ids1=self.sample_ids, data_ids2=cov_ids)
525 |             if len(cov_index) == len(self.sample_ids):
526 |                 self.fixed = torch.tensor(cov.values, dtype=torch.float64).flatten()[cov_index]
527 |             else:
528 |                 raise Exception('Sample ids of covariates and phenotype do not match.')
529 |         self.get_fixed_effects()
530 | 
531 |     def load_phenotype(self, phenotype_file: pathlib.Path, trait: str) -> (torch.Tensor, np.array):
532 |         """
533 |         Load phenotype from file. Accept .csv and single white space separated .txt and .pheno files.
534 |         Phenotype data needs to contain sample identifiers as first column and phenotypic traits as remaining columns.
535 |         The trait name should be the respective column name. Can contain more than one phenotype columns.
536 |         Will drop NAN values during preparation and compute mean over replicates.
537 | 
538 |         :param phenotype_file: full path to phenotype file
539 |         :param trait: name of phenotypic trait / column to use
540 | 
541 |         :return: tensor containing phenotypic traits and array containing respective sample_ids
542 |         """
543 | 
544 |         suffix = phenotype_file.suffix
545 |         # load CSV
546 |         if suffix == ".csv":
547 |             y = pd.read_csv(phenotype_file)
548 |         # load PHENO or TXT
549 |         elif suffix == ".txt":
550 |             y = pd.read_csv(phenotype_file, sep=" ")
551 |         elif suffix == ".pheno":
552 |             y = pd.read_csv(phenotype_file, sep=" ")
553 |             if {'FID', 'IID'}.issubset(set(y.columns)):
554 |                 y.drop(columns='FID', inplace=True)
555 |         else:
556 |             raise NotImplementedError('Only accept CSV, PHENO and TXT phenotype files')
557 |         # account for replicates
558 |         y = y.sort_values(y.columns[0]).groupby(y.columns[0]).mean()
559 |         if trait not in y.columns:
560 |             raise Exception('Phenotype ' + trait + ' is not in phenotype file ' + phenotype_file.as_posix())
561 |         else:
562 |             y = y[[trait]].dropna()
563 |         return torch.tensor(y.values, dtype=torch.float64).flatten(), np.asarray(y.index, dtype=str).flatten()
564 | 
565 |     def load_kinship(self, kinship_file: pathlib.Path) -> (torch.tensor, np.array):
566 |         """
567 |         load kinship matrix from file. Only take .csv or .h5/.hdf5/.h5py files.
568 |         For .csv files sample ids have to be in first column, .h5/.hdf5/.h5py files need to contain the kinship matrix
569 |         with key 'kinship' and the corresponding sample ids with key 'sample_ids'.
570 | 
571 |         :param kinship_file: full path to kinship file
572 | 
573 |         :return: torch.tensor containing kinship matrix and array with sample ids
574 |         """
575 |         # load .csv
576 |         suffix = kinship_file.suffix
577 |         if suffix == ".csv":
578 |             kin = pd.read_csv(kinship_file, index_col=0)
579 |             K = torch.tensor(kin.values)
580 |             sample_ids = np.array(kin.index, dtype=str)
581 |         # load .h5/.hdf5/.h5py
582 |         elif suffix in (".h5", ".hdf5", ".h5py"):
583 |             with h5py.File(kinship_file, "r") as f:
584 |                 K = torch.tensor(f['kinship'][:], dtype=torch.float64)
585 |                 sample_ids = f['sample_ids'][:].astype(str)
586 |         else:
587 |             raise NotImplementedError('Only accept .csv, .h5, .hdf5, .h5py kinship files')
588 |         return K, sample_ids
589 | 
590 |     def compute_rrk_kinship(self) -> torch.tensor:
591 |         """
592 |         compute realized relationship kernel as kinship matrix
593 | 
594 |         :return: kinship matrix
595 |         """
596 |         if self.X is None:
597 |             raise Exception('Cannot compute kinship matrix, no genotype matrix available.')
598 |         X_stand = (self.X - self.X.mean(axis=0)) / self.X.std(axis=0)
599 |         K = torch.matmul(X_stand, torch.t(X_stand)) / self.X.shape[1]
600 |         # set negative values in K to zero
601 |         return torch.where(K > 0, K, 0.)
602 | 
603 |     def normalize_kinship(self):
604 |         """
605 |         normalize kinship matrix using a Gower's centered matrix
606 |         """
607 |         n = self.K.shape[0]
608 |         P = (torch.eye(n, dtype=self.K.dtype, device=self.K.device) -
609 |              torch.ones(n, n, dtype=self.K.dtype, device=self.K.device) / n)
610 |         self.K = (n - 1) / torch.sum(torch.mul(P, self.K)) * self.K
611 | 
612 |     def load_covariates(self, covariate_file: pathlib.Path, covariate_list: list = None) -> torch.tensor:
613 |         """
614 |         Only take .csv files: sample ids have to be in first column, if column_list is available, will load all columns
615 |         specified, else will load all available columns
616 | 
617 |         :param covariate_file: full path to covariates file
618 |         :param covariate_list: list containing column names/headers of covariates to load
619 | 
620 |         :return: pandas DataFrame containing covariates with sample ids as index
621 |         """
622 |         if covariate_file.suffix == ".csv":
623 |             covs = pd.read_csv(covariate_file)
624 |             covs = covs.sort_values(covs.columns[0]).groupby(covs.columns[0]).mean().dropna()
625 |             if covariate_list is not None:
626 |                 if set(covariate_list).issubset(set(covs.columns)):
627 |                     covs = covs[covariate_list]
628 |                 else:
629 |                     raise Exception('Specified covariates are not available in covariate file. Please check again.')
630 |         else:
631 |             raise NotImplementedError('Only accept .csv covariates files')
632 |         return covs
633 | 
634 |     def get_fixed_effects(self):
635 |         """
636 |         Check for covariates and create fixed effects matrix with ones as first column and covariates as remaining
637 |         columns if available --> dim: (n, c+1)
638 |         """
639 |         if self.fixed is None:
640 |             self.fixed = torch.ones((len(self.y), 1), dtype=torch.float64)
641 |         elif self.fixed.ndim == 1:
642 |             self.fixed = torch.stack((torch.ones(len(self.y), dtype=torch.float64), self.fixed), dim=1)
643 |         else:
644 |             self.fixed = torch.cat((torch.ones((len(self.y), 1), dtype=torch.float64), self.fixed), dim=1)
645 | 
646 |     def to_device(self, device: torch.device):
647 |         """
648 |         move data to device
649 | 
650 |         :param device: cpu or cuda
651 |         """
652 |         self.y = self.y.to(device)
653 |         self.K = self.K.to(device)
654 |         self.fixed = self.fixed.to(device)
655 | 
656 |     @staticmethod
657 |     def match_data(data_ids1: np.array, data_ids2: np.array) -> (np.array, np.array):
658 |         """
659 |         match two datasets
660 | 
661 |         :param data_ids1: ids of first dataset
662 |         :param data_ids2: ids of second dataset
663 | 
664 |         :return: two arrays with indices of matched data
665 |         """
666 |         return (np.reshape(data_ids1, (data_ids1.shape[0], 1)) == data_ids2.astype(data_ids1.dtype)).nonzero()
667 | 


--------------------------------------------------------------------------------
/supplementary_data/simulated_phenotypes_her30.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grimmlab/permGWAS/3f7a1c2e3e4c63281f5719425ff9ac405f8d9cfc/supplementary_data/simulated_phenotypes_her30.h5


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grimmlab/permGWAS/3f7a1c2e3e4c63281f5719425ff9ac405f8d9cfc/utils/__init__.py


--------------------------------------------------------------------------------
/utils/check_functions.py:
--------------------------------------------------------------------------------
  1 | import pathlib
  2 | import torch
  3 | import pandas as pd
  4 | from utils import helper_functions
  5 | import models
  6 | 
  7 | 
  8 | def check_all_arguments(args: dict) -> dict:
  9 |     """
 10 |     Check user specified arguments for plausibility and turn all file paths to pathlib.Path objects
 11 |     :param args:
 12 |     :return:
 13 |     """
 14 |     if args["config_file"] is not None:
 15 |         args = helper_functions.parse_config_file(args=args)
 16 |     del args["config_file"]
 17 |     # check if specified files exist
 18 |     args["genotype_file"] = check_file(filepath=args["genotype_file"])
 19 |     args["phenotype_file"] = check_file(filepath=args["phenotype_file"])
 20 |     args["kinship_file"] = check_file(filepath=args["kinship_file"])
 21 |     args["covariate_file"] = check_file(filepath=args["covariate_file"])
 22 |     if args["trait"] is None:
 23 |         args["trait"] = 'phenotype_value'
 24 |     elif (args["trait"] == 'all') or (args["trait"] == ['all']):
 25 |         print('Will perform computations on all available phenotypes.')
 26 |         args["out_file"] = None
 27 |         suffix = args["phenotype_file"].suffix
 28 |         if suffix == ".csv":
 29 |             df = pd.read_csv(args["phenotype_file"], index_col=0)
 30 |         # load PHENO or TXT
 31 |         elif suffix == ".txt":
 32 |             df = pd.read_csv(args["phenotype_file"], index_col=0, sep=" ")
 33 |         elif suffix == ".pheno":
 34 |             df = pd.read_csv(args["phenotype_file"], index_col=0, sep=" ")
 35 |             if 'FID' in df.columns:
 36 |                 df.drop(columns='FID', inplace=True)
 37 |             if 'IID' in df.columns:
 38 |                 df.drop(columns='IID', inplace=True)
 39 |         else:
 40 |             raise Exception('Only accept .txt, .pheno or .csv phenotype files.')
 41 |         args["trait"] = df.columns.tolist()
 42 |     elif isinstance(args["trait"], str):
 43 |         args["trait"] = [args["trait"]]
 44 |     elif isinstance(args["trait"], list):
 45 |         args["out_file"] = None
 46 |     else:
 47 |         raise Exception('Something is wrong with the trait name. Please check again.')
 48 |     # sanity checks for fast loading and batch-wise loading
 49 |     if args["kinship_file"] is None:
 50 |         args["load_genotype"] = True
 51 |     if args["genotype_file"].suffix not in ('.h5', '.hdf5', '.h5py'):
 52 |         args["load_genotype"] = True
 53 |     # check gpu
 54 |     if torch.cuda.is_available() and not args["disable_gpu"]:
 55 |         dev = "cuda:" + str(args["device"])
 56 |         print('GPU is available. Perform computations on device ', dev)
 57 |     else:
 58 |         dev = "cpu"
 59 |         print('GPU is not available. Perform computations on device ', dev)
 60 |     del args["disable_gpu"]
 61 |     args["device"] = torch.device(dev)
 62 |     # check model
 63 |     if args["model"] is None:
 64 |         args["model"] = 'lmm'
 65 |     if args["model"] not in models.__all__:
 66 |         raise NotImplementedError('Specified model not implemented')
 67 | 
 68 |     # sanity checks
 69 |     if args["maf_threshold"] is None:
 70 |         args["maf_threshold"] = 0
 71 |     if isinstance(args["covariate_list"], str):
 72 |         args["covariate_list"] = [args["covariate_list"]]
 73 |     # check permutation method
 74 |     if args["perm"] is None:
 75 |         args["perm"] = 0
 76 |     if args["perm"] > 0:
 77 |         if args["perm_method"] not in ('x', 'y'):
 78 |             raise NotImplementedError(' Can only perform permutation methods x and y. Please check again.')
 79 |     if args["adj_p_value"] and args["perm"] == 0:
 80 |         raise Exception('Can not compute adjusted p-values with 0 permutations. Please check again.')
 81 |     return args
 82 | 
 83 | 
 84 | def check_output_files(args: dict) -> dict:
 85 |     # check output directory and file
 86 |     if args["out_file"] is None:
 87 |         args["out_file"] = args["trait"] + '.csv'
 88 |     if args["out_dir"] is None:
 89 |         args["out_dir"] = pathlib.Path.cwd().joinpath('results')
 90 |     args["out_dir"], args["out_file"] = check_dir_paths(args["out_dir"], args["out_file"])
 91 |     return args
 92 | 
 93 | 
 94 | def check_file(filepath: str):
 95 |     """
 96 |     Check if specified file exists
 97 | 
 98 |     :param filepath: full path to file
 99 | 
100 |     :return: path to file as Path object
101 |     """
102 |     if filepath is None:
103 |         return None
104 |     else:
105 |         filepath = pathlib.Path(filepath)
106 |         if filepath.is_file():
107 |             return filepath
108 |         else:
109 |             raise FileNotFoundError('There is no file ', filepath.as_posix())
110 | 
111 | 
112 | def check_dir_paths(out_dir: str, out_file: str, prefix: str = 'p_values_') -> (pathlib.Path, pathlib.Path):
113 |     """
114 |     Check if directory for result files exists, if not, create directory.
115 |     Then check if result files already exist, if they already exist, rename result file by adding (i) to the
116 |     end of the file
117 | 
118 |     :param out_dir: directory to save result files
119 |     :param out_file: result file
120 |     :param prefix: prefix to use when checking for existing files, default is p_values_
121 | 
122 |     :return: path object
123 |     """
124 |     my_path = pathlib.Path(out_dir)
125 |     if prefix in ('manhattan_', 'qq_plot_'):
126 |         suffix = '.png'
127 |     elif prefix == '':
128 |         suffix = '.h5'
129 |     else:
130 |         suffix = '.csv'
131 |     out_file = pathlib.Path(out_file).with_suffix(suffix).as_posix()
132 |     if my_path.is_dir():
133 |         if my_path.joinpath(prefix + out_file).exists():
134 |             if suffix == '.h5':
135 |                 raise Exception('File %s already exists in chosen directory %s.' % (out_file, out_dir))
136 |             i = 1
137 |             new_file = pathlib.Path(out_file).with_suffix('').as_posix() + '(' + str(i) + ')' + suffix
138 |             new_path = my_path.joinpath(prefix + new_file)
139 |             while new_path.exists():
140 |                 i += 1
141 |                 new_file = pathlib.Path(out_file).with_suffix('').as_posix() + '(' + str(i) + ')' + suffix
142 |                 new_path = my_path.joinpath(prefix + new_file)
143 |             print('The file %s already exists in chosen directory %s. Changed filename to %s.'
144 |                   % (prefix + out_file, out_dir, prefix + new_file))
145 |         else:
146 |             new_file = out_file
147 |     else:
148 |         new_file = out_file
149 |         my_path.mkdir(parents=True, exist_ok=True)
150 |     return my_path, new_file
151 | 


--------------------------------------------------------------------------------
/utils/helper_functions.py:
--------------------------------------------------------------------------------
  1 | import yaml
  2 | import pathlib
  3 | import importlib
  4 | import inspect
  5 | import numpy as np
  6 | 
  7 | import models
  8 | 
  9 | 
 10 | def parse_config_file(args: dict) -> dict:
 11 |     """
 12 |     Read yaml config file to update all user specified arguments
 13 | 
 14 |     :param args: dict with user specified arguments
 15 | 
 16 |     :return: updated dict with arguments
 17 |     """
 18 |     config_path = pathlib.Path(args["config_file"])
 19 |     if not config_path.is_file():
 20 |         raise FileNotFoundError('Specified config file does not exist. Please check again.')
 21 |     if config_path.suffix not in ['.yaml', '.yml']:
 22 |         raise Exception('Only accept yaml config files. Please check again.')
 23 |     config = yaml.safe_load(open(config_path))
 24 |     args.update(config)
 25 |     return args
 26 | 
 27 | 
 28 | def get_model_class_name(model_name: str = 'lmm'):
 29 |     """
 30 |     Get class name of model for user input
 31 | 
 32 |     :param model_name: user input of model name
 33 |     :return: model class name
 34 |     """
 35 |     if model_name in models.__all__:
 36 |         model_name = 'models.' + model_name
 37 |         for name, cls in inspect.getmembers(importlib.import_module(model_name), inspect.isclass):
 38 |             if cls.__module__ == model_name:
 39 |                 return cls
 40 |             else:
 41 |                 raise NotImplementedError('No class named ', model_name)
 42 |     else:
 43 |         raise NotImplementedError('No class named ', model_name)
 44 | 
 45 | 
 46 | def estimate_heritability(v_g: float, v_e: float) -> float:
 47 |     """
 48 |     compute narrow sense heritability
 49 |     :param v_g: genetic variance component
 50 |     :param v_e: residual variance component
 51 |     :return: narrow sense heritability
 52 |     """
 53 |     return v_g / (v_g + v_e)
 54 | 
 55 | 
 56 | def compute_perm_threshold(min_p_val: np.array, sig_level: int) -> float:
 57 |     """
 58 |     Compute permutation-based threshold
 59 |     :param min_p_val: array with minimal p-values
 60 |     :param sig_level: significance level as percentage value
 61 |     :return: threshold
 62 |     """
 63 |     return np.percentile(min_p_val, sig_level)
 64 | 
 65 | 
 66 | def compute_bonf_threshold(number_snps: int, sig_level: int) -> float:
 67 |     """
 68 |     Compute Bonferroni threshold
 69 |     :param number_snps: number of SNPs
 70 |     :param sig_level: significance level as percentage value
 71 |     :return: threshold
 72 |     """
 73 |     return (sig_level / 100) / number_snps
 74 | 
 75 | 
 76 | def print_summary_stats(genotype_file: pathlib.Path, phenotype_file: pathlib.Path, trait: str, samples: int, snps: int,
 77 |                         model: str, maf_threshold: int, perm: int, v_g: float, v_e: float, h2: float, bonf1: float,
 78 |                         bonf5: float, perm1: float, perm5: float, time: float, kinship_file: pathlib.Path = None,
 79 |                         covariate_file: pathlib.Path = None, covariate_list: list = None, perm_method: str = None):
 80 |     """
 81 |     Print summary statistics
 82 | 
 83 |     :param genotype_file:
 84 |     :param phenotype_file:
 85 |     :param trait: name of phenotypic trait
 86 |     :param samples: number of samples used
 87 |     :param snps: number of SNPs used
 88 |     :param model: model used for GWAS
 89 |     :param maf_threshold: threshold used for maf filtering
 90 |     :param perm: number of permutations
 91 |     :param v_g: genetic variance component
 92 |     :param v_e: residual variiance component
 93 |     :param h2: narrow-sense heritability
 94 |     :param bonf1: Bonferroni threshold significance level 1%
 95 |     :param bonf5: Bonferroni threshold significance level 5%
 96 |     :param perm1: permutation-based threshold significance level 1%
 97 |     :param perm5: permutation-based threshold significance level 5%
 98 |     :param kinship_file:
 99 |     :param covariate_file:
100 |     :param covariate_list: list containing covariates
101 |     :param perm_method: method used for permutations
102 |     """
103 |     print('\n')
104 |     print('+++++++++ Summary Statistics +++++++++')
105 |     print('## Genotype file: ' + genotype_file.as_posix())
106 |     print('## Phenotype file: ' + phenotype_file.as_posix())
107 |     print('## Phenotype: ' + trait)
108 |     if covariate_file is not None:
109 |         print('## Covariate file: ' + covariate_file.as_posix())
110 |         if covariate_list is not None:
111 |             print('## Used covariates: ' + ",".join(covariate_list))
112 |         else:
113 |             print('## Used all available covariates')
114 |     if kinship_file is not None:
115 |         print('## Kinship file: ' + kinship_file.as_posix())
116 |     print('## Number of individuals: ' + str(samples))
117 |     print('## Number of SNPs: ' + str(snps))
118 |     print('## Model: ' + model)
119 |     print('## MAF threshold: ' + str(maf_threshold))
120 |     print('## Number of permutations: ' + str(perm))
121 |     if perm_method is not None:
122 |         print('## permutation method: ' + perm_method)
123 |     if model == 'lmm':
124 |         print('## v_g estimate in null model: ' + str(v_g))
125 |         print('## v_e estimate in null model: ' + str(v_e))
126 |         print('## Narrow-sense heritability estimate: ' + str(h2))
127 |     print('## Bonferroni threshold (1% significance level): ' + str(bonf1))
128 |     print('## Bonferroni threshold (5% significance level): ' + str(bonf5))
129 |     if perm1 is not None:
130 |         print('## Permutation-based threshold (1% significance level): ' + str(perm1))
131 |         print('## Permutation-based threshold (5% significance level): ' + str(perm5))
132 |     print('## Total time: %.2f s' %time)
133 |     print('+++++++++++++++++++++++++++')
134 |     print('\n')
135 | 
136 | 
137 | def write_summary_stats(out_dir: pathlib.Path, out_file: str, genotype_file: pathlib.Path, phenotype_file: pathlib.Path,
138 |                         trait: str, samples: int, snps: int, model: str, maf_threshold: int, perm: int, v_g: float,
139 |                         v_e: float, h2: float, bonf1: float, bonf5: float, perm1: float, perm5: float, time: float,
140 |                         kinship_file: pathlib.Path = None, covariate_file: pathlib.Path = None,
141 |                         covariate_list: list = None, perm_method: str = None):
142 |     """
143 |     Save summary statistics to txt file
144 | 
145 |     :param out_dir:
146 |     :param out_file:
147 |     :param genotype_file:
148 |     :param phenotype_file:
149 |     :param trait: name of phenotypic trait
150 |     :param samples: number of samples used
151 |     :param snps: number of SNPs used
152 |     :param model: model used for GWAS
153 |     :param maf_threshold: threshold used for maf filtering
154 |     :param perm: number of permutations
155 |     :param v_g: genetic variance component
156 |     :param v_e: residual variance component
157 |     :param h2: narrow-sense heritability
158 |     :param bonf1: Bonferroni threshold significance level 1%
159 |     :param bonf5: Bonferroni threshold significance level 5%
160 |     :param perm1: permutation-based threshold significance level 1%
161 |     :param perm5: permutation-based threshold significance level 5%
162 |     :param kinship_file:
163 |     :param covariate_file:
164 |     :param covariate_list: list containing covariates
165 |     :param perm_method: method used for permutations
166 |     """
167 |     filename = out_dir.joinpath('summary_statistics_' + pathlib.Path(out_file).with_suffix('.txt').as_posix())
168 |     with open(filename, 'w') as f:
169 |         f.write('Summary Statistics:\n')
170 |         f.write('## Genotype file:\t' + genotype_file.as_posix() + '\n')
171 |         f.write('## Phenotype file:\t' + phenotype_file.as_posix() + '\n')
172 |         f.write('## Phenotype:\t' + trait + '\n')
173 |         if covariate_file is not None:
174 |             f.write('## Covariate file:\t' + covariate_file.as_posix() + '\n')
175 |             if covariate_list is not None:
176 |                 f.write('## Used covariates:\t' + ",".join(covariate_list) + '\n')
177 |             else:
178 |                 f.write('## Used all available covariates' + '\n')
179 |         if kinship_file is not None:
180 |             f.write('## Kinship file:\t' + kinship_file.as_posix() + '\n')
181 |         f.write('## Number of individuals:\t' + str(samples) + '\n')
182 |         f.write('## Number of SNPs:\t' + str(snps) + '\n')
183 |         f.write('## Model:\t' + model + '\n')
184 |         f.write('## MAF threshold:\t' + str(maf_threshold) + '\n')
185 |         f.write('## Number of permutations:\t' + str(perm) + '\n')
186 |         if perm_method is not None:
187 |             f.write('## permutation method:\t' + perm_method + '\n')
188 |         if model == 'lmm':
189 |             f.write('## v_g estimate in null model:\t' + str(v_g) + '\n')
190 |             f.write('## v_e estimate in null model:\t' + str(v_e) + '\n')
191 |             f.write('## Narrow-sense heritability estimate:\t' + str(h2) + '\n')
192 |         f.write('## Bonferroni threshold (1% significance level):\t' + str(bonf1) + '\n')
193 |         f.write('## Bonferroni threshold (5% significance level):\t' + str(bonf5) + '\n')
194 |         if perm1 is not None:
195 |             f.write('## Permutation-based threshold (1% significance level):\t' + str(perm1) + '\n')
196 |             f.write('## Permutation-based threshold (5% significance level):\t' + str(perm5) + '\n')
197 |         f.write('## Total time:\t' + str(time) + ' s\n')
198 | 
199 | 
200 | def get_summary_stats(out_dir: pathlib.Path, out_file: str, genotype_file: pathlib.Path, phenotype_file: pathlib.Path,
201 |                       trait: str, samples: int, snps: int, model: str, maf_threshold: int, perm: int, v_g: float,
202 |                       v_e: float, min_p_val: np.array, time: float, kinship_file: pathlib.Path = None,
203 |                       covariate_file: pathlib.Path = None, covariate_list: list = None, perm_method: str = None):
204 |     """
205 |     Compute summary statistics, print and save them to file
206 | 
207 |     :param out_dir:
208 |     :param out_file:
209 |     :param genotype_file:
210 |     :param phenotype_file:
211 |     :param trait: name of phenotypic trait
212 |     :param samples: number of samples used
213 |     :param snps: number of SNPs used
214 |     :param model: model used for GWAS
215 |     :param maf_threshold: threshold used for maf filtering
216 |     :param perm: number of permutations
217 |     :param v_g: genetic variance component
218 |     :param v_e: residual variance component
219 |     :param min_p_val: minimal p-values
220 |     :param kinship_file:
221 |     :param covariate_file:
222 |     :param covariate_list: list containing covariates
223 |     :param perm_method: method used for permutations
224 |     """
225 |     if model == 'lmm':
226 |         h2 = estimate_heritability(v_g=v_g, v_e=v_e)
227 |     else:
228 |         h2 = None
229 |     bonf1 = compute_bonf_threshold(number_snps=snps, sig_level=1)
230 |     bonf5 = compute_bonf_threshold(number_snps=snps, sig_level=5)
231 |     if min_p_val is not None:
232 |         perm1 = compute_perm_threshold(min_p_val=min_p_val, sig_level=1)
233 |         perm5 = compute_perm_threshold(min_p_val=min_p_val, sig_level=5)
234 |     else:
235 |         perm1 = None
236 |         perm5 = None
237 | 
238 |     write_summary_stats(out_dir=out_dir, out_file=out_file, genotype_file=genotype_file, phenotype_file=phenotype_file,
239 |                         trait=trait, samples=samples, snps=snps, model=model, maf_threshold=maf_threshold, perm=perm,
240 |                         v_g=v_g, v_e=v_e, h2=h2, bonf1=bonf1, bonf5=bonf5, perm1=perm1, perm5=perm5, time=time,
241 |                         kinship_file=kinship_file, covariate_file=covariate_file, covariate_list=covariate_list,
242 |                         perm_method=perm_method)
243 |     print_summary_stats(genotype_file=genotype_file, phenotype_file=phenotype_file,
244 |                         trait=trait, samples=samples, snps=snps, model=model, maf_threshold=maf_threshold, perm=perm,
245 |                         v_g=v_g, v_e=v_e, h2=h2, bonf1=bonf1, bonf5=bonf5, perm1=perm1, perm5=perm5, time=time,
246 |                         kinship_file=kinship_file, covariate_file=covariate_file, covariate_list=covariate_list,
247 |                         perm_method=perm_method)
248 | 


--------------------------------------------------------------------------------