├── .readthedocs.yaml
├── LICENSE
├── MANIFEST
├── README.md
├── _config.yml
├── bin
    ├── NoveltyAnalysis
    ├── PubMedQuery
    ├── censorData
    ├── convertEventToAge
    ├── createGenotypeFile
    ├── createICDCovariate
    ├── createPhenotypeFile
    ├── maximizeControls
    ├── mergeGroups
    ├── pyPhewasExplorer
    ├── pyPhewasExplorerBackEnd
    ├── pyPhewasLookup
    ├── pyPhewasModel
    ├── pyPhewasPipeline
    ├── pyPhewasPlot
    ├── pyProwasLookup
    ├── pyProwasModel
    ├── pyProwasPipeline
    ├── pyProwasPlot
    └── test
├── deprecated
    ├── censor_diagnosis.py
    ├── labwas.py
    ├── maximize_control_matching.py
    ├── prowas_reg.py
    ├── pyPhewasCore.py
    ├── pyPhewasv2.py
    ├── pyPhewasv3.py
    └── pyProWAS.py
├── dist
    ├── pyPheWAS-0.1.11.tar.gz
    ├── pyPheWAS-0.1.12.tar.gz
    ├── pyPheWAS-0.1.13.tar.gz
    ├── pyPheWAS-0.1.14.tar.gz
    ├── pyPheWAS-0.1.15.tar.gz
    ├── pyPheWAS-0.1.16.tar.gz
    ├── pyPheWAS-0.1.17.tar.gz
    ├── pyPheWAS-0.1.18.tar.gz
    ├── pyPheWAS-0.1.19.tar.gz
    ├── pyPheWAS-0.1.20.tar.gz
    ├── pyPheWAS-0.1.21.tar.gz
    ├── pyPheWAS-0.1.22.tar.gz
    ├── pyPheWAS-0.1.27.tar.gz
    ├── pyPheWAS-0.1.28.tar.gz
    ├── pyPheWAS-0.1.29.tar.gz
    ├── pyPheWAS-0.1.3.tar.gz
    ├── pyPheWAS-0.1.30.tar.gz
    ├── pyPheWAS-0.1.31.tar.gz
    ├── pyPheWAS-0.1.32.tar.gz
    ├── pyPheWAS-0.1.33.tar.gz
    ├── pyPheWAS-0.1.34.tar.gz
    ├── pyPheWAS-0.1.35.tar.gz
    ├── pyPheWAS-0.1.36.tar.gz
    └── pyPheWAS-1.0.2.tar.gz
├── docs
    ├── Makefile
    ├── _build
    │   ├── .buildinfo
    │   ├── .doctrees
    │   │   ├── agematch.doctree
    │   │   ├── analysis.doctree
    │   │   ├── api.doctree
    │   │   ├── api.doctree.orig
    │   │   ├── basic.doctree
    │   │   ├── code.doctree
    │   │   ├── code_BACKUP_13469.doctree
    │   │   ├── code_BASE_13469.doctree
    │   │   ├── code_LOCAL_13469.doctree
    │   │   ├── code_REMOTE_13469.doctree
    │   │   ├── environment.pickle
    │   │   ├── index.doctree
    │   │   ├── other.doctree
    │   │   ├── source
    │   │   │   └── index.doctree
    │   │   ├── test.doctree
    │   │   ├── tutorial.doctree
    │   │   └── tutorial
    │   │   │   ├── agematch.doctree
    │   │   │   ├── analysis.doctree
    │   │   │   ├── basic.doctree
    │   │   │   ├── other.doctree
    │   │   │   └── tutorial.doctree
    │   ├── _modules
    │   │   ├── index.html
    │   │   ├── pyPheWAS
    │   │   │   ├── pyPhewas.html
    │   │   │   ├── pyPhewasCore.html
    │   │   │   ├── pyPhewaslin.html
    │   │   │   ├── pyPhewaslog.html
    │   │   │   └── pyPhewasv2.html
    │   │   └── pyPhewasv2.html
    │   ├── _sources
    │   │   ├── api.txt
    │   │   ├── index.txt
    │   │   ├── tutorial.txt
    │   │   └── tutorial
    │   │   │   ├── agematch.txt
    │   │   │   ├── analysis.txt
    │   │   │   ├── basic.txt
    │   │   │   ├── other.txt
    │   │   │   └── tutorial.txt
    │   ├── _static
    │   │   ├── ajax-loader.gif
    │   │   ├── alabaster.css
    │   │   ├── basic.css
    │   │   ├── comment-bright.png
    │   │   ├── comment-close.png
    │   │   ├── comment.png
    │   │   ├── custom.css
    │   │   ├── doctools.js
    │   │   ├── down-pressed.png
    │   │   ├── down.png
    │   │   ├── file.png
    │   │   ├── jquery-1.11.1.js
    │   │   ├── jquery.js
    │   │   ├── minus.png
    │   │   ├── plus.png
    │   │   ├── pygments.css
    │   │   ├── searchtools.js
    │   │   ├── underscore-1.3.1.js
    │   │   ├── underscore.js
    │   │   ├── up-pressed.png
    │   │   ├── up.png
    │   │   └── websupport.js
    │   ├── api.html
    │   ├── code.html
    │   ├── genindex.html
    │   ├── index.html
    │   ├── objects.inv
    │   ├── py-modindex.html
    │   ├── search.html
    │   ├── searchindex.js
    │   ├── tutorial.html
    │   └── tutorial
    │   │   ├── agematch.html
    │   │   ├── analysis.html
    │   │   ├── basic.html
    │   │   ├── other.html
    │   │   └── tutorial.html
    ├── _static
    │   ├── phewas_workflow.png
    │   ├── pyPheWAS_Research_Tools.png
    │   ├── pyphewas_explorer.png
    │   ├── pyphewas_explorer_reg_builder.png
    │   └── pyphewas_explorer_reg_eval.png
    ├── api.rst
    ├── api
    │   ├── censor_diagnosis.rst
    │   ├── maximize_bipartite.rst
    │   └── pyPhewasCorev2.rst
    ├── basic.rst
    ├── build.sh
    ├── conf.py
    ├── customizations.rst
    ├── dataprep.rst
    ├── explorer_overview.rst
    ├── explorer_setup.rst
    ├── explorer_walkthrough.rst
    ├── index.rst
    ├── make.bat
    ├── novelty_tools.rst
    ├── phewas_tools.rst
    ├── prowas_tools.rst
    ├── references.rst
    └── tutorial.rst
├── pyPheWAS
    ├── Explorer_GUI
    │   ├── 023fbd237e5020d0@76.js
    │   ├── 29d10840f83e2527@465.js
    │   ├── e38221a5df1e64b8@3957.js
    │   ├── ef93120144671667@373.js
    │   ├── index.html
    │   ├── index.js
    │   ├── inspector.css
    │   ├── package.json
    │   └── runtime.js
    ├── NoveltyPheDAS.py
    ├── PubMedFunctions.py
    ├── __init__.py
    ├── maximize_bipartite.py
    ├── pyPhewasCorev2.py
    ├── pyPhewasExplorerCore.py
    ├── resources
    │   ├── R_squared.xlsx
    │   ├── codes.csv
    │   ├── phecode_exclude.csv
    │   ├── phecode_map_v1_1_icd9.csv
    │   ├── phecode_map_v1_2_icd10_beta.csv
    │   ├── phecode_map_v1_2_icd9.csv
    │   ├── prowas_codes.csv
    │   ├── prowas_codes_eye.csv
    │   └── pyPheWAS_start_msg.txt
    └── rt_censor_diagnosis.py
├── requirements.txt
├── setup.cfg
├── setup.py
└── synthetic_data
    ├── README.md
    ├── group.csv
    └── icds.csv


/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file for Sphinx projects
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | # Required
 5 | version: 2
 6 | 
 7 | # Set the OS, Python version and other tools you might need
 8 | build:
 9 |   os: ubuntu-22.04
10 |   tools:
11 |     python: "3.11"
12 |     # You can also specify other tool versions:
13 |     # nodejs: "20"
14 |     # rust: "1.70"
15 |     # golang: "1.20"
16 | 
17 | # Build documentation in the "docs/" directory with Sphinx
18 | sphinx:
19 |   configuration: docs/conf.py
20 |   # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs
21 |   # builder: "dirhtml"
22 |   # Fail on all warnings to avoid broken references
23 |   # fail_on_warning: true
24 | 
25 | # Optionally build your docs in additional formats such as PDF and ePub
26 | # formats:
27 | #    - pdf
28 | #    - epub
29 | 
30 | # Optional but recommended, declare the Python requirements required
31 | # to build your documentation
32 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
33 | # python:
34 | #    install:
35 | #    - requirements: docs/requirements.txt


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 Bennett Landman
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST:
--------------------------------------------------------------------------------
 1 | # file GENERATED by distutils, do NOT edit
 2 | setup.cfg
 3 | setup.py
 4 | bin/NoveltyAnalysis
 5 | bin/PubMedQuery
 6 | bin/censorData
 7 | bin/convertEventToAge
 8 | bin/createGenotypeFile
 9 | bin/createPhenotypeFile
10 | bin/maximizeControls
11 | bin/mergeGroups
12 | bin/pyPhewasExplorer
13 | bin/pyPhewasExplorerBackEnd
14 | bin/pyPhewasLookup
15 | bin/pyPhewasModel
16 | bin/pyPhewasPipeline
17 | bin/pyPhewasPlot
18 | bin/pyProwasLookup
19 | bin/pyProwasModel
20 | bin/pyProwasPipeline
21 | bin/pyProwasPlot
22 | pyPheWAS/NoveltyPheDAS.py
23 | pyPheWAS/PubMedFunctions.py
24 | pyPheWAS/__init__.py
25 | pyPheWAS/maximize_bipartite.py
26 | pyPheWAS/pyPhewasCorev2.py
27 | pyPheWAS/pyPhewasExplorerCore.py
28 | pyPheWAS/rt_censor_diagnosis.py
29 | pyPheWAS/Explorer_GUI/023fbd237e5020d0@76.js
30 | pyPheWAS/Explorer_GUI/29d10840f83e2527@465.js
31 | pyPheWAS/Explorer_GUI/e38221a5df1e64b8@3957.js
32 | pyPheWAS/Explorer_GUI/ef93120144671667@373.js
33 | pyPheWAS/Explorer_GUI/index.html
34 | pyPheWAS/Explorer_GUI/index.js
35 | pyPheWAS/Explorer_GUI/inspector.css
36 | pyPheWAS/Explorer_GUI/package.json
37 | pyPheWAS/Explorer_GUI/runtime.js
38 | pyPheWAS/resources/codes.csv
39 | pyPheWAS/resources/phecode_exclude.csv
40 | pyPheWAS/resources/phecode_map_v1_1_icd9.csv
41 | pyPheWAS/resources/phecode_map_v1_2_icd10_beta.csv
42 | pyPheWAS/resources/phecode_map_v1_2_icd9.csv
43 | pyPheWAS/resources/prowas_codes.csv
44 | pyPheWAS/resources/prowas_codes_eye.csv
45 | pyPheWAS/resources/pyPheWAS_start_msg.txt
46 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## pyPheWAS
 2 | 
 3 | Repository for the pyPheWAS project.
 4 | Full documentation at https://pyphewas.readthedocs.io/en/latest/
 5 | 
 6 | ### Developers
 7 | Cailey Kerley, PhD
 8 | 
 9 | Shikha Chaganti, PhD
10 | 
11 | Bennett Landman, PhD
12 | 
13 | ## Cite pyPheWAS
14 | Kerley, C.I., Chaganti, S., Nguyen, T.Q. et al. pyPheWAS: A Phenome-Disease Association Tool for Electronic Medical Record Analysis. *Neuroinform* (2022). https://doi.org/10.1007/s12021-021-09553-4
15 | 
16 | Kerley, C.I., Nguyen T.Q., Ramadass, K, et al. pyPheWAS Explorer: a visualization tool for exploratory analysis of phenome-disease associations. *JAMIA Open* (2023). https://doi.org/10.1093/jamiaopen/ooad018
17 | 
18 | ## Latest Release: pyPheWAS 4.2
19 | 
20 | ### 4.2.0
21 | - Default regression equation modified to allow for both canonical and reversed PheWAS equations
22 | - Updated plot styling to improve legibility
23 | - Bug fix: can now run pyPhewasModel/pyProwasModel without covariates
24 | - Other minor bug fixes
25 | 
26 | ### 4.1.1
27 | - Minor bug fixes
28 | 
29 | #### 4.1.0
30 | - pyPheWAS Explorer updates
31 | - New demographic variables added to synthetic dataset
32 | 
33 | #### 4.0.5
34 | - convertEventToAge includes new warning for calculated ages are negative
35 | - small bugs fixed in maximizeControls, NoveltyAnalysis, and PubMedQuery tools
36 | 
37 | 
38 | #### 4.0.4
39 | - createPhenotypeFile now supports more options for controlling case/control group curation
40 | - Documentation updates
41 | 
42 | #### 4.0.3
43 | - **Novelty Analysis** tools: examine the relative literary novelty of disease-phecode pairings
44 | - **pyPheWAS Explorer**: an interactive visualization of PheDAS experiments
45 | - createGenotypeFile updated - now called createPhenotypeFile
46 | - Minor bug fixes
47 | 
48 | 
49 | ### Older Releases
50 | 
51 | #### pyPheWAS 3.2.0
52 | - Configurable threshold for number of subjects required to run the regression on an individual PheCode
53 | - All regressions are now fit with regularization (old scheme available with 'legacy' option)
54 | - Minor changes to Manhattan plot
55 | - PheCode/ProCode categories added to regression file
56 | - Minor bug fixes
57 | 
58 | #### pyPheWAS 3.1.1
59 | - New Analysis Type: ProWAS Tools
60 | - New Plot Type: Volcano Plot (see pyPhewasPlot)
61 | - maximizeControls now saves explicit Case/Control matches
62 | - New PheCode category colors in plots are more distinguishable
63 | - Improved command line tool argument handling
64 | - Improved error handling
65 | - Documentation overhaul
66 | - API update
67 | - Minor bug fixes
68 | 
69 | #### pyPheWAS 3.0.1
70 | - Bug fixes including __FDR & Bonferroni threshold calculations__
71 | - Header saved in feature matrices
72 | - More file formats available for saving plots
73 | 
74 | #### pyPheWAS 3.0.0
75 | - Support for both ICD 9 and ICD 10
76 | - All 3 regression types (binary, count, & duration) optimized for big data
77 | - pyPhewasPipeline: a streamlined combination of pyPhewasLookup, pyPhewasModel, and pyPhewasPlot
78 | - Compatibility with Python 3
79 | - Age matching now saves the explicit mapping between controls/cases in addition to the resulting group file
80 | - Operation of the ICD censoring function matches the description in the documentation
81 | - Minor bug fixes
82 | 


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-slate


--------------------------------------------------------------------------------
/bin/NoveltyAnalysis:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import argparse
  4 | 
  5 | 
  6 | def parse_args():
  7 |     parser = argparse.ArgumentParser(description="Run pyPheWAS Novelty Analysis")
  8 | 
  9 |     parser.add_argument('--pm_dir', required=True, type=str, help='Path to PheCode PubMed directory')
 10 |     parser.add_argument('--statfile', required=True, type=str, help='Name of the pyPheWAS stat file (e.g. regressions.csv)')
 11 |     parser.add_argument('--dx_pm', required=True, type=str,help='Name of the Disease Search PubMed file (e.g. dx_PubMED_results.csv)')
 12 |     parser.add_argument('--null_int', required=True, type=str, help='Null interval for calculating the 2nd gen p-value (e.g. [0.3, 1.1])')
 13 |     parser.add_argument('--path', required=False, default='.', type=str,help='Path to all input files and destination of output files')
 14 |     parser.add_argument('--postfix', required=False, default=None, type=str,help='Descriptive postfix for output files (e.g. poster or ages50-60)')
 15 | 
 16 |     args = parser.parse_args()
 17 |     return args
 18 | 
 19 | 
 20 | args = parse_args()
 21 | 
 22 | """
 23 | Print Start Message
 24 | """
 25 | from pyPheWAS.pyPhewasCorev2 import print_start_msg, display_kwargs
 26 | from pyPheWAS.NoveltyPheDAS import *
 27 | import pandas as pd
 28 | from pathlib import Path
 29 | import time
 30 | import math
 31 | from ast import literal_eval
 32 | import sys
 33 | from tqdm import tqdm
 34 | 
 35 | start = time.time()
 36 | print_start_msg()
 37 | print('\nNoveltyAnalysis: pyPheWAS Novelty Finding Index Tool\n')
 38 | 
 39 | """
 40 | Retrieve and validate all arguments.
 41 | """
 42 | 
 43 | kwargs = {'path': Path(args.path),
 44 |           'pm_dir': Path(args.pm_dir),
 45 | 		  'statfile': args.statfile,
 46 | 		  'dx_pm': args.dx_pm,
 47 |           'null_int': args.null_int,
 48 | 		  'postfix': args.postfix,
 49 | }
 50 | 
 51 | # Assert that valid file names were given
 52 | assert kwargs['statfile'].endswith('.csv'), "%s is not a valid stat file, must be a .csv file" % (kwargs['statfile'])
 53 | assert kwargs['dx_pm'].endswith('.csv'), "%s is not a valid Dx PubMed file, must be a .csv file" % (kwargs['dx_pm'])
 54 | # Assert that valid files were given
 55 | assert (kwargs['path'] / kwargs['statfile']).exists(), "%s does not exist" %(kwargs['path'] / kwargs['statfile'])
 56 | assert (kwargs['path'] / kwargs['dx_pm']).exists(), "%s does not exist" %(kwargs['path'] / kwargs['dx_pm'])
 57 | assert kwargs['pm_dir'].exists(), "%s does not exist" % kwargs['pm_dir']
 58 | 
 59 | # check null interval
 60 | try:
 61 |     null_int_str = kwargs['null_int']
 62 |     kwargs['null_int'] = literal_eval(kwargs['null_int'])
 63 | except Exception as e:
 64 |     print('Error encountered while parsing the null interval: %s' % null_int_str)
 65 |     print(e)
 66 |     sys.exit()
 67 | assert len(kwargs['null_int']) == 2, 'Provided null interval does not contain two items/boundaries: %s' % null_int_str
 68 | 
 69 | # Print Arguments
 70 | display_kwargs(kwargs)
 71 | # Make all arguments local variables
 72 | locals().update(kwargs)
 73 | 
 74 | """
 75 | Load files
 76 | """
 77 | dx_pubmed = pd.read_csv(path / dx_pm)
 78 | 
 79 | reg_f = open(path / statfile)
 80 | reg_hdr = reg_f.readline()
 81 | reg = pd.read_csv(reg_f, dtype={"PheWAS Code":str})
 82 | reg_f.close()
 83 | 
 84 | # split confidence interval into lower & upper bounds
 85 | reg[['beta_lowlim', 'beta_uplim']] = reg['Conf-interval beta'].str.split(',', expand=True)
 86 | reg['beta_uplim'] = reg.beta_uplim.str.replace(']', '')
 87 | reg['beta_lowlim'] = reg.beta_lowlim.str.replace('[', '')
 88 | reg = reg.astype(dtype={'beta_uplim': float, 'beta_lowlim': float})
 89 | 
 90 | # convert log odds ratio (beta & its confidence interval) to odds ratios
 91 | reg['OddsRatio'] = np.exp(reg['beta'])
 92 | reg['OR_uplim'] = np.exp(reg['beta_uplim'])
 93 | reg['OR_lowlim'] = np.exp(reg['beta_lowlim'])
 94 | 
 95 | """
 96 | Combine Mass PheCode & Dx PubMed results
 97 | """
 98 | reg = get_joint_PubMed_articles(reg, dx_pubmed, pm_dir)
 99 | 
100 | """
101 | Run Novelty Calculations
102 | """
103 | print('Calculating Novelty Finding Index')
104 | reg = calcNoveltyScore(reg, null_int)
105 | 
106 | """
107 | Save Regression File w/ Novelty Data
108 | """
109 | if postfix is not None:
110 |     fparts = statfile.split('.')
111 |     outfile = '%s_%s.%s' % (fparts[0], postfix, fparts[1])
112 | else:
113 |     outfile = statfile
114 | 
115 | print('Saving updated regression file to %s' % (path/outfile))
116 | with open(path / outfile, 'w+') as f:
117 |     f.write(reg_hdr)
118 |     reg.to_csv(f, index=False)
119 | 
120 | """
121 | Plot Novelty Finding Index Results
122 | """
123 | print('Generating Novelty Finding Index Plots')
124 | # filter regressions - only plot those with second gen p-val == 0 (significant)
125 | reg_to_plot = reg[reg['sgpv']==0].copy()
126 | reg_to_plot.sort_values(by=['Novelty_Finding_Index'], ascending=False, inplace=True)
127 | # plot in groups of 25-30 (or less) to keep plots legible
128 | if postfix is not None:
129 |     basename = 'NFI_%s' % postfix
130 | else:
131 |     basename = 'NFI'
132 | # finally, make the plots
133 | if reg_to_plot.shape[0] < 30: # just one plot
134 |     plotfile = basename + '.png'
135 |     plot_log_odds_ratio_novelty(reg_to_plot, np.log(null_int), save=path / plotfile)
136 | else: # lots of results = lots of plots
137 |     for plot_ix, plot_group in tqdm(reg_to_plot.groupby(np.arange(reg_to_plot.shape[0])//25)):
138 |         plotfile = '%s_%d.png' %(basename, plot_ix)
139 |         plot_log_odds_ratio_novelty(plot_group, np.log(null_int), path / plotfile)
140 | 
141 | """
142 | Calculate runtime
143 | """
144 | interval = time.time() - start
145 | hour = math.floor(interval/3600.0)
146 | minute = math.floor((interval - hour*3600)/60)
147 | second = math.floor(interval - hour*3600 - minute*60)
148 | 
149 | if hour > 0:
150 |     time_str = '%dh:%dm:%ds' %(hour,minute,second)
151 | elif minute > 0:
152 |     time_str = '%dm:%ds' % (minute, second)
153 | else:
154 |     time_str = '%ds' % second
155 | 
156 | print('NoveltyAnalysis Complete [Runtime: %s]' %time_str)


--------------------------------------------------------------------------------
/bin/PubMedQuery:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import argparse
 4 | 
 5 | def parse_args():
 6 |     parser = argparse.ArgumentParser(description="PubMed Query Tool")
 7 | 
 8 |     parser.add_argument('--outdir', required=True, type=str, help='Path to output directory')
 9 |     parser.add_argument('--umls', required=False, type=str, help='Path to UMLS Metathesaurus (MRCONSO.RRF)')
10 |     parser.add_argument('--custom_terms', required=False, default=None, type=str,help='File containing custom search terms (Should be TXT or CSV)')
11 | 
12 |     args = parser.parse_args()
13 |     return args
14 | 
15 | 
16 | args = parse_args()
17 | 
18 | """
19 | Print Start Message
20 | """
21 | from pyPheWAS.pyPhewasCorev2 import print_start_msg, display_kwargs
22 | from pyPheWAS.PubMedFunctions import *
23 | from pathlib import Path
24 | import time
25 | import math
26 | 
27 | start = time.time()
28 | print_start_msg()
29 | print('\nPubMedQuery: PubMed Search Tool\n')
30 | 
31 | """
32 | Retrieve and validate all arguments.
33 | """
34 | 
35 | kwargs = {'outdir': Path(args.outdir),
36 |           'umls':args.umls,
37 | 		  'custom_terms':args.custom_terms,
38 |           }
39 | 
40 | # Assert valid argument combination
41 | assert (kwargs['custom_terms'] is not None) | (kwargs['umls'] is not None), "Please provide either the 'umls' or 'custom_terms' arguments"
42 | assert (kwargs['custom_terms'] is not None) != (kwargs['umls'] is not None), "Both 'umls' and 'custom_terms' specified; please only provide one of these arguments"
43 | 
44 | # Assert that valid files/paths were given
45 | if kwargs['umls']  is not None:
46 |     assert kwargs['umls'].endswith('.RRF'), "%s is not a valid UMLS file, must be a .RRF file" % kwargs['umls']
47 |     kwargs['umls'] = Path(kwargs['umls'])
48 |     assert kwargs['umls'].exists(), "%s does not exist" % kwargs['umls']
49 | 
50 | if kwargs['custom_terms']  is not None:
51 |     assert kwargs['custom_terms'].endswith(('.txt', '.csv')), "%s is not a valid custom search term file, must be a .txt or .csv file" % kwargs['custom_terms']
52 |     kwargs['custom_terms'] = Path(kwargs['custom_terms'])
53 |     assert kwargs['custom_terms'].exists(), "%s does not exist" % kwargs['custom_terms']
54 | 
55 | # Print Arguments
56 | display_kwargs(kwargs)
57 | # Make all arguments local variables
58 | locals().update(kwargs)
59 | 
60 | """ Setup: Make outdir """
61 | outdir.mkdir(parents=True, exist_ok=True)
62 | 
63 | """
64 | Run PubMed Search
65 | """
66 | if kwargs['custom_terms']  is not None:
67 |     print('Running PubMed Search on provided dx search terms')
68 |     print('Loading provided search terms')
69 |     search_terms = load_search_terms(custom_terms)
70 |     run_Custom_PubMed_Query(search_terms, outdir)
71 | else:
72 |     print('Running PubMed Search on all PheCodes')
73 |     print('Loading UMLS Metathesaurus (This could take a while...)')
74 |     umls_df = load_umls(umls)
75 |     run_PheWAS_PubMed_Query(umls_df, outdir)
76 | 
77 | """
78 | Calculate runtime
79 | """
80 | interval = time.time() - start
81 | hour = math.floor(interval/3600.0)
82 | minute = math.floor((interval - hour*3600)/60)
83 | second = math.floor(interval - hour*3600 - minute*60)
84 | 
85 | if hour > 0:
86 |     time_str = '%dh:%dm:%ds' %(hour,minute,second)
87 | elif minute > 0:
88 |     time_str = '%dm:%ds' % (minute, second)
89 | else:
90 |     time_str = '%ds' % second
91 | 
92 | print('PubMedQuery Complete [Runtime: %s]' %time_str)
93 | 


--------------------------------------------------------------------------------
/bin/censorData:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | from pyPheWAS.pyPhewasCorev2 import *
  4 | from pyPheWAS.rt_censor_diagnosis import *
  5 | import os
  6 | import argparse
  7 | from pathlib import Path
  8 | import os.path as osp
  9 | import time
 10 | import numpy as np
 11 | import math
 12 | 
 13 | 
 14 | def parse_args():
 15 |     parser = argparse.ArgumentParser(description="pyPheWAS ICD/CPT Age Censoring Tool")
 16 | 
 17 |     parser.add_argument('--phenotype', required=True, type=str, help='Name of phenotype file')
 18 |     parser.add_argument('--group', required=True, type=str, help ='Name of the group file')
 19 |     parser.add_argument('--phenotypeout', required=True, type=str,help='Name of output phenotype file')
 20 |     parser.add_argument('--groupout', required=True, type=str, help='Name of output group file')
 21 |     parser.add_argument('--path', required=False, default='.', type=str,help='Path to all input files and destination of output files')
 22 |     parser.add_argument('--efield', required=False, default='AgeAtICD', type=str, help='Name of event to censor on (default: AgeAtICD)')
 23 |     parser.add_argument('--delta_field', required=False, default=None, type=str, help='If specified, censor with respect to the interval between delta_field and efield')
 24 |     parser.add_argument('--start', required=False, default=np.nan, type=float, help='Start time for censoring')
 25 |     parser.add_argument('--end', required=False, default=np.nan, type=float, help='End time for censoring')
 26 | 
 27 |     args = parser.parse_args()
 28 |     return args
 29 | 
 30 | """
 31 | Print Start Message
 32 | """
 33 | script_start = time.time()
 34 | print_start_msg()
 35 | print('\ncensorData: ICD/CPT Age Censoring Tool\n')
 36 | 
 37 | 
 38 | """
 39 | Retrieve and validate all arguments.
 40 | """
 41 | args = parse_args()
 42 | kwargs = {'path': Path(args.path),
 43 | 		  'phenotype': args.phenotype,
 44 | 		  'group': args.group,
 45 | 		  'phenotypeout':args.phenotypeout,
 46 | 		  'groupout':args.groupout,
 47 |           'start':args.start,
 48 |           'end':args.end,
 49 |           'efield':args.efield,
 50 |           'delta_field':args.delta_field,
 51 | }
 52 | 
 53 | 
 54 | # Assert that valid files were given
 55 | assert kwargs['phenotype'].endswith('.csv'), "%s is not a valid phenotype file, must be a .csv file" % (kwargs['phenotype'])
 56 | assert kwargs['group'].endswith('.csv'), "%s is not a valid group file, must be a .csv file" % (kwargs['group'])
 57 | assert kwargs['phenotypeout'].endswith('.csv'), "%s is not a valid output file, must be a .csv file" % (kwargs['phenotypeout'])
 58 | assert kwargs['groupout'].endswith('.csv'), "%s is not a valid output file, must be a .csv file" % (kwargs['groupout'])
 59 | 
 60 | # Assert that a valid combination of start/end was given
 61 | assert np.isfinite(kwargs['start']) or np.isfinite(kwargs['end']), "Please define a start time and/or end time for censoring"
 62 | 
 63 | # Print Arguments
 64 | display_kwargs(kwargs)
 65 | # Make all arguments local variables
 66 | locals().update(kwargs)
 67 | 
 68 | # Fill paths
 69 | phenotype = path / phenotype
 70 | group = path / group
 71 | phenotypeout = path / phenotypeout
 72 | groupout = path / groupout
 73 | 
 74 | # make sure files exist
 75 | assert osp.exists(phenotype), "%s does not exist" % phenotype
 76 | assert osp.exists(group), "%s does not exist" % group
 77 | 
 78 | # Change times to integers
 79 | start = float(start)
 80 | end = float(end)
 81 | 
 82 | """
 83 | Run censoring
 84 | """
 85 | censor_diagnosis(group, phenotype, phenotypeout, groupout, efield,  delta_field, start, end)
 86 | 
 87 | """
 88 | Calculate runtime
 89 | """
 90 | interval = time.time() - script_start
 91 | hour = math.floor(interval/3600.0)
 92 | minute = math.floor((interval - hour*3600)/60)
 93 | second = math.floor(interval - hour*3600 - minute*60)
 94 | 
 95 | if hour > 0:
 96 |     time_str = '%dh:%dm:%ds' %(hour,minute,second)
 97 | elif minute > 0:
 98 |     time_str = '%dm:%ds' % (minute, second)
 99 | else:
100 |     time_str = '%ds' % second
101 | 
102 | print('censorData Complete [Runtime: %s]' %time_str)
103 | 


--------------------------------------------------------------------------------
/bin/convertEventToAge:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | from pyPheWAS.pyPhewasCorev2 import *
  4 | import pandas as pd
  5 | import numpy as np
  6 | import argparse
  7 | import time
  8 | import os.path as osp
  9 | from pathlib import Path
 10 | import math
 11 | 
 12 | 
 13 | def parse_args():
 14 |     parser = argparse.ArgumentParser(description="pyPheWAS Date to Age Conversion Tool")
 15 | 
 16 |     parser.add_argument('--phenotype', required=True, type=str, help='Name of the phenotype file (e.g. icd9_data.csv)')
 17 |     parser.add_argument('--group', required=True, type=str, help ='Name of the group file (e.g. groups.csv)')
 18 |     parser.add_argument('--phenotypeout', required=True, type=str, help='Name of the output file (original phenotype data + event ages)')
 19 |     parser.add_argument('--eventcolumn', required=True, type=str, help='Name of the event column in the phenotype file')
 20 |     parser.add_argument('--etype', required=True, type=str, help='Type of event data (CPT or ICD)')
 21 |     parser.add_argument('--path', required=False, default='.', type=str, help='Path to all input files and destination of output files')
 22 |     parser.add_argument('--precision', required=False, default=5, type=int, help='Decimal precision of age in the output file (default: 5)')
 23 |     parser.add_argument('--dob_column', required=False, default='DOB',type=str, help='Name of the birth date column in the group file (default: DOB)')
 24 | 
 25 |     args = parser.parse_args()
 26 |     return args
 27 | 
 28 | """
 29 | Print Start Message
 30 | """
 31 | start = time.time()
 32 | print_start_msg()
 33 | print('\nconvertEventToAge: Date to Age Conversion Tool\n')
 34 | 
 35 | 
 36 | """
 37 | Retrieve and validate all arguments.
 38 | """
 39 | args = parse_args()
 40 | 
 41 | kwargs = {'phenotype': args.phenotype,
 42 |           'group': args.group,
 43 |           'path': Path(args.path),
 44 |           'phenotypeout': args.phenotypeout,
 45 |           'eventcolumn': args.eventcolumn,
 46 |           'precision': args.precision,
 47 |           'dob_column': args.dob_column,
 48 |           'etype':args.etype
 49 | }
 50 | 
 51 | # Assert that valid files were given
 52 | assert kwargs['phenotype'].endswith('.csv'), "%s is not a valid phenotype file, must be a .csv file" % (kwargs['phenotype'])
 53 | assert kwargs['group'].endswith('.csv'), "%s is not a valid group file, must be a .csv file" % (kwargs['group'])
 54 | assert kwargs['phenotypeout'].endswith('.csv'), "%s is not a valid output file, must be a .csv file" % (kwargs['phenout'])
 55 | 
 56 | # Assert that valid event type was given
 57 | assert kwargs['etype'] in ['CPT','ICD'], "%s is not a valid data type. Must be CPT or ICD" % (kwards['type'])
 58 | 
 59 | # Print Arguments
 60 | display_kwargs(kwargs)
 61 | # Make all arguments local variables
 62 | locals().update(kwargs)
 63 | 
 64 | # Fill paths
 65 | phenotype = path / phenotype
 66 | group = path / group
 67 | phenotypeout = path / phenotypeout
 68 | 
 69 | # Assert that all files exist
 70 | assert osp.exists(phenotype), "%s does not exist" % phenotype
 71 | assert osp.exists(group), "%s does not exist" % group
 72 | 
 73 | 
 74 | """
 75 | Read Files
 76 | """
 77 | print('Reading input files')
 78 | group_df = pd.read_csv(group)
 79 | phen = pd.read_csv(phenotype)
 80 | out_cols = list(phen.columns)
 81 | 
 82 | 
 83 | """
 84 | Convert Specified Event to Age
 85 | """
 86 | print('Starting conversion')
 87 | group_df['nDOB'] = pd.to_datetime(group_df[dob_column], infer_datetime_format=True)
 88 | phen['nEvent_date'] = pd.to_datetime(phen[eventcolumn], infer_datetime_format=True)
 89 | df = pd.merge(group_df, phen, on='id')
 90 | 
 91 | df['AgeAt'+etype] = (df['nEvent_date'] - df['nDOB']).astype('timedelta64[D]')/365.2425
 92 | df['AgeAt'+etype] = df['AgeAt'+etype].round(precision)
 93 | 
 94 | neg_mask = df['AgeAt'+etype] < 0.0
 95 | if np.any(neg_mask):
 96 |     error_subs = df[neg_mask].drop_duplicates(subset='id')[['id']]
 97 |     nsub = error_subs.shape[0]
 98 |     psub = float(nsub) / float(df.drop_duplicates(subset='id').shape[0]) * 100.0
 99 |     print('\nWARNING -- %d events from %d subjects have negative ages' %(sum(neg_mask), nsub))
100 |     print('Removing %d (%.2f%%) subjects and saving to %s' % (nsub, psub, path / 'age_calc_error_*.csv'))
101 |     error_subs['REMOVE'] = 1
102 |     # remove subjects from group
103 |     group_df = group_df.merge(error_subs, on='id', how='left')
104 |     tmp = group_df[group_df['REMOVE'] == 1].copy()
105 |     tmp.drop(columns=['REMOVE','nDOB'], inplace=True)
106 |     f = path / 'age_calc_error_group.csv'
107 |     tmp.to_csv(f, index=False)
108 |     # remove subjects from ICD 
109 |     df = df.merge(error_subs, on='id', how='left')
110 |     tmp = df[df['REMOVE'] == 1].copy()
111 |     tmp.drop(columns=['REMOVE','nDOB','nEvent_date'], inplace=True)
112 |     f = path / 'age_calc_error_phen.csv'
113 |     tmp.to_csv(f, index=False)
114 |     df = df[df['REMOVE'] != 1]
115 |     # Save new group
116 |     f_basename = group.stem + '__fixed.csv'
117 |     f = path / f_basename
118 |     print('Saving new group file (%d subjects removed) to %s\n' %(nsub, f))
119 |     group_df = group_df[group_df['REMOVE'] !=1]
120 |     group_df.drop(columns=['REMOVE','nDOB'], inplace=True)
121 |     group_df.to_csv(f, index=False)
122 |     
123 |     
124 | 
125 | out_cols.append('AgeAt'+etype)
126 | 
127 | print('Saving %s data to %s' % (etype,phenotypeout))
128 | df.to_csv(phenotypeout, index=False, columns=out_cols)
129 | 
130 | 
131 | interval = time.time() - start
132 | hour = math.floor(interval/3600.0)
133 | minute = math.floor((interval - hour*3600)/60)
134 | second = math.floor(interval - hour*3600 - minute*60)
135 | 
136 | if hour > 0:
137 |     time_str = '%dh:%dm:%ds' %(hour,minute,second)
138 | elif minute > 0:
139 |     time_str = '%dm:%ds' % (minute, second)
140 | else:
141 |     time_str = '%ds' % second
142 | 
143 | print('convertEventToAge Complete [Runtime: %s]' %time_str)


--------------------------------------------------------------------------------
/bin/createGenotypeFile:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | from pyPheWAS.pyPhewasCorev2 import *
  3 | import sys, os, math
  4 | import pandas as pd
  5 | import argparse
  6 | from pathlib import Path
  7 | import os.path as osp
  8 | import time
  9 | 
 10 | 
 11 | def parse_args():
 12 |     parser = argparse.ArgumentParser(description="pyPheWAS Genotype Assignment Tool")
 13 | 
 14 |     parser.add_argument('--phenotype', required=True, type=str, help='Name of the input phenotype file')
 15 |     parser.add_argument('--group', required=False, default='', type=str, help='Name of the group file to add genotype map to')
 16 |     parser.add_argument('--groupout', required=True, type=str, help ='Name of the output group file')
 17 |     parser.add_argument('--case_codes', required=True, type=str, help='Case ICD codes (filename or comma-separated list)')
 18 |     parser.add_argument('--ctrl_codes', required=False, default='', type=str,  help='Control ICD codes (filename or comma-separated list)')
 19 |     parser.add_argument('--code_freq', required=True, type=str, help='Minimum frequency of codes (If 2 comma-separated values are given and ctrl_codes is given, 2nd argument is applied to controls)')
 20 |     parser.add_argument('--path', required=False, default='.', type=str, help='Path to all input files and destination of output files (default = current directory)')
 21 | 
 22 |     args = parser.parse_args()
 23 |     return args
 24 | 
 25 | 
 26 | """
 27 | Print Start Message
 28 | """
 29 | start = time.time()
 30 | print_start_msg()
 31 | print('\ncreateGenotypeFile: Genotype Assignment Tool\n')
 32 | 
 33 | 
 34 | """
 35 | Retrieve and validate all arguments.
 36 | """
 37 | args = parse_args()
 38 | 
 39 | kwargs = {
 40 | 	'phenotype':args.phenotype,
 41 | 	'group':args.group,
 42 | 	'groupout':args.groupout,
 43 | 	'path':Path(args.path),
 44 | 	'case_codes':args.case_codes,
 45 | 	'ctrl_codes': args.ctrl_codes,
 46 | 	'code_freq':args.code_freq,
 47 | }
 48 | 
 49 | # Change path to absolute path
 50 | # kwargs['path'] = os.path.join(os.path.abspath(kwargs['path']),'')
 51 | 
 52 | # Assert that files are valid
 53 | assert kwargs['phenotype'].endswith('.csv'), "%s is not a valid phenotype file, must be a .csv file" % (kwargs['phenotype'])
 54 | assert kwargs['groupout'].endswith('.csv'), "%s is not a valid output file, must be a .csv file" % (kwargs['groupout'])
 55 | if len(kwargs['group']) > 0:
 56 | 	assert kwargs['group'].endswith('.csv'), "%s is not a valid output file, must be a .csv file" % (kwargs['group'])
 57 | 
 58 | # Print Arguments
 59 | display_kwargs(kwargs)
 60 | # Make all arguments local variables
 61 | locals().update(kwargs)
 62 | 
 63 | # Fill paths
 64 | phenotype = path / phenotype
 65 | groupout = path / groupout
 66 | if len(str(group)) > 0:
 67 | 	group = path / group
 68 | 
 69 | # Assert that all files exist
 70 | assert osp.exists(phenotype), "%s does not exist" % phenotype
 71 | if len(str(group)) > 0:
 72 | 	assert osp.exists(group), "%s does not exist" % group
 73 | if case_codes.endswith('.csv') | case_codes.endswith('.txt'):
 74 | 	assert osp.exists(path/case_codes), "%s does not exist" % (path / case_codes)
 75 | if ctrl_codes.endswith('.csv') | ctrl_codes.endswith('.txt'):
 76 | 	assert osp.exists(path/ctrl_codes), "%s does not exist" % (path / ctrl_codes)
 77 | 
 78 | # Read group file
 79 | if len(str(group)) > 0:
 80 | 	group_data = pd.read_csv(group)
 81 | 
 82 | # Make code frequency an integer
 83 | code_freq = code_freq.replace(" ","").split(',')
 84 | for i in range(len(code_freq)):
 85 | 	code_freq[i] = int(code_freq[i])
 86 | 
 87 | """
 88 | Parse codes
 89 | """
 90 | # Case
 91 | if case_codes.endswith('.csv') | case_codes.endswith('.txt'):
 92 | 	print('Reading case group codes from file')
 93 | 	with open(path/case_codes,'r') as code_f:
 94 | 		case_codes = code_f.readlines()[0]
 95 | # remove white space and split into an array
 96 | case_codes = case_codes.replace(" ","").replace("\n","").split(',')
 97 | 
 98 | 
 99 | # Controls
100 | if len(ctrl_codes) > 0:
101 | 	if ctrl_codes.endswith('.csv') | ctrl_codes.endswith('.txt'):
102 | 		print('Reading control group codes from file')
103 | 		with open(path/ctrl_codes,'r') as code_f:
104 | 			ctrl_codes = code_f.readlines()[0]
105 | 	# remove white space and split into an array
106 | 	ctrl_codes = ctrl_codes.replace(" ", "").replace("\n", "").split(',')
107 | 
108 | 
109 | """
110 | Find codes & make groups
111 | """
112 | phen = pd.read_csv(phenotype)
113 | phen['genotype'] = -1
114 | 
115 | # Cases
116 | print('Finding cases with codes: %s' % '|'.join(case_codes))
117 | # append \Z to force regex to find exact match
118 | for ix in range(len(case_codes)):
119 | 	case_codes[ix] = case_codes[ix] + '\Z'
120 | phen['gen'] = phen['ICD_CODE'].str.match('|'.join(case_codes)) # find all ICD code matches
121 | phen['gen'] = phen['gen']*1 # convert to integer
122 | phen['genc'] = phen.groupby('id')['gen'].transform('sum') # count all instances
123 | case_mask = phen['genc']>=code_freq[0]
124 | rm_mask = (phen['genc'] < code_freq[0]) & (phen['genc'] > 0) # need to remove these later
125 | phen.loc[case_mask,'genotype'] = 1
126 | 
127 | # Controls
128 | if len(ctrl_codes) > 0:
129 | 	print('Finding controls with codes: %s' % '|'.join(ctrl_codes))
130 | 	# append \Z to force regex to find exact match
131 | 	for ix in range(len(ctrl_codes)):
132 | 		ctrl_codes[ix] = ctrl_codes[ix] + '\Z'
133 | 	phen['gen_ctrl'] = phen['ICD_CODE'].str.match('|'.join(ctrl_codes))
134 | 	phen['gen_ctrl'] = phen['gen_ctrl']*1
135 | 	phen['genc_ctrl'] = phen.groupby('id')['gen_ctrl'].transform('sum')
136 | 	if len(code_freq) > 1:
137 | 		cf = code_freq[1]
138 | 	else:
139 | 		cf = code_freq[0]
140 | 	ctrl_mask = (phen['genc_ctrl']>=cf) & ~ case_mask
141 | 	phen.loc[ctrl_mask, 'genotype'] = 0
142 | 	# drop other subjects
143 | 	sub_mask = (case_mask | ctrl_mask) & ~rm_mask
144 | 	phen = phen[sub_mask]
145 | else:
146 | 	phen.loc[~case_mask,'genotype'] = 0
147 |     # drop subjects
148 |     phen = phen[~rm_mask]
149 | 
150 | phen = phen[['id','genotype']].drop_duplicates()
151 | 
152 | """
153 | Save Output
154 | """
155 | if len(str(group)) > 0:
156 | 	print('Merging genotype assignment with provided group file')
157 | 	phen = pd.merge(phen, group_data, how='inner',on='id', suffixes=('','_old'))
158 | 
159 | num_case = phen[phen['genotype']==1].shape[0]
160 | num_ctrl = phen[phen['genotype']==0].shape[0]
161 | print('Cases: %d\nControls: %d' %(num_case, num_ctrl))
162 | 
163 | print('Saving gentoype mapping to %s' % groupout)
164 | phen.to_csv(groupout,index=False)
165 | 
166 | """
167 | Calculate runtime
168 | """
169 | interval = time.time() - start
170 | hour = math.floor(interval/3600.0)
171 | minute = math.floor((interval - hour*3600)/60)
172 | second = math.floor(interval - hour*3600 - minute*60)
173 | 
174 | if hour > 0:
175 |     time_str = '%dh:%dm:%ds' %(hour,minute,second)
176 | elif minute > 0:
177 |     time_str = '%dm:%ds' % (minute, second)
178 | else:
179 |     time_str = '%ds' % second
180 | 
181 | print('createGenotypeFile Complete [Runtime: %s]' %time_str)
182 | 


--------------------------------------------------------------------------------
/bin/createICDCovariate:
--------------------------------------------------------------------------------
 1 | from pyPheWAS.pyPhewasCorev2 import *
 2 | import pandas as pd
 3 | import sys, os
 4 | 
 5 | optargs = {
 6 | 	'--phenotype': 'phenotypes',
 7 | 	'--group':'groups',
 8 | 	'--path':'path',
 9 | 	'--groupout': 'groupout',
10 | 	'--icd':'icd'
11 | }
12 | 
13 | """
14 | Retrieve and validate all arguments.
15 | """
16 | 
17 | args = sys.argv[1:]
18 | 
19 | # Define any default arguments
20 | kwargs = {'path':'.'}
21 | 
22 | kwargs = process_args(kwargs, optargs, *args)
23 | 
24 | # Change path to absolute path
25 | kwargs['path'] = os.path.join(os.path.abspath(kwargs['path']),'')
26 | 
27 | print(kwargs)
28 | 
29 | 
30 | # Assert that valid files were given
31 | assert kwargs['phenotypes'].endswith('.csv'), "%s is not a valid phenotype file, must be a .csv file" % (kwargs['phenotypes'])
32 | assert kwargs['groups'].endswith('.csv'), "%s is not a valid group file, must be a .csv file" % (kwargs['groups'])
33 | 
34 | # Assert that the output file is valid
35 | assert kwargs['groupout'].endswith('.csv'), "%s is not a vailid output file, must be a .csv file" % (kwargs['groupout'])
36 | 
37 | # Print Arguments
38 | display_kwargs(kwargs)
39 | 
40 | # Make all arguments local variables
41 | locals().update(kwargs)
42 | 
43 | ni = pd.read_csv(os.sep.join([path, phenotypes]))
44 | ng = pd.read_csv(os.sep.join([path, groups]))
45 | 
46 | ng['icd=%s'%(icd)] = ng['id'].isin(ni[ni.icd9==icd]['id']).astype(np.int)
47 | 
48 | ng.to_csv(os.sep.join([path,groupout]))
49 | 
50 | 


--------------------------------------------------------------------------------
/bin/maximizeControls:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from pyPheWAS.pyPhewasCorev2 import display_kwargs,print_start_msg
 4 | from pyPheWAS.maximize_bipartite import *
 5 | import os
 6 | import argparse
 7 | from pathlib import Path
 8 | import os.path as osp
 9 | import time
10 | import math
11 | 
12 | def parse_args():
13 |     parser = argparse.ArgumentParser(description="pyPheWAS Case/Control Matching Tool")
14 | 
15 |     parser.add_argument('--input', required=True, type=str, help='Name of the input group file')
16 |     parser.add_argument('--deltas', required=True, type=str, help='Tolerance intervals for matching criteria')
17 |     parser.add_argument('--keys', required=True, type=str, help='Matching criteria (must be columns in group file)')
18 |     parser.add_argument('--goal', required=True, type=int, help='n, indicating the ratio of control to case groups that are being matched')
19 |     parser.add_argument('--output', required=False, default=None, type=str, help='Name of the output group file')
20 |     parser.add_argument('--condition', required=False, default='genotype', type=str, help='Field denoting groups assignments (default = genotype)')
21 |     parser.add_argument('--path', required=False, default='.', type=str, help='Path to all input files and destination of output files (default = current directory)')
22 | 
23 |     args = parser.parse_args()
24 |     return args
25 | 
26 | """
27 | Print Start Message
28 | """
29 | start = time.time()
30 | print_start_msg()
31 | print('\nmaximizeControls: Case/Control Matching Tool\n')
32 | 
33 | 
34 | """
35 | Retrieve and validate all arguments.
36 | """
37 | args = parse_args()
38 | kwargs = {'path': Path(args.path),
39 |           'input': args.input,
40 |           'output': args.output,
41 |           'deltas':args.deltas,
42 |           'keys':args.keys,
43 |           'condition':args.condition,
44 |           'goal': int(args.goal)
45 | }
46 | 
47 | # Assert that valid files were given
48 | assert kwargs['input'].endswith('.csv'), "%s is not a valid input group file, must be a .csv file" % (kwargs['input'])
49 | assert osp.exists(kwargs['path'] / kwargs['input']), "%s does not exist" %(kwargs['path'] / kwargs['input'])
50 | 
51 | if kwargs['output'] is not None:
52 |     assert kwargs['output'].endswith('.csv'), "%s is not a valid output group file, must be a .csv file" % (kwargs['output'])
53 | 
54 | 
55 | # Print Arguments
56 | display_kwargs(kwargs)
57 | 
58 | """
59 | Run control matching
60 | """
61 | control_match(**kwargs)
62 | 
63 | 
64 | """
65 | Calculate runtime
66 | """
67 | interval = time.time() - start
68 | hour = math.floor(interval/3600.0)
69 | minute = math.floor((interval - hour*3600)/60)
70 | second = math.floor(interval - hour*3600 - minute*60)
71 | 
72 | if hour > 0:
73 |     time_str = '%dh:%dm:%ds' %(hour,minute,second)
74 | elif minute > 0:
75 |     time_str = '%dm:%ds' % (minute, second)
76 | else:
77 |     time_str = '%ds' % second
78 | 
79 | print('maximizeControls Complete [Runtime: %s]' %time_str)
80 | 


--------------------------------------------------------------------------------
/bin/mergeGroups:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | from pyPheWAS.pyPhewasCorev2 import *
  4 | import pandas as pd
  5 | import sys, os
  6 | from pathlib import Path
  7 | import os.path as osp
  8 | import time
  9 | import argparse
 10 | import math
 11 | 
 12 | 
 13 | def parse_args():
 14 |     parser = argparse.ArgumentParser(description="pyPheWAS group/phenotype files merging tool")
 15 | 
 16 |     parser.add_argument('--phenotypefiles', required=False, default=None, type=str, help='Names of input phenotype files separated by +')
 17 |     parser.add_argument('--groupfiles', required=False, default=None, type=str, help='Names of input group files separated by +')
 18 |     parser.add_argument('--phenotypeout', required=False, default=None, type=str, help ='Name of the output phenotype file')
 19 |     parser.add_argument('--groupout', required=False, default=None, type=str, help='Name of the output group file')
 20 |     parser.add_argument('--path', required=False, default='.', type=str, help='Path to all input files and destination of output files (default = current directory)')
 21 | 
 22 |     args = parser.parse_args()
 23 |     return args
 24 | 
 25 | """
 26 | Print Start Message
 27 | """
 28 | start = time.time()
 29 | print_start_msg()
 30 | print('\nmergeGroups: Group/Phenotype File Merging Tool\n')
 31 | 
 32 | 
 33 | """
 34 | Retrieve and validate all arguments.
 35 | """
 36 | args = parse_args()
 37 | 
 38 | kwargs = {
 39 | 	'phenotypefiles':args.phenotypefiles,
 40 | 	'groupfiles':args.groupfiles,
 41 | 	'phenotypeout':args.phenotypeout,
 42 | 	'groupout':args.groupout,
 43 | 	'path':Path(args.path),
 44 | }
 45 | 
 46 | # assert that valid input combination was given
 47 | assert (kwargs['phenotypefiles'] is not None) or (kwargs['groupfiles'] is not None), "No files were provided to merge"
 48 | 
 49 | # Print Arguments
 50 | display_kwargs(kwargs)
 51 | # Make all arguments local variables
 52 | locals().update(kwargs)
 53 | 
 54 | # Assert that valid files were given
 55 | n_phenf = 0 # count number of files
 56 | if phenotypefiles is not None:
 57 | 	for filename in phenotypefiles.split('+'):
 58 | 		assert filename.endswith('.csv'), "%s is not a valid phenotype file, must be a .csv file" % (filename)
 59 | 		assert osp.exists(path/filename), "phenotype file (%s) does not exist" % (path/filename)
 60 | 		n_phenf += 1
 61 | 		
 62 | n_groupf = 0 # count number of group files
 63 | if groupfiles is not None:
 64 | 	for filename in groupfiles.split('+'):
 65 | 		assert filename.endswith('.csv'), "%s is not a valid group file, must be a .csv file" % (filename)
 66 | 		assert osp.exists(path/filename), "group file (%s) does not exist" % (path/filename)
 67 | 		n_groupf += 1
 68 | 
 69 | # Assert that the output files are valid
 70 | if phenotypefiles is not None:
 71 | 	assert phenotypeout is not None, "Please provide a filename for the merged phenotype data"
 72 | 	assert phenotypeout.endswith('.csv'), "%s is not a valid output file, must be a .csv file" % phenotypeout
 73 | 	
 74 | if groupfiles is not None:
 75 | 	assert groupout is not None, "Please provide a filename for the merged group data"
 76 | 	assert groupout.endswith('.csv'), "%s is not a vailid output file, must be a .csv file" % groupout
 77 | 
 78 | """
 79 | Read & Merge the group and/or phenotype files
 80 | """
 81 | if phenotypefiles is not None:
 82 | 	print('Reading %d phenotype files' %n_phenf)
 83 | 	phensDF = [pd.read_csv(path/filename) for filename in phenotypefiles.split('+')]
 84 | 	print('Merging phenotype files')
 85 | 	phen_merged = pd.concat(phensDF)
 86 | 	phen_merged.drop_duplicates(inplace=True)
 87 | 	print('Writing merged phenotype data to %s' %(path / phenotypeout))
 88 | 	phen_merged.to_csv(path / phenotypeout, index=False)
 89 | 
 90 | if groupfiles is not None:
 91 | 	print('Reading %d group files' %n_groupf)
 92 | 	groupsDF = [pd.read_csv(path/filename) for filename in groupfiles.split('+')]
 93 | 	print('Merging group files')
 94 | 	group_merged = pd.concat(groupsDF)
 95 | 	group_merged.drop_duplicates('id', inplace=True)
 96 | 	print('Writing merged group data to %s' % (path / groupout))
 97 | 	group_merged.to_csv(path / groupout, index=False)
 98 | 
 99 | 
100 | 
101 | """
102 | Calculate runtime
103 | """
104 | interval = time.time() - start
105 | hour = math.floor(interval/3600.0)
106 | minute = math.floor((interval - hour*3600)/60)
107 | second = math.floor(interval - hour*3600 - minute*60)
108 | 
109 | if hour > 0:
110 |     time_str = '%dh:%dm:%ds' %(hour,minute,second)
111 | elif minute > 0:
112 |     time_str = '%dm:%ds' % (minute, second)
113 | else:
114 |     time_str = '%ds' % second
115 | 
116 | print('createGenotypeFile Complete [Runtime: %s]' %time_str)


--------------------------------------------------------------------------------
/bin/pyPhewasExplorer:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import argparse
 4 | 
 5 | parser = argparse.ArgumentParser(description='pyPheWAS Explorer Launch Script')
 6 | parser.add_argument('--indir', required=False, default='.', type=str, help='Input directory for pyPheWAS analysis')
 7 | args = parser.parse_args()
 8 | 
 9 | 
10 | """
11 | Print Start Message
12 | """
13 | from subprocess import Popen
14 | from pathlib import Path
15 | from pyPheWAS.pyPhewasCorev2 import print_start_msg
16 | from pyPheWAS.pyPhewasExplorerCore import run_Explorer_GUI
17 | 
18 | print_start_msg()
19 | print('\npyPheWAS Explorer: Interactive Visualization of PheWAS models\n')
20 | 
21 | """
22 | Retrieve and validate all arguments.
23 | """
24 | data_path = Path(args.indir).resolve()
25 | 
26 | # Assert that valid directory was given
27 | assert data_path.exists(), "%s does not exist" % data_path
28 | assert data_path.is_dir(), "%s is not a valid directory" % data_path
29 | 
30 | # Assert that required input files exist
31 | group_f = data_path / "group.csv"
32 | assert group_f.exists(), "%s does not contain a group file (group.csv)" % data_path
33 | 
34 | icd_f = data_path / "icds.csv"
35 | bin_fm_f = data_path / "binary_feature_matrix.csv"
36 | cnt_fm_f = data_path / "count_feature_matrix.csv"
37 | dur_fm_f = data_path / "duration_feature_matrix.csv"
38 | FMs_exist = bin_fm_f.exists() & cnt_fm_f.exists() & dur_fm_f.exists()
39 | assert icd_f.exists() | FMs_exist, "%s does not contain an EMR file (icds.csv)" % data_path
40 | 
41 | print("Setting up pyPheWAS Explorer using data found in %s" % data_path)
42 | 
43 | 
44 | """
45 | Launch the servers
46 | """
47 | 
48 | # Launch the flask server (Back End) as a subprocess
49 | exec_path = Path(__file__).parent.absolute()/'pyPhewasExplorerBackEnd'
50 | process = Popen('python %s --indir %s' %(exec_path, data_path), shell=True)
51 | 
52 | # Launch the simple HTTP server (Front End) as the main process
53 | run_Explorer_GUI()
54 | 


--------------------------------------------------------------------------------
/bin/pyPhewasExplorerBackEnd:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import argparse
  4 | 
  5 | parser = argparse.ArgumentParser(description='pyPheWAS Explorer server')
  6 | parser.add_argument('--indir', required=False, default='.', type=str, help='Input directory for pyPheWAS analysis')
  7 | args = parser.parse_args()
  8 | 
  9 | 
 10 | # for dev mode, run before calling this script:
 11 | # export FLASK_ENV=development
 12 | 
 13 | # constants
 14 | BIN_REG = 0
 15 | COUNT_REG = 1
 16 | DUR_REG = 2
 17 | reg_key = {BIN_REG : 'binary',
 18 | 		   COUNT_REG : 'count',
 19 | 		   DUR_REG : 'duration'}
 20 | 
 21 | """
 22 | Get arguments
 23 | """
 24 | import sys
 25 | import flask
 26 | from flask import Flask
 27 | from flask_cors import CORS
 28 | import numpy as np
 29 | import pandas as pd
 30 | from pyPheWAS.pyPhewasExplorerCore import *
 31 | import scipy.stats
 32 | from pathlib import Path
 33 | 
 34 | data_path = Path(args.indir).resolve()
 35 | 
 36 | group_f = data_path / "group.csv"
 37 | icd_f = data_path / "icds.csv"
 38 | bin_fm_f = data_path / "binary_feature_matrix.csv"
 39 | cnt_fm_f = data_path / "count_feature_matrix.csv"
 40 | dur_fm_f = data_path / "duration_feature_matrix.csv"
 41 | FMs_exist = bin_fm_f.exists() & cnt_fm_f.exists() & dur_fm_f.exists()
 42 | 
 43 | """
 44 | Set everything up
 45 | """
 46 | try:
 47 | 	# load group data
 48 | 	group_data = get_group_file(group_f)
 49 | 	if not 'id' in group_data.columns:
 50 | 		raise Exception("Group file (%s) does not contain a subject identifier column ('id')" % group_f)
 51 | 	
 52 | 	response_options, gvars = process_group_vars(group_data)
 53 | 	init_response = response_options[0]
 54 | 
 55 | 	if FMs_exist:
 56 | 		print('Loading binary feature matrix')
 57 | 		fm_bin = np.loadtxt(data_path/'binary_feature_matrix.csv', delimiter=',')
 58 | 		print('Loading count feature matrix')
 59 | 		fm_cnt = np.loadtxt(data_path/'count_feature_matrix.csv', delimiter=',')
 60 | 		print('Loading duration feature matrix')
 61 | 		fm_dur = np.loadtxt(data_path/'duration_feature_matrix.csv', delimiter=',')
 62 | 		sub_count = [group_data.shape[0], group_data.shape[0], group_data.shape[0]]
 63 | 		assert [fm_bin.shape[0],fm_cnt.shape[0],fm_dur.shape[0]] == sub_count, "Feature matrices and group data do not contain the same number of subjects. Please delete the feature matrices and restart the Explorer"
 64 | 	else:
 65 | 		print('Building Feature Matrices')
 66 | 		pheno = get_icd_codes(icd_f)
 67 | 		fm_bin, fm_cnt, fm_dur, columns = generate_feature_matrix(group_data, pheno)
 68 | 		print('Saving feature matrices')
 69 | 		h = ','.join(columns)
 70 | 		np.savetxt(data_path/'binary_feature_matrix.csv', fm_bin, delimiter=',', header=h)
 71 | 		print('...')
 72 | 		np.savetxt(data_path/'count_feature_matrix.csv', fm_cnt, delimiter=',', header=h)
 73 | 		print('...')
 74 | 		np.savetxt(data_path/'duration_feature_matrix.csv', fm_dur, delimiter=',', header=h)
 75 | 		print('...')
 76 | 
 77 | except Exception as e:
 78 | 	print('\nERROR encountered while setting up pyPheWAS Explorer')
 79 | 	print(e.args[0])
 80 | 	print('----')
 81 | 	print('Please press Ctrl+C to quit')
 82 | 	sys.exit()
 83 | 
 84 | print("pyPheWAS Explorer Ready")
 85 | print("Please open http://localhost:8000/ in a web brower (preferably Google Chrome)")
 86 | print("---\n\n\n")
 87 | 
 88 | """
 89 | create Flask app
 90 | """
 91 | app = Flask(__name__)
 92 | CORS(app)
 93 | 
 94 | @app.route('/grab_data', methods=['GET','POST'])
 95 | def get_signals():
 96 | 	# get data from the client
 97 | 	client_data = flask.request.json
 98 | 	command = client_data['cmd']
 99 | 
100 | 	if command == "init_response": # this is only run for the INITAL response variable
101 | 		msg = pd.DataFrame(columns=['msg'], data=[init_response])
102 | 		data_obj = msg.to_json(orient='records')
103 | 
104 | 	elif command == "init_groupvars": # this is only run for the INITAL response variable
105 | 		msg = pd.DataFrame(columns=['msg'], data=gvars)
106 | 		data_obj = msg.to_json(orient='records')
107 | 
108 | 	elif command == "group_data":
109 | 		response = init_response if (r := client_data['response']) == "INIT" else r
110 | 		var_df = pd.DataFrame(columns=['var','corr','pval','g0','g1','ropt'], index=range(len(gvars)))
111 | 		x = group_data[response].values
112 | 		n_response1 = sum(x)
113 | 		n_response0 = group_data.shape[0] - n_response1
114 | 		for ix, gv in enumerate(gvars):
115 | 			y = group_data[gv].values
116 | 			[corr, pval] = scipy.stats.spearmanr(x, y)
117 | 			data = [gv, corr, pval, 
118 | 					n_response0, n_response1,
119 | 					gv in response_options
120 | 					]
121 | 			var_df.loc[ix] = data
122 | 		data_obj = var_df.to_json(orient='records')
123 | 
124 | 	elif command == "histograms":
125 | 		response = init_response if (r := client_data['response']) == "INIT" else r
126 | 		to_concat = []
127 | 		mask0 = group_data[response] == 0
128 | 		for g in gvars:
129 | 			print(g)
130 | 			h_g = get_1D_histogram(group_data, mask0, g)
131 | 			to_concat.append(h_g)
132 | 		h = pd.concat(to_concat, sort=False, ignore_index=True)
133 | 		data_obj = h.to_json(orient='records')
134 | 
135 | 	elif command == "compute_hist2D":
136 | 		var1 = client_data["var1"]
137 | 		var2 = client_data["var2"]
138 | 		response = client_data['response']
139 | 		if var1 == '':
140 | 			msg = pd.DataFrame(columns=['msg'])
141 | 			msg.loc[0, 'msg'] = "no_data"
142 | 			data_obj = msg.to_json(orient='records')
143 | 		elif var1 == var2:
144 | 			msg = pd.DataFrame(columns=['msg'])
145 | 			msg.loc[0, 'msg'] = "select_2nd_var"
146 | 			data_obj = msg.to_json(orient='records')
147 | 		else:
148 | 			h = get_2D_histogram(group_data, var1, var2, response)
149 | 			data_obj = h.to_json(orient='records')
150 | 
151 | 	elif command == "independence_tests":
152 | 		var1 = client_data["var1"]
153 | 		var2 = client_data["var2"]
154 | 		response = client_data['response']
155 | 		if var1 == '':
156 | 			msg = pd.DataFrame(columns=['msg'])
157 | 			msg.loc[0,'msg'] = "no_data"
158 | 			data_obj = msg.to_json(orient='records')
159 | 		elif var1 == var2:
160 | 			msg = pd.DataFrame(columns=['msg'])
161 | 			msg.loc[0, 'msg'] = "select_2nd_var"
162 | 			data_obj = msg.to_json(orient='records')
163 | 		else:
164 | 			stats = variable_comparison(group_data, var1, var2, response)
165 | 			data_obj = stats.to_json(orient='records')
166 | 
167 | 	elif command == "run_reg":
168 | 		reg_type = int(client_data['rtype'])
169 | 		if reg_type == -1:
170 | 			# init - don't do anything
171 | 			msg = pd.DataFrame(columns=['msg'])
172 | 			msg.loc[0,'msg'] = "no_data"
173 | 			data_obj = msg.to_json(orient='records')
174 | 		else:
175 | 			# build & send regressions to notebook
176 | 			save_cov_data = bool(client_data['save_cov'])
177 | 			if reg_type == BIN_REG:
178 | 				regressions = run_phewas(fm_bin, group_data, client_data['model_str'], reg_key[BIN_REG], save_cov=save_cov_data, outpath=data_path)
179 | 			elif reg_type == COUNT_REG:
180 | 				regressions = run_phewas(fm_cnt, group_data, client_data['model_str'], reg_key[COUNT_REG], save_cov=save_cov_data, outpath=data_path)
181 | 			elif reg_type == DUR_REG:
182 | 				regressions = run_phewas(fm_dur, group_data, client_data['model_str'], reg_key[DUR_REG], save_cov=save_cov_data, outpath=data_path)
183 | 			data_obj = regressions.to_json(orient='records')
184 | 	else:
185 | 		data_obj = "ERROR Unknown command %s" % command
186 | 
187 | 	return flask.jsonify(data_obj)
188 | 
189 | 
190 | # execute the application (by default, it should be hosted at localhost:5000, which you will see in the output)
191 | if __name__ == '__main__':
192 | 	app.run()
193 | 


--------------------------------------------------------------------------------
/bin/pyPhewasLookup:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import argparse
  4 | 
  5 | def parse_args():
  6 |     parser = argparse.ArgumentParser(description="pyPheWAS ICD-Phecode Lookup Tool")
  7 | 
  8 |     parser.add_argument('--phenotype', required=True, type=str, help='Name of the phenotype file (e.g. icd9_data.csv)')
  9 |     parser.add_argument('--group', required=True, type=str, help ='Name of the group file (e.g. groups.csv)')
 10 |     parser.add_argument('--reg_type', required=True, type=str, help='Type of regression that you would like to use (log, lin, or dur)')
 11 |     parser.add_argument('--path', required=False, default='.', type=str, help='Path to all input files and destination of output files')
 12 |     parser.add_argument('--outfile', required=False, default=None, type=str, help='Name of the output file for the feature matrix')
 13 |     parser.add_argument('--phewas_cov', required=False, default=None, type=str, help='PheCode to use as covariates in pyPhewasModel regression')
 14 | 
 15 |     args = parser.parse_args()
 16 |     return args
 17 | 
 18 | 
 19 | args = parse_args()
 20 | 
 21 | """
 22 | Print Start Message
 23 | """
 24 | from pyPheWAS.pyPhewasCorev2 import *
 25 | import sys, os
 26 | import time
 27 | import math
 28 | from pathlib import Path
 29 | import os.path as osp
 30 | 
 31 | start = time.time()
 32 | print_start_msg()
 33 | print('\npyPhewasLookup: ICD-Phecode Lookup Tool\n')
 34 | 
 35 | 
 36 | """
 37 | Retrieve and validate all arguments.
 38 | """
 39 | 
 40 | kwargs = {'path': Path(args.path),
 41 | 		  'phenotype': args.phenotype,
 42 | 		  'group': args.group,
 43 | 		  'reg_type':args.reg_type,
 44 | 		  'phewas_cov':args.phewas_cov,
 45 | 		  'outfile':args.outfile,
 46 | }
 47 | 
 48 | # Assert that a valid regression type was used
 49 | assert args.reg_type in regression_map.keys(), "%s is not a valid regression type" % args.reg_type
 50 | assert (kwargs['phewas_cov'] is None) or (kwargs['phewas_cov'] in phewas_codes['PheCode'].values), "phewas_cov input (%s) is not a valid PheCode" % kwargs['phewas_cov']
 51 | 
 52 | # Assert that valid file names were given
 53 | assert kwargs['phenotype'].endswith('.csv'), "%s is not a valid phenotype file, must be a .csv file" % (kwargs['phenotype'])
 54 | assert kwargs['group'].endswith('.csv'), "%s is not a valid group file, must be a .csv file" % (kwargs['group'])
 55 | # Assert that valid files were given
 56 | assert osp.exists(kwargs['path'] / kwargs['phenotype']), "%s does not exist" %(kwargs['path'] / kwargs['phenotype'])
 57 | assert osp.exists(kwargs['path'] / kwargs['group']), "%s does not exist" %(kwargs['path'] / kwargs['group'])
 58 | 
 59 | # Assign the output file if none was assigned
 60 | if kwargs['outfile'] is None:
 61 |     kwargs['outfile'] = "feature_matrix_" + kwargs['group']
 62 | 
 63 | # Assert that the output file is valid
 64 | assert kwargs['outfile'].endswith('.csv'), "%s is not a valid outputfile, must be a .csv file" % (kwargs['outfile'])
 65 | 
 66 | # Print Arguments
 67 | display_kwargs(kwargs)
 68 | # Make all arguments local variables
 69 | locals().update(kwargs)
 70 | 
 71 | """
 72 | Calculate feature matrix
 73 | """
 74 | print("Retrieving phenotype data...")
 75 | phenotypes = get_icd_codes(path, phenotype, regression_map[reg_type])
 76 | 
 77 | print("Retrieving group data...")
 78 | genotypes = get_group_file(path, group)
 79 | 
 80 | if not {'MaxAgeAtVisit'}.issubset(genotypes.columns):
 81 | 	print('WARNING: MaxAgeAtVisit was not found in group file. Calculating MaxAgeAtVisit from phenotype data')
 82 | 	phenotypes['MaxAgeAtVisit'] = phenotypes.groupby(['id'])['AgeAtICD'].transform('max')
 83 | 	genotypes = pd.merge(genotypes, phenotypes[['id','MaxAgeAtVisit']].drop_duplicates(subset='id'),on='id',how='left')
 84 | 
 85 | print("Generating feature matrix...")
 86 | fm,columns = generate_feature_matrix(genotypes, phenotypes, regression_map[reg_type], 'ICD', phewas_cov)
 87 | 
 88 | """
 89 | Save feature matrix
 90 | """
 91 | print("Saving feature matrices to %s" % (path /('*_' + outfile)))
 92 | h = ','.join(columns)
 93 | 
 94 | np.savetxt(path /('agg_measures_' + outfile), fm[0],delimiter=',',header=h)
 95 | print("...")
 96 | np.savetxt(path /('icd_age_' + outfile), fm[1],delimiter=',',header=h)
 97 | 
 98 | if phewas_cov is not None:
 99 | 	# only save this if it actually means something
100 | 	print("...")
101 | 	np.savetxt(path /('phewas_cov_' + outfile), fm[2],delimiter=',',header=h)
102 | 
103 | """
104 | Calculate runtime
105 | """
106 | interval = time.time() - start
107 | hour = math.floor(interval/3600.0)
108 | minute = math.floor((interval - hour*3600)/60)
109 | second = math.floor(interval - hour*3600 - minute*60)
110 | 
111 | if hour > 0:
112 |     time_str = '%dh:%dm:%ds' %(hour,minute,second)
113 | elif minute > 0:
114 |     time_str = '%dm:%ds' % (minute, second)
115 | else:
116 |     time_str = '%ds' % second
117 | 
118 | print('pyPhewasLookup Complete [Runtime: %s]' %time_str)
119 | 
120 | 


--------------------------------------------------------------------------------
/bin/pyPhewasModel:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import argparse
  4 | 
  5 | def parse_args():
  6 |     parser = argparse.ArgumentParser(description="pyPheWAS Mass PheCode Regression Tool")
  7 | 
  8 |     parser.add_argument('--feature_matrix', required=True, type=str, help='Name of the feature matrix file (e.g. feature_matrix_group.csv)')
  9 |     parser.add_argument('--group', required=True, type=str, help ='Name of the group file (e.g. groups.csv)')
 10 |     parser.add_argument('--reg_type', required=True, type=str, help='Type of regression that you would like to use (log, lin, or dur)')
 11 |     parser.add_argument('--path', required=False, default='.', type=str,help='Path to all input files and destination of output files')
 12 |     parser.add_argument('--outfile', required=False, default=None, type=str,help='Name of the output file for the regression results')
 13 |     parser.add_argument('--covariates', required=False, default='', type=str, help='Variables to be used as covariates')
 14 |     parser.add_argument('--target', required=False, default='genotype', type=str, help='Binary variable that indicates case/control groups (default: genotype)')
 15 |     parser.add_argument('--canonical', required=False, default="True", type=str, help='Use target as a predictor [True, default] or the dependent variable [False] in the PheWAS equation')
 16 |     parser.add_argument('--phewas_cov', required=False, default=None, type=str, help='PheCode to use as a covariate in regression')
 17 |     parser.add_argument('--reg_thresh', required=False, default=5, type=int, help='Threshold of subjects presenting a PheCode required for running regression (default: 5)')
 18 | 
 19 |     args = parser.parse_args()
 20 |     return args
 21 | 
 22 | 
 23 | args = parse_args() # doing this first reduces help message time
 24 | 
 25 | """
 26 | Import and Print Start Message
 27 | """
 28 | import time
 29 | import math
 30 | from pathlib import Path
 31 | import os.path as osp
 32 | from pyPheWAS.pyPhewasCorev2 import *
 33 | 
 34 | start = time.time()
 35 | print_start_msg()
 36 | print('\npyPhewasModel: Mass PheCode Regression Tool\n')
 37 | 
 38 | 
 39 | """
 40 | Retrieve and validate all arguments.
 41 | """
 42 | kwargs = {'path': Path(args.path),
 43 | 		  'feature_matrix': args.feature_matrix,
 44 | 		  'group': args.group,
 45 |           'reg_type':args.reg_type,
 46 |           'covariates':args.covariates,
 47 | 		  'phewas_cov':args.phewas_cov,
 48 |           'target':args.target,
 49 |           'canonical': args.canonical,
 50 |           'reg_thresh':args.reg_thresh,
 51 |           'outfile':args.outfile
 52 | }
 53 | 
 54 | # Assert that a valid regression type was used
 55 | assert args.reg_type in regression_map.keys(), "%s is not a valid regression type" % args.reg_type
 56 | assert (kwargs['phewas_cov'] is None) or (kwargs['phewas_cov'] in phewas_codes['PheCode'].values), "phewas_cov input (%s) is not a valid PheCode" % kwargs['phewas_cov']
 57 | 
 58 | # Assert that valid file types were given
 59 | assert kwargs['feature_matrix'].endswith('.csv'), "%s is not a valid feature matrix file, must be a .csv file" % (kwargs['feature_matrix'])
 60 | assert kwargs['group'].endswith('.csv'), "%s is not a valid group file, must be a .csv file" % (kwargs['group'])
 61 | # Assert that valid files were given
 62 | assert osp.exists(kwargs['path'] / ('agg_measures_' + kwargs['feature_matrix'])), "%s does not exist" %(kwargs['path'] / kwargs['feature_matrix'])
 63 | assert osp.exists(kwargs['path'] / kwargs['group']), "%s does not exist" %(kwargs['path'] / kwargs['group'])
 64 | 
 65 | # Assign the output file if none was assigned
 66 | if kwargs['outfile'] is None:
 67 |     kwargs['outfile'] = "regressions_" + kwargs['group']
 68 | # Assert that the output file is valid
 69 | assert kwargs['outfile'].endswith('.csv'), "%s is not a valid outputfile, must be a .csv file" % (kwargs['outfile'])
 70 | 
 71 | assert kwargs['canonical'] in ["True", "False"], "%s is not a valid canonical value (True or False)" % kwargs['canonical']
 72 | kwargs['canonical'] = eval(kwargs['canonical'])
 73 | 
 74 | # Print Arguments
 75 | display_kwargs(kwargs)
 76 | 
 77 | # Make all arguments local variables
 78 | locals().update(kwargs)
 79 | 
 80 | 
 81 | """
 82 | Load Data
 83 | """
 84 | print("Retrieving group data.")
 85 | genotypes = get_group_file(path, group)
 86 | 
 87 | # check target variable
 88 | assert target in genotypes.columns, "target %s is not a column in the group file" % target
 89 | 
 90 | # check covariates
 91 | if covariates != '':
 92 |     for cov in covariates.replace(" ", "").split('+'):
 93 |         if cov == MAX_AGE_AT_ICD:
 94 |             assert cov not in genotypes.columns, "%s is a reserved covariate name; please rename this column in the group file" % cov
 95 |         elif cov in RESERVED_COL_NAMES:
 96 |             assert False, "%s is a reserved covariate name [%s]; please remove from the covariate list and/or rename this column in the group file" % (cov, ','.join(RESERVED_COL_NAMES))
 97 |         else:
 98 |             assert cov in genotypes.columns, "covariate %s is not a column in the group file" % cov
 99 | 
100 | print('Loading feature matrices.')
101 | 
102 | a = np.loadtxt(path / ('agg_measures_' + feature_matrix), delimiter=',')
103 | 
104 | if 'MaxAgeAtICD' in covariates:
105 |     assert osp.exists(kwargs['path'] / ('icd_age_' + kwargs['feature_matrix'])), "%s does not exist" % (kwargs['path'] / ('icd_age_' + kwargs['feature_matrix']))
106 |     b = np.loadtxt(path / ('icd_age_' + feature_matrix), delimiter=',')
107 | else:
108 |     b = np.zeros_like(a)
109 | 
110 | if phewas_cov is not None:
111 |     assert osp.exists(kwargs['path'] / ('phewas_cov_' + kwargs['feature_matrix'])), "%s does not exist" % (kwargs['path'] / ('phewas_cov_' + kwargs['feature_matrix']))
112 |     c = np.loadtxt(path / ('phewas_cov_' + feature_matrix), delimiter=',')
113 | else:
114 |     c = np.zeros_like(a)
115 |     
116 | # reconstruct full feature matrix
117 | fm = np.array([a,b,c])
118 | 
119 | 
120 | """
121 | Run Regressions
122 | """
123 | print("Running PheWAS regressions...")
124 | regressions, model_str = run_phewas(fm, genotypes, 'ICD', regression_map[reg_type], 
125 |                     covariates=covariates, target=target, 
126 |                     phe_thresh=reg_thresh, canonical=canonical)
127 | 
128 | 
129 | print("Saving regression data to %s" % (path / outfile))
130 | if phewas_cov is not None:
131 |     model_str = model_str.replace('phewas_cov', f'phewas_cov_{phewas_cov}')
132 |     
133 | header = ','.join(['model_equation', model_str, 'group', group, 'feature_matrix', feature_matrix, 'reg_type', reg_type, 'code_type', 'ICD']) + '\n' 
134 | f = open(path / outfile, 'w')
135 | f.write(header)
136 | regressions.to_csv(f,index=False)
137 | f.close()
138 | 
139 | 
140 | """
141 | Calculate runtime
142 | """
143 | 
144 | interval = time.time() - start
145 | hour = math.floor(interval/3600.0)
146 | minute = math.floor((interval - hour*3600)/60)
147 | second = math.floor(interval - hour*3600 - minute*60)
148 | 
149 | if hour > 0:
150 |     time_str = '%dh:%dm:%ds' %(hour,minute,second)
151 | elif minute > 0:
152 |     time_str = '%dm:%ds' % (minute, second)
153 | else:
154 |     time_str = '%ds' % second
155 | 
156 | print('pyPhewasModel Complete [Runtime: %s]' %time_str)


--------------------------------------------------------------------------------
/bin/pyPhewasPlot:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import argparse
  4 | 
  5 | def parse_args():
  6 |     parser = argparse.ArgumentParser(description="pyPheWAS Plotting Tool")
  7 | 
  8 |     parser.add_argument('--statfile', required=True, type=str, help='Name of the statistics/regressions file')
  9 |     parser.add_argument('--thresh_type', required=True, type=str, help=' the type of threshold to be used in the plot')
 10 |     parser.add_argument('--custom_thresh', required=False, default=None, type=float, help='Custom threshold value (float between 0 and 1)')
 11 |     parser.add_argument('--imbalance', required=False, default="True", help = 'Show the direction of imbalance in the Manhattan plot [True (default) or False]')
 12 |     parser.add_argument('--plot_all_pts', required=False, default="True", help='Show all points regardless of significance in the Manhattan plot [True (default) or False]')
 13 |     parser.add_argument('--old_style', required=False, default="False", help = 'Use old plot style (no gridlines, all spines shown)')
 14 |     parser.add_argument('--phewas_label', required=False, default="plot", type=str, help='Location of PheCode labels on Log Odds plot [plot (default) or axis]')
 15 |     parser.add_argument('--path', required=False, default='.', type=str, help='Path to all input files and destination of output files')
 16 |     parser.add_argument('--outfile', required=False, default=None, type=str, help='Name of the output file for the plot')
 17 | 
 18 |     args = parser.parse_args()
 19 |     return args
 20 | 
 21 | args = parse_args()
 22 | 
 23 | """
 24 | Print Start Message
 25 | """
 26 | from pyPheWAS.pyPhewasCorev2 import *
 27 | import os
 28 | import sys
 29 | 
 30 | import time
 31 | import math
 32 | import matplotlib.pyplot as plt
 33 | from pathlib import Path
 34 | import os.path as osp
 35 | 
 36 | start = time.time()
 37 | print_start_msg()
 38 | print('\npyPhewasPlot: Plot Mass PheCode Regression Results\n')
 39 | 
 40 | 
 41 | """
 42 | Retrieve and validate all arguments.
 43 | """
 44 | 
 45 | kwargs = {'path': Path(args.path),
 46 |           'statfile': args.statfile,
 47 |           'thresh_type': args.thresh_type,
 48 |           'imbalance': args.imbalance,
 49 |           'plot_all_pts': args.plot_all_pts,
 50 |           'old_style': args.old_style,
 51 |           'custom_thresh':args.custom_thresh,
 52 |           'phewas_label': args.phewas_label,
 53 |           'outfile':args.outfile,
 54 | }
 55 | 
 56 | # Assert that a valid threshold type was used
 57 | assert kwargs['thresh_type'] in threshold_map.keys(), "%s is not a valid regression type" % (kwargs['thresh_type'])
 58 | if kwargs['thresh_type'] == 'custom':
 59 |     assert kwargs['custom_thresh'] is not None, "Custom threshold specified. Please define --custom_thresh"
 60 |     assert (kwargs['custom_thresh'] < 1.0) & (kwargs['custom_thresh'] > 0.0), "%s is not a valid threshold (should be between 0.0 and 1.0)" % (kwargs['custom_thresh'])
 61 | 
 62 | # Assert that valid files were given
 63 | assert kwargs['statfile'].endswith('.csv'), "%s is not a valid phenotype file (must be a .csv file)" % (kwargs['feature_matrix'])
 64 | assert osp.exists(kwargs['path'] / kwargs['statfile']), "%s does not exist" %(kwargs['path'] / kwargs['statfile'])
 65 | 
 66 | assert kwargs['phewas_label'] in ["plot","axis"], "%s is not a valid PheCode label location" % (kwargs['phewas_label'])
 67 | 
 68 | for bool_arg in ['imbalance','plot_all_pts','old_style']:
 69 |     assert kwargs[bool_arg] in ["True", "False"], "%s is not a valid imbalance value (\"True\" or \"False\")" % kwargs[bool_arg]
 70 |     kwargs[bool_arg] = eval(kwargs[bool_arg])
 71 | 
 72 | 
 73 | # Print Arguments
 74 | display_kwargs(kwargs)
 75 | # Make all arguments local variables
 76 | locals().update(kwargs)
 77 | 
 78 | 
 79 | """
 80 | Load Data
 81 | """
 82 | 
 83 | ff = open(path / statfile)
 84 | header = ff.readline().strip().split(',')
 85 | reg_args = {}
 86 | for i in range(0,len(header),2):
 87 |     reg_args[header[i]] = header[i+1]
 88 | print('\nRegression Info')
 89 | display_kwargs(reg_args)
 90 | 
 91 | # Read in the remaining data (the pandas DataFrame)
 92 | regressions = pd.read_csv(ff,dtype={'PheWAS Code':str})
 93 | regressions.dropna(subset=['p-val','"-log(p)"'], inplace=True)
 94 | 
 95 | try:
 96 |     # make confidence interval numberic instead of a string
 97 |     regressions[['lowlim', 'uplim']] = regressions['Conf-interval beta'].str.split(',', expand=True)
 98 |     regressions['uplim'] = regressions.uplim.str.replace(']', '', regex=False)
 99 |     regressions['lowlim'] = regressions.lowlim.str.replace('[', '', regex=False)
100 |     regressions = regressions.astype(dtype={'uplim':float,'lowlim':float})
101 | except Exception as e:
102 |     print('Error reading regression file:')
103 |     print(e)
104 |     sys.exit()
105 | 
106 | 
107 | """
108 | Create plots
109 | """
110 | 
111 | # Get the threshold
112 | pvalues = regressions['p-val'].values
113 | 
114 | if thresh_type == 'bon':
115 |     thresh = get_bon_thresh(pvalues,0.05)
116 | elif thresh_type == 'fdr':
117 |     thresh = get_fdr_thresh(pvalues,0.05)
118 | elif thresh_type == 'custom':
119 |     thresh = custom_thresh
120 | print('%s threshold: %0.5f'%(thresh_type,thresh))
121 | 
122 | # figure out file names
123 | if outfile is not None:
124 |     file_name, file_format = osp.splitext(outfile)
125 |     savem = path / (file_name + '_Manhattan' + file_format)
126 |     saveb = path / (file_name + '_EffectSize' + file_format)
127 |     savev = path / (file_name + '_Volcano' + file_format)
128 |     file_format = file_format[1:] # remove '.' from from first index
129 | else:
130 |     savem = ''
131 |     saveb = ''
132 |     savev = ''
133 |     file_format = ''
134 | 
135 | plot_manhattan(
136 |     regressions,
137 |     thresh=thresh,
138 |     show_imbalance=imbalance,
139 |     plot_all_pts=plot_all_pts,
140 |     old_plot_style=old_style,
141 |     code_type='ICD',
142 |     save=savem,
143 |     save_format=file_format,
144 |     )
145 | 
146 | plot_effect_size(
147 |     regressions,
148 |     thresh=thresh,
149 |     model_str=reg_args.get('model_equation', None),
150 |     reg_type=reg_args.get('reg_type', None),
151 |     label_loc=phewas_label,
152 |     old_plot_style=old_style,
153 |     code_type='ICD',
154 |     save=saveb,
155 |     save_format=file_format,
156 |     )
157 | 
158 | plot_volcano(
159 |     regressions,
160 |     model_str=reg_args.get('model_equation', None),
161 |     reg_type=reg_args.get('reg_type', None),
162 |     old_plot_style=old_style,
163 |     code_type='ICD',
164 |     save=savev,
165 |     save_format=file_format
166 |     )
167 | 
168 | if outfile is not None:
169 |     print("Saving plots to %s" % (path))
170 | else:
171 |     plt.show()
172 | 
173 | 
174 | """
175 | Calculate runtime
176 | """
177 | 
178 | interval = time.time() - start
179 | hour = math.floor(interval/3600.0)
180 | minute = math.floor((interval - hour*3600)/60)
181 | second = math.floor(interval - hour*3600 - minute*60)
182 | 
183 | if hour > 0:
184 |     time_str = '%dh:%dm:%ds' %(hour,minute,second)
185 | elif minute > 0:
186 |     time_str = '%dm:%ds' % (minute, second)
187 | else:
188 |     time_str = '%ds' % second
189 | 
190 | print('pyPhewasPlot Complete [Runtime: %s]' %time_str)


--------------------------------------------------------------------------------
/bin/pyProwasLookup:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import argparse
  4 | 
  5 | def parse_args():
  6 |     parser = argparse.ArgumentParser(description="pyProWAS CPT-Procode Lookup Tool")
  7 | 
  8 |     parser.add_argument('--phenotype', required=True, type=str, help='Name of the phenotype file (e.g. cpt_data.csv)')
  9 |     parser.add_argument('--group', required=True, type=str, help ='Name of the group file (e.g. groups.csv)')
 10 |     parser.add_argument('--reg_type', required=True, type=str, help='Type of regression that you would like to use (log, lin, or dur)')
 11 |     parser.add_argument('--path', required=False, default='.', type=str, help='Path to all input files and destination of output files')
 12 |     parser.add_argument('--outfile', required=False, default=None, type=str, help='Name of the output file for the feature matrix')
 13 |     parser.add_argument('--prowas_cov', required=False, default=None, type=str, help='ProCode to use as covariates in pyProwasModel regression')
 14 | 
 15 |     args = parser.parse_args()
 16 |     return args
 17 | 
 18 | 
 19 | args = parse_args()
 20 | 
 21 | """
 22 | Print Start Message
 23 | """
 24 | from pyPheWAS.pyPhewasCorev2 import *
 25 | import sys, os
 26 | import time
 27 | import math
 28 | from pathlib import Path
 29 | import os.path as osp
 30 | 
 31 | start = time.time()
 32 | print_start_msg()
 33 | print('\npyProwasLookup: CPT-Procode Lookup Tool\n')
 34 | 
 35 | 
 36 | """
 37 | Retrieve and validate all arguments.
 38 | """
 39 | 
 40 | kwargs = {'path': Path(args.path),
 41 | 		  'phenotype': args.phenotype,
 42 | 		  'group': args.group,
 43 | 		  'reg_type':args.reg_type,
 44 | 		  'prowas_cov':args.prowas_cov,
 45 | 		  'outfile':args.outfile,
 46 | }
 47 | 
 48 | # Assert that a valid regression type was used
 49 | assert kwargs['reg_type'] in regression_map.keys(), "%s is not a valid regression type" % kwargs['reg_type']
 50 | assert (kwargs['prowas_cov'] is None) or (kwargs['prowas_cov'] in prowas_codes['prowas_code'].values), "prowas_cov input (%s) is not a valid ProCode" % kwargs['prowas_cov']
 51 | 
 52 | # Assert that valid file names were given
 53 | assert kwargs['phenotype'].endswith('.csv'), "%s is not a valid phenotype file, must be a .csv file" % (kwargs['phenotype'])
 54 | assert kwargs['group'].endswith('.csv'), "%s is not a valid group file, must be a .csv file" % (kwargs['group'])
 55 | # Assert that valid files were given
 56 | assert osp.exists(kwargs['path'] / kwargs['phenotype']), "%s does not exist" %(kwargs['path'] / kwargs['phenotype'])
 57 | assert osp.exists(kwargs['path'] / kwargs['group']), "%s does not exist" %(kwargs['path'] / kwargs['group'])
 58 | 
 59 | # Assign the output file if none was assigned
 60 | if kwargs['outfile'] is None:
 61 |     kwargs['outfile'] = "feature_matrix_" + kwargs['group']
 62 | 
 63 | # Assert that the output file is valid
 64 | assert kwargs['outfile'].endswith('.csv'), "%s is not a valid outputfile, must be a .csv file" % (kwargs['outfile'])
 65 | 
 66 | # Print Arguments
 67 | display_kwargs(kwargs)
 68 | # Make all arguments local variables
 69 | locals().update(kwargs)
 70 | 
 71 | """
 72 | Calculate feature matrix
 73 | """
 74 | print("Retrieving phenotype data...")
 75 | phenotypes = get_cpt_codes(path, phenotype, regression_map[reg_type])
 76 | 
 77 | print("Retrieving group data...")
 78 | genotypes = get_group_file(path, group)
 79 | 
 80 | if not {'MaxAgeAtVisit'}.issubset(genotypes.columns):
 81 | 	print('WARNING: MaxAgeAtVisit was not found in group file. Calculating MaxAgeAtVisit from phenotype data')
 82 | 	phenotypes['MaxAgeAtVisit'] = phenotypes.groupby(['id'])['AgeAtCPT'].transform('max')
 83 | 	genotypes = pd.merge(genotypes, phenotypes[['id','MaxAgeAtVisit']].drop_duplicates(subset='id'),on='id',how='left')
 84 | 
 85 | print("Generating feature matrix...")
 86 | fm,columns = generate_feature_matrix(genotypes, phenotypes, regression_map[reg_type], 'CPT', prowas_cov)
 87 | 
 88 | """
 89 | Save feature matrix
 90 | """
 91 | print("Saving feature matrices to %s" % (path /('*_' + outfile)))
 92 | h = ','.join(columns)
 93 | 
 94 | np.savetxt(path /('agg_measures_' + outfile), fm[0],delimiter=',',header=h)
 95 | print("...")
 96 | np.savetxt(path /('cpt_age_' + outfile), fm[1],delimiter=',',header=h)
 97 | 
 98 | if prowas_cov is not None:
 99 | 	# only save this if it actually means something
100 | 	print("...")
101 | 	np.savetxt(path /('prowas_cov_' + outfile), fm[2],delimiter=',',header=h)
102 | 
103 | """
104 | Calculate runtime
105 | """
106 | interval = time.time() - start
107 | hour = math.floor(interval/3600.0)
108 | minute = math.floor((interval - hour*3600)/60)
109 | second = math.floor(interval - hour*3600 - minute*60)
110 | 
111 | if hour > 0:
112 |     time_str = '%dh:%dm:%ds' %(hour,minute,second)
113 | elif minute > 0:
114 |     time_str = '%dm:%ds' % (minute, second)
115 | else:
116 |     time_str = '%ds' % second
117 | 
118 | print('pyProwasLookup Complete [Runtime: %s]' %time_str)
119 | 
120 | 


--------------------------------------------------------------------------------
/bin/pyProwasModel:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import argparse
  4 | 
  5 | def parse_args():
  6 |     parser = argparse.ArgumentParser(description="pyProWAS Mass ProCode Regression Tool")
  7 | 
  8 |     parser.add_argument('--feature_matrix', required=True, type=str, help='Name of the feature matrix file (e.g. feature_matrix_group.csv)')
  9 |     parser.add_argument('--group', required=True, type=str, help ='Name of the group file (e.g. groups.csv)')
 10 |     parser.add_argument('--reg_type', required=True, type=str, help='Type of regression that you would like to use (log, lin, or dur)')
 11 |     parser.add_argument('--path', required=False, default='.', type=str,help='Path to all input files and destination of output files')
 12 |     parser.add_argument('--outfile', required=False, default=None, type=str,help='Name of the output file for the regression results')
 13 |     parser.add_argument('--covariates', required=False, default='', type=str, help='Variables to be used as covariates')
 14 |     parser.add_argument('--target', required=False, default='genotype', type=str, help='Binary variable that indicates case/control groups (default: genotype)')
 15 |     parser.add_argument('--canonical', required=False, default="True", type=str, help='Use target as a predictor [True, default] or the dependent variable [False] in the ProWAS equation')
 16 |     parser.add_argument('--prowas_cov', required=False, default=None, type=str, help='ProCode to use as a covariate in regression')
 17 |     parser.add_argument('--reg_thresh', required=False, default=5, type=int, help='Threshold of subjects presenting a ProCode required for running regression (default: 5)')
 18 | 
 19 |     args = parser.parse_args()
 20 |     return args
 21 | 
 22 | 
 23 | args = parse_args()
 24 | 
 25 | """
 26 | Print Start Message
 27 | """
 28 | from pyPheWAS.pyPhewasCorev2 import *
 29 | import time
 30 | import math
 31 | from pathlib import Path
 32 | import os.path as osp
 33 | 
 34 | start = time.time()
 35 | print_start_msg()
 36 | print('\npyProwasModel: Mass ProCode Regression Tool\n')
 37 | 
 38 | 
 39 | """
 40 | Retrieve and validate all arguments.
 41 | """
 42 | 
 43 | kwargs = {'path': Path(args.path),
 44 | 		  'feature_matrix': args.feature_matrix,
 45 | 		  'group': args.group,
 46 |           'reg_type':args.reg_type,
 47 |           'covariates':args.covariates,
 48 | 		  'prowas_cov':args.prowas_cov,
 49 |           'target':args.target,
 50 |           'canonical': args.canonical,
 51 |           'reg_thresh':args.reg_thresh,
 52 |           'outfile':args.outfile,
 53 | }
 54 | 
 55 | # Assert that a valid regression type was used
 56 | assert args.reg_type in regression_map.keys(), "%s is not a valid regression type" % args.reg_type
 57 | assert (kwargs['prowas_cov'] is None) or (kwargs['prowas_cov'] in prowas_codes['prowas_code'].values), "prowas_cov input (%s) is not a valid ProCode" % kwargs['prowas_cov']
 58 | 
 59 | # Assert that valid file types were given
 60 | assert kwargs['feature_matrix'].endswith('.csv'), "%s is not a valid feature matrix file, must be a .csv file" % (kwargs['feature_matrix'])
 61 | assert kwargs['group'].endswith('.csv'), "%s is not a valid group file, must be a .csv file" % (kwargs['group'])
 62 | # Assert that valid files were given
 63 | assert osp.exists(kwargs['path'] / ('agg_measures_' + kwargs['feature_matrix'])), "%s does not exist" %(kwargs['path'] / ('agg_measures_' + kwargs['feature_matrix']))
 64 | assert osp.exists(kwargs['path'] / kwargs['group']), "%s does not exist" %(kwargs['path'] / kwargs['group'])
 65 | 
 66 | # Assign the output file if none was assigned
 67 | if kwargs['outfile'] is None:
 68 |     kwargs['outfile'] = "regressions_" + kwargs['group']
 69 | # Assert that the output file is valid
 70 | assert kwargs['outfile'].endswith('.csv'), "%s is not a valid outputfile, must be a .csv file" % (kwargs['outfile'])
 71 | 
 72 | assert kwargs['canonical'] in ["True", "False"], "%s is not a valid canonical value (True or False)" % kwargs['canonical']
 73 | kwargs['canonical'] = eval(kwargs['canonical'])
 74 | 
 75 | 
 76 | # Print Arguments
 77 | display_kwargs(kwargs)
 78 | 
 79 | # Make all arguments local variables
 80 | locals().update(kwargs)
 81 | 
 82 | 
 83 | """
 84 | Load Data
 85 | """
 86 | print("Retrieving group data.")
 87 | genotypes = get_group_file(path, group)
 88 | 
 89 | # check target variable
 90 | assert target in genotypes.columns, "target %s is not a column in the group file" % target
 91 | 
 92 | # check covariates
 93 | if covariates != '':
 94 |     for cov in covariates.replace(" ", "").split('+'):
 95 |         if cov == MAX_AGE_AT_CPT:
 96 |             assert cov not in genotypes.columns, "%s is a reserved covariate name; please rename this column in the group file" % cov
 97 |         elif cov in RESERVED_COL_NAMES:
 98 |             assert False, "%s is a reserved covariate name [%s]; please remove from the covariate list and/or rename this column in the group file" % (cov, ','.join(RESERVED_COL_NAMES))
 99 |         else:
100 |             assert cov in genotypes.columns, "covariate %s is not a column in the group file" % cov
101 | 
102 | print('Loading feature matrices.')
103 | 
104 | a = np.loadtxt(path / ('agg_measures_' + feature_matrix), delimiter=',')
105 | 
106 | if 'MaxAgeAtCPT' in covariates:
107 |     assert osp.exists(kwargs['path'] / ('cpt_age_' + kwargs['feature_matrix'])), "%s does not exist" % (kwargs['path'] / ('cpt_age_' + kwargs['feature_matrix']))
108 |     b = np.loadtxt(path / ('cpt_age_' + feature_matrix), delimiter=',')
109 | else:
110 |     b = np.zeros_like(a)
111 |     
112 | if prowas_cov is not None:
113 |     assert osp.exists(kwargs['path'] / ('prowas_cov_' + kwargs['feature_matrix'])), "%s does not exist" % (kwargs['path'] / ('prowas_cov_' + kwargs['feature_matrix']))
114 |     c = np.loadtxt(path / ('prowas_cov_' + feature_matrix), delimiter=',')
115 | else:
116 |     c = np.zeros_like(a)
117 |     
118 | # reconstruct full feature matrix
119 | fm = np.array([a,b,c])
120 | 
121 | 
122 | """
123 | Run Regressions
124 | """
125 | print("Running ProWAS regressions...")
126 | regressions, model_str = run_phewas(fm, genotypes, 'CPT', regression_map[reg_type], 
127 |                 covariates=covariates, target=target, 
128 |                 phe_thresh=reg_thresh, canonical=canonical)
129 | 
130 | print("Saving regression data to %s" % (path / outfile))
131 | if prowas_cov is not None:
132 |     model_str = model_str.replace('prowas_cov', f'prowas_cov_{prowas_cov}')
133 |     
134 | header = ','.join(['model_equation', model_str, 'group', group, 'feature_matrix', feature_matrix, 'reg_type', reg_type, 'code_type', 'CPT']) + '\n'
135 | f = open(path / outfile, 'w')
136 | f.write(header)
137 | regressions.to_csv(f,index=False)
138 | f.close()
139 | 
140 | 
141 | """
142 | Calculate runtime
143 | """
144 | 
145 | interval = time.time() - start
146 | hour = math.floor(interval/3600.0)
147 | minute = math.floor((interval - hour*3600)/60)
148 | second = math.floor(interval - hour*3600 - minute*60)
149 | 
150 | if hour > 0:
151 |     time_str = '%dh:%dm:%ds' %(hour,minute,second)
152 | elif minute > 0:
153 |     time_str = '%dm:%ds' % (minute, second)
154 | else:
155 |     time_str = '%ds' % second
156 | 
157 | print('pyProwasModel Complete [Runtime: %s]' %time_str)


--------------------------------------------------------------------------------
/bin/pyProwasPlot:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import argparse
  4 | 
  5 | def parse_args():
  6 |     parser = argparse.ArgumentParser(description="pyProWAS Plotting Tool")
  7 | 
  8 |     parser.add_argument('--statfile', required=True, type=str, help='Name of the statistics/regressions file')
  9 |     parser.add_argument('--thresh_type', required=True, type=str, help=' the type of threshold to be used in the plot')
 10 |     parser.add_argument('--custom_thresh', required=False, default=None, type=float, help='Custom threshold value (float between 0 and 1)')
 11 |     parser.add_argument('--imbalance', required=False, default="True", help = 'Show the direction of imbalance in the Manhattan plot [True (default) or False]')
 12 |     parser.add_argument('--plot_all_pts', required=False, default="True", help='Show all points regardless of significance in the Manhattan plot [True (default) or False]')
 13 |     parser.add_argument('--old_style', required=False, default="False", help = 'Use old plot style (no gridlines, all spines shown)')
 14 |     parser.add_argument('--prowas_label', required=False, default="plot", type=str, help='Location of ProCode labels on Log Odds plot [plot (default) or axis]')
 15 |     parser.add_argument('--path', required=False, default='.', type=str, help='Path to all input files and destination of output files')
 16 |     parser.add_argument('--outfile', required=False, default=None, type=str, help='Name of the output file for the plot')
 17 | 
 18 |     args = parser.parse_args()
 19 |     return args
 20 | 
 21 | 
 22 | args = parse_args()
 23 | 
 24 | """
 25 | Print Start Message
 26 | """
 27 | from pyPheWAS.pyPhewasCorev2 import *
 28 | import os
 29 | import sys
 30 | import time
 31 | import math
 32 | import matplotlib.pyplot as plt
 33 | from pathlib import Path
 34 | import os.path as osp
 35 | 
 36 | start = time.time()
 37 | print_start_msg()
 38 | print('\npyProwasPlot: Plot Mass ProCode Regression Results\n')
 39 | 
 40 | 
 41 | """
 42 | Retrieve and validate all arguments.
 43 | """
 44 | 
 45 | kwargs = {'path': Path(args.path),
 46 |           'statfile': args.statfile,
 47 |           'thresh_type': args.thresh_type,
 48 |           'imbalance': args.imbalance,
 49 |           'plot_all_pts': args.plot_all_pts,
 50 |           'old_style': args.old_style,
 51 |           'custom_thresh':args.custom_thresh,
 52 |           'prowas_label': args.prowas_label,
 53 |           'outfile':args.outfile,
 54 | }
 55 | 
 56 | # Assert that a valid threshold type was used
 57 | assert kwargs['thresh_type'] in threshold_map.keys(), "%s is not a valid regression type" % (kwargs['thresh_type'])
 58 | if kwargs['thresh_type'] == 'custom':
 59 |     assert kwargs['custom_thresh'] is not None, "Custom threshold specified. Please define --custom_thresh"
 60 |     assert (kwargs['custom_thresh'] < 1.0) & (kwargs['custom_thresh'] > 0.0), "%s is not a valid threshold (should be between 0.0 and 1.0)" % (kwargs['custom_thresh'])
 61 | 
 62 | # Assert that valid files were given
 63 | assert kwargs['statfile'].endswith('.csv'), "%s is not a valid phenotype file (must be a .csv file)" % (kwargs['feature_matrix'])
 64 | assert osp.exists(kwargs['path'] / kwargs['statfile']), "%s does not exist" %(kwargs['path'] / kwargs['statfile'])
 65 | 
 66 | assert kwargs['prowas_label'] in ["plot","axis"], "%s is not a valid ProCode label location" % (kwargs['prowas_label'])
 67 | 
 68 | for bool_arg in ['imbalance','plot_all_pts','old_style']:
 69 |     assert kwargs[bool_arg] in ["True", "False"], "%s is not a valid imbalance value (\"True\" or \"False\")" % kwargs[bool_arg]
 70 |     kwargs[bool_arg] = eval(kwargs[bool_arg])
 71 | 
 72 | # Print Arguments
 73 | display_kwargs(kwargs)
 74 | # Make all arguments local variables
 75 | locals().update(kwargs)
 76 | 
 77 | 
 78 | """
 79 | Load Data
 80 | """
 81 | 
 82 | ff = open(path / statfile)
 83 | header = ff.readline().strip().split(',')
 84 | reg_args = {}
 85 | for i in range(0,len(header),2):
 86 |     reg_args[header[i]] = header[i+1]
 87 | print('\nRegression Info')
 88 | display_kwargs(reg_args)
 89 | 
 90 | # Read in the remaining data (the pandas DataFrame)
 91 | regressions = pd.read_csv(ff,dtype={'ProWAS Code':str})
 92 | regressions.dropna(subset=['p-val','"-log(p)"'], inplace=True)
 93 | 
 94 | try:
 95 |     # make confidence interval numberic instead of a string
 96 |     regressions[['lowlim', 'uplim']] = regressions['Conf-interval beta'].str.split(',', expand=True)
 97 |     regressions['uplim'] = regressions.uplim.str.replace(']', '', regex=False)
 98 |     regressions['lowlim'] = regressions.lowlim.str.replace('[', '', regex=False)
 99 |     regressions = regressions.astype(dtype={'uplim':float,'lowlim':float})
100 | except Exception as e:
101 |     print('Error reading regression file:')
102 |     print(e)
103 |     sys.exit()
104 | 
105 | 
106 | """
107 | Create plots
108 | """
109 | 
110 | # Get the threshold
111 | pvalues = regressions['p-val'].values
112 | 
113 | if thresh_type == 'bon':
114 |     thresh = get_bon_thresh(pvalues,0.05)
115 | elif thresh_type == 'fdr':
116 |     thresh = get_fdr_thresh(pvalues,0.05)
117 | elif thresh_type == 'custom':
118 |     thresh = custom_thresh
119 | print('%s threshold: %0.5f'%(thresh_type,thresh))
120 | 
121 | # figure out file names
122 | if outfile is not None:
123 |     file_name, file_format = osp.splitext(outfile)
124 |     savem = path / (file_name + '_Manhattan' + file_format)
125 |     saveb = path / (file_name + '_EffectSize' + file_format)
126 |     savev = path / (file_name + '_Volcano' + file_format)
127 |     file_format = file_format[1:] # remove '.' from from first index
128 | else:
129 |     savem = ''
130 |     saveb = ''
131 |     savev = ''
132 |     file_format = ''
133 | 
134 | plot_manhattan(
135 |     regressions,
136 |     thresh=thresh,
137 |     show_imbalance=imbalance,
138 |     plot_all_pts=plot_all_pts,
139 |     old_plot_style=old_style,
140 |     code_type='CPT',
141 |     save=savem,
142 |     save_format=file_format,
143 |     )
144 | 
145 | plot_effect_size(
146 |     regressions,
147 |     thresh=thresh,
148 |     model_str=reg_args.get('model_equation', None),
149 |     reg_type=reg_args.get('reg_type', None),
150 |     label_loc=prowas_label,
151 |     old_plot_style=old_style,
152 |     code_type='CPT',
153 |     save=saveb,
154 |     save_format=file_format,
155 |     )
156 | 
157 | plot_volcano(
158 |     regressions,
159 |     model_str=reg_args.get('model_equation', None),
160 |     reg_type=reg_args.get('reg_type', None),
161 |     old_plot_style=old_style,
162 |     code_type='CPT',
163 |     save=savev,
164 |     save_format=file_format
165 |     )
166 | 
167 | 
168 | if outfile is not None:
169 |     print("Saving plots to %s" % (path))
170 | else:
171 |     plt.show()
172 | 
173 | 
174 | """
175 | Calculate runtime
176 | """
177 | 
178 | interval = time.time() - start
179 | hour = math.floor(interval/3600.0)
180 | minute = math.floor((interval - hour*3600)/60)
181 | second = math.floor(interval - hour*3600 - minute*60)
182 | 
183 | if hour > 0:
184 |     time_str = '%dh:%dm:%ds' %(hour,minute,second)
185 | elif minute > 0:
186 |     time_str = '%dm:%ds' % (minute, second)
187 | else:
188 |     time_str = '%ds' % second
189 | 
190 | print('pyProwasPlot Complete [Runtime: %s]' %time_str)


--------------------------------------------------------------------------------
/bin/test:
--------------------------------------------------------------------------------
1 | convertEventToAge --phenotype 'Landman_Optic_CPT_20150904.csv' --group 'Landman_Optic_DEMO_20150904.csv' --path '/Users/shikhachaganti/IIH/' --phenotypeout 'Landman_Optic_CPT_wAge.csv' --eventcolumn 'Event_date' --precision '2'


--------------------------------------------------------------------------------
/deprecated/censor_diagnosis.py:
--------------------------------------------------------------------------------
 1 | def censor_diagnosis(path,genotype_file,phenotype_file,final_pfile, final_gfile, field ='na',type='ICD',ad=1,start_time=float('nan'),end_time=float('nan')):
 2 |         import pandas as pd
 3 |         import numpy as np
 4 |         genotypes = pd.read_csv(path+genotype_file)
 5 |         phenotypes = pd.read_csv(path+phenotype_file)
 6 |         mg=pd.merge(phenotypes,genotypes,on='id')
 7 |         if np.isnan(start_time) and np.isnan(end_time):
 8 |                 print("Choose appropriate time period")
 9 |         if field=='na':
10 |                 if np.isfinite(start_time) and np.isnan(end_time):
11 |                         final = mg[mg['AgeAt'+type]>=start_time]
12 |                 elif np.isnan(start_time) and np.isfinite(end_time):
13 |                         final = mg[mg['AgeAt'+type]<=end_time]
14 |                 else:
15 |                         final = mg[(mg['AgeAt'+type]>=start_time)&(mg['AgeAt'+type]<=end_time)]
16 | 
17 |         else:
18 |                 mg['diff']=mg[field]-mg['AgeAt'+type]
19 |                 if np.isfinite(start_time) and np.isnan(end_time):
20 |                         final = mg[(mg['diff']>=start_time)|(np.isnan(mg['diff']))]
21 |                 elif np.isnan(start_time) and np.isfinite(end_time):
22 |                         final = mg[(mg['diff']<=end_time)|(np.isnan(mg['diff']))]
23 |                 else:
24 |                         final = mg[(mg[field]>=start_time)&(mg[field]<=end_time)|(np.isnan(mg[field]))]
25 | 
26 |         final['MaxAgeBeforeDx'] = final.groupby('id')['AgeAt'+type].transform('max')
27 |         if ad==0:
28 |                 final['AgeNow'] = final[field]-start_time
29 |                 idx = np.isnan(final.AgeNow)
30 |                 final.ix[idx,'AgeNow']=final.ix[idx,'MaxAgeBeforeDx']
31 | 
32 |         final.dropna(subset=['MaxAgeBeforeDx'],inplace=True)
33 |         final[['id',type.lower(),'AgeAt'+type]].to_csv(path+final_pfile,index=False)
34 |         cnames = list(genotypes.columns.values)
35 |         if ad==0:
36 |                 if not 'AgeNow' in genotypes.columns.values:
37 |                         cnames.append('AgeNow')
38 |         if not 'MaxAgeBeforeDx' in genotypes.columns.values:
39 |                 cnames.append('MaxAgeBeforeDx')
40 |         final[cnames].drop_duplicates().to_csv(path+final_gfile,index=False)
41 | 
42 |         
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/deprecated/labwas.py:
--------------------------------------------------------------------------------
  1 | output_columns = ['PheWAS Code',
  2 |                   'PheWAS Name',
  3 |                   'p-val',
  4 |                   '\"-log(p)\"',
  5 |                   'beta',
  6 |                   'Conf-interval beta',
  7 |                   'cpt']
  8 | 
  9 | imbalance_colors = {
 10 |     0: 'white',
 11 |     1: 'deepskyblue',
 12 |     -1: 'red'
 13 | }
 14 | m = len(fm[0])
 15 | p_values = np.zeros(m, dtype=float)
 16 | icodes = []
 17 | # store all of the pertinent data from the regressions
 18 | regressions = pd.DataFrame(columns=output_columns)
 19 | labnames=df.columns
 20 | 
 21 | def get_bon_thresh(normalized, power):  # same
 22 |     """
 23 |     Calculate the bonferroni correction threshold.
 24 | 
 25 |     Divide the power by the sum of all finite values (all non-nan values).
 26 | 
 27 |     :param normalized: an array of all normalized p-values. Normalized p-values are -log10(p) where p is the p-value.
 28 |     :param power: the threshold power being used (usually 0.05)
 29 |     :type normalized: numpy array
 30 |     :type power: float
 31 | 
 32 |     :returns: The bonferroni correction
 33 |     :rtype: float
 34 | 
 35 |     """
 36 |     return power / sum(np.isfinite(normalized))
 37 | 
 38 | for index in range(m):
 39 |     print(index)
 40 |     phen_vector1 = fm[:, index]
 41 |     res = calculate_odds_ratio(genotypes, phen_vector1,0)
 42 | 
 43 |     # save all of the regression data
 44 |     phewas_info = [labnames[index],labnames[index],labnames[index]]
 45 |     stat_info = res[2]
 46 |     info = phewas_info[0:2] + [res[1]] + stat_info + [phewas_info[2]]
 47 |     regressions.loc[index] = info
 48 | 
 49 |     p_values[index] = res[1]
 50 | 
 51 | 
 52 | def get_imbalances(regressions):
 53 |     """
 54 |     Generates a numpy array of the imbalances.
 55 | 
 56 |     For a value *x* where *x* is the beta of a regression:
 57 | 
 58 |     ========= ====== =======================================================
 59 |     *x* < 0   **-1** The regression had a negative beta value
 60 |     *x* = nan **0**  The regression had a nan beta value (and a nan p-value)
 61 |     *x* > 0   **+1** The regression had a positive beta value
 62 |     ========= ====== =======================================================
 63 | 
 64 |     These values are then used to get the correct colors using the imbalance_colors.
 65 | 
 66 |     :param regressions: DataFrame containing a variety of different output values from the regression performed. The only one used for this function are the 'beta' values.
 67 |     :type regressions: pandas DataFrame
 68 | 
 69 |     :returns: A list that is the length of the number of regressions performed. Each element in the list is either a -1, 0, or +1. These are used as explained above.
 70 |     :rtype: numpy array
 71 |     """
 72 | 
 73 |     imbalance = np.array(regressions['beta'])
 74 |     imbalance[np.isnan(imbalance)] = 0
 75 |     imbalance[imbalance > 0] = 1
 76 |     imbalance[imbalance < 0] = -1
 77 |     return imbalance
 78 | 
 79 | def calculate_odds_ratio(genotypes, phen_vector1,reg_type):  # diff - done
 80 | 
 81 |     data = genotypes
 82 |     data['y'] = phen_vector1
 83 |     f = 'genotype ~ y'
 84 |     try:
 85 |         if reg_type == 0:
 86 |             logreg = smf.logit(f, data).fit(method='bfgs', disp=False)
 87 |             p = logreg.pvalues.y
 88 |             odds = logreg.params.y
 89 |             conf = logreg.conf_int()
 90 |             od = [-math.log10(p), logreg.params.y, '[%s,%s]' % (conf[0]['y'], conf[1]['y'])]
 91 |         else:
 92 |             linreg = smf.logit(f, data).fit(method='bfgs', disp=False)
 93 |             p = linreg.pvalues.y
 94 |             odds = linreg.params.y
 95 |             conf = linreg.conf_int()
 96 |             od = [-math.log10(p), linreg.params.y, '[%s,%s]' % (conf[0]['y'], conf[1]['y'])]
 97 |     except:
 98 |         odds = 0
 99 |         p = np.nan
100 |         od = [np.nan, np.nan, np.nan]
101 |     return (odds, p, od)
102 | 
103 | def plot_data_points(y, thresh,labnames,save='', imbalances=np.array([])):  # same
104 | 
105 |     idx = y.sort_values().index
106 | 
107 |     # Plot each of the points, if necessary, label the points.
108 |     e = 1
109 |     artists = []
110 |     for i in idx:
111 |         if imbalances[i] >0:
112 |             plt.plot(e, y[i], 'o', color=imbalance_colors[imbalances[i]], fillstyle='full', markeredgewidth=0.0)
113 |         if y[i] > thresh and imbalances[i] > 0:
114 |             artists.append(plt.text(e, y[i], labnames[i], fontsize=5,rotation=70, va='bottom'))
115 |             e += 10
116 | 
117 |     # If the imbalance is to be shown, draw lines to show the categories.
118 |     # if show_imbalance:
119 |     #     for pos in linepos:
120 |     #         plt.axvline(x=pos, color='black', ls='dotted')
121 | 
122 |     # Plot a blue line at p=0.05 and plot a red line at the line for the threshold type.
123 |     plt.axhline(y=-math.log10(0.05), color='blue')
124 |     plt.axhline(y=thresh, color='red')
125 | 
126 |     # Set windows and labels
127 |     # plt.xticks(x_label_positions, x_labels, rotation=70, fontsize=10)
128 |     plt.ylim(ymin=0, ymax=max(y[imbalances>0])+5)
129 |     plt.xlim(xmin=0, xmax=e)
130 |     plt.ylabel('-log10(p)')
131 | 
132 |     # Determine the type of output desired (saved to a plot or displayed on the screen)
133 |     if save:
134 |         pdf = PdfPages(save)
135 |         pdf.savefig(bbox_extra_artists=artists, bbox_inches='tight')
136 |         pdf.close()
137 |     else:
138 |         plt.subplots_adjust(left=0.05, right=0.85)
139 |         plt.show()
140 | 
141 |     # Clear the plot in case another plot is to be made.
142 |     plt.clf()
143 | 
144 | 
145 | regressions[(y > -math.log10(0.05))&(imbalances<0)].to_csv('labwasneg.csv')
146 | 


--------------------------------------------------------------------------------
/deprecated/maximize_control_matching.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import operator
  3 | import random
  4 | import numpy as np
  5 | import sys
  6 | import getopt
  7 | from hopcroftkarp import HopcroftKarp
  8 | 
  9 | """
 10 | 
 11 | 
 12 | """
 13 | CATEGORICAL_DATA = '675161f1c87ff2648c61ff1c57c780f2'
 14 | 
 15 | 
 16 | def generate_row_query(keys, deltas, tr):
 17 | 	q = []
 18 | 	for i,dt in enumerate(deltas):
 19 | 		key = keys[i]
 20 | 		is_categorical = dt == CATEGORICAL_DATA
 21 | 		if is_categorical:
 22 | 			part = '=='.join([key, tr[key].__repr__()])
 23 | 		else:
 24 | 			structure = ['abs(', key, '-',tr[key],')', '<', dt]
 25 | 			part = ''.join([str(x) for x in structure])
 26 | 		q.append(part)
 27 | 	return '&'.join(q)
 28 | 
 29 | def get_options(targets, controls, keys, deltas):
 30 | 	tt = targets[keys]
 31 | 	c = controls[keys]
 32 | 	matching = {}
 33 | 	if len(c) > len(tt):
 34 | 		for i in tt.index:
 35 | 			tr = tt.loc[i]
 36 | 			control_query = generate_row_query(keys, deltas, tr)
 37 | 			matches = c.query(control_query).index
 38 | 			matching[i] = matches.drop_duplicates().tolist()
 39 | 			# matching[i] = set(matches)
 40 | 	else:
 41 | 		for i in c.index:
 42 | 			tr = c.loc[i]
 43 | 			target_query = generate_row_query(keys, deltas, tr)
 44 | 			matches = tt.query(target_query).index
 45 | 			matching[i] = matches.drop_duplicates().tolist()
 46 | 			# matching[i] = set(matches)
 47 | 
 48 | 	return matching
 49 | 
 50 | def generate_matches(matching, goal):
 51 | 	# Sort the targets by the number of controls they match
 52 | 	frequency = { k : len(v) for k,v in matching.items() }
 53 | 	frequency = sorted(frequency.items(), key=operator.itemgetter(1))
 54 | 	success = True
 55 | 
 56 | 	# Keep track of the already used controls
 57 | 	used = []
 58 | 
 59 | 	# The final mapping of targets : [control list]
 60 | 	final = {}
 61 | 
 62 | 	for key,_ in frequency:
 63 | 		final[key] = []
 64 | 		viable = matching[key]
 65 | 		random.shuffle(viable)
 66 | 		for control in viable:
 67 | 			if len(final[key]) == goal:
 68 | 				break
 69 | 			if control not in used:
 70 | 				used.append(control)
 71 | 				final[key].append(control)
 72 | 		if len(final[key]) < goal:
 73 | 			success = False
 74 | 	return (final, used, success, goal)
 75 | 
 76 | def maximize_matches(matching):
 77 | 	prev = generate_matches(matching, 1)
 78 | 	while prev[2] == False:
 79 | 		return prev
 80 | 
 81 | 	# If 1-1 matching was successful, attempt to maximize starting from 2
 82 | 	success = prev[2]
 83 | 	goal = 2
 84 | 
 85 | 	while success:
 86 | 		curr = generate_matches(matching, goal)
 87 | 		success = curr[2]
 88 | 		if success:
 89 | 			prev = curr
 90 | 			goal += 1
 91 | 	
 92 | 	return prev
 93 | 
 94 | def output_matches(path, outputfile, data, all_used, success, matched):
 95 | 	new_data = data[data.index.isin(all_used)]
 96 | 
 97 | 	if not success:
 98 | 		print("Could not match 1-1, using the maximum number of matches found by the approximation algorithm")
 99 | 		if '%s' in outputfile:
100 | 			outputfile = outputfile % ('max')
101 | 	else:
102 | 		print("Matched data 1-%s" % (matched))
103 | 		if '%s' in outputfile:
104 | 			outputfile = outputfile % (matched)
105 | 
106 | 	new_data.to_csv(path + outputfile,index=False)
107 | 	print("Data in %s" % (path + outputfile))
108 | 
109 | def control_match(path, inputfile, outputfile, keys, deltas, condition='genotype',goal=-1):
110 | 	# Reformat arguments into Python format
111 | 	keys = keys.split('+')
112 | 	deltas = deltas.split(',')
113 | 	deltas = [CATEGORICAL_DATA if x == '' else int(x) for x in deltas]
114 | 
115 | 	# Read data from the provided input file
116 | 	data = pd.read_csv(path + inputfile)
117 | 
118 | 	# Assert that all of the provided keys are present in the data
119 | 	for key in keys:
120 | 		assert key in data.columns, '%s not a column in the input file (%s)' % (key, inputfile)
121 | 
122 | 	# Assign new value for outputfile
123 | 	if not outputfile:
124 | 		outputfile = '1-%s_' + inputfile
125 | 
126 | 	# Separate patients and controls
127 | 	targets = data[data[condition] == 1]
128 | 	controls = data[data[condition] == 0]
129 | 
130 | 	match_by_control = len(targets) > len(controls)
131 | 
132 | 	matching = get_options(targets, controls, keys, deltas)
133 | 
134 | 	if goal != -1:
135 | 		final, used, success, matched = generate_matches(matching, goal)
136 | 		if success:
137 | 			if match_by_control:
138 | 				all_used = used + controls.index.tolist()
139 | 			else:
140 | 				all_used = used + targets.index.tolist()
141 | 			output_matches(path, outputfile, data, all_used, success, matched)
142 | 			# return
143 | 		else:
144 | 			print("Failed to perform 1-%s, attempting to maximize..." % (goal))
145 | 			while not success:
146 | 				goal = 1
147 | 				print(deltas)
148 | 				deltas = [element + 1 if element != CATEGORICAL_DATA else element for element in deltas]
149 | 				matching = get_options(targets, controls, keys, deltas)
150 | 				final, used, success, matched = generate_matches(matching, goal)
151 | 			print("Used %s as delta values across keys. Generated a 1-%s match." % (deltas, goal))
152 | 	final, used, success, matched = maximize_matches(matching)
153 | 	if match_by_control:
154 | 		all_used = used + controls.index.tolist()
155 | 	else:
156 | 		all_used = used + targets.index.tolist()
157 | 	output_matches(path, outputfile, data, all_used, success, matched)
158 | 	if goal==-1:
159 | 		final, used, success, matched = maximize_matches(matching)
160 | 		#all_used = used + targets.index.tolist()
161 | 		if match_by_control:
162 | 			all_used = used + controls.index.tolist()
163 | 		else:
164 | 			all_used = used + targets.index.tolist()
165 | 		output_matches(path, outputfile, data, all_used, success, matched)
166 | 
167 | 


--------------------------------------------------------------------------------
/deprecated/prowas_reg.py:
--------------------------------------------------------------------------------
 1 | from pyProWAS import *
 2 | import os
 3 | import numpy as np
 4 | import pandas as pd
 5 | 
 6 | reg_type = 0
 7 | str_reg_type = "log"
 8 | path = "/nfs/share5/clineci/DownSyndrome/experiments/prowas_test/"
 9 | filename = "cpts_age.csv"
10 | groupfile = "group.csv"
11 | phewas_cov = ''
12 | outfile = 'feature_matrix.csv'
13 | covariates = 'SEX'
14 | str_thresh_type = "fdr"
15 | thresh_type = 1
16 | 
17 | """
18 | # gen_ftype = reg_type
19 | phenotypes = get_input(path, filename,reg_type)
20 | genotypes = get_group_file(path, groupfile)
21 | fm = generate_feature_matrix(genotypes, phenotypes, reg_type)
22 | 
23 | print("Saving feature matrices to %s" % (path + outfile))
24 | 
25 | np.savetxt(path + 'agg_measures_' + outfile, fm[0],delimiter=',')
26 | print("...")
27 | np.savetxt(path + 'icd_age_' + outfile, fm[1],delimiter=',')
28 | print("...")
29 | np.savetxt(path + 'phewas_cov_' + outfile, fm[2],delimiter=',')
30 | 
31 | regressions = run_phewas(fm, genotypes, covariates,reg_type)
32 | 
33 | print("Saving regression data to %s" % (path + 'regressions.csv'))
34 | header = ','.join(['str_reg_type', str_reg_type, 'group', groupfile]) + '\n'
35 | f = open(os.sep.join([path, 'regressions.csv']), 'w')
36 | f.write(header)
37 | regressions.to_csv(f,index=False)
38 | f.close()
39 | 
40 | """
41 | 
42 | regressions = pd.read_csv(path + 'regressions.csv',dtype={'PheWAS Code':str},skiprows=1)
43 | 
44 | print("creating plots")
45 | 
46 | # Check if an imbalance will be used
47 | 
48 | imbalances = get_imbalances(regressions)
49 | 
50 | y = regressions['"-log(p)"']
51 | pvalues = regressions['p-val'].values
52 | 
53 | # Get the threshold type
54 | if thresh_type == 0:
55 |     thresh = get_bon_thresh(pvalues, 0.05)
56 | elif thresh_type == 1:
57 |     thresh = get_fdr_thresh(pvalues, 0.05)
58 | 
59 | thresh = 0.5
60 | print('%s threshold: %0.5f'%(str_thresh_type,thresh))
61 | 
62 | try:
63 |     regressions[['lowlim', 'uplim']] = regressions['Conf-interval beta'].str.split(',', expand=True)
64 |     regressions['uplim'] = regressions.uplim.str.replace(']', '')
65 |     regressions['lowlim'] = regressions.lowlim.str.replace('[', '')
66 |     regressions = regressions.astype(dtype={'uplim':float,'lowlim':float})
67 |     yb = regressions[['beta', 'lowlim', 'uplim']].values
68 |     yb = yb.astype(float)
69 | except Exception as e:
70 |     print('Error reading regression file:')
71 |     print(e)
72 |     sys.exit()
73 | 
74 | save = path + 'plot.png'
75 | file_name, file_format = os.path.splitext(save)
76 | saveb = file_name + '_beta' + file_format
77 | file_format = file_format[1:] # remove '.' from from first index
78 | print("Saving plot to %s" % (save))
79 | 
80 | plot_manhattan(regressions, -math.log10(thresh), save=save, save_format=file_format)
81 | plot_odds_ratio(regressions, -math.log10(thresh), save=saveb, save_format=file_format)


--------------------------------------------------------------------------------
/dist/pyPheWAS-0.1.11.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/dist/pyPheWAS-0.1.11.tar.gz


--------------------------------------------------------------------------------
/dist/pyPheWAS-0.1.12.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/dist/pyPheWAS-0.1.12.tar.gz


--------------------------------------------------------------------------------
/dist/pyPheWAS-0.1.13.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/dist/pyPheWAS-0.1.13.tar.gz


--------------------------------------------------------------------------------
/dist/pyPheWAS-0.1.14.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/dist/pyPheWAS-0.1.14.tar.gz


--------------------------------------------------------------------------------
/dist/pyPheWAS-0.1.15.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/dist/pyPheWAS-0.1.15.tar.gz


--------------------------------------------------------------------------------
/dist/pyPheWAS-0.1.16.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/dist/pyPheWAS-0.1.16.tar.gz


--------------------------------------------------------------------------------
/dist/pyPheWAS-0.1.17.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/dist/pyPheWAS-0.1.17.tar.gz


--------------------------------------------------------------------------------
/dist/pyPheWAS-0.1.18.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/dist/pyPheWAS-0.1.18.tar.gz


--------------------------------------------------------------------------------
/dist/pyPheWAS-0.1.19.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/dist/pyPheWAS-0.1.19.tar.gz


--------------------------------------------------------------------------------
/dist/pyPheWAS-0.1.20.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/dist/pyPheWAS-0.1.20.tar.gz


--------------------------------------------------------------------------------
/dist/pyPheWAS-0.1.21.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/dist/pyPheWAS-0.1.21.tar.gz


--------------------------------------------------------------------------------
/dist/pyPheWAS-0.1.22.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/dist/pyPheWAS-0.1.22.tar.gz


--------------------------------------------------------------------------------
/dist/pyPheWAS-0.1.27.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/dist/pyPheWAS-0.1.27.tar.gz


--------------------------------------------------------------------------------
/dist/pyPheWAS-0.1.28.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/dist/pyPheWAS-0.1.28.tar.gz


--------------------------------------------------------------------------------
/dist/pyPheWAS-0.1.29.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/dist/pyPheWAS-0.1.29.tar.gz


--------------------------------------------------------------------------------
/dist/pyPheWAS-0.1.3.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/dist/pyPheWAS-0.1.3.tar.gz


--------------------------------------------------------------------------------
/dist/pyPheWAS-0.1.30.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/dist/pyPheWAS-0.1.30.tar.gz


--------------------------------------------------------------------------------
/dist/pyPheWAS-0.1.31.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/dist/pyPheWAS-0.1.31.tar.gz


--------------------------------------------------------------------------------
/dist/pyPheWAS-0.1.32.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/dist/pyPheWAS-0.1.32.tar.gz


--------------------------------------------------------------------------------
/dist/pyPheWAS-0.1.33.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/dist/pyPheWAS-0.1.33.tar.gz


--------------------------------------------------------------------------------
/dist/pyPheWAS-0.1.34.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/dist/pyPheWAS-0.1.34.tar.gz


--------------------------------------------------------------------------------
/dist/pyPheWAS-0.1.35.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/dist/pyPheWAS-0.1.35.tar.gz


--------------------------------------------------------------------------------
/dist/pyPheWAS-0.1.36.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/dist/pyPheWAS-0.1.36.tar.gz


--------------------------------------------------------------------------------
/dist/pyPheWAS-1.0.2.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/dist/pyPheWAS-1.0.2.tar.gz


--------------------------------------------------------------------------------
/docs/_build/.buildinfo:
--------------------------------------------------------------------------------
1 | # Sphinx build info version 1
2 | # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3 | config: 022bdca6dc48b3872640b3417cc7f18c
4 | tags: 645f666f9bcd5a90fca523b33c5a78b7
5 | 


--------------------------------------------------------------------------------
/docs/_build/.doctrees/agematch.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/.doctrees/agematch.doctree


--------------------------------------------------------------------------------
/docs/_build/.doctrees/analysis.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/.doctrees/analysis.doctree


--------------------------------------------------------------------------------
/docs/_build/.doctrees/api.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/.doctrees/api.doctree


--------------------------------------------------------------------------------
/docs/_build/.doctrees/api.doctree.orig:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/.doctrees/api.doctree.orig


--------------------------------------------------------------------------------
/docs/_build/.doctrees/basic.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/.doctrees/basic.doctree


--------------------------------------------------------------------------------
/docs/_build/.doctrees/code.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/.doctrees/code.doctree


--------------------------------------------------------------------------------
/docs/_build/.doctrees/code_BACKUP_13469.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/.doctrees/code_BACKUP_13469.doctree


--------------------------------------------------------------------------------
/docs/_build/.doctrees/code_BASE_13469.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/.doctrees/code_BASE_13469.doctree


--------------------------------------------------------------------------------
/docs/_build/.doctrees/code_LOCAL_13469.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/.doctrees/code_LOCAL_13469.doctree


--------------------------------------------------------------------------------
/docs/_build/.doctrees/code_REMOTE_13469.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/.doctrees/code_REMOTE_13469.doctree


--------------------------------------------------------------------------------
/docs/_build/.doctrees/environment.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/.doctrees/environment.pickle


--------------------------------------------------------------------------------
/docs/_build/.doctrees/index.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/.doctrees/index.doctree


--------------------------------------------------------------------------------
/docs/_build/.doctrees/other.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/.doctrees/other.doctree


--------------------------------------------------------------------------------
/docs/_build/.doctrees/source/index.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/.doctrees/source/index.doctree


--------------------------------------------------------------------------------
/docs/_build/.doctrees/test.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/.doctrees/test.doctree


--------------------------------------------------------------------------------
/docs/_build/.doctrees/tutorial.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/.doctrees/tutorial.doctree


--------------------------------------------------------------------------------
/docs/_build/.doctrees/tutorial/agematch.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/.doctrees/tutorial/agematch.doctree


--------------------------------------------------------------------------------
/docs/_build/.doctrees/tutorial/analysis.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/.doctrees/tutorial/analysis.doctree


--------------------------------------------------------------------------------
/docs/_build/.doctrees/tutorial/basic.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/.doctrees/tutorial/basic.doctree


--------------------------------------------------------------------------------
/docs/_build/.doctrees/tutorial/other.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/.doctrees/tutorial/other.doctree


--------------------------------------------------------------------------------
/docs/_build/.doctrees/tutorial/tutorial.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/.doctrees/tutorial/tutorial.doctree


--------------------------------------------------------------------------------
/docs/_build/_modules/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
 2 |   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 3 | 
 4 | 
 5 | <html xmlns="http://www.w3.org/1999/xhtml">
 6 |   <head>
 7 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 8 |     
 9 |     <title>Overview: module code &mdash; pyPheWAS 0.0.2 documentation</title>
10 |     
11 |     <link rel="stylesheet" href="../_static/alabaster.css" type="text/css" />
12 |     <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
13 |     
14 |     <script type="text/javascript">
15 |       var DOCUMENTATION_OPTIONS = {
16 |         URL_ROOT:    '../',
17 |         VERSION:     '0.0.2',
18 |         COLLAPSE_INDEX: false,
19 |         FILE_SUFFIX: '.html',
20 |         HAS_SOURCE:  true
21 |       };
22 |     </script>
23 |     <script type="text/javascript" src="../_static/jquery.js"></script>
24 |     <script type="text/javascript" src="../_static/underscore.js"></script>
25 |     <script type="text/javascript" src="../_static/doctools.js"></script>
26 |     <link rel="top" title="pyPheWAS 0.0.2 documentation" href="../index.html" />
27 |    
28 |   
29 |   <meta name="viewport" content="width=device-width, initial-scale=0.9, maximum-scale=0.9" />
30 | 
31 |   </head>
32 |   <body role="document">  
33 | 
34 |     <div class="document">
35 |       <div class="documentwrapper">
36 |         <div class="bodywrapper">
37 |           <div class="body" role="main">
38 |             
39 |   <h1>All modules for which code is available</h1>
40 | <ul><li><a href="pyPheWAS/pyPhewasCore.html">pyPheWAS.pyPhewasCore</a></li>
41 | <li><a href="pyPheWAS/pyPhewasv2.html">pyPheWAS.pyPhewasv2</a></li>
42 | </ul>
43 | 
44 |           </div>
45 |         </div>
46 |       </div>
47 |       <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
48 |         <div class="sphinxsidebarwrapper"><div class="relations">
49 | <h3>Related Topics</h3>
50 | <ul>
51 |   <li><a href="../index.html">Documentation overview</a><ul>
52 |   </ul></li>
53 | </ul>
54 | </div>
55 | <div id="searchbox" style="display: none" role="search">
56 |   <h3>Quick search</h3>
57 |     <form class="search" action="../search.html" method="get">
58 |       <input type="text" name="q" />
59 |       <input type="submit" value="Go" />
60 |       <input type="hidden" name="check_keywords" value="yes" />
61 |       <input type="hidden" name="area" value="default" />
62 |     </form>
63 |     <p class="searchtip" style="font-size: 90%">
64 |     Enter search terms or a module, class or function name.
65 |     </p>
66 | </div>
67 | <script type="text/javascript">$('#searchbox').show(0);</script>
68 |         </div>
69 |       </div>
70 |       <div class="clearer"></div>
71 |     </div>
72 |     <div class="footer">
73 |       &copy;2016, MASI Lab.
74 |       
75 |       |
76 |       Powered by <a href="http://sphinx-doc.org/">Sphinx 1.3.5</a>
77 |       &amp; <a href="https://github.com/bitprophet/alabaster">Alabaster 0.7.7</a>
78 |       
79 |     </div>
80 | 
81 |     
82 | 
83 |     
84 |   </body>
85 | </html>


--------------------------------------------------------------------------------
/docs/_build/_sources/api.txt:
--------------------------------------------------------------------------------
 1 | Source Documentation
 2 | ====================
 3 | 
 4 | :mod:`pyPheWAS` -- Root package
 5 | *******************************
 6 | 
 7 | .. automodule:: pyPheWAS
 8 |    :members:
 9 | 
10 | :mod:`pyPheWAS.pyPhewasv2` -- pyPhewas functions file
11 | *****************************************************
12 | 
13 | .. automodule:: pyPheWAS.pyPhewasv2
14 |    :members:
15 | 
16 | :mod:`pyPheWAS.pyPhewasCore` -- pyPhewas Research Tools file
17 | ************************************************************
18 | 
19 | .. automodule:: pyPheWAS.pyPhewasCore
20 |    :members:


--------------------------------------------------------------------------------
/docs/_build/_sources/index.txt:
--------------------------------------------------------------------------------
 1 | .. pyPheWAS documentation master file, created by
 2 |    sphinx-quickstart on Mon May 23 15:05:09 2016.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to pyPheWAS's documentation!
 7 | ====================================
 8 | 
 9 | The pyPhewas module is used to execute a variety of different analyses on large sets of patient phenotype data.
10 | 
11 | Getting started::
12 | 	
13 | 	from pyPhewas import Phewas # imports the Phewas object
14 | 	# define phenotype file, genotype file, or any other desired options
15 | 	p = Phewas(phenotypes, genotypes)
16 | 	p.run_lin() # generates a linear regression for the given data
17 | 
18 | Features
19 | --------
20 | 
21 | * Run linear/logarithmic regrssions on patient data
22 | * Plot results
23 | 
24 | Installation
25 | ------------
26 | 
27 | Install pyPhewas through pip by running::
28 | 
29 | 	pip install pyPhewas
30 | 
31 | Online
32 | ------
33 | 
34 | * Issue Tracker: github.com/BennettLandman/pyPheWAS/issues
35 | * Source Code: github.com/BennettLandman/pyPheWAS
36 | 
37 | Support
38 | -------
39 | 
40 | If you are having issues, please let us know!
41 | 
42 | License
43 | -------
44 | 
45 | This project is licensed under the MIT license.
46 | 
47 | Contents
48 | --------
49 | 
50 | .. toctree::
51 |    :maxdepth: 2
52 | 
53 |    tutorial
54 |    api
55 | 
56 | Indices and tables
57 | ==================
58 | 
59 | * :ref:`genindex`
60 | * :ref:`search`
61 | * :ref:`modindex`


--------------------------------------------------------------------------------
/docs/_build/_sources/tutorial.txt:
--------------------------------------------------------------------------------
 1 | Tutorial
 2 | ========
 3 | 
 4 | .. toctree::
 5 | 	:maxdepth: 2
 6 | 
 7 | 	tutorial/basic
 8 | 	tutorial/analysis
 9 | 	tutorial/agematch
10 | 	tutorial/other
11 | 


--------------------------------------------------------------------------------
/docs/_build/_sources/tutorial/agematch.txt:
--------------------------------------------------------------------------------
1 | Age Control and Matching
2 | ========================


--------------------------------------------------------------------------------
/docs/_build/_sources/tutorial/analysis.txt:
--------------------------------------------------------------------------------
1 | Methods of Analysis
2 | ===================


--------------------------------------------------------------------------------
/docs/_build/_sources/tutorial/basic.txt:
--------------------------------------------------------------------------------
 1 | Basics
 2 | ======
 3 | 
 4 | Purpose
 5 | -------
 6 | 
 7 | This tutorial is meant to help get a user started with pyPhewas and troubleshoot problems.
 8 | 
 9 | 
10 | What are Phewas codes?
11 | ----------------------
12 | 
13 | Phewas codes categorize each icd9 into a subsection of a condition. There are over 15,000 ICD-9 codes included in the Phewas categorization. All of these are placed into roughly 2000 Phewas codes. These codes are then generated into Phewas plots and can be analyzed to find associations.
14 | 
15 | 
16 | File Format
17 | -----------
18 | 
19 | Phenotype File
20 | ^^^^^^^^^^^^^^
21 | 
22 | ==== ====== ==================
23 | id   icd9   *other covariates*
24 | ==== ====== ==================
25 | 11   790.29 ...
26 | 1    580.8  ...
27 | 131  786.59 ...
28 | 9999 740.2  ...
29 | ==== ====== ==================
30 | 
31 | This is the file format that is required for the phenotype file. This file is processed by pyPhewas into either
32 | 
33 |  * For the logarithmic regression, all id-phewas combinations and a 0/1 of whether or not they occurred.
34 |  * For the linear regression, all id-phewas combinations and the count of times that the id corresponded to the given phewas codes
35 | 
36 | Genotype File
37 | ^^^^^^^^^^^^^
38 | 
39 | ===== ==================
40 | id    *other covariates*
41 | ===== ==================
42 | 1     ...
43 | 32    ...
44 | 131   ...
45 | 200   ...
46 | ===== ==================
47 | 
48 | Depending on what you are using pyPhewas for, the above file format may be all that is necessary. However, more often than not, the file format will look as follows:
49 | 
50 | ===== ======== ==================
51 | id    genotype *other covariates*
52 | ===== ======== ==================
53 | 1     0        ...
54 | 32    0        ...
55 | 131   1        ...
56 | 200   0        ...
57 | ===== ======== ==================
58 | 
59 | The genotype column and the 0/1 denote the presence or absence of some other condition that the patient may have. The 'genotype' is the default covariate that is passed by the Phewas object into the logarithmic and linear regressions. If you would prefer to use other covariates. They must be specified as outlined in the documentation and below. While using the genotype column is not required, it is highly recommended for the use of Phewas.
60 | 
61 | .. note:: The order of the columns as shown above is not required, but it does include readability for people opening and reading the files that are input into pyPhewas.


--------------------------------------------------------------------------------
/docs/_build/_sources/tutorial/other.txt:
--------------------------------------------------------------------------------
1 | Other
2 | =====


--------------------------------------------------------------------------------
/docs/_build/_sources/tutorial/tutorial.txt:
--------------------------------------------------------------------------------
 1 | Tutorials
 2 | ========
 3 | 
 4 | .. toctree::
 5 | 	:maxdepth: 2
 6 | 
 7 | 	basic
 8 | 	analysis
 9 | 	agematch
10 | 	other
11 | 


--------------------------------------------------------------------------------
/docs/_build/_static/ajax-loader.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/_static/ajax-loader.gif


--------------------------------------------------------------------------------
/docs/_build/_static/comment-bright.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/_static/comment-bright.png


--------------------------------------------------------------------------------
/docs/_build/_static/comment-close.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/_static/comment-close.png


--------------------------------------------------------------------------------
/docs/_build/_static/comment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/_static/comment.png


--------------------------------------------------------------------------------
/docs/_build/_static/custom.css:
--------------------------------------------------------------------------------
1 | /* This file intentionally left blank. */
2 | 


--------------------------------------------------------------------------------
/docs/_build/_static/down-pressed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/_static/down-pressed.png


--------------------------------------------------------------------------------
/docs/_build/_static/down.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/_static/down.png


--------------------------------------------------------------------------------
/docs/_build/_static/file.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/_static/file.png


--------------------------------------------------------------------------------
/docs/_build/_static/minus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/_static/minus.png


--------------------------------------------------------------------------------
/docs/_build/_static/plus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/_static/plus.png


--------------------------------------------------------------------------------
/docs/_build/_static/pygments.css:
--------------------------------------------------------------------------------
 1 | .highlight .hll { background-color: #ffffcc }
 2 | .highlight  { background: #eeffcc; }
 3 | .highlight .c { color: #408090; font-style: italic } /* Comment */
 4 | .highlight .err { border: 1px solid #FF0000 } /* Error */
 5 | .highlight .k { color: #007020; font-weight: bold } /* Keyword */
 6 | .highlight .o { color: #666666 } /* Operator */
 7 | .highlight .ch { color: #408090; font-style: italic } /* Comment.Hashbang */
 8 | .highlight .cm { color: #408090; font-style: italic } /* Comment.Multiline */
 9 | .highlight .cp { color: #007020 } /* Comment.Preproc */
10 | .highlight .cpf { color: #408090; font-style: italic } /* Comment.PreprocFile */
11 | .highlight .c1 { color: #408090; font-style: italic } /* Comment.Single */
12 | .highlight .cs { color: #408090; background-color: #fff0f0 } /* Comment.Special */
13 | .highlight .gd { color: #A00000 } /* Generic.Deleted */
14 | .highlight .ge { font-style: italic } /* Generic.Emph */
15 | .highlight .gr { color: #FF0000 } /* Generic.Error */
16 | .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */
17 | .highlight .gi { color: #00A000 } /* Generic.Inserted */
18 | .highlight .go { color: #333333 } /* Generic.Output */
19 | .highlight .gp { color: #c65d09; font-weight: bold } /* Generic.Prompt */
20 | .highlight .gs { font-weight: bold } /* Generic.Strong */
21 | .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
22 | .highlight .gt { color: #0044DD } /* Generic.Traceback */
23 | .highlight .kc { color: #007020; font-weight: bold } /* Keyword.Constant */
24 | .highlight .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */
25 | .highlight .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */
26 | .highlight .kp { color: #007020 } /* Keyword.Pseudo */
27 | .highlight .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */
28 | .highlight .kt { color: #902000 } /* Keyword.Type */
29 | .highlight .m { color: #208050 } /* Literal.Number */
30 | .highlight .s { color: #4070a0 } /* Literal.String */
31 | .highlight .na { color: #4070a0 } /* Name.Attribute */
32 | .highlight .nb { color: #007020 } /* Name.Builtin */
33 | .highlight .nc { color: #0e84b5; font-weight: bold } /* Name.Class */
34 | .highlight .no { color: #60add5 } /* Name.Constant */
35 | .highlight .nd { color: #555555; font-weight: bold } /* Name.Decorator */
36 | .highlight .ni { color: #d55537; font-weight: bold } /* Name.Entity */
37 | .highlight .ne { color: #007020 } /* Name.Exception */
38 | .highlight .nf { color: #06287e } /* Name.Function */
39 | .highlight .nl { color: #002070; font-weight: bold } /* Name.Label */
40 | .highlight .nn { color: #0e84b5; font-weight: bold } /* Name.Namespace */
41 | .highlight .nt { color: #062873; font-weight: bold } /* Name.Tag */
42 | .highlight .nv { color: #bb60d5 } /* Name.Variable */
43 | .highlight .ow { color: #007020; font-weight: bold } /* Operator.Word */
44 | .highlight .w { color: #bbbbbb } /* Text.Whitespace */
45 | .highlight .mb { color: #208050 } /* Literal.Number.Bin */
46 | .highlight .mf { color: #208050 } /* Literal.Number.Float */
47 | .highlight .mh { color: #208050 } /* Literal.Number.Hex */
48 | .highlight .mi { color: #208050 } /* Literal.Number.Integer */
49 | .highlight .mo { color: #208050 } /* Literal.Number.Oct */
50 | .highlight .sb { color: #4070a0 } /* Literal.String.Backtick */
51 | .highlight .sc { color: #4070a0 } /* Literal.String.Char */
52 | .highlight .sd { color: #4070a0; font-style: italic } /* Literal.String.Doc */
53 | .highlight .s2 { color: #4070a0 } /* Literal.String.Double */
54 | .highlight .se { color: #4070a0; font-weight: bold } /* Literal.String.Escape */
55 | .highlight .sh { color: #4070a0 } /* Literal.String.Heredoc */
56 | .highlight .si { color: #70a0d0; font-style: italic } /* Literal.String.Interpol */
57 | .highlight .sx { color: #c65d09 } /* Literal.String.Other */
58 | .highlight .sr { color: #235388 } /* Literal.String.Regex */
59 | .highlight .s1 { color: #4070a0 } /* Literal.String.Single */
60 | .highlight .ss { color: #517918 } /* Literal.String.Symbol */
61 | .highlight .bp { color: #007020 } /* Name.Builtin.Pseudo */
62 | .highlight .vc { color: #bb60d5 } /* Name.Variable.Class */
63 | .highlight .vg { color: #bb60d5 } /* Name.Variable.Global */
64 | .highlight .vi { color: #bb60d5 } /* Name.Variable.Instance */
65 | .highlight .il { color: #208050 } /* Literal.Number.Integer.Long */


--------------------------------------------------------------------------------
/docs/_build/_static/up-pressed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/_static/up-pressed.png


--------------------------------------------------------------------------------
/docs/_build/_static/up.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/_static/up.png


--------------------------------------------------------------------------------
/docs/_build/code.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
 2 |   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 3 | 
 4 | 
 5 | <html xmlns="http://www.w3.org/1999/xhtml">
 6 |   <head>
 7 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 8 |     
 9 |     <title>Auto Generated Documentation &mdash; pyPheWAS 0.0.2 documentation</title>
10 |     
11 |     <link rel="stylesheet" href="_static/alabaster.css" type="text/css" />
12 |     <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
13 |     
14 |     <script type="text/javascript">
15 |       var DOCUMENTATION_OPTIONS = {
16 |         URL_ROOT:    './',
17 |         VERSION:     '0.0.2',
18 |         COLLAPSE_INDEX: false,
19 |         FILE_SUFFIX: '.html',
20 |         HAS_SOURCE:  true
21 |       };
22 |     </script>
23 |     <script type="text/javascript" src="_static/jquery.js"></script>
24 |     <script type="text/javascript" src="_static/underscore.js"></script>
25 |     <script type="text/javascript" src="_static/doctools.js"></script>
26 |     <link rel="top" title="pyPheWAS 0.0.2 documentation" href="index.html" />
27 |    
28 |   
29 |   <meta name="viewport" content="width=device-width, initial-scale=0.9, maximum-scale=0.9" />
30 | 
31 |   </head>
32 |   <body role="document">  
33 | 
34 |     <div class="document">
35 |       <div class="documentwrapper">
36 |         <div class="bodywrapper">
37 |           <div class="body" role="main">
38 |             
39 |   <div class="section" id="auto-generated-documentation">
40 | <h1>Auto Generated Documentation<a class="headerlink" href="#auto-generated-documentation" title="Permalink to this headline">¶</a></h1>
41 | </div>
42 | 
43 | 
44 |           </div>
45 |         </div>
46 |       </div>
47 |       <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
48 |         <div class="sphinxsidebarwrapper"><div class="relations">
49 | <h3>Related Topics</h3>
50 | <ul>
51 |   <li><a href="index.html">Documentation overview</a><ul>
52 |   </ul></li>
53 | </ul>
54 | </div>
55 |   <div role="note" aria-label="source link">
56 |     <h3>This Page</h3>
57 |     <ul class="this-page-menu">
58 |       <li><a href="_sources/code.txt"
59 |             rel="nofollow">Show Source</a></li>
60 |     </ul>
61 |    </div>
62 | <div id="searchbox" style="display: none" role="search">
63 |   <h3>Quick search</h3>
64 |     <form class="search" action="search.html" method="get">
65 |       <input type="text" name="q" />
66 |       <input type="submit" value="Go" />
67 |       <input type="hidden" name="check_keywords" value="yes" />
68 |       <input type="hidden" name="area" value="default" />
69 |     </form>
70 |     <p class="searchtip" style="font-size: 90%">
71 |     Enter search terms or a module, class or function name.
72 |     </p>
73 | </div>
74 | <script type="text/javascript">$('#searchbox').show(0);</script>
75 |         </div>
76 |       </div>
77 |       <div class="clearer"></div>
78 |     </div>
79 |     <div class="footer">
80 |       &copy;2016, MASI Lab.
81 |       
82 |       |
83 |       Powered by <a href="http://sphinx-doc.org/">Sphinx 1.3.5</a>
84 |       &amp; <a href="https://github.com/bitprophet/alabaster">Alabaster 0.7.7</a>
85 |       
86 |       |
87 |       <a href="_sources/code.txt"
88 |           rel="nofollow">Page source</a>
89 |     </div>
90 | 
91 |     
92 | 
93 |     
94 |   </body>
95 | </html>


--------------------------------------------------------------------------------
/docs/_build/objects.inv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_build/objects.inv


--------------------------------------------------------------------------------
/docs/_build/py-modindex.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  2 |   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  3 | 
  4 | 
  5 | <html xmlns="http://www.w3.org/1999/xhtml">
  6 |   <head>
  7 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  8 |     
  9 |     <title>Python Module Index &mdash; pyPheWAS 0.0.2 documentation</title>
 10 |     
 11 |     <link rel="stylesheet" href="_static/alabaster.css" type="text/css" />
 12 |     <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
 13 |     
 14 |     <script type="text/javascript">
 15 |       var DOCUMENTATION_OPTIONS = {
 16 |         URL_ROOT:    './',
 17 |         VERSION:     '0.0.2',
 18 |         COLLAPSE_INDEX: false,
 19 |         FILE_SUFFIX: '.html',
 20 |         HAS_SOURCE:  true
 21 |       };
 22 |     </script>
 23 |     <script type="text/javascript" src="_static/jquery.js"></script>
 24 |     <script type="text/javascript" src="_static/underscore.js"></script>
 25 |     <script type="text/javascript" src="_static/doctools.js"></script>
 26 |     <link rel="top" title="pyPheWAS 0.0.2 documentation" href="index.html" />
 27 | 
 28 |    
 29 |   
 30 |   <meta name="viewport" content="width=device-width, initial-scale=0.9, maximum-scale=0.9" />
 31 | 
 32 | 
 33 | 
 34 |   </head>
 35 |   <body role="document">  
 36 | 
 37 |     <div class="document">
 38 |       <div class="documentwrapper">
 39 |         <div class="bodywrapper">
 40 |           <div class="body" role="main">
 41 |             
 42 | 
 43 |    <h1>Python Module Index</h1>
 44 | 
 45 |    <div class="modindex-jumpbox">
 46 |    <a href="#cap-p"><strong>p</strong></a>
 47 |    </div>
 48 | 
 49 |    <table class="indextable modindextable" cellspacing="0" cellpadding="2">
 50 |      <tr class="pcap"><td></td><td>&nbsp;</td><td></td></tr>
 51 |      <tr class="cap" id="cap-p"><td></td><td>
 52 |        <strong>p</strong></td><td></td></tr>
 53 |      <tr>
 54 |        <td><img src="_static/minus.png" class="toggler"
 55 |               id="toggle-1" style="display: none" alt="-" /></td>
 56 |        <td>
 57 |        <a href="api.html#module-pyPheWAS"><code class="xref">pyPheWAS</code></a></td><td>
 58 |        <em></em></td></tr>
 59 |      <tr class="cg-1">
 60 |        <td></td>
 61 |        <td>&nbsp;&nbsp;&nbsp;
 62 |        <a href="api.html#module-pyPheWAS.pyPhewasCore"><code class="xref">pyPheWAS.pyPhewasCore</code></a></td><td>
 63 |        <em></em></td></tr>
 64 |      <tr class="cg-1">
 65 |        <td></td>
 66 |        <td>&nbsp;&nbsp;&nbsp;
 67 |        <a href="api.html#module-pyPheWAS.pyPhewasv2"><code class="xref">pyPheWAS.pyPhewasv2</code></a></td><td>
 68 |        <em></em></td></tr>
 69 |    </table>
 70 | 
 71 | 
 72 |           </div>
 73 |         </div>
 74 |       </div>
 75 |       <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
 76 |         <div class="sphinxsidebarwrapper"><div class="relations">
 77 | <h3>Related Topics</h3>
 78 | <ul>
 79 |   <li><a href="index.html">Documentation overview</a><ul>
 80 |   </ul></li>
 81 | </ul>
 82 | </div>
 83 | <div id="searchbox" style="display: none" role="search">
 84 |   <h3>Quick search</h3>
 85 |     <form class="search" action="search.html" method="get">
 86 |       <input type="text" name="q" />
 87 |       <input type="submit" value="Go" />
 88 |       <input type="hidden" name="check_keywords" value="yes" />
 89 |       <input type="hidden" name="area" value="default" />
 90 |     </form>
 91 |     <p class="searchtip" style="font-size: 90%">
 92 |     Enter search terms or a module, class or function name.
 93 |     </p>
 94 | </div>
 95 | <script type="text/javascript">$('#searchbox').show(0);</script>
 96 |         </div>
 97 |       </div>
 98 |       <div class="clearer"></div>
 99 |     </div>
100 |     <div class="footer">
101 |       &copy;2016, MASI Lab.
102 |       
103 |       |
104 |       Powered by <a href="http://sphinx-doc.org/">Sphinx 1.3.5</a>
105 |       &amp; <a href="https://github.com/bitprophet/alabaster">Alabaster 0.7.7</a>
106 |       
107 |     </div>
108 | 
109 |     
110 | 
111 |     
112 |   </body>
113 | </html>


--------------------------------------------------------------------------------
/docs/_build/search.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
 2 |   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 3 | 
 4 | 
 5 | <html xmlns="http://www.w3.org/1999/xhtml">
 6 |   <head>
 7 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 8 |     
 9 |     <title>Search &mdash; pyPheWAS 0.0.2 documentation</title>
10 |     
11 |     <link rel="stylesheet" href="_static/alabaster.css" type="text/css" />
12 |     <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
13 |     
14 |     <script type="text/javascript">
15 |       var DOCUMENTATION_OPTIONS = {
16 |         URL_ROOT:    './',
17 |         VERSION:     '0.0.2',
18 |         COLLAPSE_INDEX: false,
19 |         FILE_SUFFIX: '.html',
20 |         HAS_SOURCE:  true
21 |       };
22 |     </script>
23 |     <script type="text/javascript" src="_static/jquery.js"></script>
24 |     <script type="text/javascript" src="_static/underscore.js"></script>
25 |     <script type="text/javascript" src="_static/doctools.js"></script>
26 |     <script type="text/javascript" src="_static/searchtools.js"></script>
27 |     <link rel="top" title="pyPheWAS 0.0.2 documentation" href="index.html" />
28 |   <script type="text/javascript">
29 |     jQuery(function() { Search.loadIndex("searchindex.js"); });
30 |   </script>
31 |   
32 |   <script type="text/javascript" id="searchindexloader"></script>
33 |   
34 |    
35 |   
36 |   <meta name="viewport" content="width=device-width, initial-scale=0.9, maximum-scale=0.9" />
37 | 
38 | 
39 |   </head>
40 |   <body role="document">  
41 | 
42 |     <div class="document">
43 |       <div class="documentwrapper">
44 |         <div class="bodywrapper">
45 |           <div class="body" role="main">
46 |             
47 |   <h1 id="search-documentation">Search</h1>
48 |   <div id="fallback" class="admonition warning">
49 |   <script type="text/javascript">$('#fallback').hide();</script>
50 |   <p>
51 |     Please activate JavaScript to enable the search
52 |     functionality.
53 |   </p>
54 |   </div>
55 |   <p>
56 |     From here you can search these documents. Enter your search
57 |     words into the box below and click "search". Note that the search
58 |     function will automatically search for all of the words. Pages
59 |     containing fewer words won't appear in the result list.
60 |   </p>
61 |   <form action="" method="get">
62 |     <input type="text" name="q" value="" />
63 |     <input type="submit" value="search" />
64 |     <span id="search-progress" style="padding-left: 10px"></span>
65 |   </form>
66 |   
67 |   <div id="search-results">
68 |   
69 |   </div>
70 | 
71 |           </div>
72 |         </div>
73 |       </div>
74 |       <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
75 |         <div class="sphinxsidebarwrapper"><div class="relations">
76 | <h3>Related Topics</h3>
77 | <ul>
78 |   <li><a href="index.html">Documentation overview</a><ul>
79 |   </ul></li>
80 | </ul>
81 | </div>
82 |         </div>
83 |       </div>
84 |       <div class="clearer"></div>
85 |     </div>
86 |     <div class="footer">
87 |       &copy;2016, MASI Lab.
88 |       
89 |       |
90 |       Powered by <a href="http://sphinx-doc.org/">Sphinx 1.3.5</a>
91 |       &amp; <a href="https://github.com/bitprophet/alabaster">Alabaster 0.7.7</a>
92 |       
93 |     </div>
94 | 
95 |     
96 | 
97 |     
98 |   </body>
99 | </html>


--------------------------------------------------------------------------------
/docs/_build/searchindex.js:
--------------------------------------------------------------------------------
1 | Search.setIndex({envversion:47,filenames:["api","index","tutorial","tutorial/agematch","tutorial/analysis","tutorial/basic","tutorial/other","tutorial/tutorial"],objects:{"":{pyPheWAS:[0,0,0,"-"]},"pyPheWAS.pyPhewasCore":{calculate_odds_ratio:[0,1,1,""],generate_feature_matrix:[0,1,1,""],get_bon_thresh:[0,1,1,""],get_codes:[0,1,1,""],get_fdr_thresh:[0,1,1,""],get_group_file:[0,1,1,""],get_imbalances:[0,1,1,""],get_input:[0,1,1,""],get_phewas_info:[0,1,1,""],get_x_label_positions:[0,1,1,""],plot_data_points:[0,1,1,""],run_phewas:[0,1,1,""]},"pyPheWAS.pyPhewasv2":{calculate_odds_ratio:[0,1,1,""],generate_feature_matrix:[0,1,1,""],get_bon_thresh:[0,1,1,""],get_codes:[0,1,1,""],get_fdr_thresh:[0,1,1,""],get_group_file:[0,1,1,""],get_imbalances:[0,1,1,""],get_input:[0,1,1,""],get_phewas_info:[0,1,1,""],get_x_label_positions:[0,1,1,""],phewas:[0,1,1,""],plot_data_points:[0,1,1,""],run_phewas:[0,1,1,""]},pyPheWAS:{pyPhewasCore:[0,0,0,"-"],pyPhewasv2:[0,0,0,"-"]}},objnames:{"0":["py","module","Python module"],"1":["py","function","Python function"]},objtypes:{"0":"py:module","1":"py:function"},terms:{"boolean":0,"default":5,"float":0,"import":1,"int":0,"return":0,"true":0,"while":5,abov:[0,5],absenc:5,all:[0,5],analys:1,analysi:[1,2],analyz:5,ani:1,arrai:0,associ:5,basic:[1,2],below:5,bennettlandman:1,beta:0,between:0,bonferroni:0,bool:0,calcul:0,calculate_odds_ratio:0,can:5,categor:5,categori:0,center:0,code:[0,1,2],color:0,column:[0,5],com:1,combin:5,condit:5,contain:0,control:[1,2],convert:0,correct:0,correspond:5,count:5,covari:[0,5],csv:0,data:[0,1],datafram:0,defin:1,delimit:0,denot:5,depend:5,desir:[0,1],determin:0,differ:[0,1],discoveri:0,displai:0,divid:0,doe:5,dtype:0,each:[0,5],either:[0,5],element:0,empti:0,end:0,execut:[0,1],explain:0,fals:0,featur:0,filenam:0,find:5,finit:0,float64:0,follow:[0,5],format:[0,2],from:[0,1],gener:[0,1,5],generate_feature_matrix:0,genotyp:[0,1],get:[0,1,5],get_bon_thresh:0,get_cod:0,get_fdr_thresh:0,get_group_fil:0,get_imbal:0,get_input:0,get_phewas_info:0,get_x_label_posit:0,github:1,given:[0,1,5],group:0,groupfil:0,had:0,have:[1,5],header:0,help:5,here:0,highli:5,howev:5,icd9:5,icd:[0,5],imbal:0,imbalance_color:0,includ:[0,5],index:[0,1],indic:0,info:0,inform:0,input:5,issu:1,join:0,know:1,label:0,larg:1,length:0,let:1,like:0,line:0,linear:[1,5],list:0,load:0,local:0,locat:0,log10:0,logarithm:[1,5],look:5,mai:5,main:0,match:[1,2],matrix:0,meant:5,method:[0,1,2],mit:1,modul:1,more:5,must:[0,5],name:0,nan:0,necessari:5,neg:0,non:0,normal:0,number:0,numpi:0,object:[1,5],obtain:0,occur:5,often:5,onli:0,open:5,option:[0,1],order:5,other:[1,2,5],outlin:5,output:0,over:5,p_index:0,p_valu:0,page:1,panda:0,paramet:0,pass:5,path:0,patient:[1,5],peopl:5,perform:0,phen_vector:0,phenotyp:[0,1],phewa:[0,1,2],pip:1,place:5,pleas:1,plot:[0,1,5],plot_data_point:0,posit:0,power:0,prefer:5,presenc:5,primari:0,problem:5,process:5,project:1,purpos:2,rate:0,read:[0,5],readabl:5,recommend:5,reg_typ:0,regress:[0,1,5],regrssion:1,rel:0,requir:5,resourc:0,result:[0,1],rollup:0,roughli:5,run:[0,1],run_lin:1,run_phewa:0,save:0,search:1,set:1,show:0,show_imbal:0,shown:5,some:5,specif:0,specifi:[0,5],start:[1,5],str:0,string:0,subsect:5,sum:0,take:0,than:5,thei:5,thershold:0,thi:[0,1,5],thresh:0,thresh_typ:0,threshold:0,through:1,time:5,tracker:1,troubleshoot:5,tupl:0,tutori:1,type:0,under:1,user:5,usual:0,valu:0,varieti:[0,1],vector:0,what:2,where:0,whether:[0,5],which:0,would:[0,5],you:[0,1,5],your:0},titles:["Source Documentation","Welcome to pyPheWAS&#8217;s documentation!","Tutorial","Age Control and Matching","Methods of Analysis","Basics","Other","Tutorials"],titleterms:{"function":0,analysi:4,basic:5,code:5,content:1,control:3,document:[0,1],featur:1,file:[0,5],format:5,genotyp:5,indic:1,instal:1,licens:1,match:3,method:4,onlin:1,other:6,packag:0,phenotyp:5,phewa:5,purpos:5,pyphewa:[0,1],pyphewascor:0,pyphewasv2:0,research:0,root:0,sourc:0,support:1,tabl:1,tool:0,tutori:[2,7],welcom:1,what:5}})


--------------------------------------------------------------------------------
/docs/_build/tutorial.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  2 |   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  3 | 
  4 | 
  5 | <html xmlns="http://www.w3.org/1999/xhtml">
  6 |   <head>
  7 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  8 |     
  9 |     <title>Tutorial &mdash; pyPheWAS 0.0.2 documentation</title>
 10 |     
 11 |     <link rel="stylesheet" href="_static/alabaster.css" type="text/css" />
 12 |     <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
 13 |     
 14 |     <script type="text/javascript">
 15 |       var DOCUMENTATION_OPTIONS = {
 16 |         URL_ROOT:    './',
 17 |         VERSION:     '0.0.2',
 18 |         COLLAPSE_INDEX: false,
 19 |         FILE_SUFFIX: '.html',
 20 |         HAS_SOURCE:  true
 21 |       };
 22 |     </script>
 23 |     <script type="text/javascript" src="_static/jquery.js"></script>
 24 |     <script type="text/javascript" src="_static/underscore.js"></script>
 25 |     <script type="text/javascript" src="_static/doctools.js"></script>
 26 |     <link rel="top" title="pyPheWAS 0.0.2 documentation" href="index.html" />
 27 |     <link rel="next" title="Basics" href="tutorial/basic.html" />
 28 |     <link rel="prev" title="Welcome to pyPheWAS’s documentation!" href="index.html" />
 29 |    
 30 |   
 31 |   <meta name="viewport" content="width=device-width, initial-scale=0.9, maximum-scale=0.9" />
 32 | 
 33 |   </head>
 34 |   <body role="document">  
 35 | 
 36 |     <div class="document">
 37 |       <div class="documentwrapper">
 38 |         <div class="bodywrapper">
 39 |           <div class="body" role="main">
 40 |             
 41 |   <div class="section" id="tutorial">
 42 | <h1>Tutorial<a class="headerlink" href="#tutorial" title="Permalink to this headline">¶</a></h1>
 43 | <div class="toctree-wrapper compound">
 44 | <ul>
 45 | <li class="toctree-l1"><a class="reference internal" href="tutorial/basic.html">Basics</a><ul>
 46 | <li class="toctree-l2"><a class="reference internal" href="tutorial/basic.html#purpose">Purpose</a></li>
 47 | <li class="toctree-l2"><a class="reference internal" href="tutorial/basic.html#what-are-phewas-codes">What are Phewas codes?</a></li>
 48 | <li class="toctree-l2"><a class="reference internal" href="tutorial/basic.html#file-format">File Format</a></li>
 49 | </ul>
 50 | </li>
 51 | <li class="toctree-l1"><a class="reference internal" href="tutorial/analysis.html">Methods of Analysis</a></li>
 52 | <li class="toctree-l1"><a class="reference internal" href="tutorial/agematch.html">Age Control and Matching</a></li>
 53 | <li class="toctree-l1"><a class="reference internal" href="tutorial/other.html">Other</a></li>
 54 | </ul>
 55 | </div>
 56 | </div>
 57 | 
 58 | 
 59 |           </div>
 60 |         </div>
 61 |       </div>
 62 |       <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
 63 |         <div class="sphinxsidebarwrapper"><div class="relations">
 64 | <h3>Related Topics</h3>
 65 | <ul>
 66 |   <li><a href="index.html">Documentation overview</a><ul>
 67 |       <li>Previous: <a href="index.html" title="previous chapter">Welcome to pyPheWAS&#8217;s documentation!</a></li>
 68 |       <li>Next: <a href="tutorial/basic.html" title="next chapter">Basics</a></li>
 69 |   </ul></li>
 70 | </ul>
 71 | </div>
 72 |   <div role="note" aria-label="source link">
 73 |     <h3>This Page</h3>
 74 |     <ul class="this-page-menu">
 75 |       <li><a href="_sources/tutorial.txt"
 76 |             rel="nofollow">Show Source</a></li>
 77 |     </ul>
 78 |    </div>
 79 | <div id="searchbox" style="display: none" role="search">
 80 |   <h3>Quick search</h3>
 81 |     <form class="search" action="search.html" method="get">
 82 |       <input type="text" name="q" />
 83 |       <input type="submit" value="Go" />
 84 |       <input type="hidden" name="check_keywords" value="yes" />
 85 |       <input type="hidden" name="area" value="default" />
 86 |     </form>
 87 |     <p class="searchtip" style="font-size: 90%">
 88 |     Enter search terms or a module, class or function name.
 89 |     </p>
 90 | </div>
 91 | <script type="text/javascript">$('#searchbox').show(0);</script>
 92 |         </div>
 93 |       </div>
 94 |       <div class="clearer"></div>
 95 |     </div>
 96 |     <div class="footer">
 97 |       &copy;2016, MASI Lab.
 98 |       
 99 |       |
100 |       Powered by <a href="http://sphinx-doc.org/">Sphinx 1.3.5</a>
101 |       &amp; <a href="https://github.com/bitprophet/alabaster">Alabaster 0.7.7</a>
102 |       
103 |       |
104 |       <a href="_sources/tutorial.txt"
105 |           rel="nofollow">Page source</a>
106 |     </div>
107 | 
108 |     
109 | 
110 |     
111 |   </body>
112 | </html>


--------------------------------------------------------------------------------
/docs/_build/tutorial/agematch.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  2 |   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  3 | 
  4 | 
  5 | <html xmlns="http://www.w3.org/1999/xhtml">
  6 |   <head>
  7 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  8 |     
  9 |     <title>Age Control and Matching &mdash; pyPheWAS 0.0.2 documentation</title>
 10 |     
 11 |     <link rel="stylesheet" href="../_static/alabaster.css" type="text/css" />
 12 |     <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
 13 |     
 14 |     <script type="text/javascript">
 15 |       var DOCUMENTATION_OPTIONS = {
 16 |         URL_ROOT:    '../',
 17 |         VERSION:     '0.0.2',
 18 |         COLLAPSE_INDEX: false,
 19 |         FILE_SUFFIX: '.html',
 20 |         HAS_SOURCE:  true
 21 |       };
 22 |     </script>
 23 |     <script type="text/javascript" src="../_static/jquery.js"></script>
 24 |     <script type="text/javascript" src="../_static/underscore.js"></script>
 25 |     <script type="text/javascript" src="../_static/doctools.js"></script>
 26 |     <link rel="top" title="pyPheWAS 0.0.2 documentation" href="../index.html" />
 27 |     <link rel="up" title="Tutorial" href="../tutorial.html" />
 28 |     <link rel="next" title="Other" href="other.html" />
 29 |     <link rel="prev" title="Methods of Analysis" href="analysis.html" />
 30 |    
 31 |   
 32 |   <meta name="viewport" content="width=device-width, initial-scale=0.9, maximum-scale=0.9" />
 33 | 
 34 |   </head>
 35 |   <body role="document">  
 36 | 
 37 |     <div class="document">
 38 |       <div class="documentwrapper">
 39 |         <div class="bodywrapper">
 40 |           <div class="body" role="main">
 41 |             
 42 |   <div class="section" id="age-control-and-matching">
 43 | <h1>Age Control and Matching<a class="headerlink" href="#age-control-and-matching" title="Permalink to this headline">¶</a></h1>
 44 | </div>
 45 | 
 46 | 
 47 |           </div>
 48 |         </div>
 49 |       </div>
 50 |       <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
 51 |         <div class="sphinxsidebarwrapper"><div class="relations">
 52 | <h3>Related Topics</h3>
 53 | <ul>
 54 |   <li><a href="../index.html">Documentation overview</a><ul>
 55 |   <li><a href="../tutorial.html">Tutorial</a><ul>
 56 |       <li>Previous: <a href="analysis.html" title="previous chapter">Methods of Analysis</a></li>
 57 |       <li>Next: <a href="other.html" title="next chapter">Other</a></li>
 58 |   </ul></li>
 59 |   </ul></li>
 60 | </ul>
 61 | </div>
 62 |   <div role="note" aria-label="source link">
 63 |     <h3>This Page</h3>
 64 |     <ul class="this-page-menu">
 65 |       <li><a href="../_sources/tutorial/agematch.txt"
 66 |             rel="nofollow">Show Source</a></li>
 67 |     </ul>
 68 |    </div>
 69 | <div id="searchbox" style="display: none" role="search">
 70 |   <h3>Quick search</h3>
 71 |     <form class="search" action="../search.html" method="get">
 72 |       <input type="text" name="q" />
 73 |       <input type="submit" value="Go" />
 74 |       <input type="hidden" name="check_keywords" value="yes" />
 75 |       <input type="hidden" name="area" value="default" />
 76 |     </form>
 77 |     <p class="searchtip" style="font-size: 90%">
 78 |     Enter search terms or a module, class or function name.
 79 |     </p>
 80 | </div>
 81 | <script type="text/javascript">$('#searchbox').show(0);</script>
 82 |         </div>
 83 |       </div>
 84 |       <div class="clearer"></div>
 85 |     </div>
 86 |     <div class="footer">
 87 |       &copy;2016, MASI Lab.
 88 |       
 89 |       |
 90 |       Powered by <a href="http://sphinx-doc.org/">Sphinx 1.3.5</a>
 91 |       &amp; <a href="https://github.com/bitprophet/alabaster">Alabaster 0.7.7</a>
 92 |       
 93 |       |
 94 |       <a href="../_sources/tutorial/agematch.txt"
 95 |           rel="nofollow">Page source</a>
 96 |     </div>
 97 | 
 98 |     
 99 | 
100 |     
101 |   </body>
102 | </html>


--------------------------------------------------------------------------------
/docs/_build/tutorial/analysis.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  2 |   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  3 | 
  4 | 
  5 | <html xmlns="http://www.w3.org/1999/xhtml">
  6 |   <head>
  7 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  8 |     
  9 |     <title>Methods of Analysis &mdash; pyPheWAS 0.0.2 documentation</title>
 10 |     
 11 |     <link rel="stylesheet" href="../_static/alabaster.css" type="text/css" />
 12 |     <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
 13 |     
 14 |     <script type="text/javascript">
 15 |       var DOCUMENTATION_OPTIONS = {
 16 |         URL_ROOT:    '../',
 17 |         VERSION:     '0.0.2',
 18 |         COLLAPSE_INDEX: false,
 19 |         FILE_SUFFIX: '.html',
 20 |         HAS_SOURCE:  true
 21 |       };
 22 |     </script>
 23 |     <script type="text/javascript" src="../_static/jquery.js"></script>
 24 |     <script type="text/javascript" src="../_static/underscore.js"></script>
 25 |     <script type="text/javascript" src="../_static/doctools.js"></script>
 26 |     <link rel="top" title="pyPheWAS 0.0.2 documentation" href="../index.html" />
 27 |     <link rel="up" title="Tutorial" href="../tutorial.html" />
 28 |     <link rel="next" title="Age Control and Matching" href="agematch.html" />
 29 |     <link rel="prev" title="Basics" href="basic.html" />
 30 |    
 31 |   
 32 |   <meta name="viewport" content="width=device-width, initial-scale=0.9, maximum-scale=0.9" />
 33 | 
 34 |   </head>
 35 |   <body role="document">  
 36 | 
 37 |     <div class="document">
 38 |       <div class="documentwrapper">
 39 |         <div class="bodywrapper">
 40 |           <div class="body" role="main">
 41 |             
 42 |   <div class="section" id="methods-of-analysis">
 43 | <h1>Methods of Analysis<a class="headerlink" href="#methods-of-analysis" title="Permalink to this headline">¶</a></h1>
 44 | </div>
 45 | 
 46 | 
 47 |           </div>
 48 |         </div>
 49 |       </div>
 50 |       <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
 51 |         <div class="sphinxsidebarwrapper"><div class="relations">
 52 | <h3>Related Topics</h3>
 53 | <ul>
 54 |   <li><a href="../index.html">Documentation overview</a><ul>
 55 |   <li><a href="../tutorial.html">Tutorial</a><ul>
 56 |       <li>Previous: <a href="basic.html" title="previous chapter">Basics</a></li>
 57 |       <li>Next: <a href="agematch.html" title="next chapter">Age Control and Matching</a></li>
 58 |   </ul></li>
 59 |   </ul></li>
 60 | </ul>
 61 | </div>
 62 |   <div role="note" aria-label="source link">
 63 |     <h3>This Page</h3>
 64 |     <ul class="this-page-menu">
 65 |       <li><a href="../_sources/tutorial/analysis.txt"
 66 |             rel="nofollow">Show Source</a></li>
 67 |     </ul>
 68 |    </div>
 69 | <div id="searchbox" style="display: none" role="search">
 70 |   <h3>Quick search</h3>
 71 |     <form class="search" action="../search.html" method="get">
 72 |       <input type="text" name="q" />
 73 |       <input type="submit" value="Go" />
 74 |       <input type="hidden" name="check_keywords" value="yes" />
 75 |       <input type="hidden" name="area" value="default" />
 76 |     </form>
 77 |     <p class="searchtip" style="font-size: 90%">
 78 |     Enter search terms or a module, class or function name.
 79 |     </p>
 80 | </div>
 81 | <script type="text/javascript">$('#searchbox').show(0);</script>
 82 |         </div>
 83 |       </div>
 84 |       <div class="clearer"></div>
 85 |     </div>
 86 |     <div class="footer">
 87 |       &copy;2016, MASI Lab.
 88 |       
 89 |       |
 90 |       Powered by <a href="http://sphinx-doc.org/">Sphinx 1.3.5</a>
 91 |       &amp; <a href="https://github.com/bitprophet/alabaster">Alabaster 0.7.7</a>
 92 |       
 93 |       |
 94 |       <a href="../_sources/tutorial/analysis.txt"
 95 |           rel="nofollow">Page source</a>
 96 |     </div>
 97 | 
 98 |     
 99 | 
100 |     
101 |   </body>
102 | </html>


--------------------------------------------------------------------------------
/docs/_build/tutorial/other.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  2 |   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  3 | 
  4 | 
  5 | <html xmlns="http://www.w3.org/1999/xhtml">
  6 |   <head>
  7 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  8 |     
  9 |     <title>Other &mdash; pyPheWAS 0.0.2 documentation</title>
 10 |     
 11 |     <link rel="stylesheet" href="../_static/alabaster.css" type="text/css" />
 12 |     <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
 13 |     
 14 |     <script type="text/javascript">
 15 |       var DOCUMENTATION_OPTIONS = {
 16 |         URL_ROOT:    '../',
 17 |         VERSION:     '0.0.2',
 18 |         COLLAPSE_INDEX: false,
 19 |         FILE_SUFFIX: '.html',
 20 |         HAS_SOURCE:  true
 21 |       };
 22 |     </script>
 23 |     <script type="text/javascript" src="../_static/jquery.js"></script>
 24 |     <script type="text/javascript" src="../_static/underscore.js"></script>
 25 |     <script type="text/javascript" src="../_static/doctools.js"></script>
 26 |     <link rel="top" title="pyPheWAS 0.0.2 documentation" href="../index.html" />
 27 |     <link rel="up" title="Tutorial" href="../tutorial.html" />
 28 |     <link rel="next" title="Source Documentation" href="../api.html" />
 29 |     <link rel="prev" title="Age Control and Matching" href="agematch.html" />
 30 |    
 31 |   
 32 |   <meta name="viewport" content="width=device-width, initial-scale=0.9, maximum-scale=0.9" />
 33 | 
 34 |   </head>
 35 |   <body role="document">  
 36 | 
 37 |     <div class="document">
 38 |       <div class="documentwrapper">
 39 |         <div class="bodywrapper">
 40 |           <div class="body" role="main">
 41 |             
 42 |   <div class="section" id="other">
 43 | <h1>Other<a class="headerlink" href="#other" title="Permalink to this headline">¶</a></h1>
 44 | </div>
 45 | 
 46 | 
 47 |           </div>
 48 |         </div>
 49 |       </div>
 50 |       <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
 51 |         <div class="sphinxsidebarwrapper"><div class="relations">
 52 | <h3>Related Topics</h3>
 53 | <ul>
 54 |   <li><a href="../index.html">Documentation overview</a><ul>
 55 |   <li><a href="../tutorial.html">Tutorial</a><ul>
 56 |       <li>Previous: <a href="agematch.html" title="previous chapter">Age Control and Matching</a></li>
 57 |       <li>Next: <a href="../api.html" title="next chapter">Source Documentation</a></li>
 58 |   </ul></li>
 59 |   </ul></li>
 60 | </ul>
 61 | </div>
 62 |   <div role="note" aria-label="source link">
 63 |     <h3>This Page</h3>
 64 |     <ul class="this-page-menu">
 65 |       <li><a href="../_sources/tutorial/other.txt"
 66 |             rel="nofollow">Show Source</a></li>
 67 |     </ul>
 68 |    </div>
 69 | <div id="searchbox" style="display: none" role="search">
 70 |   <h3>Quick search</h3>
 71 |     <form class="search" action="../search.html" method="get">
 72 |       <input type="text" name="q" />
 73 |       <input type="submit" value="Go" />
 74 |       <input type="hidden" name="check_keywords" value="yes" />
 75 |       <input type="hidden" name="area" value="default" />
 76 |     </form>
 77 |     <p class="searchtip" style="font-size: 90%">
 78 |     Enter search terms or a module, class or function name.
 79 |     </p>
 80 | </div>
 81 | <script type="text/javascript">$('#searchbox').show(0);</script>
 82 |         </div>
 83 |       </div>
 84 |       <div class="clearer"></div>
 85 |     </div>
 86 |     <div class="footer">
 87 |       &copy;2016, MASI Lab.
 88 |       
 89 |       |
 90 |       Powered by <a href="http://sphinx-doc.org/">Sphinx 1.3.5</a>
 91 |       &amp; <a href="https://github.com/bitprophet/alabaster">Alabaster 0.7.7</a>
 92 |       
 93 |       |
 94 |       <a href="../_sources/tutorial/other.txt"
 95 |           rel="nofollow">Page source</a>
 96 |     </div>
 97 | 
 98 |     
 99 | 
100 |     
101 |   </body>
102 | </html>


--------------------------------------------------------------------------------
/docs/_build/tutorial/tutorial.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  2 |   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  3 | 
  4 | 
  5 | <html xmlns="http://www.w3.org/1999/xhtml">
  6 |   <head>
  7 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  8 |     
  9 |     <title>Tutorials &mdash; pyPheWAS 0.0.2 documentation</title>
 10 |     
 11 |     <link rel="stylesheet" href="../_static/alabaster.css" type="text/css" />
 12 |     <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
 13 |     
 14 |     <script type="text/javascript">
 15 |       var DOCUMENTATION_OPTIONS = {
 16 |         URL_ROOT:    '../',
 17 |         VERSION:     '0.0.2',
 18 |         COLLAPSE_INDEX: false,
 19 |         FILE_SUFFIX: '.html',
 20 |         HAS_SOURCE:  true
 21 |       };
 22 |     </script>
 23 |     <script type="text/javascript" src="../_static/jquery.js"></script>
 24 |     <script type="text/javascript" src="../_static/underscore.js"></script>
 25 |     <script type="text/javascript" src="../_static/doctools.js"></script>
 26 |     <link rel="top" title="pyPheWAS 0.0.2 documentation" href="../index.html" />
 27 |    
 28 |   
 29 |   <meta name="viewport" content="width=device-width, initial-scale=0.9, maximum-scale=0.9" />
 30 | 
 31 |   </head>
 32 |   <body role="document">  
 33 | 
 34 |     <div class="document">
 35 |       <div class="documentwrapper">
 36 |         <div class="bodywrapper">
 37 |           <div class="body" role="main">
 38 |             
 39 |   <div class="section" id="tutorials">
 40 | <h1>Tutorials<a class="headerlink" href="#tutorials" title="Permalink to this headline">¶</a></h1>
 41 | <div class="toctree-wrapper compound">
 42 | <ul>
 43 | <li class="toctree-l1"><a class="reference internal" href="basic.html">Basics</a><ul>
 44 | <li class="toctree-l2"><a class="reference internal" href="basic.html#purpose">Purpose</a></li>
 45 | <li class="toctree-l2"><a class="reference internal" href="basic.html#what-are-phewas-codes">What are Phewas codes?</a></li>
 46 | <li class="toctree-l2"><a class="reference internal" href="basic.html#file-format">File Format</a></li>
 47 | </ul>
 48 | </li>
 49 | <li class="toctree-l1"><a class="reference internal" href="analysis.html">Methods of Analysis</a></li>
 50 | <li class="toctree-l1"><a class="reference internal" href="agematch.html">Age Control and Matching</a></li>
 51 | <li class="toctree-l1"><a class="reference internal" href="other.html">Other</a></li>
 52 | </ul>
 53 | </div>
 54 | </div>
 55 | 
 56 | 
 57 |           </div>
 58 |         </div>
 59 |       </div>
 60 |       <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
 61 |         <div class="sphinxsidebarwrapper"><div class="relations">
 62 | <h3>Related Topics</h3>
 63 | <ul>
 64 |   <li><a href="../index.html">Documentation overview</a><ul>
 65 |   </ul></li>
 66 | </ul>
 67 | </div>
 68 |   <div role="note" aria-label="source link">
 69 |     <h3>This Page</h3>
 70 |     <ul class="this-page-menu">
 71 |       <li><a href="../_sources/tutorial/tutorial.txt"
 72 |             rel="nofollow">Show Source</a></li>
 73 |     </ul>
 74 |    </div>
 75 | <div id="searchbox" style="display: none" role="search">
 76 |   <h3>Quick search</h3>
 77 |     <form class="search" action="../search.html" method="get">
 78 |       <input type="text" name="q" />
 79 |       <input type="submit" value="Go" />
 80 |       <input type="hidden" name="check_keywords" value="yes" />
 81 |       <input type="hidden" name="area" value="default" />
 82 |     </form>
 83 |     <p class="searchtip" style="font-size: 90%">
 84 |     Enter search terms or a module, class or function name.
 85 |     </p>
 86 | </div>
 87 | <script type="text/javascript">$('#searchbox').show(0);</script>
 88 |         </div>
 89 |       </div>
 90 |       <div class="clearer"></div>
 91 |     </div>
 92 |     <div class="footer">
 93 |       &copy;2016, MASI Lab.
 94 |       
 95 |       |
 96 |       Powered by <a href="http://sphinx-doc.org/">Sphinx 1.3.5</a>
 97 |       &amp; <a href="https://github.com/bitprophet/alabaster">Alabaster 0.7.7</a>
 98 |       
 99 |       |
100 |       <a href="../_sources/tutorial/tutorial.txt"
101 |           rel="nofollow">Page source</a>
102 |     </div>
103 | 
104 |     
105 | 
106 |     
107 |   </body>
108 | </html>


--------------------------------------------------------------------------------
/docs/_static/phewas_workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_static/phewas_workflow.png


--------------------------------------------------------------------------------
/docs/_static/pyPheWAS_Research_Tools.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_static/pyPheWAS_Research_Tools.png


--------------------------------------------------------------------------------
/docs/_static/pyphewas_explorer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_static/pyphewas_explorer.png


--------------------------------------------------------------------------------
/docs/_static/pyphewas_explorer_reg_builder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_static/pyphewas_explorer_reg_builder.png


--------------------------------------------------------------------------------
/docs/_static/pyphewas_explorer_reg_eval.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/docs/_static/pyphewas_explorer_reg_eval.png


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
 1 | Source Documentation
 2 | ====================
 3 | 
 4 | Modules
 5 | *******
 6 | 
 7 | .. toctree::
 8 |     :maxdepth: 1
 9 | 
10 |     api/pyPhewasCorev2
11 |     api/censor_diagnosis
12 |     api/maximize_bipartite
13 | 


--------------------------------------------------------------------------------
/docs/api/censor_diagnosis.rst:
--------------------------------------------------------------------------------
1 | :mod:`pyPheWAS.rt_censor_diagnosis`
2 | ***********************************
3 | 
4 | .. automodule:: pyPheWAS.rt_censor_diagnosis
5 |    :members:
6 | 


--------------------------------------------------------------------------------
/docs/api/maximize_bipartite.rst:
--------------------------------------------------------------------------------
1 | :mod:`pyPheWAS.maximize_bipartite`
2 | **********************************
3 | 
4 | .. automodule:: pyPheWAS.maximize_bipartite
5 |    :members:
6 | 


--------------------------------------------------------------------------------
/docs/api/pyPhewasCorev2.rst:
--------------------------------------------------------------------------------
1 | :mod:`pyPheWAS.pyPhewasCorev2`
2 | ******************************
3 | 
4 | .. automodule:: pyPheWAS.pyPhewasCorev2
5 |    :members:
6 |    :exclude-members: get_imbalances, get_x_label_positions, get_phenotype_info, get_bhy_thresh
7 |    :member-order: bysource
8 | 


--------------------------------------------------------------------------------
/docs/basic.rst:
--------------------------------------------------------------------------------
  1 | Getting Started
  2 | ===============
  3 | 
  4 | 
  5 | Installation
  6 | ------------
  7 | 
  8 | pyPheWAS is compatible with Python 3. Install the pyPheWAS package by running::
  9 | 
 10 | 		pip install pyPheWAS
 11 | 
 12 | If this command fails, make sure that you are running Python 3+ and that you have
 13 | pip set up on your machine.
 14 | 
 15 | * `python3 <https://wiki.python.org/moin/BeginnersGuide/Download>`_ installation
 16 | * `pip <https://pip.pypa.io/en/stable/installing/>`_ installation
 17 |   *(should be installed automatically with python3)*
 18 | 
 19 | The pyPheWAS command line tools can now be run from any directory.
 20 | 
 21 | .. note:: If installing on a computing cluster (or other environment in which you do not have admin privileges) it may necessary to install pyPheWAS locally using pip's *--user* flag.
 22 | 
 23 | .. note:: pyPheWAS was originally developed in python2, and may still be compatible.
 24 |     However, the most recent releases have not been tested against python2.
 25 | 
 26 | What is PheWAS?
 27 | ---------------
 28 | 
 29 | **PheWAS**:  Phenome-Wide Association Studies
 30 | 
 31 | In brief, PheWAS examine the relationship between a large number of Electronic
 32 | Medical Record (EMR) phenotypes and a single dependent variable, typically
 33 | a genetic marker (see note). These studies, which were first proposed by Denny et al
 34 | in 2010 [Denny2010]_, are a complement of the popular Genome-Wide Association Study
 35 | (GWAS) framework, which compares many genetic markers to a single clinical phenotype.
 36 | 
 37 | An overview of the PheWAS workflow is shown below. The pyPheWAS package covers
 38 | all steps in the pipeline for two types of EMR data: International Classification
 39 | of Disease (ICD) codes and Current Procedural Terminology (CPT) codes. To
 40 | differentiate the two, studies of ICD codes are referred
 41 | to as **PheWAS**, while studies of CPT codes are referred to as **ProWAS**.
 42 | 
 43 | .. figure:: _static/phewas_workflow.png
 44 | 
 45 | * **Input files** consist of group demographic data and EMR event data. These files
 46 |   are described in detail under :ref:`File Formats`.
 47 | * The **Data Preparation** phase includes converting EMR event dates to subject ages,
 48 |   case-control matching, censoring events, etc. These tools are covered in the
 49 |   :ref:`Data Preparation` section.
 50 | * In the **Phenotype Mapping & Aggregation** phase, individual EMR events are mapped
 51 |   to their corresponding phenotypes and aggregated across each subject's record.
 52 |   This phase is described in :ref:`pyPhewasLookup` for ICD data and
 53 |   :ref:`pyProwasLookup` for CPT data. (Details regarding the phenotype mappings
 54 |   used for ICD/CPT codes are included in these sections.)
 55 | * In the **Mass Multivariate Regression** phase, a mass logistic regression is performed
 56 |   across the Phenome, comparing the dependent variable with each aggregated phenotype and
 57 |   any provided covariates. This phase is described in :ref:`pyPhewasModel` for ICD data and
 58 |   :ref:`pyProwasModel` for CPT data.
 59 | * Finally, the **Result Visualization** phase generates plots of the mass
 60 |   regression results, comparing statistical significance and effect size across
 61 |   the Phenome. This phase is described in :ref:`pyPhewasPlot` for ICD data and
 62 |   :ref:`pyProwasPlot` for CPT data.
 63 | 
 64 | .. note:: pyPheWAS tools are agnostic to the dependent regression variable, so long as
 65 |   it is a binary quantity. Due to this, pyPheWAS may be used to study diseases
 66 |   instead of traditional genetic markers. In order to avoid confusion with the
 67 |   genetic roots of the term PheWAS, studies
 68 |   such as this are referred to as *Phenome-Disease Association Studies* (PheDAS)
 69 |   [Chaganti2019a]_.
 70 | 
 71 | 
 72 | Phenotype Aggregation
 73 | ---------------------
 74 | There are three phenotype aggregation options for the :ref:`pyPhewasLookup`
 75 | and :ref:`pyProwasLookup` tools.
 76 | 
 77 |  1. **log**: binary aggregates (Is a phenotype present/absent for a subject?)
 78 |  2. **lin**: count aggregates (How many times is a phenotype present for a subject?)
 79 |  3. **dur**: duration aggregates (What is the time interval [years] between the first
 80 |     and last instances of a phenotype for a subject?)
 81 | 
 82 | 
 83 | File Formats
 84 | ------------
 85 | All tools described in :ref:`Data Preparation`, :ref:`PheWAS Tools`, and
 86 | :ref:`ProWAS tools` require EMR data contained in a phenotype file and/or group
 87 | file. The formats of these files are described below.
 88 | 
 89 | Phenotype File (ICD data)
 90 | ^^^^^^^^^^^^^^^^^^^^^^^^^
 91 | The PheWAS phenotype file format is shown below; this phenotype file
 92 | contains ICD event data for each subject in the group file, with one event per line.
 93 | All ages are in years. If your ICD event records were provided with dates instead
 94 | of ages, *AgeAtICD* may be calculated using the :ref:`convertEventToAge` tool.
 95 | *ICD_TYPE* is restricted to the values *9* and *10*.
 96 | 
 97 | ==== ======== ======== ========
 98 | id   ICD_CODE ICD_TYPE AgeAtICD
 99 | ==== ======== ======== ========
100 | 11   790.29   9        10.4
101 | 11   580.8    9        11.5
102 | 131  A03.2    10       60.0
103 | 9999 740.2    9        0.2
104 | ==== ======== ======== ========
105 | 
106 | 
107 | Phenotype File (CPT data)
108 | ^^^^^^^^^^^^^^^^^^^^^^^^^
109 | The ProWAS phenotype file format is shown below; this phenotype file
110 | contains CPT event data for each subject in the group file, with one event per line.
111 | All ages are in years. If your CPT event records were provided with dates instead
112 | of ages, *AgeAtCPT* may be calculated using the :ref:`convertEventToAge` tool.
113 | 
114 | ==== ======== ========
115 | id   CPT_CODE AgeAtCPT
116 | ==== ======== ========
117 | A52   790.29   10.4
118 | A76   580.8    11.5
119 | B01   A03.2    60.0
120 | B21   740.2    0.2
121 | ==== ======== ========
122 | 
123 | 
124 | Group File
125 | ^^^^^^^^^^
126 | The group file format is shown below; this file contains the dependent variable
127 | (in this example, *response*) for the mass logistic regression, in addition to demographic information
128 | (e.g. sex, race, age at diagnosis, etc.) for each subject.
129 | 
130 | ===== ======== ============= ==================
131 | id    response MaxAgeAtVisit *other covariates*
132 | ===== ======== ============= ==================
133 | 1     0        10.365         ...
134 | 32    0        15.444         ...
135 | 131   1        13.756         ...
136 | 200   0        12.887         ...
137 | ===== ======== ============= ==================
138 | 
139 | By default, the PheWAS and ProWAS tools use the **genotype** column as the dependent variable, but
140 | any column in the group file may be specified as the dependent variable via the
141 | ``response`` argument so long as the column contains only the values 0 and 1.
142 | 
143 | *MaxAgeAtVisit* is the maximum recorded event age for each subject; this column is optional.
144 | If not provided, it will be calculated at runtime from the phenotype data provided. This
145 | column is used to generate a maximum age covariate feature matrix, which records the
146 | maximum age of each subject at each PheCode/ProCode; *MaxAgeAtVisit* is used as the default
147 | value for PheCodes/ProCodes not in the subject's record.
148 | 


--------------------------------------------------------------------------------
/docs/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | sphinx-build -b html . _build
3 | 


--------------------------------------------------------------------------------
/docs/explorer_overview.rst:
--------------------------------------------------------------------------------
 1 | pyPheWAS Explorer
 2 | =================
 3 | This section describes pyPheWAS Explorer: an interactive visualization built
 4 | on top of pyPheWAS. This graphical tool allows users to examine group variables,
 5 | design PheDAS models, and run exploratory PheDAS analyses in real time.
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 1
 9 | 
10 |    explorer_setup
11 |    explorer_walkthrough
12 | 
13 | .. figure:: _static/pyphewas_explorer.png
14 | 


--------------------------------------------------------------------------------
/docs/explorer_setup.rst:
--------------------------------------------------------------------------------
 1 | pyPheWAS Explorer Setup
 2 | =======================
 3 | 
 4 | Exactly like the :ref:`PheWAS Tools`, the Explorer
 5 | requires a **group file** and a **phenotype file**, the formats of which
 6 | may be found in the :ref:`File Formats` section. These files must be called
 7 | ``group.csv`` and ``icds.csv``, respectively, and be saved within the same directory
 8 | on your filesystem.
 9 | 
10 | .. note:: At this time, the Explorer can only parse numeric group variables. Categorical
11 |    variables (e.g. sex, race) must be converted to numeric categories before launching
12 |    pyPheWAS Explorer.
13 | 
14 | Launching the Explorer
15 | ----------------------
16 | The script ``pyPhewasExplorer`` is installed with the pyPheWAS package to launch
17 | the Explorer. This script accepts only two input arguments:
18 | 
19 | * ``--indir``:		Path to input directory [default: current directory]
20 | * ``--response``:	Column name from group.csv to use as the dependent variable [default: target]
21 | 
22 | **Example**::
23 | 
24 | 		pyPhewasExplorer --indir /Users/me/Documents/EMRdata/ --response ADHD
25 | 
26 | To launch the Explorer, run the launch script as shown above and then open
27 | ``http://localhost:8000/`` in a web browser (preferably Google Chrome). 
28 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Welcome to pyPheWAS's documentation!
 2 | ====================================
 3 | 
 4 | The pyPheWAS module executes PheWAS analyses on large EMR datasets via command
 5 | line tools. If this tools contributes to a scientific publication, please cite us:
 6 | 
 7 | Kerley, C.I., Chaganti, S., Nguyen, T.Q. et al. pyPheWAS: A Phenome-Disease Association Tool for Electronic Medical Record Analysis. *Neuroinformatics* (2022). https://doi.org/10.1007/s12021-021-09553-4
 8 | 
 9 | Kerley, C.I., Nguyen T.Q., Ramadass, K, et al. pyPheWAS Explorer: a visualization tool for exploratory analysis of phenome-disease associations. *JAMIA Open* (2023). https://doi.org/10.1093/jamiaopen/ooad018
10 | 
11 | Features
12 | --------
13 | * Analysis of International Classification of Disease codes (both ICD-9 and ICD-10) and Current Procedural Terminology codes
14 | * EMR data cleaning and preparation
15 | * Compute mass logistic regressions on patient data
16 | * Visualize results
17 | * Examine relative novelty of disease-PheCode associations
18 | * pyPheWAS Explorer: Interactive visualization of PheDAS experiments
19 | * Sample synthetic `EMR dataset <https://github.com/MASILab/pyPheWAS/tree/master/synthetic_data>`_
20 | 
21 | Latest Release: pyPheWAS 4.2.0
22 | ------------------------------
23 | 
24 | This release includes:
25 | 
26 | - Default regression equation modified to allow for both canonical and reversed PheWAS equations
27 | - Updated plot styling to improve legibility
28 | - Bug fix: can now run pyPhewasModel/pyProwasModel without covariates
29 | - Other minor bug fixes
30 | 
31 | Support
32 | -------
33 | 
34 | * `Issue Tracker <https://github.com/MASILab/pyPheWAS/issues>`_
35 | * `Source Code <https://github.com/MASILab/pyPheWAS>`_
36 | 
37 | If you are having issues, please let us know! Email me at:
38 | 
39 | cailey.i.kerley@vanderbilt.edu
40 | 
41 | License
42 | -------
43 | 
44 | This project is licensed under the `MIT license <https://github.com/MASILab/pyPheWAS/blob/master/LICENSE>`_.
45 | 
46 | Contents
47 | --------
48 | 
49 | .. toctree::
50 |    :maxdepth: 1
51 | 
52 |    basic
53 |    dataprep
54 |    phewas_tools
55 |    prowas_tools
56 |    novelty_tools
57 |    customizations
58 |    explorer_overview
59 |    references
60 |    api
61 | 
62 | Indices and tables
63 | ==================
64 | 
65 | * :ref:`genindex`
66 | * :ref:`search`
67 | * :ref:`modindex`
68 | 


--------------------------------------------------------------------------------
/docs/novelty_tools.rst:
--------------------------------------------------------------------------------
  1 | Novelty Analysis Tools
  2 | ======================
  3 | This page describes the command line tools available for running a Novelty
  4 | Analysis. These tools require a **regression** file, which is described in the
  5 | :ref:`pyPhewasModel` section.
  6 | 
  7 | Overview of Novelty Analysis Tools
  8 | -----------------------------------
  9 | pyPheWAS's Novelty Analysis tools are a post-processing step for PheDAS analyses
 10 | that aim to estimate the relative "novelty" of a disease-phecode association. In brief,
 11 | this involves calculating a **Novelty Finding Index (NFI)** for each disease-phenotype association
 12 | that measures the *degree to which it is already known* based on data mined from PubMed abstracts.
 13 | If a disease-phenotype pairing is present in a large number of PubMed abstracts, the
 14 | association is assigned a low NFI and considered well known.
 15 | In contrast, if a disease-phenotype pairing is present in only a few PubMed abstracts,
 16 | the association is assigned a high NFI and considered relatively unknown.
 17 | For more information on the NFI, please see our publication [Chaganti2019b]_.
 18 | 
 19 | Novelty Analysis Functions:
 20 | 
 21 | * :ref:`PubMedQuery`: run a PubMed search for A) all PheCodes or B) a set
 22 |   of custom terms (i.e. a disease)
 23 | * :ref:`NoveltyAnalysis`: calculate and visualize the Novelty Finding Index for
 24 |   the results from a PheDAS
 25 | 
 26 | ----------
 27 | 
 28 | PubMedQuery
 29 | -----------
 30 | Run a PubMed Search.
 31 | 
 32 | Conducts a search of PubMed Titles, Abstracts, and Keywords for A) all 1866 PheCodes
 33 | in the pyPheWAS map, or B) a list of custom search terms. Search results are saved to a
 34 | CSV file, where the 'IdsList' column includes a list of unique PubMed article identifiers
 35 | found for each search. Both search types must be performed in order to proceed to the
 36 | :ref:`NoveltyAnalysis` step.
 37 | 
 38 | *Option A: Mass PheCode Search*
 39 | 
 40 | This option iterates over all 1,866 PheCodes in the PheWAS mapping and searches PubMed
 41 | for related articles. (It takes approximately 24 hours to conduct searches for all 1,866 PheCodes.)
 42 | The mass search is done by mapping a PheCode's corresponing ICD-9
 43 | and ICD-10 codes to CUIs (Concept Unique Identifiers) in the
 44 | `UMLS Metathesaurus <https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/index.html>`_.
 45 | Then, all strings that correspond to those CUIs are used to query PubMed.
 46 | **Warning**: due to the size of the UMLS Metathesaurus, this option requires a
 47 | machine with at least 16 GB of RAM.
 48 | 
 49 | .. note::
 50 |    A free license is required to download the UMLS Metathesaurus. For more information, please
 51 |    see the `UMLS website <https://www.nlm.nih.gov/research/umls/index.html>`_.
 52 | 
 53 | *Option B: Custom Search*
 54 | 
 55 | This option searches the PubMed database for a set of custom search strings, specified
 56 | via a text file with one search string per line. (See the examples below for an example
 57 | file.)
 58 | 
 59 | Required Arguments:
 60 |  * ``--outdir``: 	Path to output directory
 61 | 
 62 | Optional Arguments (at least one of these must be specified):
 63 |  * ``--umls``:		     Path to the UMLS Metathesaurus file (file should be called 'MRCONSO.RRF')
 64 |  * ``--custom_terms``: Path to a file containing custom search terms (i.e for the target disease)
 65 | 
 66 | 
 67 | Output:
 68 |  CSV file(s) that contain a list of PubMed article IDs for each query (either a PheCode
 69 |  or a custom search).
 70 | 
 71 | **Example** Run a mass PheCode PubMed search::
 72 | 
 73 | 		PubMedQuery --outdir="/Users/me/Documents/PubMed_data/" --umls="/Users/me/Documents/MRCONSO.RRF"
 74 | 
 75 | **Example** Run a custom PubMed search on terms in **ADHD_terms.txt**.
 76 | 
 77 | **ADHD_terms.txt**::
 78 | 
 79 |   ADHD
 80 |   ADDH
 81 |   attention deficit hyperactivity disorder
 82 |   attention deficit disorder with hyperactivity
 83 |   attention deficit
 84 | 
 85 | **Command**::
 86 | 
 87 | 		PubMedQuery --outdir="/Users/me/Documents/EMRdata" --custom_terms="/Users/me/Documents/ADHD_terms.txt"
 88 | 
 89 | ----------
 90 | 
 91 | NoveltyAnalysis
 92 | ---------------
 93 | Calculate the Novelty Finding Index for a set of PheDAS results.
 94 | 
 95 | Calculates the NFI for all PheCodes in a regression file output from :ref:`pyPhewasModel`.
 96 | All calculated values are saved to the regression, and novelty plots are created
 97 | for all PheCodes with a second generation p-value of 0. For more information on how
 98 | the NFI is calculated, please see our publication [Chaganti2019b]_.
 99 | 
100 | Required Arguments:
101 |  * ``--pm_dir``:     Path to directory where mass PheCode PubMed search results are stored
102 |  * ``--statfile``:   Name of the regression file output from :ref:`pyPhewasModel`
103 |  * ``--dx_pm``:		   Name of the disease's PubMed search results file (obtained via :ref:`PubMedQuery` custom search)
104 |  * ``--null_int``:   Null interval to use in calculating the NFI
105 | 
106 | Optional Arguments [default value]:
107 |  * ``--path``:			Path to all input files and destination of output files [current directory]
108 |  * ``--postfix``:	  Descriptive postfix for output files (e.g. poster or ages50-60)
109 | 
110 | 
111 | Output:
112 |  NFI calculations saved with the regression file and novelty plots for significant
113 |  (2nd generation pvalue = 0) PheCodes.
114 | 
115 |  Additional regression file columns include:
116 |   * sgpv: second generation p-value
117 |   * ppv: positive predictive value
118 |   * ecdf: empirical cumulative distribution function estimated from the PubMed Proportions
119 |   * DX_PM_count: number of PubMed results found for the target disease
120 |   * phe_PM_count: number of PubMed results found for each PheCode
121 |   * joint_PM_count: number of PubMed results that mention both the target disease and a PheCode
122 |   * P_PM_phe_given_DX: PubMed Proportion
123 |   * Novelty_Finding_Index: the NFI for each PheCode
124 | 
125 | **Example** Calculate the NFI for a PheDAS regression of ADHD::
126 | 
127 | 		NoveltyAnalysis --null_int="[0.3,1.1]" --pm_dir="/Users/me/Documents/PubMed_data/" --dx_pm="ADHD_pubmed_search.csv" --statfile="regressions.csv" --path="/Users/me/Documents/EMRdata/"
128 | 
129 | 
130 | .. note::
131 |    The null interval (`null_int`) is specified in terms of the odds ratio, but
132 |    results are plotted using the log odds ratio.
133 | 


--------------------------------------------------------------------------------
/docs/references.rst:
--------------------------------------------------------------------------------
 1 | References
 2 | ==========
 3 | 
 4 | pyPheWAS Main Package
 5 | ---------------------
 6 | Kerley, C.I., Chaganti, S., Nguyen, T.Q. et al. pyPheWAS: A Phenome-Disease Association Tool for Electronic Medical Record Analysis. *Neuroinformatics* (2022). https://doi.org/10.1007/s12021-021-09553-4
 7 | 
 8 | ----------
 9 | 
10 | The following articles have all contributed to the development of the pyPheWAS package.
11 | 
12 | ICD-PheCode mapping
13 | -------------------
14 | 
15 | .. [Denny2010] Denny, J. C., Ritchie, M. D., Basford, M. A., et al. PheWAS: Demonstrating
16 |    the feasibility of a phenome-wide scan to discover gene-disease associations.
17 |    *Bioinformatics* 2010 March; 26(9), 1205–1210.
18 | 
19 | .. [Denny2013] Denny, J. C., Bastarache, L., Ritchie, M. D., et al. Systematic comparison
20 |    of phenome-wide association study of electronic medical record data and genome-wide association study data.
21 |    *Nature Biotechnology.* 2013 Dec; 31(12): 1102–1110.
22 | 
23 | .. [Wei2017] Wei, W. Q., Bastarache, L. A., Carroll, R. J., et al. Evaluating phecodes,
24 |    clinical classification software, and ICD-9-CM codes for phenome-wide association
25 |    studies in the electronic health record. *PLoS ONE.* 2017 Jul; 12(7), 1–16.
26 | 
27 | .. [Wu2019] Wu, P., Gifford, A., Meng, X., et al. Mapping ICD-10 and ICD-10-CM codes
28 |    to phecodes: Workflow development and initial evaluation.
29 |    *Journal of Medical Internet Research.* 2019; 21(11), 1–13.
30 | 
31 | PheDAS
32 | ------
33 | 
34 | .. [Chaganti2019a] Chaganti, S., Mawn, L. A., Kang, H., et al. Electronic Medical Record
35 |    Context Signatures Improve Diagnostic Classification Using Medical Image Computing.
36 |    *IEEE JOURNAL OF BIOMEDICAL AND HEALTH INFORMATICS* 2019 Sept; 23(5), 2052–2062
37 | 
38 | .. [Chaganti2019b] Chaganti, S., Welty, V. F., Taylor, W., et al. Discovering novel disease
39 |    comorbidities using electronic medical records. *PLoS ONE.* 2019 Nov; 14(11), 1-14.
40 | 
41 | 
42 | Statistical Modeling
43 | --------------------
44 | 
45 | .. [Statsmodels] Seabold, S., & Perktold, J. Statsmodels: Econometric and
46 |    Statistical Modeling with Python. *PROC. OF THE 9th PYTHON IN SCIENCE CONF.*
47 |    2010 Jan; 92-96
48 | 
49 | 
50 | Visualization
51 | -------------
52 | 
53 | .. [Matplotlib] Hunter, J. D. Matplotlib : a 2D Graphics Environment.
54 |    *Computing in Science and Engineering.* 2007 May; 9, 90–95.
55 | 


--------------------------------------------------------------------------------
/docs/tutorial.rst:
--------------------------------------------------------------------------------
1 | Tutorial
2 | ========
3 | 
4 | .. todo:: Need to add an actual tutorial here with pictures from running the tools.
5 | 


--------------------------------------------------------------------------------
/pyPheWAS/Explorer_GUI/023fbd237e5020d0@76.js:
--------------------------------------------------------------------------------
 1 | // https://observablehq.com/@ddspog/notebook-itens@76
 2 | import define1 from "./29d10840f83e2527@465.js";
 3 | 
 4 | function _1(md){return(
 5 | md`# Notebook Itens`
 6 | )}
 7 | 
 8 | function _2(cheery){return(
 9 | cheery
10 | )}
11 | 
12 | function _3(md){return(
13 | md`_This notebook export some functions to print, display interesting and useful things onto notebook._`
14 | )}
15 | 
16 | function _4(md){return(
17 | md`### banner (url, height)
18 | * **url**: an url to an image as string.
19 | * **height**: the height to banner as string.
20 | 
21 | This function will create an banner with full width on the notebook, resizing image to fit banner, filling its width with repeat.
22 | `
23 | )}
24 | 
25 | function _banner(html){return(
26 | (url, height, repeat) => {
27 |   if (repeat === undefined) {
28 |     repeat = "repeat-x";
29 |   }
30 |   return html`<div style='height: ${height}; background-image: url(${url}); background-size: auto 256px; background-repeat: ${repeat}; background-position: center;'></div>`;
31 | }
32 | )}
33 | 
34 | function _6(md){return(
35 | md`### placeholder (width, height, text)
36 | * **width**: the width to placeholder.
37 | * **height**: the height to placeholder.
38 | * **text**: the text to put on placeholder.
39 | 
40 | This function will create an placeholder image painted on gray, with an optional text, or the dimensions printed at the center.
41 | `
42 | )}
43 | 
44 | function _placeholder(html,isTextValid){return(
45 | (width, height, text) => {
46 |   return html`<img src="https://via.placeholder.com/${width}x${height}.png${
47 |   isTextValid(text) ? "?text=" + text : ""
48 |   }">`
49 | }
50 | )}
51 | 
52 | function _8(md){return(
53 | md`---
54 | 
55 | # Appendix
56 | 
57 | _Importing Cheery graphics to use as cover._`
58 | )}
59 | 
60 | function _10(md){return(
61 | md`_Some short functions._`
62 | )}
63 | 
64 | function _isTextValid(){return(
65 | (text) => {
66 |   return text !== undefined && text !== null && text != ""
67 | }
68 | )}
69 | 
70 | export default function define(runtime, observer) {
71 |   const main = runtime.module();
72 |   main.variable(observer()).define(["md"], _1);
73 |   main.variable(observer()).define(["cheery"], _2);
74 |   main.variable(observer()).define(["md"], _3);
75 |   main.variable(observer()).define(["md"], _4);
76 |   main.variable(observer("banner")).define("banner", ["html"], _banner);
77 |   main.variable(observer()).define(["md"], _6);
78 |   main.variable(observer("placeholder")).define("placeholder", ["html","isTextValid"], _placeholder);
79 |   main.variable(observer()).define(["md"], _8);
80 |   const child1 = runtime.module(define1);
81 |   main.import("svg", "cheery", child1);
82 |   main.variable(observer()).define(["md"], _10);
83 |   main.variable(observer("isTextValid")).define("isTextValid", _isTextValid);
84 |   return main;
85 | }
86 | 


--------------------------------------------------------------------------------
/pyPheWAS/Explorer_GUI/29d10840f83e2527@465.js:
--------------------------------------------------------------------------------
  1 | // https://observablehq.com/@observablehq/cheery-logo@465
  2 | function _1(md){return(
  3 | md`# Cheery Logo`
  4 | )}
  5 | 
  6 | function _svg(d3,DOM,width,height,circles,betterPastel,padRadius,icon)
  7 | {
  8 |   const svg = d3
  9 |     .select(DOM.svg(width, height))
 10 |     .attr("viewBox", `${-width / 2} ${-height / 2} ${width} ${height}`)
 11 |     .style("width", "100%")
 12 |     .style("height", "auto")
 13 |     .style("display", "block");
 14 | 
 15 |   const circle = svg
 16 |     .selectAll("g")
 17 |     .data(circles)
 18 |     .join("g")
 19 |     .attr("transform", d => `translate(${d.x},${d.y})`);
 20 | 
 21 |   circle
 22 |     .append("circle")
 23 |     .attr("fill", (d, i) => (d.logo ? "none" : betterPastel[i % 3]))
 24 |     .attr("r", 0)
 25 |     .on("click", function() {
 26 |       d3.select(this)
 27 |         .transition()
 28 |         .duration(100)
 29 |         .ease(d3.easeCircleIn)
 30 |         .attr("r", 0);
 31 |     })
 32 |     .transition()
 33 |     .ease(d3.easeExpIn)
 34 |     .delay((d, i) => i * 3)
 35 |     .attr("r", d => Math.max(1, d.r - padRadius));
 36 | 
 37 |   circle
 38 |     .filter(d => d.logo)
 39 |     .attr("transform", "translate(-120, -120)")
 40 |     .append("path")
 41 |     .attr("fill", "#999")
 42 |     .attr("transform", d => `scale(${(d.r - padRadius) / 12})`)
 43 |     .attr("d", icon);
 44 | 
 45 |   return svg.node();
 46 | }
 47 | 
 48 | 
 49 | function _betterPastel(){return(
 50 | ["#b0deff", "#ffd19a", "#fff8a6"]
 51 | )}
 52 | 
 53 | function _padRadius(){return(
 54 | 5
 55 | )}
 56 | 
 57 | function _height(){return(
 58 | 320
 59 | )}
 60 | 
 61 | function _scale(){return(
 62 | 15
 63 | )}
 64 | 
 65 | function _circles(d3,scale)
 66 | {
 67 |   let circles = d3.packSiblings(
 68 |     [{ r: 125, logo: true }].concat(
 69 |       d3
 70 |         .range(550)
 71 |         .map(i => ({ r: 10 + (Math.random() * i) / scale }))
 72 |         .reverse()
 73 |     )
 74 |   );
 75 | 
 76 |   const center = circles.find(c => c.logo);
 77 | 
 78 |   for (let c of circles) {
 79 |     if (c.logo) continue;
 80 |     c.x -= center.x;
 81 |     c.y -= center.y;
 82 |   }
 83 | 
 84 |   center.x = center.y = 0;
 85 | 
 86 |   return circles;
 87 | }
 88 | 
 89 | 
 90 | function _icon(){return(
 91 | `M12.1450213,20.7196596 C11.0175263,20.7196596 10.0411956,20.4623004 9.216,19.9475745 C8.39080438,19.4328485 7.75761923,18.7343023 7.31642553,17.8519149 C6.87523184,16.9695275 6.55251166,16.0340475 6.34825532,15.0454468 C6.14399898,14.0568461 6.04187234,12.990644 6.04187234,11.8468085 C6.04187234,10.9971021 6.09497819,10.1841741 6.20119149,9.408 C6.30740479,8.63182591 6.50348793,7.84340826 6.78944681,7.0427234 C7.07540569,6.24203855 7.44306158,5.54757741 7.89242553,4.95931915 C8.34178948,4.37106089 8.93003892,3.89310822 9.65719149,3.52544681 C10.3843441,3.1577854 11.2136124,2.97395745 12.1450213,2.97395745 C13.2725163,2.97395745 14.2488469,3.23131658 15.0740426,3.74604255 C15.8992382,4.26076853 16.5324233,4.95931474 16.973617,5.84170213 C17.4148107,6.72408952 17.7375309,7.65956953 17.9417872,8.64817021 C18.1460436,9.6367709 18.2481702,10.702973 18.2481702,11.8468085 C18.2481702,12.6965149 18.1950644,13.5094429 18.0888511,14.285617 C17.9826378,15.0617911 17.7824696,15.8502088 17.4883404,16.6508936 C17.1942113,17.4515785 16.8265554,18.1460396 16.3853617,18.7342979 C15.944168,19.3225561 15.3600036,19.8005088 14.6328511,20.1681702 C13.9056985,20.5358316 13.0764302,20.7196596 12.1450213,20.7196596 Z M14.245196,13.9469832 C14.8285807,13.3635984 15.1202688,12.6635472 15.1202688,11.8468085 C15.1202688,11.0300698 14.8358729,10.3300186 14.2670728,9.74663382 C13.6982726,9.16324904 12.9909292,8.87156103 12.1450213,8.87156103 C11.2991134,8.87156103 10.5917699,9.16324904 10.0229698,9.74663382 C9.45416961,10.3300186 9.1697738,11.0300698 9.1697738,11.8468085 C9.1697738,12.6635472 9.45416961,13.3635984 10.0229698,13.9469832 C10.5917699,14.530368 11.2991134,14.822056 12.1450213,14.822056 C12.9909292,14.822056 13.6909804,14.530368 14.245196,13.9469832 Z M12,24 C18.627417,24 24,18.627417 24,12 C24,5.372583 18.627417,0 12,0 C5.372583,0 0,5.372583 0,12 C0,18.627417 5.372583,24 12,24 Z`
 92 | )}
 93 | 
 94 | function _d3(require){return(
 95 | require("d3@5")
 96 | )}
 97 | 
 98 | export default function define(runtime, observer) {
 99 |   const main = runtime.module();
100 |   main.variable(observer()).define(["md"], _1);
101 |   main.variable(observer("svg")).define("svg", ["d3","DOM","width","height","circles","betterPastel","padRadius","icon"], _svg);
102 |   main.variable(observer("betterPastel")).define("betterPastel", _betterPastel);
103 |   main.variable(observer("padRadius")).define("padRadius", _padRadius);
104 |   main.variable(observer("height")).define("height", _height);
105 |   main.variable(observer("scale")).define("scale", _scale);
106 |   main.variable(observer("circles")).define("circles", ["d3","scale"], _circles);
107 |   main.variable(observer("icon")).define("icon", _icon);
108 |   main.variable(observer("d3")).define("d3", ["require"], _d3);
109 |   return main;
110 | }
111 | 


--------------------------------------------------------------------------------
/pyPheWAS/Explorer_GUI/ef93120144671667@373.js:
--------------------------------------------------------------------------------
  1 | // https://observablehq.com/@ddspog/useful-libs@373
  2 | import define1 from "./023fbd237e5020d0@76.js";
  3 | 
  4 | function _1(md,banner){return(
  5 | md`# Useful Libs
  6 | 
  7 | ${banner("https://cdn-images-1.medium.com/max/1600/1*aalTd6nKuVR31c3-bEED8g.jpeg", "256px")}
  8 | 
  9 | _This notebook is exporting libraries used on others Notebooks, in order to avoid using extra blocks to import recorrent libraries._`
 10 | )}
 11 | 
 12 | function _2(md){return(
 13 | md`## Vega-Lite Embed
 14 | 
 15 | I've imported this Vega-Lite embed function. The API is described [here](https://github.com/vega/vega-embed). You can import using:
 16 | 
 17 | \`\`\`js
 18 | import { VegaEmbed as embed } from "@ddspog/useful-libs"
 19 | \`\`\`
 20 | 
 21 | `
 22 | )}
 23 | 
 24 | function _VegaEmbed(require){return(
 25 | require("vega-embed@3")
 26 | )}
 27 | 
 28 | function _4(md){return(
 29 | md`## JQuery
 30 | 
 31 | Simple and old jQuery.
 32 | 
 33 | \`\`\`js
 34 | import { jQuery as $ } from "@ddspog/useful-libs"
 35 | \`\`\`
 36 | 
 37 | `
 38 | )}
 39 | 
 40 | function _jQuery(require){return(
 41 | require("jquery")
 42 | )}
 43 | 
 44 | function _6(md){return(
 45 | md`## D3
 46 | 
 47 | The famous D3 Library.
 48 | 
 49 | \`\`\`js
 50 | import { d3 } from "@ddspog/useful-libs"
 51 | \`\`\``
 52 | )}
 53 | 
 54 | async function _d3(require)
 55 | {
 56 |   var d3 = await require("d3");
 57 | 
 58 |   d3.transition.prototype.at = d3.selection.prototype.at;
 59 |   d3.transition.prototype.st = d3.selection.prototype.st;
 60 |   d3.transition.prototype.tspans = d3.selection.prototype.tspans;
 61 |   
 62 |   d3.selection.prototype.apply = function(fn, ...attrs) {
 63 |     return fn(this, ...attrs);
 64 |   }
 65 |   
 66 |   d3.fetch = (d3Fn, url, row) => new Promise(resolve => {
 67 |     let fs = d3Fn(url);
 68 |     
 69 |     if (row !== undefined)
 70 |       fs.row(row);
 71 |       
 72 |     return fs.get(d => resolve(d))
 73 |   });
 74 |   
 75 |   return d3;
 76 | }
 77 | 
 78 | 
 79 | function _8(md){return(
 80 | md`## Luxon Tools
 81 | 
 82 | Utility functions.
 83 | 
 84 | \`\`\`js
 85 | import { DateTime, Interval } from "@ddspog/useful-libs"
 86 | \`\`\``
 87 | )}
 88 | 
 89 | function _luxon(){return(
 90 | import('https://unpkg.com/luxon@2.0.2/src/luxon.js?module')
 91 | )}
 92 | 
 93 | function _DateTime(luxon){return(
 94 | luxon.DateTime
 95 | )}
 96 | 
 97 | function _Interval(luxon){return(
 98 | luxon.Interval
 99 | )}
100 | 
101 | function _12(md){return(
102 | md`## Lodash
103 | 
104 | Utility functions.
105 | 
106 | \`\`\`js
107 | import { _ } from "@ddspog/useful-libs"
108 | \`\`\`
109 | 
110 | `
111 | )}
112 | 
113 | function __(require){return(
114 | require('lodash')
115 | )}
116 | 
117 | function _14(md){return(
118 | md`## Math
119 | 
120 | _Package for simple math operations._
121 | 
122 | \`\`\`js
123 | import { math } from "@ddspog/useful-libs"
124 | \`\`\`
125 | 
126 | `
127 | )}
128 | 
129 | function _math(require){return(
130 | require("https://unpkg.com/mathjs@5.2.0/dist/math.min.js")
131 | )}
132 | 
133 | function _16(md){return(
134 | md`## Other functions
135 | 
136 | _Useful functions such as Clone, to safely clone any object, array or similar element._
137 | 
138 | \`\`\`js
139 | import { Clone } from "@ddspog/useful-libs"
140 | \`\`\``
141 | )}
142 | 
143 | function _Clone(require){return(
144 | require('https://bundle.run/clone@2.1.2')
145 | )}
146 | 
147 | function _18(md){return(
148 | md`
149 | _The set function it's a utility to set nested properties values with ease._
150 | 
151 | \`\`\`js
152 | import { set } from "@ddspog/useful-libs"
153 | \`\`\``
154 | )}
155 | 
156 | function _set(require){return(
157 | require('https://bundle.run/set-value@3.0.0')
158 | )}
159 | 
160 | function _20(md){return(
161 | md`
162 | 
163 | _The get function, similarly to set, access nested properties values._
164 | 
165 | \`\`\`js
166 | import { get } from "@ddspog/useful-libs"
167 | \`\`\``
168 | )}
169 | 
170 | function _get(require){return(
171 | require('https://bundle.run/get-value@3.0.1')
172 | )}
173 | 
174 | function _22(md){return(
175 | md`---
176 | # Appendix
177 | 
178 | `
179 | )}
180 | 
181 | function _23(md){return(
182 | md`_Importing banner function to use as cover._`
183 | )}
184 | 
185 | export default function define(runtime, observer) {
186 |   const main = runtime.module();
187 |   main.variable(observer()).define(["md","banner"], _1);
188 |   main.variable(observer()).define(["md"], _2);
189 |   main.variable(observer("VegaEmbed")).define("VegaEmbed", ["require"], _VegaEmbed);
190 |   main.variable(observer()).define(["md"], _4);
191 |   main.variable(observer("jQuery")).define("jQuery", ["require"], _jQuery);
192 |   main.variable(observer()).define(["md"], _6);
193 |   main.variable(observer("d3")).define("d3", ["require"], _d3);
194 |   main.variable(observer()).define(["md"], _8);
195 |   main.variable(observer("luxon")).define("luxon", _luxon);
196 |   main.variable(observer("DateTime")).define("DateTime", ["luxon"], _DateTime);
197 |   main.variable(observer("Interval")).define("Interval", ["luxon"], _Interval);
198 |   main.variable(observer()).define(["md"], _12);
199 |   main.variable(observer("_")).define("_", ["require"], __);
200 |   main.variable(observer()).define(["md"], _14);
201 |   main.variable(observer("math")).define("math", ["require"], _math);
202 |   main.variable(observer()).define(["md"], _16);
203 |   main.variable(observer("Clone")).define("Clone", ["require"], _Clone);
204 |   main.variable(observer()).define(["md"], _18);
205 |   main.variable(observer("set")).define("set", ["require"], _set);
206 |   main.variable(observer()).define(["md"], _20);
207 |   main.variable(observer("get")).define("get", ["require"], _get);
208 |   main.variable(observer()).define(["md"], _22);
209 |   main.variable(observer()).define(["md"], _23);
210 |   const child1 = runtime.module(define1);
211 |   main.import("banner", child1);
212 |   return main;
213 | }
214 | 


--------------------------------------------------------------------------------
/pyPheWAS/Explorer_GUI/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <meta charset="utf-8">
 3 | <title>pyPheWAS Explorer</title>
 4 | <link rel="stylesheet" type="text/css" href="./inspector.css">
 5 | <body>
 6 | <script type="module">
 7 | 
 8 | import define from "./index.js";
 9 | import {Runtime, Library, Inspector} from "./runtime.js";
10 | 
11 | const runtime = new Runtime();
12 | const main = runtime.module(define, Inspector.into(document.body));
13 | 
14 | </script>
15 | 


--------------------------------------------------------------------------------
/pyPheWAS/Explorer_GUI/index.js:
--------------------------------------------------------------------------------
1 | export {default} from "./e38221a5df1e64b8@3957.js";
2 | 


--------------------------------------------------------------------------------
/pyPheWAS/Explorer_GUI/inspector.css:
--------------------------------------------------------------------------------
1 | :root{--syntax_normal:#1b1e23;--syntax_comment:#a9b0bc;--syntax_number:#20a5ba;--syntax_keyword:#c30771;--syntax_atom:#10a778;--syntax_string:#008ec4;--syntax_error:#ffbedc;--syntax_unknown_variable:#838383;--syntax_known_variable:#005f87;--syntax_matchbracket:#20bbfc;--syntax_key:#6636b4;--mono_fonts:82%/1.5 Menlo,Consolas,monospace}.observablehq--collapsed,.observablehq--expanded,.observablehq--function,.observablehq--gray,.observablehq--import,.observablehq--string:after,.observablehq--string:before{color:var(--syntax_normal)}.observablehq--collapsed,.observablehq--inspect a{cursor:pointer}.observablehq--field{text-indent:-1em;margin-left:1em}.observablehq--empty{color:var(--syntax_comment)}.observablehq--blue,.observablehq--keyword{color:#3182bd}.observablehq--forbidden,.observablehq--pink{color:#e377c2}.observablehq--orange{color:#e6550d}.observablehq--boolean,.observablehq--null,.observablehq--undefined{color:var(--syntax_atom)}.observablehq--bigint,.observablehq--date,.observablehq--green,.observablehq--number,.observablehq--regexp,.observablehq--symbol{color:var(--syntax_number)}.observablehq--index,.observablehq--key{color:var(--syntax_key)}.observablehq--prototype-key{color:#aaa}.observablehq--empty{font-style:oblique}.observablehq--purple,.observablehq--string{color:var(--syntax_string)}.observablehq--error,.observablehq--red{color:#e7040f}.observablehq--inspect{font:var(--mono_fonts);overflow-x:auto;display:block;white-space:pre}.observablehq--error .observablehq--inspect{word-break:break-all;white-space:pre-wrap}


--------------------------------------------------------------------------------
/pyPheWAS/Explorer_GUI/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "e38221a5df1e64b8",
 3 |   "main": "e38221a5df1e64b8@3957.js",
 4 |   "version": "3957.0.0",
 5 |   "homepage": "https://observablehq.com/d/e38221a5df1e64b8",
 6 |   "author": {
 7 |     "name": "clineci",
 8 |     "url": "https://observablehq.com/@clineci"
 9 |   },
10 |   "type": "module",
11 |   "peerDependencies": {
12 |     "@observablehq/runtime": "4 - 5"
13 |   }
14 | }


--------------------------------------------------------------------------------
/pyPheWAS/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/pyPheWAS/__init__.py


--------------------------------------------------------------------------------
/pyPheWAS/resources/R_squared.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MASILab/pyPheWAS/2a8fff1d515edf2fc574c57b903769ddf0d2462f/pyPheWAS/resources/R_squared.xlsx


--------------------------------------------------------------------------------
/pyPheWAS/resources/pyPheWAS_start_msg.txt:
--------------------------------------------------------------------------------
1 | ==================== pyPheWAS ====================
2 | Developed by the MASI Lab at Vanderbilt University
3 | This software is covered by the MIT license.
4 | ==================================================


--------------------------------------------------------------------------------
/pyPheWAS/rt_censor_diagnosis.py:
--------------------------------------------------------------------------------
 1 | """
 2 | **EMR Data Censoring Function**
 3 | 
 4 | Contains source code for :ref:`censorData` tool.
 5 | """
 6 | 
 7 | import pandas as pd
 8 | import numpy as np
 9 | 
10 | def censor_diagnosis(genotype_file, phenotype_file, final_pfile, final_gfile, efield, delta_field=None, start_time=np.nan, end_time=np.nan):
11 | 	"""
12 | 	Specify a range of ages for censoring event data, such that ``efield`` ages are
13 | 	censored to the range
14 | 
15 | 	        :math:`start \leq efield \leq end`
16 | 
17 | 	Instead of censoring based on absolute age, you may also censor with respect to
18 | 	another data field using the ``delta_field``. If specified, the data is
19 | 	censored based on the *interval between* ``delta_field`` and ``efield``:
20 | 
21 | 	        :math:`start \leq deltafield - efield \leq end`.
22 | 
23 | 	Censored event data is saved to ``final_pfile``. Subjects with data remaining
24 | 	after censoring are saved to ``final_gfile``.
25 | 
26 | 	:param genotype_file: path to input group file
27 | 	:param phenotype_file: path to input phenotype file
28 | 	:param final_pfile: path to output group file
29 | 	:param final_gfile: path to output group file
30 | 	:param efield: name of field in the phenotype file to be censored
31 | 	:param delta_field: name of field to censor with respect to (i.e. interval between ``delta_field`` and ``efield``) [default: None]
32 | 	:param start_time: start time for censoring in years [default: None]
33 | 	:param end_time: end time for censoring in years [default: None]
34 | 
35 | 	:type genotype_file: str
36 | 	:type phenotype_file: str
37 | 	:type final_pfile: str
38 | 	:type final_gfile: str
39 | 	:type efield: str
40 | 	:type delta_field: str
41 | 	:type start_time: float
42 | 	:type end_time: float
43 | 
44 | 	:returns: None
45 | 
46 | 	.. note:: Either ``start_time`` and/or ``end_time`` must be given.
47 | 
48 | 	"""
49 | 	# read files & check field names
50 | 	print('Reading input files')
51 | 	genotypes = pd.read_csv(genotype_file)
52 | 	phenotypes = pd.read_csv(phenotype_file)
53 | 	mg = pd.merge(phenotypes, genotypes, on='id')
54 | 	# assert specified fields exist
55 | 	assert efield in phenotypes, 'Specified efield (%s) does not exist in phenotype file' % efield
56 | 
57 | 	# censor the data
58 | 	if delta_field is not None:
59 | 		# censor with respect to the interval between efield and delta_field
60 | 		assert delta_field in mg, 'Specified delta_field (%s) does not exist in phenotype or genotype file' % delta_field
61 | 		print('Censoring %s with respect to the interval between %s and %s' %(efield, efield, delta_field))
62 | 		mg['diff'] = mg[delta_field] - mg[efield]
63 | 		if np.isfinite(start_time) and np.isnan(end_time):
64 | 			print('Start time specified: keeping events with (%s - %s) >= %0.2f' % (delta_field,efield, start_time))
65 | 			# final = mg[(mg['diff']>=start_time)|(np.isnan(mg['diff']))] # old behavior keeps nans - for when controls don't have the delta_column?
66 | 			final = mg[(mg['diff'] >= start_time)]
67 | 		elif np.isnan(start_time) and np.isfinite(end_time):
68 | 			print('End time specified: keeping events with (%s - %s) <= %0.2f' % (delta_field, efield, end_time))
69 | 			# final = mg[(mg['diff']<=end_time)|(np.isnan(mg['diff']))] # old behavior keeps nans - for when controls don't have the delta_column?
70 | 			final = mg[(mg['diff'] <= end_time)]
71 | 		else:
72 | 			print('Start & End times specified: keeping events with %0.2 <= (%s - %s) <= %0.2f' % (start_time, delta_field, efield, end_time))
73 | 			# final = mg[(mg['diff']>=start_time)&(mg['diff']<=end_time)|(np.isnan(mg['diff']))]  # old behavior keeps nans - for when controls don't have the delta_column?
74 | 			final = mg[(mg['diff'] >= start_time) & (mg['diff'] <= end_time)]
75 | 	else:
76 | 		# censor efield ages based on how start/end times are specified
77 | 		if np.isfinite(start_time) and np.isnan(end_time):
78 | 			print('Start time specified: keeping events with %s >= %0.2f' %(efield,start_time))
79 | 			final = mg[mg[efield] >= start_time]
80 | 		elif np.isnan(start_time) and np.isfinite(end_time):
81 | 			print('End time specified: keeping events with %s <= %0.2f' %(efield,end_time))
82 | 			final = mg[mg[efield] <= end_time]
83 | 		else:
84 | 			print('Start & End times specified: keeping events with %0.2f <= %s <= %0.2f' %(start_time,efield,end_time))
85 | 			final = mg[(mg[efield] >= start_time) & (mg[efield] <= end_time)]
86 | 
87 | 	# save results
88 | 	final_gp = final.drop_duplicates('id')
89 | 	print('%d (out of %d) subjects remain after censoring' %(final_gp.shape[0], genotypes.shape[0]))
90 | 	if 'genotype' in final_gp:
91 | 		num_case = final_gp[final_gp['genotype']==1].shape[0]
92 | 		num_ctrl = final_gp[final_gp['genotype']==0].shape[0]
93 | 		print('Cases: %d\nControls: %d' %(num_case,num_ctrl))
94 | 
95 | 	print("Saving new genotype file to %s" % final_gfile)
96 | 	final_gp.to_csv(final_gfile, columns=genotypes.columns, index=False)
97 | 	print("Saving new phenotype file to %s" % final_pfile)
98 | 	final.to_csv(final_pfile, columns=phenotypes.columns, index=False)
99 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | matplotlib
 3 | scipy
 4 | pandas
 5 | statsmodels
 6 | tqdm
 7 | hopcroftkarp
 8 | biopython
 9 | flask
10 | flask_cors
11 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 | 
4 | [easy_install]
5 | 
6 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | 
 3 | setup(
 4 |   name = 'pyPheWAS',
 5 |   packages = ['pyPheWAS'], # this must be the same as the name above
 6 |   version = '4.2.0',
 7 |   description = 'MASI Lab Port of PheWAS into Python',
 8 |   author = 'MASI Lab',
 9 |   author_email = 'bennett.landman@vanderbilt.edu',
10 |   url = 'https://github.com/MASILab/pyPheWAS', # use the URL to the github repo
11 |   download_url = 'https://github.com/MASILab/pyPheWAS/tarball/0.1', # I'll explain this in a second
12 |   keywords = ['PheWAS', 'ICD-9', 'ICD-10', 'EMR', 'CPT'], # arbitrary keywords
13 |   classifiers = [],
14 |   install_requires=[ 'numpy>=1.16.4',
15 |                      'matplotlib',
16 |                      'scipy>=1.2.1',
17 |                      'pandas>=0.24.2',
18 |                      'statsmodels>=0.10.1',
19 |                      'hopcroftkarp',
20 |                      'tqdm',
21 |                      'pathlib',
22 |                      'biopython',
23 |                      'flask',
24 |                      'flask_cors',
25 |                      ],
26 |   package_data={
27 |     '':['resources/*.csv', 'resources/*.txt','Explorer_GUI/*'],
28 |   },
29 |   scripts=['bin/pyPhewasLookup',
30 |            'bin/pyPhewasModel',
31 |            'bin/pyPhewasPlot',
32 |            'bin/pyPhewasPipeline',
33 |            'bin/pyProwasLookup',
34 |            'bin/pyProwasModel',
35 |            'bin/pyProwasPlot',
36 |            'bin/pyProwasPipeline',
37 |            'bin/censorData',
38 |            'bin/convertEventToAge',
39 |            'bin/createGenotypeFile',
40 |            'bin/createPhenotypeFile',
41 |            'bin/maximizeControls',
42 |            'bin/mergeGroups',
43 |            'bin/PubMedQuery',
44 |            'bin/NoveltyAnalysis',
45 |            'bin/pyPhewasExplorer',
46 |            'bin/pyPhewasExplorerBackEnd'
47 |            ],
48 | )
49 | 


--------------------------------------------------------------------------------
/synthetic_data/README.md:
--------------------------------------------------------------------------------
 1 | # Synthetic EMR Data
 2 | 
 3 | This synthetic dataset was created for verifying the pyPheWAS package. We have made it freely available to allow users to test pyPheWAS's capabilities.
 4 | 
 5 | ### Demographic Summary
 6 | 
 7 | The dataset contains 10,000 subjects split evenly between cases (Dx=1)
 8 | and controls (Dx=0), and includes two demographic variables: biological
 9 | sex and maximum age at visit.
10 | 
11 | |                | Subjects | Sex [% Female] | Max Age At Visit [mean (std.)] |
12 | | -------------- |:-------: | :------------: | :----------------------------: |
13 | | Case (Dx=1)    | 5,000    | 70%            | 59.946 (9.563)                 |
14 | | Control (Dx=0) | 5,000    | 40%            | 60.802 (9.448)                 |
15 | 
16 | ### ICD Record Generation
17 | A mix of 103,493 ICD-9 and ICD-10 code events were generated for this dataset,
18 | covering 31 PheCode associations. Three types of PheCode associations were created, including:
19 | * 20 **background** associations,
20 | * 9 **primary** associations,
21 | * and 2 **confounding** associations.
22 | 
23 | **Background** | insignificant associations between Dx and the PheCode
24 | 
25 | ICD events were generated such that each background PheCode would have a small pre-specified effect size, randomly generated via a uniform distribution over the range [-0.1, 0.1]; individuals’ ages for each event were randomly generated using a uniform distribution over the range [30, 50]. *pyPheWAS should accurately estimate each background association’s effect size but determine that the association is insignificant.*
26 | 
27 | **Primary** | true associations between Dx and the PheCode
28 | 
29 | ICD events were generated such that primary PheCodes would have a unique pre-specified effect size (log odds ratio) across the full cohort; individuals’ ages for each event were randomly generated using a uniform distribution over the range [30, 50]. The nine primary PheCodes and their respective effect
30 | sizes are shown in the table below.
31 | 
32 | | PheCode | Phenotype                                        | Log Odds Ratio |
33 | | ------- |------------------------------------------------- | -------------- |
34 | | 338.2   | Chronic pain                                     | 1.50           |
35 | | 340     | Migraine                                         | 1.10           |
36 | | 1011    | Complications of surgical and medical procedures | 0.70           |
37 | | 296.22  | Major depressive disorder                        | 0.60           |
38 | | 530.11  | GERD                                             | 0.30           |
39 | | 401     | Hypertension                                     | 0.25           |
40 | | 041     | Bacterial infection NOS                          | -0.20          |
41 | | 1009    | Injury, NOS                                      | -0.60          |
42 | | 495     | Asthma                                           | -1.00          |
43 | 
44 | *pyPheWAS should accurately estimate each primary association’s effect size and determine that the association is statistically significant.*
45 | 
46 | 
47 | **Confounding** | false positive associations caused by the confounding effect of either sex or age
48 | 
49 | PheCode 174.1 (Breast cancer [female]) was used as a **sex-confounded** PheCode. ICD events were generated such that all females in the dataset had equal odds of having PheCode 174.1 in their record; ages for each event were randomly generated using a uniform distribution over the range [30, 50]. Because females were disproportionally represented across the case and control groups, however, the PheCode’s cohort-wide effect size is positively skewed to a 0.6 log odds ratio.
50 | 
51 | PheCode 292.2 (Mild cognitive impairment) was used as an **age-confounded** PheCode. ICD events were generated such that PheCode 292.2 would have a -0.2 log odds ratio; however, event ages were randomly generated using a uniform distribution over the higher age range [65,70]. This resulted in PheCode 292.2 being highly associated with larger values of MAV.
52 | 
53 | *Without controlling for the confounding variable, pyPheWAS should identify a significant association with these confounded PheCodes; including the confounding variable as a covariate, however, should reduce (or eliminate) the confounded association.*
54 | 


--------------------------------------------------------------------------------