' + 111 | '' + 112 | _("Hide Search Matches") + 113 | "
" 114 | ) 115 | ); 116 | }, 117 | 118 | /** 119 | * helper function to hide the search marks again 120 | */ 121 | hideSearchWords: () => { 122 | document 123 | .querySelectorAll("#searchbox .highlight-link") 124 | .forEach((el) => el.remove()); 125 | document 126 | .querySelectorAll("span.highlighted") 127 | .forEach((el) => el.classList.remove("highlighted")); 128 | localStorage.removeItem("sphinx_highlight_terms") 129 | }, 130 | 131 | initEscapeListener: () => { 132 | // only install a listener if it is really needed 133 | if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) return; 134 | 135 | document.addEventListener("keydown", (event) => { 136 | // bail for input elements 137 | if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; 138 | // bail with special keys 139 | if (event.shiftKey || event.altKey || event.ctrlKey || event.metaKey) return; 140 | if (DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS && (event.key === "Escape")) { 141 | SphinxHighlight.hideSearchWords(); 142 | event.preventDefault(); 143 | } 144 | }); 145 | }, 146 | }; 147 | 148 | _ready(() => { 149 | /* Do not call highlightSearchWords() when we are on the search page. 150 | * It will highlight words from the *previous* search query. 151 | */ 152 | if (typeof Search === "undefined") SphinxHighlight.highlightSearchWords(); 153 | SphinxHighlight.initEscapeListener(); 154 | }); 155 | -------------------------------------------------------------------------------- /docs/build/py-modindex.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 || 85 | g | ||
| 89 | |
90 | genal | 91 | |
| 94 | |
95 | genal.association | 96 | |
| 99 | |
100 | genal.clump | 101 | |
| 104 | |
105 | genal.extract_prs | 106 | |
| 109 | |
110 | genal.geno_tools | 111 | |
| 114 | |
115 | genal.lift | 116 | |
| 119 | |
120 | genal.MR | 121 | |
| 124 | |
125 | genal.MR_tools | 126 | |
| 129 | |
130 | genal.MRpresso | 131 | |
| 134 | |
135 | genal.proxy | 136 | |
| 139 | |
140 | genal.snp_query | 141 | |
| 144 | |
145 | genal.tools | 146 | |
Cyprien Rivier
80 |Aug 13, 2024
83 |“0.8”
86 |Genal is a python module designed to make it easy to run genetic risk scores and mendelian randomization analyses. It integrates a collection of tools that facilitate the cleaning of single nucleotide polymorphism data (usually derived from Genome-Wide Association Studies) and enable the execution of key clinical population genetic workflows. The functionalities provided by genal include clumping, lifting, association testing, polygenic risk scoring, and Mendelian randomization analyses, all within a single Python module.
89 |The module prioritizes user-friendliness and intuitive operation, aiming to reduce the complexity of data analysis for researchers. Despite its focus on simplicity, Genal does not sacrifice the depth of customization or the precision of analysis. Researchers can expect to maintain analytical rigour while benefiting from the streamlined experience.
90 |Genal draws on concepts from well-established R packages such as TwoSampleMR, MR-Presso, MendelianRandomization, and gwasvcf, adapting their proven methodologies to the Python environment. This approach ensures that users have access to tried and tested techniques with the versatility of Python’s data science tools.
91 |To install the latest release, type:
92 |pip install genal-python
93 | If you use genal in your work, please cite the following paper:
119 |Genal: A Python Toolkit for Genetic Risk Scoring and Mendelian Randomization 123 | Cyprien Rivier, Cyprien A. Rivier, Santiago Clocchiatti-Tuozzo, Shufan Huo, Victor Torres-Lopez, Daniela Renedo, Kevin N. Sheth, Guido J. Falcone, Julian N. Acosta. 124 | medRxiv. 2024 May 10.1101/2024.05.23.24307776.
125 |The MR-Base platform supports systematic causal inference across the human phenome. 134 | Hemani G, Zheng J, Elsworth B, Wade KH, Baird D, Haberland V, Laurin C, Burgess S, Bowden J, Langdon R, Tan VY, Yarmolinsky J, Shihab HA, Timpson NJ, Evans DM, Relton C, Martin RM, Davey Smith G, Gaunt TR, Haycock PC, The MR-Base Collaboration 135 | eLife. 2018 May 10.7554/eLife.34408. 136 | PMID: 29846171.
137 |Detection of widespread horizontal pleiotropy in causal relationships inferred from Mendelian randomization between complex traits and diseases. 141 | Marie Verbanck, Chia-Yen Chen, Benjamin Neale, Ron Do. 142 | Nature Genetics 2018 May 10.1038/s41588-018-0099-7. 143 | PMID: 29686387.
144 |The variant call format provides efficient and robust storage of GWAS summary statistics. 148 | Matthew Lyon, Shea J Andrews, Ben Elsworth, Tom R Gaunt, Gibran Hemani, Edoardo Marcora. 149 | bioRxiv 2020 May 30 2020.05.29.115824v1. 150 | PMID: 33441155.
151 |
75 | import os
76 | import subprocess
77 | import pandas as pd
78 | import uuid
79 |
80 | from .tools import read_config, get_plink19_path, get_reference_panel_path, create_tmp
81 |
82 |
83 |
84 | [docs]
85 | def clump_data(
86 | data,
87 | reference_panel="eur",
88 | kb=250,
89 | r2=0.1,
90 | p1=5e-8,
91 | p2=0.01,
92 | name="",
93 | ram=10000,
94 | ):
95 | """
96 | Perform clumping on the given data using plink. Corresponds to the :meth:`Geno.clump` method.
97 |
98 | Args:
99 | data (pd.DataFrame): Input data with at least 'SNP' and 'P' columns.
100 | reference_panel (str): The reference population for linkage disequilibrium values. Accepts values "eur", "sas", "afr", "eas", "amr". Alternatively, a path leading to a specific bed/bim/fam reference panel can be provided. Default is "eur".
101 | kb (int, optional): Clumping window in terms of thousands of SNPs. Default is 250.
102 | r2 (float, optional): Linkage disequilibrium threshold, values between 0 and 1. Default is 0.1.
103 | p1 (float, optional): P-value threshold during clumping. SNPs above this value are not considered. Default is 5e-8.
104 | p2 (float, optional): P-value threshold post-clumping to further filter the clumped SNPs. If p2 < p1, it won't be considered. Default is 0.01.
105 | name (str, optional): Name used for the files created in the tmp_GENAL folder.
106 | ram (int, optional): Amount of RAM in MB to be used by plink.
107 |
108 | Returns:
109 | pd.DataFrame: Data after clumping, if any.
110 | """
111 | plink19_path = get_plink19_path()
112 |
113 | # Create unique ID for the name if none is passed
114 | if not name:
115 | name = str(uuid.uuid4())[:8]
116 |
117 | # Save the relevant data columns to a temporary file
118 | to_clump_filename = os.path.join("tmp_GENAL", f"{name}_to_clump.txt")
119 | data[["SNP", "P"]].to_csv(to_clump_filename, index=False, sep="\t")
120 |
121 | # Construct and execute the plink clumping command
122 | output_path = os.path.join("tmp_GENAL", name)
123 | plink_command = f"{plink19_path} --memory {ram} --bfile {get_reference_panel_path(reference_panel)} \
124 | --clump {to_clump_filename} --clump-kb {kb} --clump-r2 {r2} --clump-p1 {p1} \
125 | --clump-p2 {p2} --out {output_path}"
126 | output = subprocess.run(
127 | plink_command, shell=True, capture_output=True, text=True, check=True
128 | )
129 |
130 | # Check and print the outputs for relevant information
131 | if output.returncode != 0:
132 | raise RuntimeError(
133 | f"PLINK execution failed with the following error: {output.stderr}"
134 | )
135 | if "more top variant IDs missing" in output.stderr:
136 | missing_variants = output.stderr.split("more top variant IDs missing")[0].split(
137 | "\n"
138 | )[-1]
139 | print(f"Warning: {missing_variants} top variant IDs missing")
140 | if "No significant --clump results." in output.stderr:
141 | print("No SNPs remaining after clumping.")
142 | return
143 | print(output.stdout.split("--clump: ")[1].split("\n")[0])
144 |
145 | # Extract the list of clumped SNPs and get the relevant data subset
146 | clumped_filename = os.path.join("tmp_GENAL", f"{name}.clumped")
147 | if not os.path.exists(clumped_filename):
148 | raise FileNotFoundError(f"'{clumped_filename}' is missing.")
149 | plink_clumped = pd.read_csv(clumped_filename, sep="\s+", usecols=["SNP"])
150 | clumped_data = data[data["SNP"].isin(plink_clumped["SNP"])]
151 | clumped_data.reset_index(drop=True, inplace=True)
152 | return clumped_data
153 |
154 |