├── BANN ├── README.md ├── examples_docs │ ├── .ipynb_checkpoints │ │ ├── BANNs_example-checkpoint.ipynb │ │ └── annotation_tutorial-checkpoint.ipynb │ ├── BANNs_example.html │ ├── BANNs_example.ipynb │ ├── BANNs_example.pdf │ ├── Xtest2.txt │ ├── annotation_tutorial.html │ ├── annotation_tutorial.ipynb │ ├── annotation_tutorial.md │ ├── annotation_tutorial.pdf │ ├── masktest2.txt │ ├── sampleData │ │ ├── TestAnnotationDF.txt │ │ ├── TestGeneList.txt │ │ ├── TestMask.txt │ │ ├── TestSNPList.txt │ │ ├── Xtest2.txt │ │ ├── masktest2.txt │ │ └── ytest2.txt │ └── ytest2.txt └── src │ ├── BANN.py │ ├── Xtest2.txt │ ├── __pycache__ │ ├── BANN.cpython-37.pyc │ ├── BANN2.cpython-37.pyc │ ├── annotation.cpython-37.pyc │ ├── customModel.cpython-37.pyc │ ├── customModel2.cpython-37.pyc │ └── utils.cpython-37.pyc │ ├── annotation.py │ ├── annotation.pyc │ ├── customModel.py │ ├── masktest2.txt │ ├── utils.py │ └── ytest2.txt ├── BANN_R ├── .Rhistory ├── .Rproj.user │ └── 62903D5D │ │ ├── cpp-definition-cache │ │ ├── pcs │ │ ├── files-pane.pper │ │ ├── source-pane.pper │ │ ├── windowlayoutstate.pper │ │ └── workbench-pane.pper │ │ ├── rmd-outputs │ │ └── saved_source_markers ├── BANN │ ├── .Rbuildignore │ ├── .Rhistory │ ├── .Rproj.user │ │ ├── 62903D5D │ │ │ ├── console06 │ │ │ │ └── INDEX001 │ │ │ ├── cpp-definition-cache │ │ │ ├── pcs │ │ │ │ ├── debug-breakpoints.pper │ │ │ │ ├── files-pane.pper │ │ │ │ ├── source-pane.pper │ │ │ │ ├── windowlayoutstate.pper │ │ │ │ └── workbench-pane.pper │ │ │ ├── rmd-outputs │ │ │ ├── saved_source_markers │ │ │ └── sources │ │ │ │ ├── per │ │ │ │ └── t │ │ │ │ │ ├── 2197BFE │ │ │ │ │ ├── 2197BFE-contents │ │ │ │ │ ├── 643FE402 │ │ │ │ │ ├── 643FE402-contents │ │ │ │ │ ├── 80949DC8 │ │ │ │ │ ├── 80949DC8-contents │ │ │ │ │ ├── 91E7C3B1 │ │ │ │ │ ├── 91E7C3B1-contents │ │ │ │ │ ├── C455BA39 │ │ │ │ │ ├── C455BA39-contents │ │ │ │ │ ├── E7AFF6B4 │ │ │ │ │ ├── E7AFF6B4-contents │ │ │ │ │ ├── E87780BE │ │ │ │ │ └── E87780BE-contents │ │ │ │ └── prop │ │ │ │ ├── 227CA161 │ │ │ │ ├── 29B800AA │ │ │ │ ├── 2E926431 │ │ │ │ ├── 30EAC7D3 │ │ │ │ ├── 3D141571 │ │ │ │ ├── 4AADCB24 │ │ │ │ ├── 67E3BCF5 │ │ │ │ ├── 69E8EDF4 │ │ │ │ ├── 9D8E4109 │ │ │ │ ├── A6BD220D │ │ │ │ ├── A9CDA103 │ │ │ │ ├── AF2D744A │ │ │ │ ├── BDFAD81D │ │ │ │ ├── D98CA144 │ │ │ │ ├── E7EBEC2E │ │ │ │ ├── ED02998B │ │ │ │ ├── FC2BC14B │ │ │ │ └── INDEX │ │ └── shared │ │ │ └── notebooks │ │ │ ├── patch-chunk-names │ │ │ └── paths │ ├── BANN.Rproj │ ├── DESCRIPTION │ ├── NAMESPACE │ ├── R │ │ ├── BANN.R │ │ ├── BANNvarEM.R │ │ ├── estimatePVE.R │ │ ├── innerloop.R │ │ ├── outerloop.R │ │ ├── utils.R │ │ └── varamParamUpdate.R │ └── man │ │ ├── .Rhistory │ │ ├── BANN.Rd │ │ ├── BANNvarEM.Rd │ │ ├── estimatePVE.Rd │ │ ├── innerloop.Rd │ │ ├── outerloop.Rd │ │ ├── softplus.Rd │ │ └── varParamUpdate.Rd ├── BANN_0.1.0.tar.gz ├── BANN_example.R ├── README └── example_data │ ├── .DS_Store │ ├── Xtest.txt │ ├── masktest.txt │ └── ytest.txt ├── BANN_numpy ├── BANN_example.py ├── BANNs_iterative.py ├── README ├── __pycache__ │ ├── BANNs_iterative.cpython-37.pyc │ ├── customModel_iterative.cpython-37.pyc │ └── utils.cpython-37.pyc ├── customModel_iterative.py ├── example_data │ ├── Xtest.txt │ ├── masktest.txt │ └── ytest.txt └── utils.py ├── LICENSE ├── README.md └── misc ├── Fig1.pdf ├── Fig1.png ├── Supp_Fig_1.pdf └── Supp_Fig_1.png /BANN/README.md: -------------------------------------------------------------------------------- 1 | # Dependencies: 2 | python > 3.6, numpy > 1.17.3, pandas > 1.0.4, tensorflow > 2.1.0 3 | 4 | # Usage: 5 | We provide the following ipython notebook tutorials in `examples_docs` directory: 6 | * `annotation_tutorial.ipynb`: Makes a mask file from plink format files (e.g., `.map`and `.bim`). 7 | * `BANNs_example.ipynb`: Simulates a toy example GWAS data set and run BANNs. 8 | 9 | 10 | -------------------------------------------------------------------------------- /BANN/examples_docs/.ipynb_checkpoints/annotation_tutorial-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Example for creating biological annotations for BANNs\n", 8 | "\n", 9 | "BANNs framework requires constructing an annotation mask in order to guide the model structure based on biological annotations. We create SNP-sets based on the genomic annotations of SNPs used in the model. SNPs that fall into the neighborhood of the same gene are grouped together into a SNP-set that corresponds to this gene. If we consider intergenic regions (based on user input), SNPs that fall into unannotated regions of the genome are grouped together into intergenic regions. For example, two SNPs that fall into the intergenic region between _gene A_ and _gene b_ would be grouped together into a intergenic region called _Intergenic_gene A_gene B_. \n", 10 | "\n", 11 | "In this example, we show how to carry out the annotation and create mask matrices (and annotation dataframes that store information on which SNPsets contain which SNPs) using the annotation.py script provided. \n", 12 | "\n", 13 | "We need two inputs for carrying out biological annotations: \n", 14 | "**1)** A SNP list file of .map or .bim format \n", 15 | "**2)** A gene range file \n", 16 | "\n", 17 | "#### **SNP list file:** \n", 18 | "We expect .map formatted SNP lists to be tab-delimited files have four fields/columns (with no header): Chromosome, Variant ID, Morgans, Position(bp) . Our annotation script works with position information. If morgan information is missing, you can replace this column with 0s (zeros) or leave it empty. We expect a similar format from .bim files but with six fields/columns: Chromosome, Variant ID, Morgans, Position, Minor Allele, Major Allele. If the files contain a different number of columns, annotation will not carry out and you will receive a warning about the error. \n", 19 | "To get more information on file formats, you can visit: \n", 20 | "https://www.cog-genomics.org/plink2/formats#map \n", 21 | "https://www.cog-genomics.org/plink2/formats#bim \n", 22 | "\n", 23 | "**Important Note:** In order to obtain accurate results, please make sure the order of SNPs in the SNP List file correspond to the order of SNPs in the genotype matrix fed into the model because indices of variables matter for the model and annotations. This means if variant rs7412 is the first SNP in the genotype matrix (i.e. the first column of the genotype matrix corresponds to variant rs7412), then the first row of the SNP list should have information on rs7412. \n", 24 | "\n", 25 | "\n", 26 | "#### **Gene range file:** \n", 27 | "We expect gene range file to be a tab-delimited file with four fields/columns (with no header): Chromosome, Start, End, GeneID. We expect that the Start and End coordinates will be in basepaires. To get more information on this file format, you can visit the \"Gene range lists\" section in : \n", 28 | "https://www.cog-genomics.org/plink/1.9/resources \n", 29 | "and download gene range files for human genome.\n", 30 | "\n", 31 | "#### **Example Files** \n", 32 | "In this tutorial, we will work with the very small SNP list and gene range lists provided in /sampleData/TestSNPList.txt and /sampleData/TestGeneList.txt files for the sake of demonstration. \n", 33 | "Here is what these files look like:" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 1, 39 | "metadata": {}, 40 | "outputs": [ 41 | { 42 | "name": "stdout", 43 | "output_type": "stream", 44 | "text": [ 45 | "Printing SNP list file contents (.map formatted): \n", 46 | "\n", 47 | "2\trs2\t0\t11\n", 48 | "2\trs1\t0\t2\n", 49 | "18\trs3\t0\t13\n", 50 | "18\trs4\t0\t1021\n", 51 | "18\trs5\t0\t1800\n", 52 | "19\trs5\t0\t609\n", 53 | "19\trs6\t0\t5227\n", 54 | "19\trs7\t0\t10187\n", 55 | "19\trs8\t0\t12148\n", 56 | "X\trs9\t0\t1\n", 57 | "X\trs10\t0\t392\n", 58 | "X\trs11\t0\t1107\n", 59 | "X\trs12\t0\t4331\n", 60 | "\n", 61 | "\n", 62 | " Printing Gene range list file contents: \n", 63 | "\n", 64 | "18\t1023\t1803\tGene4\n", 65 | "7\t1267\t9569\tGene2\n", 66 | "19\t9532\t10187\tGene7\n", 67 | "X\t1\t501\tGene8\n", 68 | "19\t587\t791\tGene6\n", 69 | "18\t240\t391\tGene3\n", 70 | "X\t2675\t5092\tGene9\n", 71 | "X\t4061\t9582\tGene10\n", 72 | "18\t21200\t29080\tGene5\n", 73 | "7\t241\t905\tGene1\n" 74 | ] 75 | } 76 | ], 77 | "source": [ 78 | "SNPList_file = open(\"sampleData/TestSNPList.txt\", \"r\")\n", 79 | "GeneList_file = open(\"sampleData/TestGeneList.txt\", \"r\")\n", 80 | "\n", 81 | "print(\"Printing SNP list file contents (.map formatted): \\n\")\n", 82 | "print(SNPList_file.read()) \n", 83 | "print(\"\\n\\n Printing Gene range list file contents: \\n\")\n", 84 | "print(GeneList_file.read()) " 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "### **Carrying out annotations**\n", 92 | "In this tutorial, we will create two files: \n", 93 | "**1)** an annotation dataframe that contains information on which SNP-sets contain which SNPs and what the genomic coordinate of these SNP-sets are, saved in a tab-delimited .txt file. \n", 94 | "This dataframe is sorted in ascending order based on the chromosomal location of the SNP-sets. \n", 95 | "Carried out by the *annotate()* function\n", 96 | "\n", 97 | "**2)** an annotation mask matrix, which is a sparse matrix of 0s and 1s used to guide the model architecture, saved in a tab-delimited .txt file. \n", 98 | "This is a matrix of size: (number of SNPs by number of SNPsets). An entry in the matrix at location (row i, column j) tells us whether SNP i is contained in SNP-set j (yes if 1, no if 0). \n", 99 | "The rows of the mask matrix (which correspond to SNPs) are in the same order as the SNP list file, meaning the first row contains annotation information of the first SNP in the SNP list. The columns of the mask matrix (which correspond to SNP-set) are in the same order as the annotation dataframe, so columns correspond to SNP-sets in an ascending order based on chromosomal location.\n", 100 | "Carried out by the *getMaskMatrix()* function\n", 101 | "\n", 102 | "#### **Parameters**\n", 103 | "There are a few parameters we require from the user to specify how to carry out the annotations and save the results, apart from giving the path to the input SNP list file and gene range list file:\n", 104 | "\n", 105 | "**output file**: Both *annotate()* and *getMaskMatrix()* functions require us to specify this. It is supposed to be path to the .txt file where we want to save the results (either annotation dataframe or mask matrix).\n", 106 | "\n", 107 | "**intergenic:** For *annotate()* function. This is a boolean parameter, that expects either of the True or False values. The default value is True. \n", 108 | "If *True*, the annotation script creates intergenic SNP-sets and considers them when annotating the SNPs. If *False*, it simply groups SNPs into genes defined in the gene range list and all SNPs that are unannotated (in intergenic regions) are grouped into one SNP-set called \"Unannotated\". This \"Unannotated\" SNP-set is the last entry of the annotation dataframe and would be the last column of the mask matrix to be created from the annotation dataframe. If the gene range list provided by the user has all chromosomes, there will be no \"Unannotated\" SNP-set when this parameter is set to *True*. However, if there are chromosomes missing (as in the case of this example, just for demonstrations), SNPs in the missing chromosomes will still be grouped into the \"Unannotated\" SNP-set. \n", 109 | "\n", 110 | "**buffer:** For *annotate()* function. This is an integer and the default value is 0. \n", 111 | "It tells the annotation script how many basepairs to allow for as a buffer when considering a SNP-set neighborhood. If set to, let's say, 50000, then we would allow a 50kb window around SNP-sets when considering whether a SNP is in the SNP-set or not. \n", 112 | "\n", 113 | "**dropSingletons:** For *annotate()* function. This is a boolean parameter, that expects either of the True or False values. The default value is False. \n", 114 | "When annotating, it is possible that some SNP-sets will only contain one SNP. We call these SNP-sets \"singletons\". In this case, whether they should still be considered a \"SNP-set\" is debatable. If you set this parameter value to *True*, then these SNP-sets will be dropped from the annotation, and SNPs will be re-annotated with the remaining SNP-sets. Otherwise (if set at *False*), singleton SNP-sets will be kept in the model.\n", 115 | "\n", 116 | "#### Creating the annotation dataframe:" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 2, 122 | "metadata": {}, 123 | "outputs": [ 124 | { 125 | "name": "stderr", 126 | "output_type": "stream", 127 | "text": [ 128 | "100%|██████████| 10/10 [00:00<00:00, 241.81it/s]\n", 129 | "100%|██████████| 13/13 [00:00<00:00, 689.29it/s]" 130 | ] 131 | }, 132 | { 133 | "name": "stdout", 134 | "output_type": "stream", 135 | "text": [ 136 | "You have chosen to annotate SNP-sets with intergenic regions and with a buffer of 0bp\n", 137 | "Creating Intergenic SNP-sets\n", 138 | "Annotating SNP-sets with the corresponding SNPs\n", 139 | "Saving annotation results to file sampleData/TestAnnotationDF.txt\n", 140 | " GeneID Chromosome Start End SNPindex \\\n", 141 | "0 Intergenic_Gene3_Gene4 18 392.0 1022.0 [3] \n", 142 | "1 Gene4 18 1023.0 1803.0 [4] \n", 143 | "2 Gene6 19 587.0 791.0 [5] \n", 144 | "3 Intergenic_Gene6_Gene7 19 792.0 9531.0 [6] \n", 145 | "4 Gene7 19 9532.0 10187.0 [7] \n", 146 | "5 Downstream_Gene7 19 10188.0 12148.0 [8] \n", 147 | "6 Gene8 X 1.0 501.0 [9, 10] \n", 148 | "7 Intergenic_Gene8_Gene9 X 502.0 2674.0 [11] \n", 149 | "8 Gene9 X 2675.0 5092.0 [12] \n", 150 | "9 Gene10 X 4061.0 9582.0 [12] \n", 151 | "10 UnAnnotated NaN NaN NaN [1, 0, 2] \n", 152 | "\n", 153 | " VariantID \n", 154 | "0 [rs4] \n", 155 | "1 [rs5] \n", 156 | "2 [rs5] \n", 157 | "3 [rs6] \n", 158 | "4 [rs7] \n", 159 | "5 [rs8] \n", 160 | "6 [rs9, rs10] \n", 161 | "7 [rs11] \n", 162 | "8 [rs12] \n", 163 | "9 [rs12] \n", 164 | "10 [rs1, rs2, rs3] \n" 165 | ] 166 | }, 167 | { 168 | "name": "stderr", 169 | "output_type": "stream", 170 | "text": [ 171 | "\n" 172 | ] 173 | } 174 | ], 175 | "source": [ 176 | "# Importing annotation.py from the src folder:\n", 177 | "import sys\n", 178 | "sys.path.insert(0, '../src/') #Need to do this for this notebook example since the code lives in a different directory \n", 179 | "\n", 180 | "from annotation import * # Import annotation code\n", 181 | "\n", 182 | "#Changing back to the original directory:\n", 183 | "sys.path.insert(0, '../examples_docs/')\n", 184 | "\n", 185 | "#Specifying the path to the SNP list file and gene range list file:\n", 186 | "path_to_SNPList=\"sampleData/TestSNPList.txt\"\n", 187 | "path_to_geneList=\"sampleData/TestGeneList.txt\"\n", 188 | "\n", 189 | "#Specifying the path to the file we want the save annotation dataframe into:\n", 190 | "file_toSave_annotationDF=\"sampleData/TestAnnotationDF.txt\"\n", 191 | "\n", 192 | "#Carrying out the annotation:\n", 193 | "annotationDF=annotate(path_to_SNPList, path_to_geneList, outputFile=file_toSave_annotationDF, \n", 194 | " intergenic=True, buffer=0, dropSingletons=False)\n", 195 | "\n", 196 | "print(annotationDF)" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": {}, 202 | "source": [ 203 | "Showing annotation results with different parameters:" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 3, 209 | "metadata": {}, 210 | "outputs": [ 211 | { 212 | "name": "stderr", 213 | "output_type": "stream", 214 | "text": [ 215 | "100%|██████████| 13/13 [00:00<00:00, 859.38it/s]\n", 216 | "100%|██████████| 13/13 [00:00<00:00, 838.38it/s]" 217 | ] 218 | }, 219 | { 220 | "name": "stdout", 221 | "output_type": "stream", 222 | "text": [ 223 | "You have chosen to annotate SNP-sets without intergenic regions and with a buffer of 500bp\n", 224 | "Annotating SNP-sets with the corresponding SNPs\n", 225 | "Dropping SNP-sets that are singletons (containing only one SNP) and re-annotating SNPs without them\n", 226 | "Annotating SNP-sets with the corresponding SNPs\n", 227 | "Saving annotation results to file sampleData/TestAnnotationDF.txt\n", 228 | " GeneID Chromosome Start End SNPindex \\\n", 229 | "0 Gene3 18 240.0 391.0 [2] \n", 230 | "1 Gene4 18 1023.0 1803.0 [3, 4] \n", 231 | "2 Gene6 19 587.0 791.0 [5] \n", 232 | "3 Gene7 19 9532.0 10187.0 [7] \n", 233 | "4 Gene8 X 1.0 501.0 [9, 10] \n", 234 | "5 Gene9 X 2675.0 5092.0 [12] \n", 235 | "6 Gene10 X 4061.0 9582.0 [12] \n", 236 | "7 UnAnnotated NaN NaN NaN [1, 0, 6, 8, 11] \n", 237 | "\n", 238 | " VariantID \n", 239 | "0 [rs3] \n", 240 | "1 [rs4, rs5] \n", 241 | "2 [rs5] \n", 242 | "3 [rs7] \n", 243 | "4 [rs9, rs10] \n", 244 | "5 [rs12] \n", 245 | "6 [rs12] \n", 246 | "7 [rs1, rs2, rs6, rs8, rs11] \n" 247 | ] 248 | }, 249 | { 250 | "name": "stderr", 251 | "output_type": "stream", 252 | "text": [ 253 | "\n" 254 | ] 255 | } 256 | ], 257 | "source": [ 258 | "#Carrying out the annotation:\n", 259 | "annotationDF=annotate(path_to_SNPList, path_to_geneList, outputFile=file_toSave_annotationDF, \n", 260 | " intergenic=False, buffer=500, dropSingletons=True)\n", 261 | "\n", 262 | "print(annotationDF)" 263 | ] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "metadata": {}, 268 | "source": [ 269 | "#### Creating the mask matrix from the annotation dataframe:" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 4, 275 | "metadata": {}, 276 | "outputs": [ 277 | { 278 | "name": "stderr", 279 | "output_type": "stream", 280 | "text": [ 281 | "100%|██████████| 8/8 [00:00<00:00, 3834.79it/s]" 282 | ] 283 | }, 284 | { 285 | "name": "stdout", 286 | "output_type": "stream", 287 | "text": [ 288 | "creating mask\n", 289 | "Saving annotation mask to file sampleData/TestMask.txt in tab-delimited format\n", 290 | "[[0. 0. 0. 0. 0. 0. 0. 1.]\n", 291 | " [0. 0. 0. 0. 0. 0. 0. 1.]\n", 292 | " [1. 0. 0. 0. 0. 0. 0. 0.]\n", 293 | " [0. 1. 0. 0. 0. 0. 0. 0.]\n", 294 | " [0. 1. 0. 0. 0. 0. 0. 0.]\n", 295 | " [0. 0. 1. 0. 0. 0. 0. 0.]\n", 296 | " [0. 0. 0. 0. 0. 0. 0. 1.]\n", 297 | " [0. 0. 0. 1. 0. 0. 0. 0.]\n", 298 | " [0. 0. 0. 0. 0. 0. 0. 1.]\n", 299 | " [0. 0. 0. 0. 1. 0. 0. 0.]\n", 300 | " [0. 0. 0. 0. 1. 0. 0. 0.]\n", 301 | " [0. 0. 0. 0. 0. 0. 0. 1.]\n", 302 | " [0. 0. 0. 0. 0. 1. 1. 0.]]\n" 303 | ] 304 | }, 305 | { 306 | "name": "stderr", 307 | "output_type": "stream", 308 | "text": [ 309 | "\n" 310 | ] 311 | } 312 | ], 313 | "source": [ 314 | "mask_outputFile=\"sampleData/TestMask.txt\"\n", 315 | "mask = getMaskMatrix(path_to_SNPList, annotationDF, mask_outputFile)\n", 316 | "print(mask)" 317 | ] 318 | } 319 | ], 320 | "metadata": { 321 | "kernelspec": { 322 | "display_name": "Python 3", 323 | "language": "python", 324 | "name": "python3" 325 | }, 326 | "language_info": { 327 | "codemirror_mode": { 328 | "name": "ipython", 329 | "version": 3 330 | }, 331 | "file_extension": ".py", 332 | "mimetype": "text/x-python", 333 | "name": "python", 334 | "nbconvert_exporter": "python", 335 | "pygments_lexer": "ipython3", 336 | "version": "3.7.6" 337 | } 338 | }, 339 | "nbformat": 4, 340 | "nbformat_minor": 4 341 | } 342 | -------------------------------------------------------------------------------- /BANN/examples_docs/BANNs_example.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcrawlab/BANNs/83f344744698fad00d985fbe1522016150155a79/BANN/examples_docs/BANNs_example.pdf -------------------------------------------------------------------------------- /BANN/examples_docs/annotation_tutorial.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Example for creating biological annotations for BANNs\n", 8 | "\n", 9 | "BANNs framework requires constructing an annotation mask in order to guide the model structure based on biological annotations. We create SNP-sets based on the genomic annotations of SNPs used in the model. SNPs that fall into the neighborhood of the same gene are grouped together into a SNP-set that corresponds to this gene. If we consider intergenic regions (based on user input), SNPs that fall into unannotated regions of the genome are grouped together into intergenic regions. For example, two SNPs that fall into the intergenic region between _gene A_ and _gene b_ would be grouped together into a intergenic region called _Intergenic_gene A_gene B_. \n", 10 | "\n", 11 | "In this example, we show how to carry out the annotation and create mask matrices (and annotation dataframes that store information on which SNPsets contain which SNPs) using the annotation.py script provided. \n", 12 | "\n", 13 | "We need two inputs for carrying out biological annotations: \n", 14 | "**1)** A SNP list file of .map or .bim format \n", 15 | "**2)** A gene range file \n", 16 | "\n", 17 | "#### **SNP list file:** \n", 18 | "We expect .map formatted SNP lists to be tab-delimited files have four fields/columns (with no header): Chromosome, Variant ID, Morgans, Position(bp) . Our annotation script works with position information. If morgan information is missing, you can replace this column with 0s (zeros) or leave it empty. We expect a similar format from .bim files but with six fields/columns: Chromosome, Variant ID, Morgans, Position, Minor Allele, Major Allele. If the files contain a different number of columns, annotation will not carry out and you will receive a warning about the error. \n", 19 | "To get more information on file formats, you can visit: \n", 20 | "https://www.cog-genomics.org/plink2/formats#map \n", 21 | "https://www.cog-genomics.org/plink2/formats#bim \n", 22 | "\n", 23 | "**Important Note:** In order to obtain accurate results, please make sure the order of SNPs in the SNP List file correspond to the order of SNPs in the genotype matrix fed into the model because indices of variables matter for the model and annotations. This means if variant rs7412 is the first SNP in the genotype matrix (i.e. the first column of the genotype matrix corresponds to variant rs7412), then the first row of the SNP list should have information on rs7412. \n", 24 | "\n", 25 | "\n", 26 | "#### **Gene range file:** \n", 27 | "We expect gene range file to be a tab-delimited file with four fields/columns (with no header): Chromosome, Start, End, GeneID. We expect that the Start and End coordinates will be in basepaires. To get more information on this file format, you can visit the \"Gene range lists\" section in : \n", 28 | "https://www.cog-genomics.org/plink/1.9/resources \n", 29 | "and download gene range files for human genome.\n", 30 | "\n", 31 | "#### **Example Files** \n", 32 | "In this tutorial, we will work with the very small SNP list and gene range lists provided in /sampleData/TestSNPList.txt and /sampleData/TestGeneList.txt files for the sake of demonstration. \n", 33 | "Here is what these files look like:" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 1, 39 | "metadata": {}, 40 | "outputs": [ 41 | { 42 | "name": "stdout", 43 | "output_type": "stream", 44 | "text": [ 45 | "Printing SNP list file contents (.map formatted): \n", 46 | "\n", 47 | "2\trs2\t0\t11\n", 48 | "2\trs1\t0\t2\n", 49 | "18\trs3\t0\t13\n", 50 | "18\trs4\t0\t1021\n", 51 | "18\trs5\t0\t1800\n", 52 | "19\trs5\t0\t609\n", 53 | "19\trs6\t0\t5227\n", 54 | "19\trs7\t0\t10187\n", 55 | "19\trs8\t0\t12148\n", 56 | "X\trs9\t0\t1\n", 57 | "X\trs10\t0\t392\n", 58 | "X\trs11\t0\t1107\n", 59 | "X\trs12\t0\t4331\n", 60 | "\n", 61 | "\n", 62 | " Printing Gene range list file contents: \n", 63 | "\n", 64 | "18\t1023\t1803\tGene4\n", 65 | "7\t1267\t9569\tGene2\n", 66 | "19\t9532\t10187\tGene7\n", 67 | "X\t1\t501\tGene8\n", 68 | "19\t587\t791\tGene6\n", 69 | "18\t240\t391\tGene3\n", 70 | "X\t2675\t5092\tGene9\n", 71 | "X\t4061\t9582\tGene10\n", 72 | "18\t21200\t29080\tGene5\n", 73 | "7\t241\t905\tGene1\n" 74 | ] 75 | } 76 | ], 77 | "source": [ 78 | "SNPList_file = open(\"sampleData/TestSNPList.txt\", \"r\")\n", 79 | "GeneList_file = open(\"sampleData/TestGeneList.txt\", \"r\")\n", 80 | "\n", 81 | "print(\"Printing SNP list file contents (.map formatted): \\n\")\n", 82 | "print(SNPList_file.read()) \n", 83 | "print(\"\\n\\n Printing Gene range list file contents: \\n\")\n", 84 | "print(GeneList_file.read()) " 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "### **Carrying out annotations**\n", 92 | "In this tutorial, we will create two files: \n", 93 | "**1)** an annotation dataframe that contains information on which SNP-sets contain which SNPs and what the genomic coordinate of these SNP-sets are, saved in a tab-delimited .txt file. \n", 94 | "This dataframe is sorted in ascending order based on the chromosomal location of the SNP-sets. \n", 95 | "Carried out by the *annotate()* function\n", 96 | "\n", 97 | "**2)** an annotation mask matrix, which is a sparse matrix of 0s and 1s used to guide the model architecture, saved in a tab-delimited .txt file. \n", 98 | "This is a matrix of size: (number of SNPs by number of SNPsets). An entry in the matrix at location (row i, column j) tells us whether SNP i is contained in SNP-set j (yes if 1, no if 0). \n", 99 | "The rows of the mask matrix (which correspond to SNPs) are in the same order as the SNP list file, meaning the first row contains annotation information of the first SNP in the SNP list. The columns of the mask matrix (which correspond to SNP-set) are in the same order as the annotation dataframe, so columns correspond to SNP-sets in an ascending order based on chromosomal location.\n", 100 | "Carried out by the *getMaskMatrix()* function\n", 101 | "\n", 102 | "#### **Parameters**\n", 103 | "There are a few parameters we require from the user to specify how to carry out the annotations and save the results, apart from giving the path to the input SNP list file and gene range list file:\n", 104 | "\n", 105 | "**output file**: Both *annotate()* and *getMaskMatrix()* functions require us to specify this. It is supposed to be path to the .txt file where we want to save the results (either annotation dataframe or mask matrix).\n", 106 | "\n", 107 | "**intergenic:** For *annotate()* function. This is a boolean parameter, that expects either of the True or False values. The default value is True. \n", 108 | "If *True*, the annotation script creates intergenic SNP-sets and considers them when annotating the SNPs. If *False*, it simply groups SNPs into genes defined in the gene range list and all SNPs that are unannotated (in intergenic regions) are grouped into one SNP-set called \"Unannotated\". This \"Unannotated\" SNP-set is the last entry of the annotation dataframe and would be the last column of the mask matrix to be created from the annotation dataframe. If the gene range list provided by the user has all chromosomes, there will be no \"Unannotated\" SNP-set when this parameter is set to *True*. However, if there are chromosomes missing (as in the case of this example, just for demonstrations), SNPs in the missing chromosomes will still be grouped into the \"Unannotated\" SNP-set. \n", 109 | "\n", 110 | "**buffer:** For *annotate()* function. This is an integer and the default value is 0. \n", 111 | "It tells the annotation script how many basepairs to allow for as a buffer when considering a SNP-set neighborhood. If set to, let's say, 50000, then we would allow a 50kb window around SNP-sets when considering whether a SNP is in the SNP-set or not. \n", 112 | "\n", 113 | "**dropSingletons:** For *annotate()* function. This is a boolean parameter, that expects either of the True or False values. The default value is False. \n", 114 | "When annotating, it is possible that some SNP-sets will only contain one SNP. We call these SNP-sets \"singletons\". In this case, whether they should still be considered a \"SNP-set\" is debatable. If you set this parameter value to *True*, then these SNP-sets will be dropped from the annotation, and SNPs will be re-annotated with the remaining SNP-sets. Otherwise (if set at *False*), singleton SNP-sets will be kept in the model.\n", 115 | "\n", 116 | "#### Creating the annotation dataframe:" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 2, 122 | "metadata": {}, 123 | "outputs": [ 124 | { 125 | "name": "stderr", 126 | "output_type": "stream", 127 | "text": [ 128 | "100%|██████████| 10/10 [00:00<00:00, 241.81it/s]\n", 129 | "100%|██████████| 13/13 [00:00<00:00, 689.29it/s]" 130 | ] 131 | }, 132 | { 133 | "name": "stdout", 134 | "output_type": "stream", 135 | "text": [ 136 | "You have chosen to annotate SNP-sets with intergenic regions and with a buffer of 0bp\n", 137 | "Creating Intergenic SNP-sets\n", 138 | "Annotating SNP-sets with the corresponding SNPs\n", 139 | "Saving annotation results to file sampleData/TestAnnotationDF.txt\n", 140 | " GeneID Chromosome Start End SNPindex \\\n", 141 | "0 Intergenic_Gene3_Gene4 18 392.0 1022.0 [3] \n", 142 | "1 Gene4 18 1023.0 1803.0 [4] \n", 143 | "2 Gene6 19 587.0 791.0 [5] \n", 144 | "3 Intergenic_Gene6_Gene7 19 792.0 9531.0 [6] \n", 145 | "4 Gene7 19 9532.0 10187.0 [7] \n", 146 | "5 Downstream_Gene7 19 10188.0 12148.0 [8] \n", 147 | "6 Gene8 X 1.0 501.0 [9, 10] \n", 148 | "7 Intergenic_Gene8_Gene9 X 502.0 2674.0 [11] \n", 149 | "8 Gene9 X 2675.0 5092.0 [12] \n", 150 | "9 Gene10 X 4061.0 9582.0 [12] \n", 151 | "10 UnAnnotated NaN NaN NaN [1, 0, 2] \n", 152 | "\n", 153 | " VariantID \n", 154 | "0 [rs4] \n", 155 | "1 [rs5] \n", 156 | "2 [rs5] \n", 157 | "3 [rs6] \n", 158 | "4 [rs7] \n", 159 | "5 [rs8] \n", 160 | "6 [rs9, rs10] \n", 161 | "7 [rs11] \n", 162 | "8 [rs12] \n", 163 | "9 [rs12] \n", 164 | "10 [rs1, rs2, rs3] \n" 165 | ] 166 | }, 167 | { 168 | "name": "stderr", 169 | "output_type": "stream", 170 | "text": [ 171 | "\n" 172 | ] 173 | } 174 | ], 175 | "source": [ 176 | "# Importing annotation.py from the src folder:\n", 177 | "import sys\n", 178 | "sys.path.insert(0, '../src/') #Need to do this for this notebook example since the code lives in a different directory \n", 179 | "\n", 180 | "from annotation import * # Import annotation code\n", 181 | "\n", 182 | "#Changing back to the original directory:\n", 183 | "sys.path.insert(0, '../examples_docs/')\n", 184 | "\n", 185 | "#Specifying the path to the SNP list file and gene range list file:\n", 186 | "path_to_SNPList=\"sampleData/TestSNPList.txt\"\n", 187 | "path_to_geneList=\"sampleData/TestGeneList.txt\"\n", 188 | "\n", 189 | "#Specifying the path to the file we want the save annotation dataframe into:\n", 190 | "file_toSave_annotationDF=\"sampleData/TestAnnotationDF.txt\"\n", 191 | "\n", 192 | "#Carrying out the annotation:\n", 193 | "annotationDF=annotate(path_to_SNPList, path_to_geneList, outputFile=file_toSave_annotationDF, \n", 194 | " intergenic=True, buffer=0, dropSingletons=False)\n", 195 | "\n", 196 | "print(annotationDF)" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": {}, 202 | "source": [ 203 | "Showing annotation results with different parameters:" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 3, 209 | "metadata": {}, 210 | "outputs": [ 211 | { 212 | "name": "stderr", 213 | "output_type": "stream", 214 | "text": [ 215 | "100%|██████████| 13/13 [00:00<00:00, 859.38it/s]\n", 216 | "100%|██████████| 13/13 [00:00<00:00, 838.38it/s]" 217 | ] 218 | }, 219 | { 220 | "name": "stdout", 221 | "output_type": "stream", 222 | "text": [ 223 | "You have chosen to annotate SNP-sets without intergenic regions and with a buffer of 500bp\n", 224 | "Annotating SNP-sets with the corresponding SNPs\n", 225 | "Dropping SNP-sets that are singletons (containing only one SNP) and re-annotating SNPs without them\n", 226 | "Annotating SNP-sets with the corresponding SNPs\n", 227 | "Saving annotation results to file sampleData/TestAnnotationDF.txt\n", 228 | " GeneID Chromosome Start End SNPindex \\\n", 229 | "0 Gene3 18 240.0 391.0 [2] \n", 230 | "1 Gene4 18 1023.0 1803.0 [3, 4] \n", 231 | "2 Gene6 19 587.0 791.0 [5] \n", 232 | "3 Gene7 19 9532.0 10187.0 [7] \n", 233 | "4 Gene8 X 1.0 501.0 [9, 10] \n", 234 | "5 Gene9 X 2675.0 5092.0 [12] \n", 235 | "6 Gene10 X 4061.0 9582.0 [12] \n", 236 | "7 UnAnnotated NaN NaN NaN [1, 0, 6, 8, 11] \n", 237 | "\n", 238 | " VariantID \n", 239 | "0 [rs3] \n", 240 | "1 [rs4, rs5] \n", 241 | "2 [rs5] \n", 242 | "3 [rs7] \n", 243 | "4 [rs9, rs10] \n", 244 | "5 [rs12] \n", 245 | "6 [rs12] \n", 246 | "7 [rs1, rs2, rs6, rs8, rs11] \n" 247 | ] 248 | }, 249 | { 250 | "name": "stderr", 251 | "output_type": "stream", 252 | "text": [ 253 | "\n" 254 | ] 255 | } 256 | ], 257 | "source": [ 258 | "#Carrying out the annotation:\n", 259 | "annotationDF=annotate(path_to_SNPList, path_to_geneList, outputFile=file_toSave_annotationDF, \n", 260 | " intergenic=False, buffer=500, dropSingletons=True)\n", 261 | "\n", 262 | "print(annotationDF)" 263 | ] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "metadata": {}, 268 | "source": [ 269 | "#### Creating the mask matrix from the annotation dataframe:" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 4, 275 | "metadata": {}, 276 | "outputs": [ 277 | { 278 | "name": "stderr", 279 | "output_type": "stream", 280 | "text": [ 281 | "100%|██████████| 8/8 [00:00<00:00, 3834.79it/s]" 282 | ] 283 | }, 284 | { 285 | "name": "stdout", 286 | "output_type": "stream", 287 | "text": [ 288 | "creating mask\n", 289 | "Saving annotation mask to file sampleData/TestMask.txt in tab-delimited format\n", 290 | "[[0. 0. 0. 0. 0. 0. 0. 1.]\n", 291 | " [0. 0. 0. 0. 0. 0. 0. 1.]\n", 292 | " [1. 0. 0. 0. 0. 0. 0. 0.]\n", 293 | " [0. 1. 0. 0. 0. 0. 0. 0.]\n", 294 | " [0. 1. 0. 0. 0. 0. 0. 0.]\n", 295 | " [0. 0. 1. 0. 0. 0. 0. 0.]\n", 296 | " [0. 0. 0. 0. 0. 0. 0. 1.]\n", 297 | " [0. 0. 0. 1. 0. 0. 0. 0.]\n", 298 | " [0. 0. 0. 0. 0. 0. 0. 1.]\n", 299 | " [0. 0. 0. 0. 1. 0. 0. 0.]\n", 300 | " [0. 0. 0. 0. 1. 0. 0. 0.]\n", 301 | " [0. 0. 0. 0. 0. 0. 0. 1.]\n", 302 | " [0. 0. 0. 0. 0. 1. 1. 0.]]\n" 303 | ] 304 | }, 305 | { 306 | "name": "stderr", 307 | "output_type": "stream", 308 | "text": [ 309 | "\n" 310 | ] 311 | } 312 | ], 313 | "source": [ 314 | "mask_outputFile=\"sampleData/TestMask.txt\"\n", 315 | "mask = getMaskMatrix(path_to_SNPList, annotationDF, mask_outputFile)\n", 316 | "print(mask)" 317 | ] 318 | } 319 | ], 320 | "metadata": { 321 | "kernelspec": { 322 | "display_name": "Python 3", 323 | "language": "python", 324 | "name": "python3" 325 | }, 326 | "language_info": { 327 | "codemirror_mode": { 328 | "name": "ipython", 329 | "version": 3 330 | }, 331 | "file_extension": ".py", 332 | "mimetype": "text/x-python", 333 | "name": "python", 334 | "nbconvert_exporter": "python", 335 | "pygments_lexer": "ipython3", 336 | "version": "3.7.6" 337 | } 338 | }, 339 | "nbformat": 4, 340 | "nbformat_minor": 4 341 | } 342 | -------------------------------------------------------------------------------- /BANN/examples_docs/annotation_tutorial.md: -------------------------------------------------------------------------------- 1 | ## Example for creating biological annotations for BANNs 2 | 3 | BANNs framework requires constructing an annotation mask in order to guide the model structure based on biological annotations. We create SNP-sets based on the genomic annotations of SNPs used in the model. SNPs that fall into the neighborhood of the same gene are grouped together into a SNP-set that corresponds to this gene. If we consider intergenic regions (based on user input), SNPs that fall into unannotated regions of the genome are grouped together into intergenic regions. For example, two SNPs that fall into the intergenic region between _gene A_ and _gene b_ would be grouped together into a intergenic region called _Intergenic_gene A_gene B_. 4 | 5 | In this example, we show how to carry out the annotation and create mask matrices (and annotation dataframes that store information on which SNPsets contain which SNPs) using the annotation.py script provided. 6 | 7 | We need two inputs for carrying out biological annotations: 8 | **1)** A SNP list file of .map or .bim format 9 | **2)** A gene range file 10 | 11 | #### **SNP list file:** 12 | We expect .map formatted SNP lists to be tab-delimited files have four fields/columns (with no header): Chromosome, Variant ID, Morgans, Position(bp) . Our annotation script works with position information. If morgan information is missing, you can replace this column with 0s (zeros) or leave it empty. We expect a similar format from .bim files but with six fields/columns: Chromosome, Variant ID, Morgans, Position, Minor Allele, Major Allele. If the files contain a different number of columns, annotation will not carry out and you will receive a warning about the error. 13 | To get more information on file formats, you can visit: 14 | https://www.cog-genomics.org/plink2/formats#map 15 | https://www.cog-genomics.org/plink2/formats#bim 16 | 17 | **Important Note:** In order to obtain accurate results, please make sure the order of SNPs in the SNP List file correspond to the order of SNPs in the genotype matrix fed into the model because indices of variables matter for the model and annotations. This means if variant rs7412 is the first SNP in the genotype matrix (i.e. the first column of the genotype matrix corresponds to variant rs7412), then the first row of the SNP list should have information on rs7412. 18 | 19 | 20 | #### **Gene range file:** 21 | We expect gene range file to be a tab-delimited file with four fields/columns (with no header): Chromosome, Start, End, GeneID. We expect that the Start and End coordinates will be in basepaires. To get more information on this file format, you can visit the "Gene range lists" section in : 22 | https://www.cog-genomics.org/plink/1.9/resources 23 | and download gene range files for human genome. 24 | 25 | #### **Example Files** 26 | In this tutorial, we will work with the very small SNP list and gene range lists provided in /sampleData/TestSNPList.txt and /sampleData/TestGeneList.txt files for the sake of demonstration. 27 | Here is what these files look like: 28 | 29 | 30 | ```python 31 | SNPList_file = open("sampleData/TestSNPList.txt", "r") 32 | GeneList_file = open("sampleData/TestGeneList.txt", "r") 33 | 34 | print("Printing SNP list file contents (.map formatted): \n") 35 | print(SNPList_file.read()) 36 | print("\n\n Printing Gene range list file contents: \n") 37 | print(GeneList_file.read()) 38 | ``` 39 | 40 | Printing SNP list file contents (.map formatted): 41 | 42 | 2 rs2 0 11 43 | 2 rs1 0 2 44 | 18 rs3 0 13 45 | 18 rs4 0 1021 46 | 18 rs5 0 1800 47 | 19 rs5 0 609 48 | 19 rs6 0 5227 49 | 19 rs7 0 10187 50 | 19 rs8 0 12148 51 | X rs9 0 1 52 | X rs10 0 392 53 | X rs11 0 1107 54 | X rs12 0 4331 55 | 56 | 57 | Printing Gene range list file contents: 58 | 59 | 18 1023 1803 Gene4 60 | 7 1267 9569 Gene2 61 | 19 9532 10187 Gene7 62 | X 1 501 Gene8 63 | 19 587 791 Gene6 64 | 18 240 391 Gene3 65 | X 2675 5092 Gene9 66 | X 4061 9582 Gene10 67 | 18 21200 29080 Gene5 68 | 7 241 905 Gene1 69 | 70 | 71 | ### **Carrying out annotations** 72 | In this tutorial, we will create two files: 73 | **1)** an annotation dataframe that contains information on which SNP-sets contain which SNPs and what the genomic coordinate of these SNP-sets are, saved in a tab-delimited .txt file. 74 | This dataframe is sorted in ascending order based on the chromosomal location of the SNP-sets. 75 | Carried out by the *annotate()* function 76 | 77 | **2)** an annotation mask matrix, which is a sparse matrix of 0s and 1s used to guide the model architecture, saved in a tab-delimited .txt file. 78 | This is a matrix of size: (number of SNPs by number of SNPsets). An entry in the matrix at location (row i, column j) tells us whether SNP i is contained in SNP-set j (yes if 1, no if 0). 79 | The rows of the mask matrix (which correspond to SNPs) are in the same order as the SNP list file, meaning the first row contains annotation information of the first SNP in the SNP list. The columns of the mask matrix (which correspond to SNP-set) are in the same order as the annotation dataframe, so columns correspond to SNP-sets in an ascending order based on chromosomal location. 80 | Carried out by the *getMaskMatrix()* function 81 | 82 | #### **Parameters** 83 | There are a few parameters we require from the user to specify how to carry out the annotations and save the results, apart from giving the path to the input SNP list file and gene range list file: 84 | 85 | **output file**: Both *annotate()* and *getMaskMatrix()* functions require us to specify this. It is supposed to be path to the .txt file where we want to save the results (either annotation dataframe or mask matrix). 86 | 87 | **intergenic:** For *annotate()* function. This is a boolean parameter, that expects either of the True or False values. The default value is True. 88 | If *True*, the annotation script creates intergenic SNP-sets and considers them when annotating the SNPs. If *False*, it simply groups SNPs into genes defined in the gene range list and all SNPs that are unannotated (in intergenic regions) are grouped into one SNP-set called "Unannotated". This "Unannotated" SNP-set is the last entry of the annotation dataframe and would be the last column of the mask matrix to be created from the annotation dataframe. If the gene range list provided by the user has all chromosomes, there will be no "Unannotated" SNP-set when this parameter is set to *True*. However, if there are chromosomes missing (as in the case of this example, just for demonstrations), SNPs in the missing chromosomes will still be grouped into the "Unannotated" SNP-set. 89 | 90 | **buffer:** For *annotate()* function. This is an integer and the default value is 0. 91 | It tells the annotation script how many basepairs to allow for as a buffer when considering a SNP-set neighborhood. If set to, let's say, 50000, then we would allow a 50kb window around SNP-sets when considering whether a SNP is in the SNP-set or not. 92 | 93 | **dropSingletons:** For *annotate()* function. This is a boolean parameter, that expects either of the True or False values. The default value is False. 94 | When annotating, it is possible that some SNP-sets will only contain one SNP. We call these SNP-sets "singletons". In this case, whether they should still be considered a "SNP-set" is debatable. If you set this parameter value to *True*, then these SNP-sets will be dropped from the annotation, and SNPs will be re-annotated with the remaining SNP-sets. Otherwise (if set at *False*), singleton SNP-sets will be kept in the model. 95 | 96 | #### Creating the annotation dataframe: 97 | 98 | 99 | ```python 100 | # Importing annotation.py from the src folder: 101 | import sys 102 | sys.path.insert(0, '../src/') #Need to do this for this notebook example since the code lives in a different directory 103 | 104 | from annotation import * # Import annotation code 105 | 106 | #Changing back to the original directory: 107 | sys.path.insert(0, '../examples_docs/') 108 | 109 | #Specifying the path to the SNP list file and gene range list file: 110 | path_to_SNPList="sampleData/TestSNPList.txt" 111 | path_to_geneList="sampleData/TestGeneList.txt" 112 | 113 | #Specifying the path to the file we want the save annotation dataframe into: 114 | file_toSave_annotationDF="sampleData/TestAnnotationDF.txt" 115 | 116 | #Carrying out the annotation: 117 | annotationDF=annotate(path_to_SNPList, path_to_geneList, outputFile=file_toSave_annotationDF, 118 | intergenic=True, buffer=0, dropSingletons=False) 119 | 120 | print(annotationDF) 121 | ``` 122 | 123 | 100%|██████████| 10/10 [00:00<00:00, 241.81it/s] 124 | 100%|██████████| 13/13 [00:00<00:00, 689.29it/s] 125 | 126 | You have chosen to annotate SNP-sets with intergenic regions and with a buffer of 0bp 127 | Creating Intergenic SNP-sets 128 | Annotating SNP-sets with the corresponding SNPs 129 | Saving annotation results to file sampleData/TestAnnotationDF.txt 130 | GeneID Chromosome Start End SNPindex \ 131 | 0 Intergenic_Gene3_Gene4 18 392.0 1022.0 [3] 132 | 1 Gene4 18 1023.0 1803.0 [4] 133 | 2 Gene6 19 587.0 791.0 [5] 134 | 3 Intergenic_Gene6_Gene7 19 792.0 9531.0 [6] 135 | 4 Gene7 19 9532.0 10187.0 [7] 136 | 5 Downstream_Gene7 19 10188.0 12148.0 [8] 137 | 6 Gene8 X 1.0 501.0 [9, 10] 138 | 7 Intergenic_Gene8_Gene9 X 502.0 2674.0 [11] 139 | 8 Gene9 X 2675.0 5092.0 [12] 140 | 9 Gene10 X 4061.0 9582.0 [12] 141 | 10 UnAnnotated NaN NaN NaN [1, 0, 2] 142 | 143 | VariantID 144 | 0 [rs4] 145 | 1 [rs5] 146 | 2 [rs5] 147 | 3 [rs6] 148 | 4 [rs7] 149 | 5 [rs8] 150 | 6 [rs9, rs10] 151 | 7 [rs11] 152 | 8 [rs12] 153 | 9 [rs12] 154 | 10 [rs1, rs2, rs3] 155 | 156 | 157 | 158 | 159 | 160 | Showing annotation results with different parameters: 161 | 162 | 163 | ```python 164 | #Carrying out the annotation: 165 | annotationDF=annotate(path_to_SNPList, path_to_geneList, outputFile=file_toSave_annotationDF, 166 | intergenic=False, buffer=500, dropSingletons=True) 167 | 168 | print(annotationDF) 169 | ``` 170 | 171 | 100%|██████████| 13/13 [00:00<00:00, 859.38it/s] 172 | 100%|██████████| 13/13 [00:00<00:00, 838.38it/s] 173 | 174 | You have chosen to annotate SNP-sets without intergenic regions and with a buffer of 500bp 175 | Annotating SNP-sets with the corresponding SNPs 176 | Dropping SNP-sets that are singletons (containing only one SNP) and re-annotating SNPs without them 177 | Annotating SNP-sets with the corresponding SNPs 178 | Saving annotation results to file sampleData/TestAnnotationDF.txt 179 | GeneID Chromosome Start End SNPindex \ 180 | 0 Gene3 18 240.0 391.0 [2] 181 | 1 Gene4 18 1023.0 1803.0 [3, 4] 182 | 2 Gene6 19 587.0 791.0 [5] 183 | 3 Gene7 19 9532.0 10187.0 [7] 184 | 4 Gene8 X 1.0 501.0 [9, 10] 185 | 5 Gene9 X 2675.0 5092.0 [12] 186 | 6 Gene10 X 4061.0 9582.0 [12] 187 | 7 UnAnnotated NaN NaN NaN [1, 0, 6, 8, 11] 188 | 189 | VariantID 190 | 0 [rs3] 191 | 1 [rs4, rs5] 192 | 2 [rs5] 193 | 3 [rs7] 194 | 4 [rs9, rs10] 195 | 5 [rs12] 196 | 6 [rs12] 197 | 7 [rs1, rs2, rs6, rs8, rs11] 198 | 199 | 200 | 201 | 202 | 203 | #### Creating the mask matrix from the annotation dataframe: 204 | 205 | 206 | ```python 207 | mask_outputFile="sampleData/TestMask.txt" 208 | mask = getMaskMatrix(path_to_SNPList, annotationDF, mask_outputFile) 209 | print(mask) 210 | ``` 211 | 212 | 100%|██████████| 8/8 [00:00<00:00, 3834.79it/s] 213 | 214 | creating mask 215 | Saving annotation mask to file sampleData/TestMask.txt in tab-delimited format 216 | [[0. 0. 0. 0. 0. 0. 0. 1.] 217 | [0. 0. 0. 0. 0. 0. 0. 1.] 218 | [1. 0. 0. 0. 0. 0. 0. 0.] 219 | [0. 1. 0. 0. 0. 0. 0. 0.] 220 | [0. 1. 0. 0. 0. 0. 0. 0.] 221 | [0. 0. 1. 0. 0. 0. 0. 0.] 222 | [0. 0. 0. 0. 0. 0. 0. 1.] 223 | [0. 0. 0. 1. 0. 0. 0. 0.] 224 | [0. 0. 0. 0. 0. 0. 0. 1.] 225 | [0. 0. 0. 0. 1. 0. 0. 0.] 226 | [0. 0. 0. 0. 1. 0. 0. 0.] 227 | [0. 0. 0. 0. 0. 0. 0. 1.] 228 | [0. 0. 0. 0. 0. 1. 1. 0.]] 229 | 230 | 231 | 232 | 233 | -------------------------------------------------------------------------------- /BANN/examples_docs/annotation_tutorial.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lcrawlab/BANNs/83f344744698fad00d985fbe1522016150155a79/BANN/examples_docs/annotation_tutorial.pdf -------------------------------------------------------------------------------- /BANN/examples_docs/sampleData/TestAnnotationDF.txt: -------------------------------------------------------------------------------- 1 | GeneID Chromosome Start End SNPindex VariantID 2 | 0 Gene3 18 240.0 391.0 [2] ['rs3'] 3 | 1 Gene4 18 1023.0 1803.0 [3, 4] ['rs4', 'rs5'] 4 | 2 Gene6 19 587.0 791.0 [5] ['rs5'] 5 | 3 Gene7 19 9532.0 10187.0 [7] ['rs7'] 6 | 4 Gene8 X 1.0 501.0 [9, 10] ['rs9', 'rs10'] 7 | 5 Gene9 X 2675.0 5092.0 [12] ['rs12'] 8 | 6 Gene10 X 4061.0 9582.0 [12] ['rs12'] 9 | 7 UnAnnotated [1, 0, 6, 8, 11] ['rs1', 'rs2', 'rs6', 'rs8', 'rs11'] 10 | -------------------------------------------------------------------------------- /BANN/examples_docs/sampleData/TestGeneList.txt: -------------------------------------------------------------------------------- 1 | 18 1023 1803 Gene4 2 | 7 1267 9569 Gene2 3 | 19 9532 10187 Gene7 4 | X 1 501 Gene8 5 | 19 587 791 Gene6 6 | 18 240 391 Gene3 7 | X 2675 5092 Gene9 8 | X 4061 9582 Gene10 9 | 18 21200 29080 Gene5 10 | 7 241 905 Gene1 -------------------------------------------------------------------------------- /BANN/examples_docs/sampleData/TestMask.txt: -------------------------------------------------------------------------------- 1 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 2 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 3 | 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 4 | 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 5 | 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 6 | 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 7 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 8 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 9 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 10 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 11 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 12 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 13 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 14 | -------------------------------------------------------------------------------- /BANN/examples_docs/sampleData/TestSNPList.txt: -------------------------------------------------------------------------------- 1 | 2 rs2 0 11 2 | 2 rs1 0 2 3 | 18 rs3 0 13 4 | 18 rs4 0 1021 5 | 18 rs5 0 1800 6 | 19 rs5 0 609 7 | 19 rs6 0 5227 8 | 19 rs7 0 10187 9 | 19 rs8 0 12148 10 | X rs9 0 1 11 | X rs10 0 392 12 | X rs11 0 1107 13 | X rs12 0 4331 -------------------------------------------------------------------------------- /BANN/examples_docs/sampleData/ytest2.txt: -------------------------------------------------------------------------------- 1 | 3.302699205051672915e-01 2 | -1.791984890697441157e-01 3 | 6.534160132734301296e-01 4 | 8.399233525907892073e-01 5 | 1.556026820368523911e+00 6 | 1.025304949774964447e+00 7 | -3.080791220336491221e-01 8 | -4.285236303089792198e-01 9 | -6.527605310973640940e-01 10 | 6.980532133250386684e-01 11 | 3.985507894520418093e-02 12 | -1.753735449002250313e+00 13 | 1.382586783476989378e+00 14 | -1.099305349796153575e-02 15 | 1.362610248589999573e+00 16 | 2.289272124736270619e+00 17 | -1.403363278400865166e+00 18 | 4.843749325270958472e-01 19 | 5.895913348087673533e-01 20 | 6.946495439929355475e-01 21 | 1.019753465749692944e+00 22 | 3.784893045511105170e-01 23 | -1.766571522047345022e+00 24 | 6.846777773574661552e-01 25 | -8.718159737692904310e-01 26 | -5.211343559644757706e-01 27 | -2.160184290866042911e-01 28 | -6.520295807449520975e-01 29 | 6.478413851614432950e-01 30 | 2.206612141754243250e+00 31 | 3.170207805518278632e-01 32 | -7.049491511724172987e-01 33 | 1.803430131005862580e-01 34 | -4.575259667519573359e-01 35 | -1.423364691912865521e+00 36 | 7.801744550104140474e-01 37 | 8.360409460030592266e-01 38 | -1.117102467032716540e+00 39 | 7.247295035120905338e-01 40 | 1.676594019300048144e+00 41 | 9.196269303654385219e-01 42 | 9.686704592620403176e-01 43 | 1.115596511131896618e+00 44 | -9.334314032758084245e-03 45 | 6.981685455758932513e-01 46 | -3.797270869545408267e-01 47 | -7.534221065244818272e-01 48 | -1.800531131826058395e+00 49 | 2.171943208292770877e-01 50 | -6.542984041677886164e-01 51 | -1.491700666001091857e+00 52 | 2.135452199506928661e-01 53 | -3.447508784661402759e-01 54 | 8.003095390615716509e-01 55 | 2.016793407086660661e+00 56 | -1.655588738320000752e-01 57 | 1.027044295011209751e+00 58 | 5.334363046608782133e-01 59 | -3.493224970640189109e-01 60 | 1.900545571017221991e-01 61 | -1.229622448531636936e+00 62 | -1.506172779369323811e+00 63 | 1.320006065084640579e+00 64 | -9.630680277953825863e-01 65 | -3.043062790170845133e-01 66 | 8.468497909398284840e-01 67 | 1.922084811537885018e+00 68 | -9.381816277071117760e-01 69 | -1.974982080537978080e-01 70 | 8.455328433225925977e-01 71 | 4.250255505154675317e-01 72 | 1.249098202832171101e+00 73 | 8.758649347293225862e-01 74 | -8.375529354306010532e-02 75 | -9.770069523539730572e-01 76 | 1.055814094029889072e-01 77 | 1.308340258021908564e+00 78 | -1.999303384711923925e+00 79 | 8.739553656670000503e-01 80 | 1.406324318653469629e-01 81 | -2.671680197715869398e-01 82 | -1.495774760132349668e+00 83 | -1.445584642180541834e-01 84 | 1.469826447476022535e-01 85 | 4.425634618443843871e-01 86 | -9.876221234362744239e-01 87 | 1.190028945976621566e+00 88 | -1.184798412474796780e+00 89 | 2.899431630975585206e-01 90 | 1.265783794550948915e+00 91 | 1.107429570908679550e-01 92 | -2.190802045709146217e-02 93 | 1.302115036146037408e+00 94 | -1.178316482683042565e+00 95 | -6.895670343658916179e-01 96 | -9.494457794014443675e-01 97 | 1.560173307672001020e+00 98 | -1.044634711364125224e+00 99 | -3.740665587069642029e-01 100 | 5.869856748827704251e-01 101 | 6.269161858492128259e-02 102 | -7.096323119315623984e-01 103 | 2.704066802379289092e-01 104 | 8.116797666295447744e-01 105 | -2.006206820842569627e+00 106 | -4.742903174365544205e-01 107 | 1.227956036213054158e+00 108 | 2.154245598607893442e-01 109 | -3.322799986372366288e+00 110 | -1.252950671786644321e+00 111 | -3.203686844539505030e-01 112 | 1.762558658020122238e+00 113 | -1.430541422892197545e+00 114 | -7.617989136794770522e-01 115 | 5.636815617162090897e-01 116 | 1.618436587508514268e+00 117 | -7.919195085301783399e-01 118 | -2.517298685851198092e-01 119 | 5.507613656569887439e-02 120 | -4.992757053681605717e-01 121 | -8.221149866297092768e-01 122 | 1.895749816120100295e+00 123 | 1.510308383422682921e+00 124 | 7.233246793227017246e-01 125 | 4.943094314296069269e-01 126 | 2.686502350557978813e-01 127 | 5.308018876675484332e-01 128 | -6.798275510232235641e-01 129 | 1.140319286863443082e+00 130 | -2.045016672449815776e+00 131 | 3.117321736189795356e-01 132 | 7.876613090899131642e-01 133 | 1.609279952342915099e+00 134 | 1.559968290609437425e+00 135 | 5.410704927976743850e-01 136 | 1.516001418141553092e+00 137 | -2.228175824538830785e+00 138 | -1.579474046270241683e+00 139 | -2.275329274549100500e-01 140 | 1.183163120394201684e+00 141 | 3.446215744590546137e-01 142 | 1.033767365247086012e+00 143 | 1.420314878538844194e+00 144 | -1.393746072716227857e+00 145 | -1.439926545864239626e-01 146 | 2.422811441704130764e-01 147 | 4.446395863105797597e-01 148 | 3.034300261449333624e-01 149 | -5.053956694676903227e-02 150 | 2.184778430901788138e+00 151 | 6.944937220671890676e-01 152 | 3.384820475454403121e-01 153 | -8.774790666622397928e-02 154 | 9.896778545524220760e-01 155 | 3.575939986059015752e-01 156 | 9.497971812138623626e-01 157 | 9.978045169824619753e-01 158 | -1.233011626421868678e+00 159 | 1.098487351485645913e+00 160 | -1.317877978770154002e+00 161 | 1.395502305082253480e-01 162 | -1.944080993175287264e-01 163 | 1.099411215307897649e+00 164 | -4.611919539759584685e-02 165 | 2.216552955127832902e+00 166 | 4.190177635240225351e-01 167 | 4.311313594386274595e-01 168 | -2.698460861864424576e-01 169 | -5.322635715914778576e-01 170 | 1.772695433388685959e+00 171 | -1.903595729378351475e-01 172 | 1.747510801497993871e+00 173 | 5.813394789381132188e-01 174 | -6.506725292236779090e-01 175 | 8.530311907245291536e-01 176 | 5.288088774853342278e-01 177 | 1.512390387872528108e-01 178 | -1.660965839141131939e-01 179 | -1.550442191327955133e+00 180 | 7.096436493683120350e-01 181 | -1.429718245609688854e+00 182 | -4.730766208938691486e-02 183 | -2.297177758859555219e+00 184 | 2.024016401865855741e+00 185 | -3.606247844054689677e-01 186 | -1.967038765681998203e-01 187 | -1.175693942394429925e+00 188 | -6.166935330764338197e-01 189 | -1.303433930423545606e+00 190 | 1.379278690403743957e+00 191 | -1.801051245716570159e+00 192 | -8.316153553236778073e-01 193 | 4.826987595325618607e-02 194 | -1.932693806269154102e+00 195 | 3.306055661712584692e-01 196 | 4.727729107510953432e-01 197 | -1.120119143660660566e-01 198 | 2.816907055033830432e-01 199 | -4.610002299112688218e-01 200 | -1.270749748384557121e+00 201 | 1.493590706040362481e-01 202 | 7.401776313208052738e-01 203 | -2.444622630139753339e-02 204 | 9.913889195519687281e-01 205 | 8.107809289488032700e-01 206 | -4.745841114208648737e-01 207 | -2.392574611421883546e+00 208 | 8.075211563140243243e-01 209 | -9.522761205749641356e-01 210 | -9.713129499948194301e-01 211 | -2.545517309204949008e+00 212 | -1.670618969757803329e-01 213 | 7.088031020529516057e-01 214 | -3.112600376545854086e+00 215 | 9.590913265531980647e-02 216 | 2.123193805887545471e-01 217 | -1.775528289777660529e+00 218 | -1.355553262149652993e-01 219 | -1.149199182604072167e+00 220 | -2.060017922099165733e-01 221 | -7.045111705979376637e-01 222 | -1.264818150399887875e-02 223 | 7.804185324762276110e-01 224 | -9.597232452088446664e-01 225 | 4.859713986277078845e-01 226 | -1.459450838987433663e+00 227 | -4.494041872769107271e-01 228 | -1.843725114817440502e-01 229 | -3.046533637893111823e-02 230 | -3.429041117125692928e-01 231 | 4.600220021076839583e-01 232 | 7.992491519345262807e-01 233 | -1.761342248088056950e-01 234 | 4.909360080698917383e-01 235 | -1.463720530468466119e-01 236 | -4.949496582920932530e-01 237 | -2.608780495210504258e-01 238 | 5.073629847558664974e-01 239 | -1.444695043901086384e+00 240 | 4.564941050355350582e-01 241 | -9.455396696109724664e-02 242 | -2.264654950408834244e-01 243 | -3.119799646219164613e-01 244 | -9.475615184039243299e-01 245 | 4.346183447836082792e-01 246 | 1.318673203993327547e+00 247 | -1.731621220080324930e+00 248 | 4.002198102227904619e-01 249 | 2.236199501456228589e-01 250 | -1.456740868952281387e+00 251 | -7.483864260224803999e-01 252 | 2.781763277149327807e-01 253 | -5.083796355490433072e-02 254 | -1.543813588510090318e+00 255 | 1.471812939980061685e-01 256 | 3.524208114498960098e-01 257 | -1.881419019886425792e+00 258 | -1.226635937822054379e+00 259 | 7.655968936040297157e-01 260 | 1.980230156311981737e+00 261 | 5.870007826811707297e-01 262 | -7.357067979215945464e-01 263 | -2.225535058523311349e-01 264 | -1.104160125099278744e+00 265 | 1.090543579269310781e-01 266 | 7.900476036227139787e-01 267 | -8.324489215157986344e-01 268 | 1.364311684540918002e+00 269 | -6.475286877580811451e-01 270 | -1.607658888495256400e-01 271 | -5.324114855091475196e-01 272 | -1.056277326454064941e-01 273 | -1.317775575116790998e+00 274 | -7.704801706849553344e-01 275 | -5.841301690243778655e-01 276 | 9.130573963161674467e-01 277 | -8.443276306924524244e-01 278 | 3.543041778316148305e-01 279 | 9.262284593721443748e-01 280 | 8.525185208585894792e-01 281 | 1.953273935511778969e-01 282 | 1.943694017097486126e+00 283 | -1.607479115498159361e-01 284 | 2.320012271627510669e-01 285 | -9.454027813008476500e-01 286 | 2.667150977478949803e-01 287 | -3.306103590721429231e-02 288 | 8.155900417416187542e-01 289 | -1.394741837350123026e-01 290 | 1.017352449320720886e+00 291 | -1.018111542047163365e+00 292 | 1.111243800627390588e-01 293 | 1.841064048722778623e-01 294 | -9.171631413442208336e-01 295 | 1.491576656685520419e-01 296 | 3.301750995973027902e-01 297 | 1.448524909798712734e+00 298 | -4.935477655192390123e-01 299 | -1.186272641171328024e+00 300 | 1.050138850185814654e+00 301 | 4.248341341800393467e-02 302 | 5.180760057640154681e-01 303 | -1.344060358949045186e+00 304 | 7.244629124260422159e-01 305 | -2.026055734686455168e+00 306 | 2.012491235535904366e+00 307 | 1.486562260112390121e-01 308 | 4.014833388812801829e-01 309 | 7.225190901601930005e-02 310 | -2.561319155583976936e+00 311 | -6.250889142245228403e-01 312 | -4.944307513511485563e-01 313 | 3.272974916623664909e-01 314 | 7.343959585017265645e-01 315 | -1.214884957727174974e+00 316 | -7.224660715240673881e-01 317 | 1.452550013810507135e-01 318 | -1.273948502733697197e+00 319 | -1.037466240061464928e+00 320 | -1.130173596249533841e+00 321 | -1.530078576471447516e-01 322 | 5.560240336847304121e-01 323 | -4.972650586162235542e-01 324 | -2.007911494147718301e-02 325 | -1.790683677974957977e-02 326 | -1.836955088990703810e+00 327 | 5.001971291784850226e-01 328 | 1.245839250626418204e+00 329 | -7.942036117934712092e-01 330 | -8.714522486293907466e-01 331 | 9.697799151466874878e-01 332 | -3.751074272876032678e-01 333 | -5.651428693620169641e-02 334 | 3.228794714923454157e-01 335 | 5.836968380215608043e-01 336 | 2.152401628070736983e+00 337 | 1.609800594935407503e+00 338 | 1.236738115299770069e+00 339 | -2.676404757271840795e-01 340 | -2.326979098973492199e+00 341 | -1.810138666883705172e+00 342 | 9.581569623849922612e-01 343 | 1.317769781534302576e+00 344 | -4.251763433239606482e-01 345 | -1.472129980693753115e+00 346 | 6.118792844920761809e-01 347 | 1.242644047390697892e+00 348 | 3.739924511017619357e-01 349 | -5.794527856648523656e-01 350 | 6.483000013396200689e-01 351 | -2.615097463538281763e-01 352 | -6.436682202512409301e-01 353 | -2.156828973432358243e+00 354 | -2.577663363513666983e+00 355 | -9.743366278577381134e-01 356 | 1.762827482690854097e-03 357 | 1.087507838934223869e+00 358 | 1.354662420285113411e+00 359 | -4.073025296611457180e-01 360 | -1.699088043917302837e-01 361 | 1.260384840820860664e-03 362 | 7.067678784569473294e-01 363 | 6.415022405748986856e-01 364 | 4.495501862908073409e-01 365 | 9.926016685516636517e-02 366 | -1.408934677213084630e+00 367 | -7.011111197328112610e-02 368 | -1.169568267990124810e-01 369 | 5.858605328019879099e-01 370 | -6.099154526228208750e-01 371 | 4.891417822656463077e-01 372 | 4.683163836424524828e-01 373 | 1.211451808350694215e+00 374 | -5.888853750020022204e-01 375 | -1.033991898842569590e+00 376 | 9.364671869927003733e-01 377 | -1.552638764375567026e+00 378 | -9.876211136045585093e-01 379 | -1.185244686960529314e-01 380 | 1.192221781418800752e-01 381 | -6.143507827435117363e-01 382 | 2.183285402289837362e-01 383 | 3.461493829782240628e-01 384 | 5.612379125576638650e-02 385 | 7.983451718212446480e-01 386 | -1.493046515169039301e+00 387 | 4.496678923634360725e-01 388 | 6.900473365244478297e-01 389 | 1.624852756142459165e-01 390 | 1.428660710573243620e+00 391 | 6.783026127735215516e-01 392 | 8.156644857491099909e-01 393 | -8.176842130365291617e-01 394 | -4.078576143601339021e-01 395 | -4.107106390943476848e-02 396 | -4.087306268951235166e-01 397 | 1.635683152797111406e+00 398 | 1.721253131323347563e-01 399 | 2.035845643906021962e+00 400 | -1.443256265529180293e+00 401 | -1.410134319267015435e-01 402 | 1.924501489060005621e+00 403 | -7.770629210982746837e-01 404 | -1.075607962921119709e+00 405 | -1.623647148286957165e-01 406 | 5.112755813906124658e-01 407 | 5.922334444171963863e-01 408 | -5.807895817818100603e-01 409 | -6.458832589934148105e-01 410 | 2.057303748235189134e-01 411 | 4.635908317698239078e-01 412 | 8.797925131443877778e-01 413 | 2.396223455519140844e-01 414 | 9.194711527564206621e-01 415 | -1.312359079015942065e+00 416 | -5.311826171322308054e-01 417 | -5.590906496166934136e-01 418 | -9.539365962490577999e-01 419 | 1.772703144302811618e+00 420 | -8.330360235295695137e-01 421 | 1.277082377654755740e+00 422 | 1.537452930299619824e-01 423 | 6.122398177610360870e-01 424 | -5.111307843215766311e-02 425 | 1.723552690497105022e+00 426 | 3.505466337357528994e-01 427 | 6.823631461245818208e-01 428 | -1.708629924648899234e-01 429 | -4.101030717295326422e-01 430 | 1.047453204293033568e+00 431 | -1.724188359352027078e+00 432 | 2.142617291869070684e+00 433 | 1.638505918805012840e+00 434 | 1.635201537650587511e+00 435 | 1.200776299033293304e+00 436 | 2.445799042176360238e+00 437 | -3.031410780598277066e-01 438 | -1.275868197865901443e+00 439 | 7.558365578668313489e-01 440 | -6.046345975930491612e-01 441 | 3.570881947771707976e-01 442 | 6.391089869893251674e-01 443 | 1.567907000944543272e+00 444 | -1.696141785221946807e-01 445 | 3.356028081327624979e-01 446 | -1.471848134596646318e+00 447 | -1.301384548811318487e-03 448 | -2.381947017920932552e+00 449 | 2.378399207251564174e-01 450 | -4.754624704679308866e-01 451 | 8.113509342916469524e-01 452 | -1.075095137949004354e+00 453 | 1.995812806571243492e+00 454 | -1.148963283787803658e+00 455 | -4.799918226848595704e-01 456 | 1.224497147238111205e+00 457 | 1.320471353616479382e+00 458 | 2.037760555712982979e-01 459 | -3.182743407077354147e-01 460 | -1.313201148679705055e+00 461 | -1.366265813206189783e+00 462 | -1.364962447931120648e+00 463 | -3.189155495522324468e-01 464 | -6.919993006148310588e-01 465 | -7.504641815042043529e-01 466 | -1.397918515755918634e+00 467 | 6.689748370534475130e-01 468 | -2.647668388489154978e-01 469 | 4.150021210780954206e-01 470 | 2.844089191419686413e-01 471 | 1.057944442514640926e+00 472 | -3.173966075333767289e-01 473 | -5.931544952675916749e-01 474 | -4.421362550720017714e-01 475 | -3.193720897654495072e-01 476 | -1.385696231992794125e+00 477 | 6.215763437701995464e-01 478 | 1.103883064559392446e+00 479 | 2.909668492050688782e-01 480 | 6.578373883013386436e-01 481 | 5.309349137303962518e-01 482 | 5.467391920121882354e-01 483 | 1.053825587124523899e-05 484 | 3.993263397051232122e-01 485 | 2.837680225720167759e-01 486 | -9.951841140516257678e-01 487 | -2.550443160961216371e+00 488 | 2.129023209663577276e+00 489 | -2.205609549403549086e-01 490 | 5.850312829406834059e-01 491 | -1.164919179626649814e+00 492 | -1.858388126734852042e-01 493 | -6.984423646984803113e-01 494 | 5.481795713439259199e-01 495 | 1.104223338658099962e+00 496 | 6.321888095821297515e-01 497 | 3.075332037802628316e-01 498 | 1.218933105457229660e+00 499 | -5.547474015262293667e-01 500 | -8.203629501125871970e-01 501 | -------------------------------------------------------------------------------- /BANN/examples_docs/ytest2.txt: -------------------------------------------------------------------------------- 1 | 3.302699205051672915e-01 2 | -1.791984890697441157e-01 3 | 6.534160132734301296e-01 4 | 8.399233525907892073e-01 5 | 1.556026820368523911e+00 6 | 1.025304949774964447e+00 7 | -3.080791220336491221e-01 8 | -4.285236303089792198e-01 9 | -6.527605310973640940e-01 10 | 6.980532133250386684e-01 11 | 3.985507894520418093e-02 12 | -1.753735449002250313e+00 13 | 1.382586783476989378e+00 14 | -1.099305349796153575e-02 15 | 1.362610248589999573e+00 16 | 2.289272124736270619e+00 17 | -1.403363278400865166e+00 18 | 4.843749325270958472e-01 19 | 5.895913348087673533e-01 20 | 6.946495439929355475e-01 21 | 1.019753465749692944e+00 22 | 3.784893045511105170e-01 23 | -1.766571522047345022e+00 24 | 6.846777773574661552e-01 25 | -8.718159737692904310e-01 26 | -5.211343559644757706e-01 27 | -2.160184290866042911e-01 28 | -6.520295807449520975e-01 29 | 6.478413851614432950e-01 30 | 2.206612141754243250e+00 31 | 3.170207805518278632e-01 32 | -7.049491511724172987e-01 33 | 1.803430131005862580e-01 34 | -4.575259667519573359e-01 35 | -1.423364691912865521e+00 36 | 7.801744550104140474e-01 37 | 8.360409460030592266e-01 38 | -1.117102467032716540e+00 39 | 7.247295035120905338e-01 40 | 1.676594019300048144e+00 41 | 9.196269303654385219e-01 42 | 9.686704592620403176e-01 43 | 1.115596511131896618e+00 44 | -9.334314032758084245e-03 45 | 6.981685455758932513e-01 46 | -3.797270869545408267e-01 47 | -7.534221065244818272e-01 48 | -1.800531131826058395e+00 49 | 2.171943208292770877e-01 50 | -6.542984041677886164e-01 51 | -1.491700666001091857e+00 52 | 2.135452199506928661e-01 53 | -3.447508784661402759e-01 54 | 8.003095390615716509e-01 55 | 2.016793407086660661e+00 56 | -1.655588738320000752e-01 57 | 1.027044295011209751e+00 58 | 5.334363046608782133e-01 59 | -3.493224970640189109e-01 60 | 1.900545571017221991e-01 61 | -1.229622448531636936e+00 62 | -1.506172779369323811e+00 63 | 1.320006065084640579e+00 64 | -9.630680277953825863e-01 65 | -3.043062790170845133e-01 66 | 8.468497909398284840e-01 67 | 1.922084811537885018e+00 68 | -9.381816277071117760e-01 69 | -1.974982080537978080e-01 70 | 8.455328433225925977e-01 71 | 4.250255505154675317e-01 72 | 1.249098202832171101e+00 73 | 8.758649347293225862e-01 74 | -8.375529354306010532e-02 75 | -9.770069523539730572e-01 76 | 1.055814094029889072e-01 77 | 1.308340258021908564e+00 78 | -1.999303384711923925e+00 79 | 8.739553656670000503e-01 80 | 1.406324318653469629e-01 81 | -2.671680197715869398e-01 82 | -1.495774760132349668e+00 83 | -1.445584642180541834e-01 84 | 1.469826447476022535e-01 85 | 4.425634618443843871e-01 86 | -9.876221234362744239e-01 87 | 1.190028945976621566e+00 88 | -1.184798412474796780e+00 89 | 2.899431630975585206e-01 90 | 1.265783794550948915e+00 91 | 1.107429570908679550e-01 92 | -2.190802045709146217e-02 93 | 1.302115036146037408e+00 94 | -1.178316482683042565e+00 95 | -6.895670343658916179e-01 96 | -9.494457794014443675e-01 97 | 1.560173307672001020e+00 98 | -1.044634711364125224e+00 99 | -3.740665587069642029e-01 100 | 5.869856748827704251e-01 101 | 6.269161858492128259e-02 102 | -7.096323119315623984e-01 103 | 2.704066802379289092e-01 104 | 8.116797666295447744e-01 105 | -2.006206820842569627e+00 106 | -4.742903174365544205e-01 107 | 1.227956036213054158e+00 108 | 2.154245598607893442e-01 109 | -3.322799986372366288e+00 110 | -1.252950671786644321e+00 111 | -3.203686844539505030e-01 112 | 1.762558658020122238e+00 113 | -1.430541422892197545e+00 114 | -7.617989136794770522e-01 115 | 5.636815617162090897e-01 116 | 1.618436587508514268e+00 117 | -7.919195085301783399e-01 118 | -2.517298685851198092e-01 119 | 5.507613656569887439e-02 120 | -4.992757053681605717e-01 121 | -8.221149866297092768e-01 122 | 1.895749816120100295e+00 123 | 1.510308383422682921e+00 124 | 7.233246793227017246e-01 125 | 4.943094314296069269e-01 126 | 2.686502350557978813e-01 127 | 5.308018876675484332e-01 128 | -6.798275510232235641e-01 129 | 1.140319286863443082e+00 130 | -2.045016672449815776e+00 131 | 3.117321736189795356e-01 132 | 7.876613090899131642e-01 133 | 1.609279952342915099e+00 134 | 1.559968290609437425e+00 135 | 5.410704927976743850e-01 136 | 1.516001418141553092e+00 137 | -2.228175824538830785e+00 138 | -1.579474046270241683e+00 139 | -2.275329274549100500e-01 140 | 1.183163120394201684e+00 141 | 3.446215744590546137e-01 142 | 1.033767365247086012e+00 143 | 1.420314878538844194e+00 144 | -1.393746072716227857e+00 145 | -1.439926545864239626e-01 146 | 2.422811441704130764e-01 147 | 4.446395863105797597e-01 148 | 3.034300261449333624e-01 149 | -5.053956694676903227e-02 150 | 2.184778430901788138e+00 151 | 6.944937220671890676e-01 152 | 3.384820475454403121e-01 153 | -8.774790666622397928e-02 154 | 9.896778545524220760e-01 155 | 3.575939986059015752e-01 156 | 9.497971812138623626e-01 157 | 9.978045169824619753e-01 158 | -1.233011626421868678e+00 159 | 1.098487351485645913e+00 160 | -1.317877978770154002e+00 161 | 1.395502305082253480e-01 162 | -1.944080993175287264e-01 163 | 1.099411215307897649e+00 164 | -4.611919539759584685e-02 165 | 2.216552955127832902e+00 166 | 4.190177635240225351e-01 167 | 4.311313594386274595e-01 168 | -2.698460861864424576e-01 169 | -5.322635715914778576e-01 170 | 1.772695433388685959e+00 171 | -1.903595729378351475e-01 172 | 1.747510801497993871e+00 173 | 5.813394789381132188e-01 174 | -6.506725292236779090e-01 175 | 8.530311907245291536e-01 176 | 5.288088774853342278e-01 177 | 1.512390387872528108e-01 178 | -1.660965839141131939e-01 179 | -1.550442191327955133e+00 180 | 7.096436493683120350e-01 181 | -1.429718245609688854e+00 182 | -4.730766208938691486e-02 183 | -2.297177758859555219e+00 184 | 2.024016401865855741e+00 185 | -3.606247844054689677e-01 186 | -1.967038765681998203e-01 187 | -1.175693942394429925e+00 188 | -6.166935330764338197e-01 189 | -1.303433930423545606e+00 190 | 1.379278690403743957e+00 191 | -1.801051245716570159e+00 192 | -8.316153553236778073e-01 193 | 4.826987595325618607e-02 194 | -1.932693806269154102e+00 195 | 3.306055661712584692e-01 196 | 4.727729107510953432e-01 197 | -1.120119143660660566e-01 198 | 2.816907055033830432e-01 199 | -4.610002299112688218e-01 200 | -1.270749748384557121e+00 201 | 1.493590706040362481e-01 202 | 7.401776313208052738e-01 203 | -2.444622630139753339e-02 204 | 9.913889195519687281e-01 205 | 8.107809289488032700e-01 206 | -4.745841114208648737e-01 207 | -2.392574611421883546e+00 208 | 8.075211563140243243e-01 209 | -9.522761205749641356e-01 210 | -9.713129499948194301e-01 211 | -2.545517309204949008e+00 212 | -1.670618969757803329e-01 213 | 7.088031020529516057e-01 214 | -3.112600376545854086e+00 215 | 9.590913265531980647e-02 216 | 2.123193805887545471e-01 217 | -1.775528289777660529e+00 218 | -1.355553262149652993e-01 219 | -1.149199182604072167e+00 220 | -2.060017922099165733e-01 221 | -7.045111705979376637e-01 222 | -1.264818150399887875e-02 223 | 7.804185324762276110e-01 224 | -9.597232452088446664e-01 225 | 4.859713986277078845e-01 226 | -1.459450838987433663e+00 227 | -4.494041872769107271e-01 228 | -1.843725114817440502e-01 229 | -3.046533637893111823e-02 230 | -3.429041117125692928e-01 231 | 4.600220021076839583e-01 232 | 7.992491519345262807e-01 233 | -1.761342248088056950e-01 234 | 4.909360080698917383e-01 235 | -1.463720530468466119e-01 236 | -4.949496582920932530e-01 237 | -2.608780495210504258e-01 238 | 5.073629847558664974e-01 239 | -1.444695043901086384e+00 240 | 4.564941050355350582e-01 241 | -9.455396696109724664e-02 242 | -2.264654950408834244e-01 243 | -3.119799646219164613e-01 244 | -9.475615184039243299e-01 245 | 4.346183447836082792e-01 246 | 1.318673203993327547e+00 247 | -1.731621220080324930e+00 248 | 4.002198102227904619e-01 249 | 2.236199501456228589e-01 250 | -1.456740868952281387e+00 251 | -7.483864260224803999e-01 252 | 2.781763277149327807e-01 253 | -5.083796355490433072e-02 254 | -1.543813588510090318e+00 255 | 1.471812939980061685e-01 256 | 3.524208114498960098e-01 257 | -1.881419019886425792e+00 258 | -1.226635937822054379e+00 259 | 7.655968936040297157e-01 260 | 1.980230156311981737e+00 261 | 5.870007826811707297e-01 262 | -7.357067979215945464e-01 263 | -2.225535058523311349e-01 264 | -1.104160125099278744e+00 265 | 1.090543579269310781e-01 266 | 7.900476036227139787e-01 267 | -8.324489215157986344e-01 268 | 1.364311684540918002e+00 269 | -6.475286877580811451e-01 270 | -1.607658888495256400e-01 271 | -5.324114855091475196e-01 272 | -1.056277326454064941e-01 273 | -1.317775575116790998e+00 274 | -7.704801706849553344e-01 275 | -5.841301690243778655e-01 276 | 9.130573963161674467e-01 277 | -8.443276306924524244e-01 278 | 3.543041778316148305e-01 279 | 9.262284593721443748e-01 280 | 8.525185208585894792e-01 281 | 1.953273935511778969e-01 282 | 1.943694017097486126e+00 283 | -1.607479115498159361e-01 284 | 2.320012271627510669e-01 285 | -9.454027813008476500e-01 286 | 2.667150977478949803e-01 287 | -3.306103590721429231e-02 288 | 8.155900417416187542e-01 289 | -1.394741837350123026e-01 290 | 1.017352449320720886e+00 291 | -1.018111542047163365e+00 292 | 1.111243800627390588e-01 293 | 1.841064048722778623e-01 294 | -9.171631413442208336e-01 295 | 1.491576656685520419e-01 296 | 3.301750995973027902e-01 297 | 1.448524909798712734e+00 298 | -4.935477655192390123e-01 299 | -1.186272641171328024e+00 300 | 1.050138850185814654e+00 301 | 4.248341341800393467e-02 302 | 5.180760057640154681e-01 303 | -1.344060358949045186e+00 304 | 7.244629124260422159e-01 305 | -2.026055734686455168e+00 306 | 2.012491235535904366e+00 307 | 1.486562260112390121e-01 308 | 4.014833388812801829e-01 309 | 7.225190901601930005e-02 310 | -2.561319155583976936e+00 311 | -6.250889142245228403e-01 312 | -4.944307513511485563e-01 313 | 3.272974916623664909e-01 314 | 7.343959585017265645e-01 315 | -1.214884957727174974e+00 316 | -7.224660715240673881e-01 317 | 1.452550013810507135e-01 318 | -1.273948502733697197e+00 319 | -1.037466240061464928e+00 320 | -1.130173596249533841e+00 321 | -1.530078576471447516e-01 322 | 5.560240336847304121e-01 323 | -4.972650586162235542e-01 324 | -2.007911494147718301e-02 325 | -1.790683677974957977e-02 326 | -1.836955088990703810e+00 327 | 5.001971291784850226e-01 328 | 1.245839250626418204e+00 329 | -7.942036117934712092e-01 330 | -8.714522486293907466e-01 331 | 9.697799151466874878e-01 332 | -3.751074272876032678e-01 333 | -5.651428693620169641e-02 334 | 3.228794714923454157e-01 335 | 5.836968380215608043e-01 336 | 2.152401628070736983e+00 337 | 1.609800594935407503e+00 338 | 1.236738115299770069e+00 339 | -2.676404757271840795e-01 340 | -2.326979098973492199e+00 341 | -1.810138666883705172e+00 342 | 9.581569623849922612e-01 343 | 1.317769781534302576e+00 344 | -4.251763433239606482e-01 345 | -1.472129980693753115e+00 346 | 6.118792844920761809e-01 347 | 1.242644047390697892e+00 348 | 3.739924511017619357e-01 349 | -5.794527856648523656e-01 350 | 6.483000013396200689e-01 351 | -2.615097463538281763e-01 352 | -6.436682202512409301e-01 353 | -2.156828973432358243e+00 354 | -2.577663363513666983e+00 355 | -9.743366278577381134e-01 356 | 1.762827482690854097e-03 357 | 1.087507838934223869e+00 358 | 1.354662420285113411e+00 359 | -4.073025296611457180e-01 360 | -1.699088043917302837e-01 361 | 1.260384840820860664e-03 362 | 7.067678784569473294e-01 363 | 6.415022405748986856e-01 364 | 4.495501862908073409e-01 365 | 9.926016685516636517e-02 366 | -1.408934677213084630e+00 367 | -7.011111197328112610e-02 368 | -1.169568267990124810e-01 369 | 5.858605328019879099e-01 370 | -6.099154526228208750e-01 371 | 4.891417822656463077e-01 372 | 4.683163836424524828e-01 373 | 1.211451808350694215e+00 374 | -5.888853750020022204e-01 375 | -1.033991898842569590e+00 376 | 9.364671869927003733e-01 377 | -1.552638764375567026e+00 378 | -9.876211136045585093e-01 379 | -1.185244686960529314e-01 380 | 1.192221781418800752e-01 381 | -6.143507827435117363e-01 382 | 2.183285402289837362e-01 383 | 3.461493829782240628e-01 384 | 5.612379125576638650e-02 385 | 7.983451718212446480e-01 386 | -1.493046515169039301e+00 387 | 4.496678923634360725e-01 388 | 6.900473365244478297e-01 389 | 1.624852756142459165e-01 390 | 1.428660710573243620e+00 391 | 6.783026127735215516e-01 392 | 8.156644857491099909e-01 393 | -8.176842130365291617e-01 394 | -4.078576143601339021e-01 395 | -4.107106390943476848e-02 396 | -4.087306268951235166e-01 397 | 1.635683152797111406e+00 398 | 1.721253131323347563e-01 399 | 2.035845643906021962e+00 400 | -1.443256265529180293e+00 401 | -1.410134319267015435e-01 402 | 1.924501489060005621e+00 403 | -7.770629210982746837e-01 404 | -1.075607962921119709e+00 405 | -1.623647148286957165e-01 406 | 5.112755813906124658e-01 407 | 5.922334444171963863e-01 408 | -5.807895817818100603e-01 409 | -6.458832589934148105e-01 410 | 2.057303748235189134e-01 411 | 4.635908317698239078e-01 412 | 8.797925131443877778e-01 413 | 2.396223455519140844e-01 414 | 9.194711527564206621e-01 415 | -1.312359079015942065e+00 416 | -5.311826171322308054e-01 417 | -5.590906496166934136e-01 418 | -9.539365962490577999e-01 419 | 1.772703144302811618e+00 420 | -8.330360235295695137e-01 421 | 1.277082377654755740e+00 422 | 1.537452930299619824e-01 423 | 6.122398177610360870e-01 424 | -5.111307843215766311e-02 425 | 1.723552690497105022e+00 426 | 3.505466337357528994e-01 427 | 6.823631461245818208e-01 428 | -1.708629924648899234e-01 429 | -4.101030717295326422e-01 430 | 1.047453204293033568e+00 431 | -1.724188359352027078e+00 432 | 2.142617291869070684e+00 433 | 1.638505918805012840e+00 434 | 1.635201537650587511e+00 435 | 1.200776299033293304e+00 436 | 2.445799042176360238e+00 437 | -3.031410780598277066e-01 438 | -1.275868197865901443e+00 439 | 7.558365578668313489e-01 440 | -6.046345975930491612e-01 441 | 3.570881947771707976e-01 442 | 6.391089869893251674e-01 443 | 1.567907000944543272e+00 444 | -1.696141785221946807e-01 445 | 3.356028081327624979e-01 446 | -1.471848134596646318e+00 447 | -1.301384548811318487e-03 448 | -2.381947017920932552e+00 449 | 2.378399207251564174e-01 450 | -4.754624704679308866e-01 451 | 8.113509342916469524e-01 452 | -1.075095137949004354e+00 453 | 1.995812806571243492e+00 454 | -1.148963283787803658e+00 455 | -4.799918226848595704e-01 456 | 1.224497147238111205e+00 457 | 1.320471353616479382e+00 458 | 2.037760555712982979e-01 459 | -3.182743407077354147e-01 460 | -1.313201148679705055e+00 461 | -1.366265813206189783e+00 462 | -1.364962447931120648e+00 463 | -3.189155495522324468e-01 464 | -6.919993006148310588e-01 465 | -7.504641815042043529e-01 466 | -1.397918515755918634e+00 467 | 6.689748370534475130e-01 468 | -2.647668388489154978e-01 469 | 4.150021210780954206e-01 470 | 2.844089191419686413e-01 471 | 1.057944442514640926e+00 472 | -3.173966075333767289e-01 473 | -5.931544952675916749e-01 474 | -4.421362550720017714e-01 475 | -3.193720897654495072e-01 476 | -1.385696231992794125e+00 477 | 6.215763437701995464e-01 478 | 1.103883064559392446e+00 479 | 2.909668492050688782e-01 480 | 6.578373883013386436e-01 481 | 5.309349137303962518e-01 482 | 5.467391920121882354e-01 483 | 1.053825587124523899e-05 484 | 3.993263397051232122e-01 485 | 2.837680225720167759e-01 486 | -9.951841140516257678e-01 487 | -2.550443160961216371e+00 488 | 2.129023209663577276e+00 489 | -2.205609549403549086e-01 490 | 5.850312829406834059e-01 491 | -1.164919179626649814e+00 492 | -1.858388126734852042e-01 493 | -6.984423646984803113e-01 494 | 5.481795713439259199e-01 495 | 1.104223338658099962e+00 496 | 6.321888095821297515e-01 497 | 3.075332037802628316e-01 498 | 1.218933105457229660e+00 499 | -5.547474015262293667e-01 500 | -8.203629501125871970e-01 501 | -------------------------------------------------------------------------------- /BANN/src/BANN.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import pandas as pd 4 | from utils import * 5 | from customModel import * 6 | import matplotlib.pyplot as plt 7 | 8 | class BANNs(object): 9 | def __init__(self,X, y, mask, centered=False, maf=None, nModelsSNP=30, nModelsSET=30, automated=False): 10 | print("Welcome to BANNs. Please make sure SNPs in the SNP List you provide are in the same order as in the genotype matrix. Results we return will be in the order of SNP annotations and SNP-set annotations.") 11 | self.X=X 12 | self.y=y 13 | self.mask=mask 14 | self.nModelsSNP=nModelsSNP 15 | self.nModelsSET=nModelsSET 16 | self.optimizer=tf.compat.v1.train.GradientDescentOptimizer(1e-4, use_locking=False, name='GradientDescent') 17 | self.checkInputs() 18 | 19 | if maf!=None: 20 | self.QC_SNPs() 21 | 22 | if centered==False: 23 | self.center_scale_inputs() 24 | 25 | def checkInputs(self): 26 | try: 27 | self.X=np.asarray(self.X) 28 | self.y=np.asarray(self.y) 29 | self.mask=np.asarray(self.mask) 30 | except: 31 | print("Please make sure to give numerical matrices and vectors for X, y, and annotation mask") 32 | 33 | if(np.isnan(self.X).any()): 34 | print("X genotype matrix contains NaN values. Please input a matrix with no NaN values") 35 | return 36 | if (np.isnan(self.y).any()): 37 | print("y phenotype vector contains NaN values. Please input a vector with no NaN values") 38 | return 39 | if (np.isnan(self.mask).any()): 40 | print("SNP-SNPset mask matrix contains NaN values. Please input a matrix with no NaN values") 41 | return 42 | if((isinstance(self.nModelsSNP, int) and (isinstance(self.nModelsSET,int)))==False): 43 | print("Both nModelsSNP and nModelsSET parameters, which determine the number of models to initialize for SNP and SNP-Set layers, respectively, should be integers") 44 | return 45 | 46 | #### Get input shapes: 47 | #Number of individuals (or SNPs if using summary statistics) from genotype matrix: 48 | N=self.X.shape[0] 49 | #Number of SNPs: 50 | p=self.X.shape[1] 51 | #Number of individuals from phenotype array: 52 | Ny=self.y.shape[0] 53 | #Number of SNPs from mask files: 54 | pm=self.mask.shape[0] 55 | #Number of genes from mask file: 56 | g=self.mask.shape[1] 57 | 58 | ### Check if shapes agree: 59 | if(N!=Ny): 60 | print("Number of samples do not match in X matrix and y vector") 61 | return 62 | if(p!=pm): 63 | print("Number of SNPs do not match in X matrix (number of columns) and annotation mask matrix (number of rows)") 64 | return 65 | 66 | def QC_SNPs(self, maf): 67 | currentMAF=np.mean(self.X, axis=0) 68 | self.X=self.X[:,currentMAF>maf] 69 | 70 | def center_scale_inputs(self): 71 | self.X=np.nan_to_num((self.X-np.mean(self.X, axis=0))/np.std(self.X,axis=0)) # Standardized genotype matrix based on means and standard deviations 72 | self.y=np.nan_to_num((self.y-np.mean(self.y))/np.std(self.y)) # Standardized phenotype array based on means and standard deviations 73 | 74 | def estimatePVE(self,layer,X): 75 | p=X.shape[1] 76 | pve=np.repeat(0.0,100) 77 | for i in range(0,100): 78 | j = np.random.choice(layer.models,1,p=layer.w) 79 | b=layer.kernel[:,j]+np.sqrt(layer.s[:,j])*np.random.normal(0,1,p) 80 | b=b*(np.random.uniform(0,1,p)2: 160 | regionID="Upstream_" 161 | regionID+=row["GeneID"] 162 | intergenicDF=intergenicDF.append({"Chromosome":gChr, "Start":1, "End":gStart-1, "GeneID":regionID}, ignore_index=True ) 163 | 164 | if gChr==nextChr: 165 | regionID="Intergenic_" 166 | regionID+=row["GeneID"] 167 | regionID+="_" 168 | regionID+=geneList.loc[index+1,"GeneID"] 169 | intergenicDF=intergenicDF.append({"Chromosome":gChr, "Start":gStop+1, "End":geneList.loc[index+1,"Start"]-1, "GeneID":regionID}, ignore_index=True ) 170 | 171 | if gChr