├── data ├── geno.rda ├── SNPinfo.rda └── genelist.rda ├── README_figs ├── README-LDheatmap1-1.png ├── README-LDheatmap3-1.png └── README-heatmap_all.png ├── NAMESPACE ├── inst └── extdata │ ├── genelist.txt │ └── SNPinfo.txt ├── BigLD.Rproj ├── DESCRIPTION ├── .gitignore ├── man ├── GPART.Rd ├── Big_LD.Rd ├── CLQD.Rd └── LDblockHeatmap.Rd ├── README.md ├── README.Rmd ├── BigLD_manual.Rmd ├── R ├── CLQD.R ├── LDblockHeatmap.R ├── GPART.R └── Big_LD.R └── LICENSE /data/geno.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunnyeesl/BigLD/HEAD/data/geno.rda -------------------------------------------------------------------------------- /data/SNPinfo.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunnyeesl/BigLD/HEAD/data/SNPinfo.rda -------------------------------------------------------------------------------- /data/genelist.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunnyeesl/BigLD/HEAD/data/genelist.rda -------------------------------------------------------------------------------- /README_figs/README-LDheatmap1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunnyeesl/BigLD/HEAD/README_figs/README-LDheatmap1-1.png -------------------------------------------------------------------------------- /README_figs/README-LDheatmap3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunnyeesl/BigLD/HEAD/README_figs/README-LDheatmap3-1.png -------------------------------------------------------------------------------- /README_figs/README-heatmap_all.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunnyeesl/BigLD/HEAD/README_figs/README-heatmap_all.png -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(Big_LD) 4 | export(CLQD) 5 | export(GPART) 6 | export(LDblockHeatmap) 7 | import(grid) 8 | import(igraph) 9 | importFrom(plyr,alply) 10 | -------------------------------------------------------------------------------- /inst/extdata/genelist.txt: -------------------------------------------------------------------------------- 1 | CECR2 22 17840837 18037850 2 | SLC25A18 22 18043139 18073760 3 | ATP6V1E1 22 18074902 18111584 4 | BCL2L13 22 18111621 18213388 5 | BID 22 18216906 18257536 6 | MIR3198-1 22 18246946 18247025 7 | LINC00528 22 18260088 18262247 8 | MICAL3 22 18270415 18507325 9 | MIR648 22 18463634 18463727 10 | -------------------------------------------------------------------------------- /BigLD.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Package 16 | PackageUseDevtools: Yes 17 | PackageInstallArgs: --no-multiarch --with-keep.source 18 | PackageRoxygenize: rd,collate,namespace,vignette 19 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: BigLD 2 | Type: Package 3 | Title: Big-LD 4 | Version: 0.1.0 5 | Author@R: as.person(c( 6 | "Sun Ah Kim [aut, cre]", 7 | "Yun Joo Yoo [aut]" 8 | )) 9 | Maintainer: Sun Ah Kim 10 | Description: BigLD is a package containg function which relate to the Big_LD algorithm. 11 | Depends: R (>= 3.3.1) 12 | Imports: igraph, 13 | grid, 14 | Matrix, 15 | magrittr, 16 | NMF, 17 | irlba, 18 | plyr, 19 | Rcpp 20 | Encoding: UTF-8 21 | License: GPL-2 22 | LazyData: TRUE 23 | RoxygenNote: 5.0.1 24 | URL: http://github.com/sunnyeesl/BigLD 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | 5 | # Session Data files 6 | .RData 7 | 8 | # Example code in package build process 9 | *-Ex.R 10 | 11 | # Output files from R CMD build 12 | /*.tar.gz 13 | 14 | # Output files from R CMD check 15 | /*.Rcheck/ 16 | 17 | # RStudio files 18 | .Rproj.user/ 19 | 20 | # produced vignettes 21 | vignettes/*.html 22 | vignettes/*.pdf 23 | 24 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 25 | .httr-oauth 26 | 27 | # knitr and R markdown default cache directories 28 | /*_cache/ 29 | /cache/ 30 | 31 | # Temporary files created by R markdown 32 | *.utf8.md 33 | *.knit.md 34 | -------------------------------------------------------------------------------- /man/GPART.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/GPART.R 3 | \name{GPART} 4 | \alias{GPART} 5 | \title{Partitioning genodata based on the result obtained by using Big-LD and gene region information.} 6 | \usage{ 7 | GPART(geno, SNPinfo, chrN, allgenelist, BigLDresult = NULL, minsize = 4, 8 | maxsize = 50) 9 | } 10 | \arguments{ 11 | \item{geno}{A data frame or matrix of additive genotype data, each column is additive genotype of each SNP.} 12 | 13 | \item{SNPinfo}{A data frame or matrix of SNPs information. 1st column is rsID and 2nd column is bp position.} 14 | 15 | \item{chrN}{A integer value to specify chromosome number of the given data.} 16 | 17 | \item{allgenelist}{A data frame or matrix of Gene info data. 18 | (1st col : Genename, 2nd col : chromosome, 3rd col : start bp, 4th col : end bp)} 19 | 20 | \item{BigLDresult}{A data frame obtained by \code{Big_LD} function. 21 | If \code{NULL}(default), the \code{GPART} function first excute \code{Big_LD} function to obtain LD blocks estimation result.} 22 | 23 | \item{minsize}{A integer value specifying the lower bound of number of SNPs in a partition.} 24 | 25 | \item{maxsize}{A integer value specifying the upper bound of number of SNPs in a partition.} 26 | } 27 | \value{ 28 | 29 | } 30 | \description{ 31 | \code{GPART} partition the given genodata using the result obtained by Big-LD and gene region information. 32 | The algorithm partition the whole sequence into sub sequences of which size do not exceed the given threshold. 33 | } 34 | \examples{ 35 | data(geno) 36 | data(SNPinfo) 37 | data(allgenelist) 38 | GPART(geno, SNPinfo, 22, genelist) 39 | 40 | } 41 | \author{ 42 | Sun Ah Kim , Yun Joo Yoo 43 | } 44 | \seealso{ 45 | \code{\link{Big_LD}} 46 | } 47 | 48 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Big-LD: A new haplotype block detection method 2 | ================ 3 | Sunah Kim () 4 | 5 | Big-LD 6 | ====== 7 | 8 | Big-LD is a block partition method based on interval graph modeling of LD bins which are clusters of strong pairwise LD SNPs, not necessarily physically consecutive. The detailed information about the Big-LD can be found in our paper published in [bioinformatics](https://academic.oup.com/bioinformatics/article/doi/10.1093/bioinformatics/btx609/4282661/A-new-haplotype-block-detection-method-for-dense). 9 | 10 | The Big-LD algorithm and visualization function has been updated and implemented in a new package "gpart". The package "gpart" can be downloaded from [bioconductor](https://bioconductor.org/packages/devel/bioc/html/gpart.html) 11 | 12 | The "gpart" has following features. 13 | 14 | ### the updated `BigLD` function 15 | 16 | - supports two types of heuristic algorithm, `near-nonhrst` and `fast`. 17 | - supports LD measures r2 and D' both. 18 | - supports PLINK formats (.ped, .map, .raw, .traw) and vcf format as input 19 | 20 | ### the updated `LDblockHeatmap` function 21 | 22 | - shows up to 20000 SNPs. 23 | - shows physical locations of LD blocks and SNPs 24 | - shows gene locations if the gene information is inputted (or by loading gene information from the Ensembl or UCSC database) . 25 | 26 | ### the new SNP sequence partition algorithm `GPART` 27 | 28 | - divides the entire SNP sequence using LD block information (BigLD result) and gene information 29 | - can limit the min/max size of a block 30 | 31 | ### A BigLD result plotted by the updated `LDblockHeatmap()` in "gpart" package.. 32 | 33 | - EAS population of 1000 Genomes phase 1 data 34 | - chr1:16Mb-17.9Mb 35 | 36 | -------------------------------------------------------------------------------- /man/Big_LD.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Big_LD.R 3 | \name{Big_LD} 4 | \alias{Big_LD} 5 | \title{Estimation of LD block regions} 6 | \usage{ 7 | Big_LD(geno, SNPinfo, CLQcut = 0.5, clstgap = 40000, leng = 200, 8 | subSegmSize = 1500) 9 | } 10 | \arguments{ 11 | \item{geno}{A data frame or matrix of additive genotype data, each column is additive genotype of each SNP.} 12 | 13 | \item{SNPinfo}{A data frame or matrix of SNPs information. 1st column is rsID and 2nd column is bp position.} 14 | 15 | \item{CLQcut}{A numeric value of threshold for the correlation value |r|, between 0 to 1.} 16 | 17 | \item{clstgap}{An integer value to specifing the threshold of physical distance (bp) between two consecutive SNPs 18 | which do not belong to the same clique, i.e., if a physical distance between two consecutive SNPs in a clique 19 | greater than \code{clstgap}, then the algorithm split the cliques satisfying each 20 | clique do not contain such consecutive SNPs} 21 | 22 | \item{leng}{An integer value to specify the number of SNPs in a preceding and a following region 23 | of each sub-region boundary, every SNP in a preceding and every SNP in a following region need to be in weak LD.} 24 | 25 | \item{subSegmSize}{An integer value to specify the upper bound of the number of SNPs in a one-take sub-region.} 26 | } 27 | \value{ 28 | A data frame of block estimation result. 29 | Each row of data frame shows the starting SNP and end SNP of each estimated LD block 30 | } 31 | \description{ 32 | \code{Big_LD} returns the estimation of LD block regions of given data. 33 | } 34 | \examples{ 35 | 36 | data(geno) 37 | data(SNPinfo) 38 | Big_LD(geno, SNPinfo) 39 | Big_LD(geno, SNPinfo, CLQcut = 0.5, clstgap = 40000, leng = 200, subSegmSize = 1500) 40 | 41 | } 42 | \author{ 43 | Sun-Ah Kim , Yun Joo Yoo 44 | } 45 | \seealso{ 46 | \code{\link{CLQD}}, \code{\link{LDblockHeatmap}} 47 | } 48 | 49 | -------------------------------------------------------------------------------- /man/CLQD.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CLQD.R 3 | \name{CLQD} 4 | \alias{CLQD} 5 | \title{partitioning into cliques} 6 | \usage{ 7 | CLQD(subgeno, subSNPinfo, CLQcut = 0.5, clstgap = 40000, 8 | CLQmode = c("Density", "Maximal"), codechange = FALSE) 9 | } 10 | \arguments{ 11 | \item{subgeno}{A data frame or matrix of additive genotype data, each column is additive genotype of each SNP.} 12 | 13 | \item{subSNPinfo}{A data frame or matrix of SNPs information. 1st column is rsID and 2nd column is bp position.} 14 | 15 | \item{CLQcut}{A numeric value of threshold for the correlation value |r|, between 0 to 1.} 16 | 17 | \item{clstgap}{An integer value to specifing the threshold of physical distance (bp) between two consecutive SNPs 18 | which do not belong to the same clique, i.e., if a physical distance between two consecutive SNPs in a clique 19 | greater than \code{clstgap}, then the algorithm split the cliques satisfying each 20 | clique do not contain such consecutive SNPs} 21 | 22 | \item{CLQmode}{A character string to specify the way to give priority among detected cliques. 23 | if \code{CLQmode = "Density"} then the algorithm gives priority to the clique of largest value of \eqn{(Number of SNPs)/(range of clique)}, 24 | else if \code{CLQmode = "Maximal"}, then the algorithm gives priority to the largest clique.} 25 | 26 | \item{codechange}{If \code{TRUE}, choose the cliques after code change procedure.} 27 | } 28 | \value{ 29 | A vector of cluster numbers of all SNPs (\code{NA} represents singleton cluster) 30 | } 31 | \description{ 32 | \code{CLQD} partitioning the given data into subgroups that contain SNPs which are highly correlated. 33 | } 34 | \examples{ 35 | 36 | data(geno) 37 | data(SNPinfo) 38 | CLQD(geno,SNPinfo,CLQcut = 0.5, clstgap= 40000, CLQmode = 'Maximal', codechange = FALSE) 39 | CLQD(geno,SNPinfo,CLQcut = 0.5, clstgap= 40000, CLQmode = 'Density', codechange = FALSE) 40 | 41 | } 42 | \author{ 43 | Sun-Ah Kim , Yun Joo Yoo 44 | } 45 | 46 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Big-LD: A new haplotype block detection method" 3 | author: "Sunah Kim (sunny03@snu.ac.kr)" 4 | output: rmarkdown::github_document 5 | --- 6 | 7 | ```{r setup, include=FALSE} 8 | knitr::opts_chunk$set(echo = TRUE) 9 | ``` 10 | ```{r, echo = FALSE} 11 | knitr::opts_chunk$set( 12 | fig.path = "README_figs/README-" 13 | ) 14 | ``` 15 | 16 | # Big-LD 17 | 18 | Big-LD is a block partition method based on interval graph modeling of LD bins which are clusters of strong pairwise LD SNPs, not necessarily physically consecutive. 19 | The detailed information about the Big-LD can be found in our paper published in [bioinformatics](https://academic.oup.com/bioinformatics/article/doi/10.1093/bioinformatics/btx609/4282661/A-new-haplotype-block-detection-method-for-dense). 20 | 21 | The Big-LD algorithm and visualization function has been updated and implemented in a new package "gpart". 22 | The package "gpart" can be downloaded from [bioconductor](https://bioconductor.org/packages/devel/bioc/html/gpart.html) 23 | 24 | The "gpart" has following features. 25 | 26 | ### the updated `BigLD` function 27 | * supports two types of heuristic algorithm, `near-nonhrst` and `fast`. 28 | * supports LD measures r2 and D' both. 29 | * supports PLINK formats (.ped, .map, .raw, .traw) and vcf format as input 30 | 31 | ### the updated `LDblockHeatmap` function 32 | * shows up to 20000 SNPs. 33 | * shows physical locations of LD blocks and SNPs 34 | * shows gene locations if the gene information is inputted (or by loading gene information from the Ensembl or UCSC database) . 35 | 36 | ### the new SNP sequence partition algorithm `GPART` 37 | * divides the entire SNP sequence using LD block information (BigLD result) and gene information 38 | * can limit the min/max size of a block 39 | 40 | 41 | ### A BigLD result plotted by the updated `LDblockHeatmap()` in "gpart" package. 42 | 43 | * EAS population of 1000 Genomes phase 1 data 44 | * chr1:16Mb-17.9Mb 45 | ```{r, out.width = "1200px", echo = FALSE} 46 | knitr::include_graphics("README_figs/README-heatmap_all.png") 47 | ``` 48 | -------------------------------------------------------------------------------- /man/LDblockHeatmap.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/LDblockHeatmap.R 3 | \name{LDblockHeatmap} 4 | \alias{LDblockHeatmap} 5 | \title{Draw LDblock results on LD heatmap.} 6 | \usage{ 7 | LDblockHeatmap(geno, SNPinfo, chrN, showSNPs = NULL, LDblockResult = NULL, 8 | tick = c("bp", "rsID"), st.bp = 0, ed.bp = Inf, showLDsize = 3, 9 | savefile = FALSE) 10 | } 11 | \arguments{ 12 | \item{geno}{A data frame or matrix of additive genotype data, each column is additive genotype of each SNP.} 13 | 14 | \item{SNPinfo}{A data frame or matrix of SNPs information. 1st column is rsID and 2nd column is bp position.} 15 | 16 | \item{chrN}{A integer value to specify chromosome number of the given data.} 17 | 18 | \item{showSNPs}{A data frame which is part of \code{SNPinfo} that you want to show in the result LDblock heatmap. 19 | The default is \code{NULL}} 20 | 21 | \item{LDblockResult}{A data frame obtained by \code{Big_LD} function. 22 | If \code{NULL}(default), the \code{GPART} function first excute \code{Big_LD} function to obtain LD blocks estimation result.} 23 | 24 | \item{tick}{A character string to specify how to show first SNPs and last SNPs of LD blocks, 25 | in \code{"bp"} or in \code{"rsID"}.} 26 | 27 | \item{st.bp}{A integer value to specify starting bp position of the region to draw.} 28 | 29 | \item{ed.bp}{A integer value to specify end bp position of the region to draw.} 30 | 31 | \item{showLDsize}{A integer value to specify the size (number of SNPs) of LDblocks to show ticks. 32 | if \code{showLDsize = \eqn{k}}, then blocks of size equal or greater than k is shown with boundaries and 33 | the "rsID" or "bp" of the first and last SNPs, however, 34 | the blocks whose size is less than \eqn{k} is shown with only boundaries.} 35 | 36 | \item{savefile}{logical. If \code{TRUE}, save tif file into work directory and 37 | the file is named after the chromosome and the physical range to draw. The default is \code{FALSE}} 38 | } 39 | \value{ 40 | A grid graphical object of LD block heatmap. 41 | The LD block heatmap will be presented on the screen after execution of the function. 42 | #' 43 | } 44 | \description{ 45 | \code{LDblockHeatmap} shows the LDblock regions obtained by Big-LD algorithm. 46 | } 47 | \examples{ 48 | 49 | data(geno) 50 | data(SNPinfo) 51 | LDblockHeatmap(geno, SNPinfo,chrN = 22, showSNPs = NULL) 52 | LDblockHeatmap(geno, SNPinfo, 22, showSNPs = SNPinfo[c(100, 200), ], showLDsize = 10, savefile = TRUE) 53 | } 54 | \author{ 55 | Sun-Ah Kim , Yun Joo Yoo 56 | } 57 | \seealso{ 58 | \code{\link{Big_LD}} 59 | } 60 | 61 | -------------------------------------------------------------------------------- /BigLD_manual.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Big-LD" 3 | author: "Sunah Kim (sunny03@snu.ac.kr)" 4 | output: rmarkdown::github_document 5 | --- 6 | 7 | ```{r setup, include=FALSE} 8 | knitr::opts_chunk$set(echo = TRUE) 9 | ``` 10 | ```{r, echo = FALSE} 11 | knitr::opts_chunk$set( 12 | fig.path = "README_figs/README-" 13 | ) 14 | ``` 15 | 16 | # Big-LD 17 | 18 | Big-LD is a block partition method based on interval graph modeling of LD bins which are clusters of strong pairwise LD SNPs, not necessarily physically consecutive. 19 | The detailed information about the Big-LD can be found in our paper published in [bioinformatics](https://academic.oup.com/bioinformatics/article/doi/10.1093/bioinformatics/btx609/4282661/A-new-haplotype-block-detection-method-for-dense). 20 | 21 | ## Installation 22 | ```{r, eval=FALSE} 23 | library("devtools") 24 | devtools::install_github("sunnyeesl/BigLD") 25 | ``` 26 | ```{r, message=FALSE} 27 | library(BigLD) 28 | ``` 29 | ## Data 30 | 31 | You need an additive genotype data (each SNP genotype is coded in terms of the number of minor alleles) and a SNP information data. 32 | The package include sample genotype data and SNPinfo data. 33 | 34 | Load the sample data (if you installed the BigLD packages). 35 | ```{r data} 36 | data(geno) 37 | data(SNPinfo) 38 | ``` 39 | Or simply you can download the sample data from `/inst/extdata` 40 | The sample data include 1000SNPs and 286 individuals. 41 | ```{r} 42 | geno[1:10, 1:7] 43 | head(SNPinfo) 44 | ``` 45 | 46 | ## CLQD 47 | 48 | `CLQD` partitioning the SNPs into subgroups such that each subgroup contains highly correlated SNPs. 49 | There are two CLQ methods, original CLQ(`ClQmode = 'Maximal'`) and CLQD (`ClQmode = 'Density'`). 50 | 51 | ```{r CLQD} 52 | CLQres = CLQD(geno, SNPinfo, CLQmode = 'Density') 53 | head(CLQres, n = 20) 54 | ``` 55 | ## Big_LD 56 | 57 | 'Big_LD` returns the estimation of LD block regions of given data. 58 | 59 | ```{r Big_LD} 60 | BigLDres = Big_LD(geno, SNPinfo) 61 | BigLDres 62 | ``` 63 | If you want to apply heuristic procedure, add option `checkLargest = TRUE`. 64 | 65 | ```{r Big_LDheuristic, eval=FALSE} 66 | Big_LD(geno, SNPinfo, MAFcut = 0.05, checkLargest = TRUE, appendrare = TRUE) 67 | ``` 68 | 69 | ## LDblockHeatmap 70 | 71 | `LDblockHeatmap` visualize the LDblock boundaries detected by Big_LD. 72 | 73 | You can input the results obtained using Big-LD (`LDblockResult= BigLDres`). 74 | If you do not input a Big-LD results, the `LDblockHeatmap` function first excute `Big_LD` function to obtain an LD block estimation result. 75 | 76 | ```{r LDheatmap1, results='hide'} 77 | LDblockHeatmap(geno, SNPinfo, 22, LDblockResult= BigLDres) 78 | ``` 79 | 80 | You can show the location of the specific SNPs (`showSNPs = SNPinfo[c(100, 200), ]` shows the 100th and 200th SNPs), 81 | or give the threshold for LD block sizes to show SNP information (`showLDsize = 50`). 82 | If you want to save the LD heatmap results as tif file, add options such as `savefile = TRUE, filename = "LDheatmap2.tif"`. 83 | 84 | ```{r LDheatmap2, eval=FALSE} 85 | LDblockHeatmap(geno, SNPinfo, 22, showSNPs = SNPinfo[c(100, 200), ], showLDsize = 50, savefile = TRUE, filename = "LDheatmap2.tif") 86 | ``` 87 | ```{r LDheatmap3, results='hide', echo=FALSE} 88 | LDblockres = LDblockHeatmap(geno, SNPinfo, 22, showSNPs = SNPinfo[c(100, 200), ], showLDsize = 50, savefile = TRUE, filename = "LDheatmap2.tif") 89 | ``` 90 | 91 | If you have any suggestion or question, please contact us (sunny03@snu.ac.kr). 92 | -------------------------------------------------------------------------------- /R/CLQD.R: -------------------------------------------------------------------------------- 1 | ################################################################################################################# 2 | # CLQD 3 | #' @title partitioning into cliques 4 | #' @name CLQD 5 | #' 6 | #' @description \code{CLQD} partitioning the given data into subgroups that contain SNPs which are highly correlated. 7 | #' 8 | #' @param subgeno A data frame or matrix of additive genotype data, each column is additive genotype of each SNP. (Use data of non-monomorphic SNPs) 9 | #' @param subSNPinfo A data frame or matrix of SNPs information. 1st column is rsID and 2nd column is bp position. 10 | #' @param CLQcut A numeric value of threshold for the correlation value |r|, between 0 to 1. 11 | #' @param clstgap An integer value to specifing the threshold of physical distance (bp) between two consecutive SNPs 12 | #' which do not belong to the same clique, i.e., if a physical distance between two consecutive SNPs in a clique 13 | #' greater than \code{clstgap}, then the algorithm split the cliques satisfying each 14 | #' clique do not contain such consecutive SNPs 15 | #' @param CLQmode A character string to specify the way to give priority among detected cliques. 16 | #' if \code{CLQmode = "Density"} then the algorithm gives priority to the clique of largest value of \eqn{(Number of SNPs)/(range of clique)}, 17 | #' else if \code{CLQmode = "Maximal"}, then the algorithm gives priority to the largest clique. 18 | #' @param codechange If \code{TRUE}, choose the cliques after code change procedure. 19 | #' @param checkLargest If \code{checkLargest = TRUE}, the algorithm use heuristic procedure to reduce runtime 20 | # 21 | #' @return A vector of cluster numbers of all SNPs (\code{NA} represents singleton cluster). 22 | #' 23 | #' 24 | #' @examples 25 | #' 26 | #' data(geno) 27 | #' data(SNPinfo) 28 | #' CLQD(geno,SNPinfo,CLQcut = 0.5, clstgap= 40000, CLQmode = 'Maximal', codechange = FALSE) 29 | #' CLQD(geno,SNPinfo,CLQcut = 0.5, clstgap= 40000, CLQmode = 'Density', codechange = FALSE) 30 | #' 31 | #' @author Sun-Ah Kim , Yun Joo Yoo 32 | #' 33 | #' 34 | 35 | #' @import igraph 36 | #' 37 | # [1] 12 9 13 9 13 6 7 NA 7 7 6 4 8 4 NA 6 13 9 7 7 6 8 12 9 12 13 8 8 12 13 38 | # [31] 7 9 13 8 12 13 7 7 12 NA 10 10 NA 13 10 10 NA 10 18 NA 18 18 NA 18 18 18 18 NA 15 15 39 | # [61] NA 15 16 15 15 15 16 16 16 16 19 16 16 19 NA 17 17 NA NA NA NA 5 5 NA 2 2 1 1 NA 11 40 | # [91] 11 NA NA NA 3 3 NA NA 14 14 subfunctions 41 | # < built-in > 1.CliqueDecision, 2.ChooseMaximal, 3.CodeChangeV, 4.new.split.cliques 42 | #' @export 43 | #' 44 | CLQD <- function(subgeno, subSNPinfo, CLQcut = 0.5, clstgap = 40000, CLQmode = c("Density", "Maximal"), 45 | codechange = FALSE, checkLargest = FALSE) { 46 | # packages 47 | # library(igraph) 48 | ####################################################################################################### 49 | # subfunctions : 1.CliqueDecision, 2.ChooseMaximal, 3.CodeChangeV, 4.new.split.cliques 1 50 | CliqueDecision = function(x, CLQmode) { 51 | if (length(CLQmode) == 2) { 52 | CLQmode <<- "Maximal" 53 | return(length(x)) 54 | } else if (CLQmode == "Maximal") { 55 | return(length(x)) 56 | } else if (CLQmode == "Density") { 57 | return(length(x)/(diff(range(x))/1000)) 58 | } 59 | } 60 | # 2 61 | ChooseMaximal = function(vt, cut, OCM) { 62 | 63 | codeW <- CodeChangeV(vt, OCM)[[1]] #use CodeChangeV function 64 | codeW[codeW < cut] <- 0 65 | subg <- graph.adjacency(codeW, mode = "undirected", weighted = TRUE, diag = FALSE, add.colnames = NULL) 66 | lgstcliq <- largest.cliques(subg) 67 | if (length(lgstcliq) == 1) { 68 | FC <- unlist(largest.cliques(subg)) 69 | } else { 70 | sumvt <- sapply(lgstcliq, function(x) { 71 | sum(codeW[x, x]) 72 | }) 73 | cliqno <- which(sumvt == max(sumvt))[1] 74 | FC <- lgstcliq[[cliqno]] 75 | } 76 | return(vt[FC]) 77 | } 78 | # 3 79 | CodeChangeV = function(vt, OCM) { 80 | rin <- OCM[vt, vt] 81 | rin <- as.matrix(rin) 82 | nr <- dim(rin)[1] 83 | code = rep(1, nr) 84 | change = 1 85 | iter = 0 86 | while (change == 1) { 87 | change = 0 88 | if (iter > 2^nr) { 89 | print("maximum iteration") 90 | break 91 | } 92 | # count number of negative r for each SNP & find the SNP with max neg r 93 | cneg = apply(rin, 1, function(x) { 94 | sum(x < 0) 95 | }) 96 | maxneg = which.max(cneg) 97 | 98 | if (cneg[maxneg] > (nr - 1)/2) { 99 | # change signs of r for that row and column 100 | rin[maxneg, ] = -1 * rin[maxneg, ] 101 | rin[, maxneg] = -1 * rin[, maxneg] 102 | code[maxneg] = 1 - code[maxneg] 103 | change = 1 104 | } 105 | iter = iter + 1 106 | 107 | } 108 | return(list(rin, code)) 109 | } 110 | # 4 111 | new.split.cliques <- function(cliques.bp, gapdist) { 112 | nowlist <- lapply(cliques.bp, sort) 113 | fixlist <- NULL 114 | repeat { 115 | need.split = which(sapply(nowlist, function(x) max(diff(x)) > gapdist) == TRUE) 116 | need.fix <- which(sapply(nowlist, function(x) max(diff(x)) > gapdist) == FALSE) 117 | addlist <- nowlist[need.fix] 118 | fixlist <- c(fixlist, addlist) 119 | if (length(need.split) == 0) { 120 | break 121 | } 122 | nowlist <- nowlist[need.split] 123 | nowlength <- length(nowlist) 124 | newlist <- as.list(rep(NA, nowlength)) 125 | for (i in 1:nowlength) { 126 | gap = diff(nowlist[[i]]) 127 | frontpart <- nowlist[[i]][1:min(which(gap > gapdist))] 128 | restpart <- nowlist[[i]][-(1:min(which(gap > gapdist)))] 129 | nowlist[[i]] <- frontpart 130 | newlist[[i]] <- restpart 131 | } 132 | addlist <- nowlist[sapply(nowlist, function(x) length(x) > 1)] 133 | fixlist <- c(fixlist, addlist) 134 | nowlist <- newlist[sapply(newlist, function(x) length(x) > 1)] 135 | } 136 | return(fixlist) 137 | } 138 | ######################################################################################################## 139 | if (length(CLQmode) == 2) { 140 | print(" You do not choose CLQ mode! Defalt mode is 'Density'.") 141 | CLQmode <- "Density" 142 | } 143 | # Main Function 144 | SNPbps = subSNPinfo[, 2] 145 | OCM <- suppressWarnings(cor(subgeno, use="pairwise.complete.obs")) 146 | diag(OCM) <- 0 147 | OCM[abs(OCM) < CLQcut] <- 0 148 | OCM[is.na(OCM)]<-0 149 | r2Mat <- OCM^2 150 | r2Mat[r2Mat < CLQcut^2] <- 0 151 | r2Mat[r2Mat >= CLQcut^2] <- 1 152 | # Nr2Mat <- r2Mat 153 | # Nr2Mat[abs(Nr2Mat)0]<-1 155 | binvector = rep(NA, dim(r2Mat)[2]) 156 | binnum = 1 157 | re.SNPbps <- SNPbps 158 | if(all(OCM==0)) return(1:length(binvector)) 159 | ## 160 | # take Toooo Big block First! 161 | # g <- graph_from_adjacency_matrix(r2Mat, mode = "undirected", weighted = TRUE, diag = NULL, add.colnames = NA) 162 | subregionLeng = dim(OCM)[1] 163 | if(subregionLeng<500) checkLargest = FALSE 164 | while(checkLargest == TRUE){ 165 | if(checkLargest == TRUE){ 166 | g <- graph_from_adjacency_matrix(r2Mat, mode = "undirected", weighted = TRUE, diag = FALSE, add.colnames = NA) 167 | compo = components(g) 168 | componum = which(compo$csize==max(compo$csize))[1] 169 | compov = which(compo$membership==componum) 170 | compadjM = OCM[compov, compov] 171 | cg = graph_from_adjacency_matrix(compadjM, mode = "undirected", weighted = TRUE, diag = FALSE, add.colnames = NA) 172 | if((median(coreness(cg))>80 & max(coreness(cg))>100)| (quantile(coreness(cg), 0.75)>100 & max(coreness(cg))>100)){ 173 | print("use heuristic procedure!") 174 | degrees = apply(r2Mat, 1, sum) 175 | maxdegv = which(degrees >=(quantile(degrees, 0.7))) 176 | # if(length(maxdegv)>=1){ 177 | maxdegvs = maxdegv 178 | edgeDens = NULL 179 | for(maxdegv in maxdegvs){ 180 | Bignbds = which(r2Mat[maxdegv,, drop = FALSE]>0, arr.ind = TRUE) 181 | Bignbds.c = unique(Bignbds[,2]) 182 | newr2Mat = r2Mat[Bignbds.c,Bignbds.c] 183 | EdgeDen = sum(newr2Mat)/((dim(newr2Mat)[1])*(dim(newr2Mat)[1]-1)) 184 | edgeDens = c(edgeDens, EdgeDen) 185 | } 186 | maxdegvs = maxdegvs[order(edgeDens, decreasing = TRUE)] 187 | edgeDens = edgeDens[order(edgeDens, decreasing = TRUE)] 188 | degv = maxdegvs[1] 189 | edgeD = edgeDens[1] 190 | Bignbds = which(r2Mat[degv,, drop = FALSE]>0, arr.ind = TRUE) 191 | Bignbds.c = unique(Bignbds[,2]) 192 | # maxiC = maximal.cliques(g, min = dim(OCM)[1]*0.9) 193 | # largestOneRange = range(Bignbds.c) 194 | # largestSNPn = diff(largestOneRange) 195 | # largestCsize = length(Bignbds.c) 196 | nowSNPsbp = re.SNPbps[Bignbds.c] 197 | nowSNPsbploca = match(nowSNPsbp, SNPbps) 198 | binvector[nowSNPsbploca] <- binnum 199 | binnum = binnum + 1 200 | r2Mat <- r2Mat[-Bignbds.c, -Bignbds.c, drop = FALSE] 201 | OCM <- OCM[-Bignbds.c, -Bignbds.c, drop = FALSE] 202 | re.SNPbps <- re.SNPbps[-Bignbds.c] 203 | # print("case2") 204 | checkLargest = TRUE 205 | if(length(re.SNPbps)<500) checkLargest = FALSE 206 | 207 | }else{ 208 | checkLargest = FALSE 209 | } 210 | } 211 | } 212 | print("end pre-steps") 213 | g <- graph_from_adjacency_matrix(r2Mat, mode = "undirected", weighted = TRUE, diag = FALSE, add.colnames = NA) 214 | max.cliques <- max_cliques(g, min = 2) 215 | bp.cliques <- lapply(max.cliques, function(x) re.SNPbps[x]) 216 | split.bp.cliques <- new.split.cliques(bp.cliques, clstgap) 217 | repeat { 218 | if (all(is.na(binvector) == FALSE)) { 219 | break 220 | } 221 | if(length(split.bp.cliques)==0) break 222 | density.v <- sapply(split.bp.cliques, function(x) CliqueDecision(x, CLQmode), simplify = TRUE) 223 | max.d <- which(density.v == max(density.v)) 224 | max.cluster <- split.bp.cliques[max.d] 225 | if (length(max.cluster) > 1) { 226 | # if there are two bins of same density, then we choose the bigger one. 227 | max.cluster <- max.cluster[order(sapply(max.cluster, length), decreasing = TRUE)] 228 | } 229 | max.cluster <- max.cluster[[1]] 230 | max.cluster.od <- match(max.cluster, re.SNPbps) 231 | if (codechange == TRUE) { 232 | max.cluster.od <- ChooseMaximal(max.cluster.od, CLQcut, OCM) 233 | max.cluster <- re.SNPbps[max.cluster.od] 234 | } 235 | ## excluding all SNPs in max.cluster from re.SNPbps 236 | split.bp.cliques <- lapply(split.bp.cliques, function(x) setdiff(x, max.cluster)) 237 | split.bp.cliques <- split.bp.cliques[which(sapply(split.bp.cliques, length) > 1)] 238 | binvector[match(max.cluster, SNPbps)] <- binnum 239 | binnum = binnum + 1 240 | # r2Mat <- r2Mat[-max.cluster.od, -max.cluster.od] 241 | # OCM <- OCM[-max.cluster.od, -max.cluster.od] 242 | # re.SNPbps <- setdiff(re.SNPbps, max.cluster) 243 | if (length(re.SNPbps) < 2) { 244 | break 245 | } 246 | if(length(split.bp.cliques)==0) break 247 | # print(sum(is.na(binvector))) 248 | } ##end repeat 249 | 250 | if (all(is.na(binvector) == TRUE)) { 251 | binvector <- c(1:length(binvector)) 252 | } 253 | return(binvector) 254 | } 255 | -------------------------------------------------------------------------------- /R/LDblockHeatmap.R: -------------------------------------------------------------------------------- 1 | ################################################################################################################## 2 | # LDheatmap 3 | ################################################################################################################## 4 | # < input > 5 | #' @title Draw LDblock results on LD heatmap. 6 | #' @name LDblockHeatmap 7 | #' @description 8 | #' \code{LDblockHeatmap} shows the LDblock regions obtained by Big-LD algorithm. 9 | #' 10 | #' @param geno A data frame or matrix of additive genotype data, each column is additive genotype of each SNP. 11 | #' @param SNPinfo A data frame or matrix of SNPs information. 1st column is rsID and 2nd column is bp position. 12 | #' @param chrN A character value to specify chromosome number of the given data. 13 | #' @param showSNPs A data frame which is part of \code{SNPinfo} that you want to show in the result LDblock heatmap. 14 | #' The default is \code{NULL} 15 | #' @param LDblockResult A data frame obtained by \code{Big_LD} function. 16 | #' If \code{NULL}(default), the \code{LDblockHeatmap} function first excute \code{Big_LD} function to obtain LD blocks estimation result. 17 | #' 18 | #' @param tick A character string to specify how to show first SNPs and last SNPs of LD blocks, 19 | #' in \code{"bp"} or in \code{"rsID"}. 20 | #' @param st.bp A integer value to specify starting bp position of the region to draw. 21 | #' @param ed.bp A integer value to specify end bp position of the region to draw. 22 | #' @param showLDsize A integer value to specify the size (number of SNPs) of LDblocks to show ticks. 23 | #' if \code{showLDsize = \eqn{k}}, then blocks of size equal or greater than k is shown with boundaries and 24 | #' the "rsID" or "bp" of the first and last SNPs, however, 25 | #' the blocks whose size is less than \eqn{k} is shown with only boundaries. 26 | #' @param savefile logical. If \code{TRUE}, save tif file into work directory and 27 | #' the file is named after the chromosome and the physical range to draw. The default is \code{FALSE} 28 | #' @param filename A charactervalue. If \code{savefile = TRUE}, the tif file is saved in work directory with the given name. 29 | # < output > 30 | #' @return A grid graphical object of LD block heatmap. 31 | #' The LD block heatmap will be presented on the screen after execution of the function. 32 | #' #' 33 | #' @author Sun-Ah Kim , Yun Joo Yoo 34 | #' @seealso \code{\link{Big_LD}} 35 | #' 36 | #' @examples 37 | #' 38 | #' data(geno) 39 | #' data(SNPinfo) 40 | #' LDblockHeatmap(geno, SNPinfo, 22, showSNPs = NULL) 41 | #' LDblockHeatmap(geno, SNPinfo, 22, showSNPs = SNPinfo[c(100, 200), ], showLDsize = 10, savefile = TRUE) 42 | #' @import grid 43 | # TotalMap : object of heatmap. 44 | # If savefile TRUE, "LDblock_heatmap_chr[chrN]-[start bp]-[end bp].tif" file will be made. 45 | # The function will draw a plot on your screen. 46 | ################################################################################################################## 47 | # sub-Functions 48 | # 1. Big-LD 2. CLQD 49 | # < built - in > 50 | # 2. makeRect (built-in) 3. LDheatmap.Legend.add (built-in) 51 | ################################################################################################################## 52 | 53 | #' @export 54 | LDblockHeatmap <- function(geno, SNPinfo, chrN, showSNPs = NULL, LDblockResult=NULL, tick = c("bp", "rsID"), st.bp=0 , ed.bp = Inf, 55 | showLDsize = 3, savefile = FALSE, filename = "LDheatmap.tif"){ 56 | # packagese 57 | # library(grid) 58 | ######################################################################################################## 59 | # sub-Functions 60 | # 1. Big-LD 2. makeRect (built-in) 3. LDheatmap.Legend.add (built-in) 61 | ######################################################################################################## 62 | # make rectangles 63 | makeRect<-function(nrow,ncol,cols,name,byrow=TRUE){ 64 | xx<-(1:ncol)/ncol 65 | yy<-(1:nrow)/nrow 66 | right<-rep(xx,nrow) 67 | top<-rep(yy,each=ncol) 68 | rectGrob(x=right,y=top,width=1/ncol,height=1/nrow,just=c("right","top"),gp=gpar(col=NA,fill=cols),name=name) 69 | } 70 | # make Legend 71 | LDheatmap.Legend.add <- function(color, vp=VPheatmap){ 72 | ImageRect <- makeRect(2, length(color), col = c(rep(NA, length(color)), color[length(color):1]), "colorKey") 73 | keyVP <- viewport(x = 0.5, y = 0.2, height = 0.4, width =0.5, just = c("centre", "bottom"), name = "keyVP") 74 | ttt <- expression(paste("r"^{2}, " Color Key")) 75 | title <- textGrob(ttt, x = 0.5, y = 1.25, name = "title", gp = gpar(cex = 0.6)) 76 | labels <- textGrob(paste(0.2 * 0:5), x = 0.2 * 0:5, y = 0.25, gp = gpar(cex = 0.6), name = "labels") 77 | ticks <- segmentsGrob(x0 = c(0:5) * 0.2, y0 = rep(0.4,6), x1 = c(0:5) * 0.2, y1 = rep(0.5, 6), name = "ticks") 78 | box <- linesGrob(x = c(0, 0, 1, 1, 0), y = c(0.5, 1, 1, 0.5, 0.5), name = "box") 79 | key <- gTree(children = gList(ImageRect, title, labels, ticks, box), name = "Key", vp = keyVP) 80 | return(key) 81 | } 82 | ######################################################################################################## 83 | if(length(tick) >1 ){ 84 | tick = "rsID" 85 | } 86 | subSNPinfo = SNPinfo[which(SNPinfo[,2]>=st.bp & SNPinfo[,2]<=ed.bp),] 87 | if(dim(subSNPinfo)[1]>1000){ 88 | print("There are Too many SNPs! We will draw only first 1000 SNPs") 89 | subSNPinfo<- subSNPinfo[1:1000,] 90 | }else if(dim(subSNPinfo)[1]<10){ 91 | stop("Too short Region!") 92 | } 93 | if(!all(colnames(geno)==SNPinfo[,1])){ 94 | stop("column names of geno data do not agree with 1st column of SNPinfo ") 95 | } 96 | 97 | chosencol = as.vector(match(as.character(subSNPinfo[,1]), colnames(geno))) 98 | subgeno = geno[,chosencol] 99 | if(is.null(LDblockResult)){ 100 | subLDblockRes = Big_LD(subgeno, subSNPinfo) 101 | print("Big_LD, done!") 102 | }else{ 103 | subLDblockRes = LDblockResult[which(LDblockResult$end.bp >= min(subSNPinfo[,2]) & LDblockResult$start.bp <= max(subSNPinfo[,2])),] 104 | subLDblockRes$start[1] = which(SNPinfo[,1]==subSNPinfo[1,1]) 105 | levels(subLDblockRes$start.rsID) <- c(levels(subLDblockRes$start.rsID), as.character(subSNPinfo[1,1])) 106 | subLDblockRes$start.rsID[1] <- subSNPinfo[1,1] 107 | subLDblockRes$start.bp[1] = subSNPinfo[1,2] 108 | subsize = dim(subLDblockRes)[1] 109 | subSNPinfosize = dim(subSNPinfo)[1] 110 | subLDblockRes$end[subsize] = max(chosencol) 111 | levels(subLDblockRes$end.rsID) <- c(levels(subLDblockRes$end.rsID), as.character(subSNPinfo[subSNPinfosize,1])) 112 | subLDblockRes$end.rsID[subsize] <- as.character(subSNPinfo[subSNPinfosize,1]) 113 | subLDblockRes$end.bp[subsize] = subSNPinfo[subSNPinfosize,2] 114 | } 115 | 116 | s = subLDblockRes$start 117 | e = subLDblockRes$end 118 | s.rsID = as.character(subLDblockRes$start.rsID) 119 | e.rsID = as.character(subLDblockRes$end.rsID) 120 | s.bp = subLDblockRes$start.bp 121 | e.bp = subLDblockRes$end.bp 122 | 123 | if(s.bp[1]max(subSNPinfo[,2])){ 128 | e.bp[length(e.bp)]<-max(subSNPinfo[,2]) 129 | e.rsID[length(e.bp)]<-as.character(subSNPinfo[dim(subSNPinfo)[1],1]) 130 | } 131 | 132 | subLDblockRes = data.frame(s, e, s.rsID, e.rsID, s.bp, e.bp) 133 | colnames(subLDblockRes)=c("start", "end", "start.rsID", "end.rsID", "start.bp", "end.bp") 134 | 135 | BlockstP = sapply(subLDblockRes$start.bp, function(x) which(min(abs(x-subSNPinfo[,2])) ==abs(x-subSNPinfo[,2]))[1])-1 136 | BlockedP = sapply(subLDblockRes$end.bp, function(x) which(min(abs(x-subSNPinfo[,2])) ==abs(x-subSNPinfo[,2]))[1]) 137 | if(!is.null(showSNPs)){ 138 | showSNPsP = sapply(showSNPs[,2], function(x) which(abs(subSNPinfo[,2] - x) == min(abs(subSNPinfo[,2] - x)))[1]) 139 | } else { 140 | showSNPsP = NULL 141 | } 142 | 143 | 144 | if(tick == "rsID"){ 145 | tickname.st = as.character(subLDblockRes$start.rsID) 146 | tickname.ed = as.character(subLDblockRes$end.rsID) 147 | } else if(tick =="bp"){ 148 | tickname.st = (subLDblockRes$start.bp) 149 | tickname.ed = (subLDblockRes$end.bp) 150 | } 151 | ticksizeNsatisfy = ((BlockedP-BlockstP+1) 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | {description} 294 | Copyright (C) {year} {fullname} 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | {signature of Ty Coon}, 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /R/GPART.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | ################################################################################################################# 4 | # GPART < input > 5 | #' @title Partitioning genodata based on the result obtained by using Big-LD and gene region information. 6 | #' @name GPART 7 | #' @description 8 | #' \code{GPART} partition the given genodata using the result obtained by Big-LD and gene region information. 9 | #' The algorithm partition the whole sequence into sub sequences of which size do not exceed the given threshold. 10 | #' 11 | #' @param geno A data frame or matrix of additive genotype data, each column is additive genotype of each SNP. 12 | #' @param SNPinfo A data frame or matrix of SNPs information. 1st column is rsID and 2nd column is bp position. 13 | #' @param BigLDresult A data frame obtained by \code{Big_LD} function. 14 | #' If \code{NULL}(default), the \code{GPART} function first excute \code{Big_LD} function to obtain LD blocks estimation result. 15 | #' 16 | #' @param chrN A integer value to specify chromosome number of the given data. 17 | #' @param allgenelist A data frame or matrix of Gene info data. 18 | #' (1st col : Genename, 2nd col : chromosome, 3rd col : start bp, 4th col : end bp) 19 | #' @param minsize A integer value specifying the lower bound of number of SNPs in a partition. 20 | #' @param maxsize A integer value specifying the upper bound of number of SNPs in a partition. 21 | # < output > 22 | #' @return 23 | #' 24 | #' @author Sun Ah Kim , Yun Joo Yoo 25 | #' @seealso \code{\link{Big_LD}} 26 | #' 27 | #' @examples 28 | #' data(geno) 29 | #' data(SNPinfo) 30 | #' data(allgenelist) 31 | #' GPART(geno, SNPinfo, 22, genelist) 32 | #' 33 | 34 | #' @importFrom plyr alply 35 | 36 | #' @export 37 | GPART <- function(geno, SNPinfo, chrN, allgenelist, BigLDresult = NULL, minsize = 4, maxsize = 50) { 38 | ####################################################################################################### 39 | # sub-Functions 1. Big_LD 2. LDblockSplit 3. Merge_overlap_Gene 4.LDblock_Gene_Merge 5. split_Big_LD 40 | # 6. Merge_small_region 7.Naming_Region split LD 41 | ####################################################################################################### 42 | # blocks with Big-LD result 43 | # library(plyr) 44 | LDblockSplit = function(geno, LDblocks, maxsize) { 45 | LDblockSizes = apply(LDblocks, 1, diff) + 1 46 | LargeBlocksN = which(LDblockSizes > maxsize) 47 | LargeBlocks = LDblocks[LargeBlocksN, , drop = FALSE] 48 | Newblocks = NULL 49 | if (length(LargeBlocksN) != 0) { 50 | while (dim(LargeBlocks)[1] > 0) { 51 | nowblocks = LargeBlocks[1, ] 52 | nowgeno = geno[, nowblocks[1]:nowblocks[2]] 53 | nowr2 = cor(nowgeno)^2 54 | btwr2 = sapply(1:(dim(nowr2)[1] - 1), function(x) mean(nowr2[1:x, (x + 1):(dim(nowr2)[1] - 1)])) 55 | if (dim(nowr2)[1] <= ceiling(maxsize * 0.8 * 2)) { 56 | subbtwr2 = btwr2[(ceiling(dim(nowr2)[1]/2) - (maxsize * 0.2)):(ceiling(dim(nowr2)[1]/2) + (maxsize * 0.2))] 57 | addN = which(subbtwr2 == min(subbtwr2)) + (ceiling(dim(nowr2)[1]/2) - (maxsize * 0.2)) - 1 58 | Newblocks = rbind(Newblocks, c(nowblocks[1], (nowblocks[1] + addN))) 59 | Newblocks = rbind(Newblocks, c(nowblocks[1] + addN + 1, nowblocks[2])) 60 | LargeBlocks <- LargeBlocks[-1, , drop = FALSE] 61 | } else { 62 | subbtwr2 = btwr2[(maxsize * 0.4):maxsize] 63 | addN = which(subbtwr2 == min(subbtwr2)) + (maxsize * 0.4) - 1 64 | Newblocks = rbind(Newblocks, c(nowblocks[1], (nowblocks[1] + addN - 1))) 65 | LargeBlocks[1, 1] <- (nowblocks[1] + addN) 66 | if (diff(LargeBlocks[1, ] < maxsize)) { 67 | Newblocks = rbind(Newblocks, LargeBlocks[1, ]) 68 | } 69 | } 70 | } # end while 71 | FinalLDblocks = rbind(LDblocks[-LargeBlocksN, ], Newblocks) 72 | FinalLDblocks = FinalLDblocks[order(FinalLDblocks[, 1]), ] 73 | return(FinalLDblocks) 74 | } else { 75 | return(LDblocks) 76 | } 77 | } 78 | # merge overlapped gene regions 79 | Merge_overlap_Gene = function(Geneblocks) { 80 | overlaplist = NULL 81 | for (i in 1:(dim(Geneblocks)[1] - 1)) { 82 | for (j in (i + 1):dim(Geneblocks)[1]) { 83 | gene1 = Geneblocks[i, 1]:Geneblocks[i, 2] 84 | gene2 = Geneblocks[j, 1]:Geneblocks[j, 2] 85 | if (length(intersect(gene1, gene2)) > 0) { 86 | overlaplist = rbind(overlaplist, c(i, j)) 87 | } 88 | } 89 | } 90 | if(is.null(overlaplist)){ 91 | return(Geneblocks) 92 | }else{ 93 | overlaplist = as.list(data.frame(t(overlaplist))) 94 | newlist = NULL 95 | while (length(overlaplist) > 0) { 96 | now = overlaplist[[1]] 97 | N = sapply(overlaplist, function(x) length(intersect(x, now)) != 0) 98 | if (sum(N) == 1) { 99 | newlist <- c(newlist, list(now)) 100 | overlaplist = overlaplist[-which(N == TRUE)] 101 | } else { 102 | newlist <- c(newlist, list(unique(unlist(overlaplist[N])))) 103 | overlaplist = overlaplist[-which(N == TRUE)] 104 | } 105 | } 106 | 107 | overlapN = unlist(newlist) 108 | RemainGeneblocks = Geneblocks[-overlapN, ] 109 | AddGeneblocks = NULL 110 | for (i in 1:length(newlist)) { 111 | nowoverlap = newlist[[i]] 112 | overlapgenes = Geneblocks[nowoverlap, ] 113 | Totalrange = range(as.vector(overlapgenes)) 114 | newGeneblocks = matrix(Totalrange, ncol = 2) 115 | rownames(newGeneblocks) = paste(rownames(overlapgenes), collapse = "/") 116 | AddGeneblocks = rbind(AddGeneblocks, newGeneblocks) 117 | } 118 | Geneblocks = rbind(RemainGeneblocks, AddGeneblocks) 119 | Geneblocks = Geneblocks[order(Geneblocks[, 1]), ] 120 | return(Geneblocks) 121 | } 122 | } 123 | # LDblock construction and split Large regions 124 | LDblock_Gene_Merge = function(LDblocks.T, Geneblocks) { 125 | LDblocks.T = data.frame(LDblocks.T, "No", 0) 126 | Geneblocks = data.frame(Geneblocks, rownames(Geneblocks), 1) 127 | colnames(LDblocks.T) = c("st", "ed", "gname", "gnameN") 128 | colnames(Geneblocks) = c("st", "ed", "gname", "gnameN") 129 | remainedLDPart = NULL 130 | nowGeneN = 1 131 | while (dim(LDblocks.T)[1] > 0) { 132 | nowst = LDblocks.T[1, 1] 133 | nowed = LDblocks.T[1, 2] 134 | nowLD = LDblocks.T[1, ] 135 | # nowGst = as.numeric(nowGene[1]) nowGed = as.numeric(nowGene[2]) 136 | nowGene = Geneblocks[nowGeneN, , drop = FALSE] 137 | minGeneSNPid = Geneblocks[1, 1] 138 | maxGeneSNPid = max(Geneblocks[, 2]) 139 | intersectTrue = (length(intersect(c(nowst:nowed), c(nowGene[1, 1]:nowGene[1, 2]))) > 0) 140 | if (intersectTrue == FALSE) { 141 | if (nowed < nowGene[1, 1]) { 142 | remainedLDPart = rbind(remainedLDPart, nowLD) 143 | LDblocks.T = LDblocks.T[-1, , drop = FALSE] 144 | } else if (nowst > maxGeneSNPid) { 145 | remainedLDPart = rbind(remainedLDPart, LDblocks.T) 146 | LDblocks.T = LDblocks.T[-(dim(LDblocks.T)[1]), , drop = FALSE] 147 | } else if (nowst > nowGene[1, 2]) { 148 | nowGeneN = nowGeneN + 1 149 | } 150 | } else { 151 | geneset = c(nowGene[1, 1]:nowGene[1, 2]) 152 | LDset = c(nowLD[1, 1]:nowLD[1, 2]) 153 | # print(paste('small!!',length(unique(c(geneset,LDset))))) 154 | if (nowGene[1, 1] < nowLD[1, 1]) { 155 | g1 = as.character(nowGene[1, 3]) 156 | g2 = as.character(nowLD[1, 3]) 157 | } else { 158 | g2 = as.character(nowGene[1, 3]) 159 | g1 = as.character(nowLD[1, 3]) 160 | } 161 | 162 | if (g1 == "No" & g2 == "No") { 163 | gname = "No" 164 | } else if (g1 == "No" & g2 != "No") { 165 | gname <- g2 166 | } else if (g1 != "No" & g2 == "No") { 167 | gname <- g1 168 | } else if (g1 != "No" & g2 != "No") { 169 | g1 = unlist(strsplit(g1, split = "-")) 170 | g2 = unlist(strsplit(g2, split = "-")) 171 | gname = paste(unique(c(g1, g2)), collapse = "-") 172 | } 173 | gnameN = nowLD[1, 4] + nowGene[1, 4] 174 | st = min(c(nowGene[1, 1], nowGene[1, 2], nowLD[1, 1], nowLD[1, 2])) 175 | ed = max(c(nowGene[1, 1], nowGene[1, 2], nowLD[1, 1], nowLD[1, 2])) 176 | if (nowed > nowGene[2]) { 177 | levels(LDblocks.T$gname) <- c(levels(LDblocks.T$gname), as.character(gname)) 178 | levels(LDblocks.T$gnameN) <- c(levels(LDblocks.T$gname), gnameN) 179 | LDblocks.T[1, ] <- data.frame(st, ed, as.character(as.factor(gname)), gnameN) 180 | Geneblocks <- Geneblocks[-nowGeneN, ] 181 | # print(LDblocks.T[1,]) 182 | } else { 183 | levels(Geneblocks$gname) <- c(levels(Geneblocks$gname), gname) 184 | levels(Geneblocks$gnameN) <- c(levels(Geneblocks$gname), gnameN) 185 | Geneblocks[nowGeneN, ] <- data.frame(st, ed, gname, gnameN) 186 | LDblocks.T = LDblocks.T[-1, , drop = FALSE] 187 | # print(Geneblocks[nowGeneN,]) 188 | } 189 | } 190 | if (nowGeneN > dim(Geneblocks)[1]) { 191 | remainedLDPart = rbind(remainedLDPart, LDblocks.T) 192 | break 193 | } 194 | } 195 | return(list(remainedLDPart, Geneblocks)) 196 | } 197 | 198 | # split big Gene region 199 | split_Big_LD = function(GeneLDblocks, LDblocks.T, Geneblocks, maxsize) { 200 | BigN = which(GeneLDblocks[, 5] > maxsize) 201 | BigGeneblocks = GeneLDblocks[BigN, ] 202 | GeneLDblocks = GeneLDblocks[-BigN, ] 203 | newblocks = NULL 204 | for (i in 1:dim(BigGeneblocks)[1]) { 205 | nowBlock = BigGeneblocks[i, ] 206 | nowSNPs = nowBlock[1, 1]:nowBlock[1, 2] 207 | intersectLD = apply(LDblocks.T, 1, function(x) length(intersect(c(x[1]:x[2]), nowSNPs)) > 0) 208 | intersectLD = LDblocks.T[intersectLD, ] 209 | intersectLD[1, 1] <- min(nowSNPs) 210 | intersectLD[dim(intersectLD)[1], 2] <- max(nowSNPs) 211 | NintersectLD = NULL 212 | while (dim(intersectLD)[1] > 0) { 213 | st = intersectLD[1, 1] 214 | edposi = max(which(intersectLD[, 2] < (st + maxsize))) 215 | ed = intersectLD[edposi, 2] 216 | NintersectLD = rbind(NintersectLD, c(st, ed)) 217 | intersectLD = intersectLD[-(1:edposi), , drop = FALSE] 218 | } 219 | intersectLD = NintersectLD 220 | Genenames = alply(intersectLD, 1, function(x) { 221 | GeneR = apply(Geneblocks, 1, function(y) length(intersect(x[1]:x[2], y[1]:y[2])) > 0) 222 | list(rownames(Geneblocks[GeneR, , drop = FALSE])) 223 | }) 224 | gname = sapply(Genenames, function(x) paste(x[[1]], collapse = "/")) 225 | gnameN = sapply(Genenames, length) 226 | Addblocks = data.frame(intersectLD, gname, gnameN) 227 | blockL = apply(intersectLD, 1, diff) + 1 228 | Addblocks = cbind(Addblocks, blockL) 229 | newblocks = rbind(newblocks, Addblocks) 230 | } 231 | GeneLDblocks = rbind(GeneLDblocks, newblocks) 232 | GeneLDblocks = GeneLDblocks[order(GeneLDblocks[, 1]), ] 233 | return(GeneLDblocks) 234 | 235 | } 236 | # small region merging 237 | Merge_small_region = function(GeneLDblocks, maxsize, minsize) { 238 | smallbin = NULL 239 | completeBin = NULL 240 | while (dim(GeneLDblocks)[1] > 0) { 241 | # print(dim(GeneLDblocks)) 242 | nowbin = GeneLDblocks[1, ] 243 | if (is.null(smallbin) & nowbin[1, 5] >= minsize) { 244 | completeBin = rbind(completeBin, nowbin) 245 | GeneLDblocks = GeneLDblocks[-1, ] 246 | } else if (is.null(smallbin) & nowbin[1, 5] < minsize) { 247 | smallbin <- nowbin 248 | GeneLDblocks = GeneLDblocks[-1, ] 249 | } else if (smallbin[1, 5] + nowbin[1, 5] < minsize) { 250 | st = min(smallbin[1, 1], smallbin[1, 2], nowbin[1, 1], nowbin[1, 2]) 251 | ed = max(smallbin[1, 1], smallbin[1, 2], nowbin[1, 1], nowbin[1, 2]) 252 | g1 = as.character(smallbin[1, 3]) 253 | g2 = as.character(nowbin[1, 3]) 254 | if (g1 == "No" & g2 == "No") { 255 | gname = "No" 256 | } else if (g1 == "No" & g2 != "No") { 257 | gname <- g2 258 | } else if (g1 != "No" & g2 == "No") { 259 | gname <- g1 260 | } else if (g1 != "No" & g2 != "No") { 261 | g1 = unlist(strsplit(g1, split = "-")) 262 | g2 = unlist(strsplit(g2, split = "-")) 263 | gname = paste(unique(c(g1, g2)), collapse = "-") 264 | } 265 | gnameN = smallbin[1, 4] + nowbin[1, 4] 266 | blockL = smallbin[1, 5] + nowbin[1, 5] 267 | smallbin = data.frame(st, ed, gname, gnameN, blockL) 268 | GeneLDblocks = GeneLDblocks[-1, ] 269 | } else if (smallbin[1, 5] + nowbin[1, 5] >= minsize & smallbin[1, 5] + nowbin[1, 5] <= maxsize) { 270 | st = min(smallbin[1, 1], smallbin[1, 2], nowbin[1, 1], nowbin[1, 2]) 271 | ed = max(smallbin[1, 1], smallbin[1, 2], nowbin[1, 1], nowbin[1, 2]) 272 | g1 = as.character(smallbin[1, 3]) 273 | g2 = as.character(nowbin[1, 3]) 274 | if (g1 == "No" & g2 == "No") { 275 | gname = "No" 276 | } else if (g1 == "No" & g2 != "No") { 277 | gname <- g2 278 | } else if (g1 != "No" & g2 == "No") { 279 | gname <- g1 280 | } else if (g1 != "No" & g2 != "No") { 281 | g1 = unlist(strsplit(g1, split = "-")) 282 | g2 = unlist(strsplit(g2, split = "-")) 283 | gname = paste(unique(c(g1, g2)), collapse = "-") 284 | } 285 | gnameN = smallbin[1, 4] + nowbin[1, 4] 286 | blockL = smallbin[1, 5] + nowbin[1, 5] 287 | completeBin = rbind(completeBin, data.frame(st, ed, gname, gnameN, blockL)) 288 | smallbin = NULL 289 | GeneLDblocks = GeneLDblocks[-1, ] 290 | } else if (smallbin[1, 5] + nowbin[1, 5] > maxsize) { 291 | lastbin = completeBin[dim(completeBin)[1], ] 292 | if (lastbin[1, 5] + smallbin[1, 5] <= maxsize) { 293 | st = min(smallbin[1, 1], smallbin[1, 2], lastbin[1, 1], lastbin[1, 2]) 294 | ed = max(smallbin[1, 1], smallbin[1, 2], lastbin[1, 1], lastbin[1, 2]) 295 | g2 = as.character(smallbin[1, 3]) 296 | g1 = as.character(lastbin[1, 3]) 297 | if (g1 == "No" & g2 == "No") { 298 | gname = "No" 299 | } else if (g1 == "No" & g2 != "No") { 300 | gname <- g2 301 | } else if (g1 != "No" & g2 == "No") { 302 | gname <- g1 303 | } else if (g1 != "No" & g2 != "No") { 304 | g1 = unlist(strsplit(g1, split = "-")) 305 | g2 = unlist(strsplit(g2, split = "-")) 306 | gname = paste(unique(c(g1, g2)), collapse = "-") 307 | } 308 | gnameN = smallbin[1, 4] + lastbin[1, 4] 309 | blockL = smallbin[1, 5] + lastbin[1, 5] 310 | completeBin = completeBin[-dim(completeBin)[1], ] 311 | completeBin = rbind(completeBin, data.frame(st, ed, gname, gnameN, blockL)) 312 | smallbin = NULL 313 | } else { 314 | print(c("Large-small-Large")) 315 | print(rbind(lastbin, smallbin, nowbin)) 316 | completeBin = rbind(completeBin, smallbin, nowbin) 317 | smallbin = NULL 318 | GeneLDblocks = GeneLDblocks[-1, ] 319 | } 320 | } 321 | } 322 | return(completeBin) 323 | } 324 | # naming each region 325 | Naming_Region = function(GeneLDblocks) { 326 | FinalGeneLDblocks = NULL 327 | PartN = 1 328 | Pgene = NULL 329 | gene = NULL 330 | Ngene = NULL 331 | for (i in 1:dim(GeneLDblocks)[1]) { 332 | if (i%%10 == 0) 333 | print(i) 334 | nowBlock = GeneLDblocks[i, ] 335 | if (is.null(Pgene) & nowBlock$gname == "No") { 336 | # before First Gene region 337 | if (is.null(Ngene)) { 338 | Ngene = GeneLDblocks$gname[min(which(GeneLDblocks$gname != "No"))] 339 | Ngene = unlist(strsplit(as.character(Ngene), "-"))[1] 340 | Ngene = unlist(strsplit(as.character(Ngene), "/"))[1] 341 | } 342 | Rname = paste("before-", Ngene, sep = "") 343 | nowFinal = data.frame(nowBlock, Rname) 344 | FinalGeneLDblocks = rbind(FinalGeneLDblocks, nowFinal) 345 | } else if (nowBlock$gname != "No") { 346 | gene = nowBlock$gname 347 | Rname = gene 348 | nowFinal = data.frame(nowBlock, Rname) 349 | FinalGeneLDblocks = rbind(FinalGeneLDblocks, nowFinal) 350 | Pgene = tail(unlist(strsplit(as.character(gene), "-")), n = 1) 351 | Pgene = tail(unlist(strsplit(as.character(Pgene), "/")), n = 1) 352 | } else if (nowBlock$gname == "No") { 353 | geneNames = GeneLDblocks$gname[(i:dim(GeneLDblocks)[1])] 354 | if (all(geneNames == "No")) { 355 | # after Last Gene region 356 | LastPart = GeneLDblocks[i:(dim(GeneLDblocks)[1]), ] 357 | Rname = paste("after-", Pgene, sep = "") 358 | nowFinal = data.frame(LastPart, Rname) 359 | FinalGeneLDblocks = rbind(FinalGeneLDblocks, nowFinal) 360 | break 361 | } else { 362 | Ngene = geneNames[min(which(geneNames != "No"))] 363 | Ngene = unlist(strsplit(as.character(Ngene), "-"))[1] 364 | Ngene = unlist(strsplit(as.character(Ngene), "/"))[1] 365 | Rname = paste("inter-", Pgene, "-", Ngene, sep = "") 366 | nowFinal = data.frame(nowBlock, Rname) 367 | FinalGeneLDblocks = rbind(FinalGeneLDblocks, nowFinal) 368 | } 369 | } 370 | } 371 | # part numbering 372 | FinalGeneLDblocks = data.frame(FinalGeneLDblocks, 0) 373 | FinalGeneLDblocks[1, 7] <- 1 374 | for (i in 2:dim(FinalGeneLDblocks)[1]) { 375 | ifelse(FinalGeneLDblocks[i - 1, 6] == FinalGeneLDblocks[i, 6], 376 | FinalGeneLDblocks[i, 7] <- FinalGeneLDblocks[(i - 1), 7] + 1, FinalGeneLDblocks[i, 7] <- 1) 377 | # if(i%%10==0) print(i) 378 | } 379 | 380 | 381 | Finalnames = apply(FinalGeneLDblocks, 1, function(x) { 382 | # paste(as.character(x[6]), '-part', as.character(x[7]), sep='') 383 | paste(c(x[6], "-part", as.numeric(x[7])), collapse = "") 384 | }) 385 | FinalGeneLDblocks = cbind(FinalGeneLDblocks[, 1:5], Finalnames) 386 | return(FinalGeneLDblocks) 387 | } 388 | 389 | ####################################################################################################### 390 | # Main part 391 | if(is.null(BigLDresult)){ 392 | print("Start to execute Big_LD!") 393 | BigLDblocks = Big_LD(geno, SNPinfo) 394 | print("Big-LD, done!") 395 | }else{ 396 | BigLDblocks = BigLDresult 397 | } 398 | LDblocks = cbind(as.integer(as.character(BigLDblocks[, 1])), as.integer(as.character(BigLDblocks[, 2]))) 399 | # Split Large LDblocks 400 | FinalLDblocks = LDblockSplit(geno, LDblocks, maxsize) 401 | # gene-base block partitioning 402 | genelist <- allgenelist[which(allgenelist[,2] == chrN), ] 403 | GeneRegionSNPs1 = NULL 404 | for (i in 1:dim(genelist)[1]) { 405 | test = (which(SNPinfo[, 2] >= genelist[i, 3] & SNPinfo[, 2] <= genelist[i, 4])) 406 | ifelse(length(test) > 0, test <- range(test), test <- c(0, 0)) 407 | GeneRegionSNPs1 = rbind(GeneRegionSNPs1, test) 408 | # if(i%%10) print(i) 409 | } 410 | 411 | rownames(GeneRegionSNPs1) <- genelist[, 1] 412 | SNPexist = apply(GeneRegionSNPs1, 1, function(x) (x[1] != 0 & x[2] != 0)) 413 | SNPexist = unlist(SNPexist) 414 | Geneblocks = GeneRegionSNPs1[SNPexist, ] 415 | Geneblocks.M = Merge_overlap_Gene(Geneblocks) 416 | LDblocks.sgt = setdiff(1:dim(SNPinfo)[1], unlist(apply(FinalLDblocks, 1, function(x) min(x):max(x)))) 417 | LDblocks.sgt = cbind(LDblocks.sgt, LDblocks.sgt) 418 | LDblocks.T = rbind(FinalLDblocks, LDblocks.sgt) 419 | LDblocks.T = LDblocks.T[order(LDblocks.T[, 1]), ] 420 | colnames(LDblocks.T) = c("st", "ed") 421 | GeneLDblocks = LDblock_Gene_Merge(LDblocks.T, Geneblocks.M) 422 | GeneLDblocks = rbind(GeneLDblocks[[1]], GeneLDblocks[[2]]) 423 | GeneLDblocks = GeneLDblocks[order(GeneLDblocks[, 1]), ] 424 | blockL = GeneLDblocks[, 2] - GeneLDblocks[, 1] + 1 425 | GeneLDblocks = cbind(GeneLDblocks, blockL) 426 | # split big block 427 | GeneLDblocks = split_Big_LD(GeneLDblocks, LDblocks.T, Geneblocks, maxsize) 428 | GeneLDblocks = Merge_small_region(GeneLDblocks, maxsize, minsize) 429 | GeneLDblocks = Naming_Region(GeneLDblocks) 430 | st.rsID = SNPinfo[GeneLDblocks[, 1], 1] 431 | ed.rsID = SNPinfo[GeneLDblocks[, 2], 1] 432 | st.bp = SNPinfo[GeneLDblocks[, 1], 2] 433 | ed.bp = SNPinfo[GeneLDblocks[, 2], 2] 434 | Finalresult = data.frame(GeneLDblocks$st, GeneLDblocks$ed, st.rsID, ed.rsID, st.bp, ed.bp, GeneLDblocks$blockL, GeneLDblocks$Finalnames) 435 | colnames(Finalresult) = c("st", "ed", "st.rsID", "ed.rsID", "st.bp", "ed.bp", "blocksize", "Name") 436 | return(Finalresult) 437 | } 438 | -------------------------------------------------------------------------------- /inst/extdata/SNPinfo.txt: -------------------------------------------------------------------------------- 1 | rsID bp 2 | rs174309 18000090 3 | rs174310 18000280 4 | rs5747216 18000829 5 | rs174312 18001109 6 | rs174313 18001375 7 | rs5747217 18001894 8 | rs174314 18002105 9 | rs174315 18002277 10 | rs174317 18003584 11 | rs7410429 18003597 12 | rs11704765 18003601 13 | rs174323 18004101 14 | rs10775686 18004248 15 | rs5992729 18004287 16 | rs174326 18004289 17 | rs11089184 18004489 18 | rs73389192 18004617 19 | rs4494853 18004756 20 | rs4470418 18004927 21 | rs174327 18004928 22 | rs13056605 18004969 23 | rs174328 18005227 24 | rs9605316 18006528 25 | rs174330 18009909 26 | rs8135585 18010308 27 | rs9605317 18010706 28 | rs174332 18011380 29 | rs9605320 18012158 30 | rs28397634 18012775 31 | rs8138198 18013427 32 | rs1974713 18013723 33 | rs174334 18014921 34 | rs144558224 18015735 35 | rs174335 18016508 36 | rs1296791 18017560 37 | rs2300686 18018509 38 | rs2300687 18018604 39 | rs174336 18019260 40 | rs5992731 18019539 41 | rs1003861 18020773 42 | rs1985405 18021071 43 | rs2268776 18021505 44 | rs1296795 18021760 45 | rs2268777 18022858 46 | rs2268778 18022919 47 | rs174338 18024474 48 | rs174339 18024652 49 | rs174340 18024919 50 | rs55834601 18025899 51 | rs5992063 18025947 52 | rs1296798 18026033 53 | rs9605323 18027334 54 | rs17809189 18030667 55 | rs2268780 18031530 56 | rs113837 18031544 57 | rs174344 18032226 58 | rs174345 18033199 59 | rs174346 18036253 60 | rs174347 18036388 61 | rs174348 18036431 62 | rs174349 18036785 63 | rs5992734 18037113 64 | rs174350 18038176 65 | rs174351 18038786 66 | rs62238768 18039514 67 | rs174352 18039650 68 | rs174355 18041093 69 | rs174357 18042093 70 | rs174358 18043090 71 | rs174360 18044257 72 | rs174361 18044375 73 | rs174362 18044588 74 | rs174363 18044722 75 | rs5992740 18044735 76 | rs62238770 18044866 77 | rs174364 18044936 78 | rs174365 18045084 79 | rs174366 18046680 80 | rs174367 18046830 81 | rs12483850 18046944 82 | rs174368 18047028 83 | rs174369 18047163 84 | rs184083931 18047417 85 | rs5992741 18048673 86 | rs390819 18048690 87 | rs440661 18048751 88 | rs404602 18048911 89 | rs412830 18049681 90 | rs409842 18050636 91 | rs9618051 18050728 92 | rs411358 18051188 93 | rs695291 18052063 94 | rs77829105 18052568 95 | rs113857607 18052736 96 | rs112329750 18052740 97 | rs111635106 18052828 98 | rs111257429 18052854 99 | rs111952667 18052857 100 | rs695715 18052906 101 | rs438858 18053139 102 | rs423158 18053496 103 | rs1296810 18054369 104 | rs1296811 18054821 105 | rs5747236 18055062 106 | rs443694 18055234 107 | rs443708 18055271 108 | rs416422 18055357 109 | rs1296812 18055605 110 | rs394210 18056156 111 | rs1296813 18056267 112 | rs9604766 18056275 113 | rs1296814 18056332 114 | rs148048073 18057141 115 | rs75599514 18057200 116 | rs695817 18057252 117 | rs77113684 18057362 118 | rs145571580 18057808 119 | rs56145577 18057838 120 | rs60773453 18057926 121 | rs74276474 18057936 122 | rs74196725 18059204 123 | rs188665012 18059283 124 | rs12484668 18060356 125 | rs185617591 18060385 126 | rs190309298 18060388 127 | rs4819600 18060393 128 | rs174370 18060414 129 | rs4819602 18060423 130 | rs4819603 18060428 131 | rs189252714 18060451 132 | rs141399040 18060454 133 | rs4819604 18060457 134 | rs174371 18061337 135 | rs1003494 18062987 136 | rs78134013 18065844 137 | rs2074343 18065981 138 | rs60322904 18067777 139 | rs5746439 18068785 140 | rs5747240 18068881 141 | rs5747241 18069412 142 | rs1296816 18071444 143 | rs1296817 18071941 144 | rs71328218 18072015 145 | rs5747244 18072623 146 | rs9180 18073568 147 | rs1044497 18073592 148 | rs7414 18075053 149 | rs3532 18075263 150 | rs1296819 18076546 151 | rs5747247 18077640 152 | rs1296820 18077720 153 | rs5747248 18078712 154 | rs1296822 18079177 155 | rs1296823 18079203 156 | rs1296824 18079241 157 | rs1296825 18079339 158 | rs1296826 18079518 159 | rs1296827 18079783 160 | rs1296828 18079824 161 | rs1296829 18080035 162 | rs5992751 18080154 163 | rs73391480 18080431 164 | rs113018017 18080521 165 | rs73391485 18080723 166 | rs74276486 18080732 167 | rs57754481 18080830 168 | rs2287228 18081136 169 | rs2072555 18081156 170 | rs2111348 18082079 171 | rs144686405 18082292 172 | rs5992075 18082669 173 | rs5747252 18083260 174 | rs3747023 18084033 175 | rs73391495 18084272 176 | rs2080549 18084303 177 | rs5992754 18084331 178 | rs5992755 18084663 179 | rs5747255 18084836 180 | rs5747256 18084977 181 | rs5992756 18085115 182 | rs5747257 18085145 183 | rs5992757 18085646 184 | rs5992077 18085872 185 | rs5992758 18085873 186 | rs5747258 18086312 187 | rs4819609 18086475 188 | rs75934578 18086980 189 | rs5747260 18086994 190 | rs9605328 18087458 191 | rs12165542 18087491 192 | rs8135195 18087509 193 | rs77245105 18087703 194 | rs12171096 18087704 195 | rs12172178 18087730 196 | rs12172179 18087747 197 | rs11089191 18087990 198 | rs112155592 18088035 199 | rs2895943 18088250 200 | rs12169641 18088268 201 | rs4819610 18088461 202 | rs4819611 18088795 203 | rs2895944 18088846 204 | rs4239846 18088903 205 | rs4263214 18088945 206 | rs2895945 18089058 207 | rs5747261 18089601 208 | rs28469161 18089615 209 | rs9618060 18089828 210 | rs9604771 18089854 211 | rs9605331 18090006 212 | rs4819456 18090105 213 | rs74780212 18090664 214 | rs5747263 18091087 215 | rs5747264 18091363 216 | rs5747265 18091367 217 | rs58146058 18091430 218 | rs9605332 18092085 219 | rs9605333 18092304 220 | rs9605334 18092320 221 | rs9605335 18092355 222 | rs9605336 18092406 223 | rs5992078 18092649 224 | rs5992079 18092776 225 | rs5747266 18092809 226 | rs1034470 18093102 227 | rs1034471 18093310 228 | rs9605337 18093592 229 | rs2300688 18094292 230 | rs1034472 18094581 231 | rs2300689 18094893 232 | rs2300690 18094915 233 | rs2300691 18095170 234 | rs5747268 18095312 235 | rs3747024 18095838 236 | rs3747025 18095901 237 | rs17809319 18096199 238 | rs5746441 18096590 239 | rs5747269 18096617 240 | rs5746443 18096665 241 | rs5747270 18096815 242 | rs5747271 18097057 243 | rs5747273 18097196 244 | rs113253532 18097260 245 | rs55649606 18097735 246 | rs5747274 18097927 247 | rs5747275 18098050 248 | rs5747276 18098148 249 | rs5747277 18098266 250 | rs5747278 18098366 251 | rs5747279 18098432 252 | rs5747280 18098527 253 | rs9605344 18098739 254 | rs9604773 18098743 255 | rs144442008 18098792 256 | rs7291233 18098888 257 | rs12163144 18099028 258 | rs12162848 18099048 259 | rs7289867 18099055 260 | rs4996242 18099172 261 | rs2401159 18099314 262 | rs2401160 18099345 263 | rs2895946 18099370 264 | rs2401161 18099393 265 | rs5747281 18100104 266 | rs5992759 18100147 267 | rs12159924 18100435 268 | rs12160074 18100472 269 | rs4819457 18100818 270 | rs5746446 18101834 271 | rs5747283 18102837 272 | rs5992764 18103556 273 | rs5992765 18103710 274 | rs7288016 18103878 275 | rs5992766 18103973 276 | rs5747285 18104619 277 | rs41472044 18104792 278 | rs56704471 18105031 279 | rs2160628 18105457 280 | rs118092051 18106064 281 | rs5747286 18106264 282 | rs5747288 18106309 283 | rs41345445 18106662 284 | rs5747290 18108334 285 | rs5747292 18108810 286 | rs62238857 18108992 287 | rs9605352 18109130 288 | rs41277568 18109857 289 | rs5992769 18109948 290 | rs4449236 18109963 291 | rs5992771 18110185 292 | rs73376727 18110607 293 | rs7751 18111431 294 | rs111286801 18112845 295 | rs2895947 18114036 296 | rs2895948 18114042 297 | rs5747295 18115251 298 | rs5747296 18115277 299 | rs5747297 18115392 300 | rs5747298 18115612 301 | rs5747299 18116033 302 | rs75810250 18117434 303 | rs5747302 18118204 304 | rs9604777 18118636 305 | rs113398832 18118759 306 | rs113376187 18119456 307 | rs9605356 18119476 308 | rs12157642 18119777 309 | rs1080199 18120850 310 | rs12628818 18121246 311 | rs11089197 18121334 312 | rs9605359 18122486 313 | rs12167771 18124332 314 | rs12167823 18124362 315 | rs12169740 18124567 316 | rs73376758 18125797 317 | rs5746451 18126020 318 | rs5992776 18126162 319 | rs77573264 18126188 320 | rs75038815 18127324 321 | rs9605364 18127437 322 | rs5747306 18128960 323 | rs5747307 18129265 324 | rs5992086 18129770 325 | rs73376763 18130621 326 | rs73376764 18130997 327 | rs2401163 18131078 328 | rs2401164 18131226 329 | rs713701 18131546 330 | rs77259934 18132782 331 | rs2024229 18132812 332 | rs2024230 18132930 333 | rs5747308 18133500 334 | rs4819618 18133808 335 | rs2895949 18134265 336 | rs12627845 18134712 337 | rs5747309 18134850 338 | rs5747310 18134976 339 | rs73376775 18135001 340 | rs5992088 18135912 341 | rs2920520 18137144 342 | rs5746453 18137274 343 | rs5992779 18138998 344 | rs73376779 18139017 345 | rs113464802 18139348 346 | rs2587075 18139402 347 | rs2587079 18140423 348 | rs2587084 18140644 349 | rs13054674 18140771 350 | rs113539433 18140816 351 | rs5747313 18141481 352 | rs5992780 18141551 353 | rs147580196 18142254 354 | rs8141347 18143835 355 | rs8140916 18144164 356 | rs8137591 18144315 357 | rs2587087 18144824 358 | rs2587101 18146388 359 | rs147799616 18146971 360 | rs113244890 18146989 361 | rs2401165 18147071 362 | rs73376798 18147211 363 | rs5747315 18147423 364 | rs2587103 18148454 365 | rs73376802 18150665 366 | rs2587066 18154147 367 | rs2587068 18154285 368 | rs117796392 18154398 369 | rs73378703 18155411 370 | rs144169813 18156100 371 | rs5746458 18156395 372 | rs35945753 18156644 373 | rs2535692 18157282 374 | rs2535694 18159407 375 | rs715532 18159946 376 | rs13053393 18160881 377 | rs1978967 18161979 378 | rs9605381 18163136 379 | rs34891900 18163158 380 | rs5747323 18163539 381 | rs5747325 18164864 382 | rs73378723 18165638 383 | rs5747326 18167478 384 | rs8138394 18167626 385 | rs111298155 18168776 386 | rs5747327 18169212 387 | rs5747328 18169221 388 | rs2401166 18169423 389 | rs73378731 18169900 390 | rs5747329 18170207 391 | rs73378734 18171111 392 | rs5746459 18171567 393 | rs5013026 18171650 394 | rs2587076 18172236 395 | rs5747330 18172534 396 | rs117329503 18173509 397 | rs73378738 18174138 398 | rs2535704 18174658 399 | rs2587078 18174781 400 | rs2535705 18175550 401 | rs2254520 18175558 402 | rs79077571 18177283 403 | rs5746460 18178393 404 | rs2257083 18178859 405 | rs5992095 18179399 406 | rs73378746 18180397 407 | rs5747333 18180567 408 | rs7291752 18180911 409 | rs2535707 18181869 410 | rs17207051 18181984 411 | rs2587081 18183623 412 | rs2587082 18184011 413 | rs2587083 18184290 414 | rs73378760 18188924 415 | rs5747336 18188989 416 | rs2535711 18191951 417 | rs2535712 18192136 418 | rs58863768 18193240 419 | rs4819462 18193560 420 | rs2535714 18194188 421 | rs112850451 18196831 422 | rs5992802 18196875 423 | rs147412303 18196923 424 | rs5992098 18197172 425 | rs1470341 18198109 426 | rs2109659 18198327 427 | rs2535675 18199976 428 | rs3788278 18200049 429 | rs2587091 18200050 430 | rs3788279 18200386 431 | rs7290691 18201204 432 | rs2587092 18201526 433 | rs5747337 18201713 434 | rs2535676 18202129 435 | rs2535677 18202207 436 | rs35477714 18202530 437 | rs73378786 18202663 438 | rs2535679 18202850 439 | rs2535680 18203179 440 | rs75227197 18203870 441 | rs2098174 18204218 442 | rs2008085 18204316 443 | rs2079947 18204416 444 | rs59494194 18204587 445 | rs1807468 18204674 446 | rs2401169 18205430 447 | rs738133 18205557 448 | rs2907924 18206653 449 | rs1008378 18207251 450 | rs2535686 18207927 451 | rs4819623 18208641 452 | rs2535687 18208764 453 | rs73378797 18208876 454 | rs4488761 18209613 455 | rs2587100 18210704 456 | rs9967 18211205 457 | rs5746469 18211983 458 | rs181380 18212530 459 | rs8919 18213057 460 | rs424721 18214208 461 | rs78873428 18214383 462 | rs5747338 18214609 463 | rs17809603 18216211 464 | rs3747026 18216262 465 | rs2305001 18218210 466 | rs181382 18218464 467 | rs181383 18218475 468 | rs8190349 18219581 469 | rs181385 18219686 470 | rs4599222 18219727 471 | rs181386 18219731 472 | rs181387 18220324 473 | rs181388 18221167 474 | rs4819626 18221958 475 | rs181389 18221966 476 | rs181390 18222263 477 | rs181391 18222557 478 | rs181392 18222665 479 | rs181393 18223228 480 | rs181394 18223617 481 | rs181395 18223624 482 | rs181396 18224155 483 | rs1468925 18224237 484 | rs1966449 18224364 485 | rs1966450 18224417 486 | rs181397 18224731 487 | rs181399 18225318 488 | rs734297 18225684 489 | rs1468926 18226327 490 | rs2270201 18226476 491 | rs738094 18226521 492 | rs2072393 18226528 493 | rs2072392 18226612 494 | rs8190315 18226764 495 | rs8190313 18227077 496 | rs8190311 18227149 497 | rs8190307 18227564 498 | rs8190306 18227643 499 | rs78524802 18227957 500 | rs75172416 18227978 501 | rs78047295 18228127 502 | rs75087713 18228505 503 | rs76632669 18228687 504 | rs73876500 18229002 505 | rs5747339 18229187 506 | rs181401 18229556 507 | rs181402 18229774 508 | rs2268784 18230278 509 | rs1296683 18230757 510 | rs1296685 18230964 511 | rs8190303 18231014 512 | rs8190302 18231015 513 | rs1296687 18231046 514 | rs2895951 18232368 515 | rs181403 18232653 516 | rs8190291 18232672 517 | rs181404 18232723 518 | rs8190289 18232768 519 | rs181405 18233000 520 | rs2289713 18233154 521 | rs181406 18233525 522 | rs141480741 18233746 523 | rs60494266 18234062 524 | rs78387019 18234263 525 | rs372655 18234535 526 | rs3788282 18235108 527 | rs181407 18235288 528 | rs181408 18235305 529 | rs145349612 18235866 530 | rs5747340 18235867 531 | rs181409 18236035 532 | rs2011418 18236693 533 | rs188802234 18236956 534 | rs181606849 18236957 535 | rs181410 18237436 536 | rs181411 18238291 537 | rs181412 18238786 538 | rs61640818 18239276 539 | rs181413 18239312 540 | rs181414 18240212 541 | rs181415 18240260 542 | rs181416 18240424 543 | rs8137616 18240439 544 | rs415050 18242182 545 | rs4819628 18242356 546 | rs5992817 18245321 547 | rs5992818 18245322 548 | rs5747347 18245533 549 | rs5746473 18245577 550 | rs79383237 18245918 551 | rs5746474 18245932 552 | rs5747349 18245973 553 | rs5747350 18246210 554 | rs5746475 18246213 555 | rs5747351 18246375 556 | rs5747353 18249023 557 | rs2268785 18250882 558 | rs2268786 18250892 559 | rs2268787 18250968 560 | rs112896765 18251626 561 | rs3788284 18252103 562 | rs5992823 18254006 563 | rs67330875 18254058 564 | rs2098472 18254632 565 | rs738096 18255943 566 | rs738095 18255988 567 | rs443912 18257138 568 | rs8190256 18258344 569 | rs366542 18258382 570 | rs142798168 18259803 571 | rs5992102 18259810 572 | rs454566 18261028 573 | rs391085 18261587 574 | rs9605406 18262301 575 | rs372204 18262517 576 | rs5992103 18262767 577 | rs9618110 18262793 578 | rs401416 18262946 579 | rs5992837 18263018 580 | rs408656 18263268 581 | rs430438 18263400 582 | rs116984560 18263834 583 | rs141965389 18264238 584 | rs5992838 18264831 585 | rs1076489 18265172 586 | rs9617618 18265271 587 | rs741458 18265683 588 | rs12165723 18266989 589 | rs73380798 18267982 590 | rs34138371 18268130 591 | rs5992840 18268442 592 | rs5992841 18268450 593 | rs116054993 18268631 594 | rs443627 18268794 595 | rs141031649 18269140 596 | rs116946488 18269364 597 | rs79268089 18271491 598 | rs451548 18274092 599 | rs75492633 18274380 600 | rs382013 18276101 601 | rs429357 18277314 602 | rs426276 18278128 603 | rs117306911 18278320 604 | rs117602899 18279987 605 | rs5992846 18280602 606 | rs401224 18280799 607 | rs57777910 18281629 608 | rs390109 18281630 609 | rs9605408 18281729 610 | rs5992848 18282020 611 | rs117594779 18282127 612 | rs5992105 18283247 613 | rs7291975 18283876 614 | rs375603 18284420 615 | rs138588171 18284615 616 | rs378459 18285299 617 | rs2241252 18286342 618 | rs369081 18286672 619 | rs2075306 18286884 620 | rs11913358 18288040 621 | rs116071535 18288090 622 | rs390041 18288362 623 | rs390702 18288571 624 | rs143824336 18288605 625 | rs389496 18289204 626 | rs8140645 18289555 627 | rs9617619 18291198 628 | rs2289717 18291364 629 | rs454799 18291376 630 | rs423710 18291791 631 | rs115787807 18291926 632 | rs424607 18292198 633 | rs387641 18292242 634 | rs365746 18292337 635 | rs429940 18292466 636 | rs430321 18292633 637 | rs8138765 18292976 638 | rs5992108 18293051 639 | rs419352 18293072 640 | rs425011 18293192 641 | rs397877 18293197 642 | rs431538 18293255 643 | rs1296703 18293261 644 | rs8135229 18293316 645 | rs436590 18293650 646 | rs448184 18293967 647 | rs388415 18294211 648 | rs389436 18294522 649 | rs389347 18294697 650 | rs432775 18294704 651 | rs390745 18294919 652 | rs437633 18295130 653 | rs437773 18295208 654 | rs399757 18295575 655 | rs1550663 18296238 656 | rs1124070 18296438 657 | rs1124069 18296441 658 | rs1550664 18296484 659 | rs1124068 18296536 660 | rs11703172 18297597 661 | rs113201548 18297639 662 | rs36064820 18297694 663 | rs9618120 18297723 664 | rs56114181 18297990 665 | rs5992851 18298089 666 | rs4269007 18298527 667 | rs1003631 18298678 668 | rs1003630 18298723 669 | rs2277831 18299197 670 | rs741457 18299751 671 | rs741456 18299937 672 | rs5992854 18300240 673 | rs11704160 18300594 674 | rs61744842 18300775 675 | rs11704809 18300879 676 | rs78616323 18301600 677 | rs8135914 18301693 678 | rs5992855 18302122 679 | rs5992857 18302521 680 | rs35570448 18302566 681 | rs13054323 18302883 682 | rs5992110 18303070 683 | rs62238915 18303409 684 | rs9605412 18303678 685 | rs1296706 18303738 686 | rs1316044 18303768 687 | rs2305006 18304392 688 | rs5992111 18304583 689 | rs5992112 18304695 690 | rs67527329 18304753 691 | rs5992113 18304784 692 | rs45514595 18304821 693 | rs45544141 18304891 694 | rs45572336 18304978 695 | rs1076488 18305797 696 | rs436087 18306154 697 | rs8138398 18306485 698 | rs737831 18306812 699 | rs11704224 18307381 700 | rs5992861 18307634 701 | rs5992117 18308320 702 | rs5992862 18308601 703 | rs2193539 18308788 704 | rs5992863 18308819 705 | rs5992118 18308936 706 | rs5992864 18309411 707 | rs5992119 18309438 708 | rs5992120 18309546 709 | rs34837770 18309658 710 | rs11704495 18309666 711 | rs5992865 18309803 712 | rs5992866 18309826 713 | rs5992867 18309859 714 | rs5992868 18309951 715 | rs5992870 18310068 716 | rs5992871 18310110 717 | rs5992872 18310363 718 | rs5992121 18310367 719 | rs61744286 18310439 720 | rs7287465 18311845 721 | rs5992122 18312343 722 | rs8136428 18313018 723 | rs5992873 18313044 724 | rs424931 18313512 725 | rs440893 18313584 726 | rs442822 18313591 727 | rs441215 18313615 728 | rs3804050 18313672 729 | rs34401816 18314164 730 | rs367922 18314391 731 | rs5992123 18314989 732 | rs5992874 18315068 733 | rs35181588 18315983 734 | rs450796 18316091 735 | rs450960 18316304 736 | rs450975 18316333 737 | rs451840 18316526 738 | rs451740 18316620 739 | rs453005 18317095 740 | rs11705197 18317423 741 | rs405490 18317621 742 | rs453841 18317821 743 | rs415170 18318963 744 | rs439231 18319179 745 | rs117359382 18319875 746 | rs2542334 18320058 747 | rs2542335 18320258 748 | rs2587106 18320311 749 | rs2587107 18320312 750 | rs920817 18320343 751 | rs2587108 18320886 752 | rs9618135 18321040 753 | rs2587109 18323438 754 | rs2587110 18323482 755 | rs2542336 18323609 756 | rs750558 18324301 757 | rs114867357 18324517 758 | rs750559 18324564 759 | rs748779 18325067 760 | rs116236236 18326550 761 | rs2587111 18326754 762 | rs2542337 18326813 763 | rs2587112 18326823 764 | rs768563 18327590 765 | rs2587113 18328503 766 | rs2587114 18329146 767 | rs2111546 18329411 768 | rs9618143 18329571 769 | rs2587115 18331118 770 | rs9617625 18331149 771 | rs2587116 18331154 772 | rs2542338 18331871 773 | rs10427597 18332410 774 | rs2587117 18333208 775 | rs9617628 18333467 776 | rs2160760 18333902 777 | rs116470140 18333964 778 | rs116979929 18335211 779 | rs117463185 18336087 780 | rs1003361 18337880 781 | rs2016042 18338356 782 | rs873387 18339012 783 | rs76441392 18341004 784 | rs5992125 18342225 785 | rs11917 18343011 786 | rs12159919 18343523 787 | rs1057721 18343843 788 | rs997458 18343959 789 | rs5992879 18345864 790 | rs4819639 18347127 791 | rs1075596 18347224 792 | rs9605418 18347916 793 | rs4819640 18348439 794 | rs2277832 18348823 795 | rs1077146 18349379 796 | rs28700195 18349948 797 | rs5992126 18350514 798 | rs76613044 18351389 799 | rs5992880 18351575 800 | rs5747385 18352679 801 | rs5992881 18352887 802 | rs7284335 18352968 803 | rs9618150 18352986 804 | rs57831065 18353549 805 | rs4819641 18353630 806 | rs4819642 18353910 807 | rs5746486 18354272 808 | rs5746487 18354328 809 | rs9617630 18354766 810 | rs72490631 18357509 811 | rs61377180 18357549 812 | rs5992884 18358633 813 | rs1867353 18359243 814 | rs8136765 18360628 815 | rs4819643 18360810 816 | rs5992887 18361090 817 | rs1867354 18361183 818 | rs5992888 18361871 819 | rs9617633 18362573 820 | rs9605422 18362940 821 | rs9604798 18363387 822 | rs2165971 18364579 823 | rs5992889 18365072 824 | rs4819471 18365392 825 | rs2075445 18366821 826 | rs2075444 18367640 827 | rs12169892 18367988 828 | rs1076113 18368147 829 | rs2289718 18368548 830 | rs9618157 18369709 831 | rs5747390 18369710 832 | rs4819644 18369916 833 | rs5747394 18370997 834 | rs5746489 18371161 835 | rs5746490 18371293 836 | rs5747395 18371358 837 | rs3747031 18371637 838 | rs3747032 18372020 839 | rs4819645 18372709 840 | rs2075455 18373885 841 | rs2075454 18374014 842 | rs2075453 18374150 843 | rs2075452 18374554 844 | rs2075451 18374558 845 | rs1115124 18374875 846 | rs1115123 18374925 847 | rs2075450 18374927 848 | rs8141766 18377199 849 | rs8140413 18377329 850 | rs2034113 18378002 851 | rs3213927 18379160 852 | rs4819473 18380081 853 | rs56076143 18380917 854 | rs12483803 18381041 855 | rs5992895 18381686 856 | rs4819474 18383114 857 | rs4819475 18383236 858 | rs2075449 18385020 859 | rs2075448 18385166 860 | rs12484375 18385933 861 | rs12484267 18385944 862 | rs2896000 18386406 863 | rs2075447 18387015 864 | rs5747400 18388315 865 | rs2075446 18389082 866 | rs77924058 18389110 867 | rs4819646 18390280 868 | rs117911040 18390933 869 | rs4819647 18391061 870 | rs2401424 18393534 871 | rs4819648 18393564 872 | rs5746492 18393933 873 | rs5992900 18394484 874 | rs8137643 18394531 875 | rs2083881 18394968 876 | rs2083882 18395241 877 | rs5747405 18395877 878 | rs5747406 18395952 879 | rs4819649 18396329 880 | rs5747408 18397120 881 | rs5746493 18397572 882 | rs73151057 18397913 883 | rs9604802 18398018 884 | rs9604803 18398207 885 | rs5747409 18398447 886 | rs5747411 18398976 887 | rs5747412 18398999 888 | rs5992902 18399007 889 | rs5747413 18399017 890 | rs5746495 18400366 891 | rs5747416 18401104 892 | rs5747417 18401209 893 | rs34263881 18401597 894 | rs34740481 18401620 895 | rs28406897 18402012 896 | rs13057517 18402669 897 | rs13056749 18402924 898 | rs5746496 18403516 899 | rs5746497 18403629 900 | rs1072405 18406068 901 | rs5747420 18406473 902 | rs2401411 18406690 903 | rs1076111 18407249 904 | rs1076110 18407468 905 | rs74371650 18407802 906 | rs35645198 18408322 907 | rs1867355 18408705 908 | rs5747421 18411074 909 | rs5747422 18411162 910 | rs5747423 18411214 911 | rs5992132 18411621 912 | rs13058527 18412929 913 | rs2197300 18413750 914 | rs4484121 18413975 915 | rs9605435 18415576 916 | rs4819476 18416487 917 | rs78687292 18417495 918 | rs138031558 18418276 919 | rs142625807 18418315 920 | rs9605438 18418894 921 | rs9605439 18420055 922 | rs9605440 18420254 923 | rs7291294 18421784 924 | rs5992907 18421899 925 | rs5992908 18421926 926 | rs5747427 18422165 927 | rs9605441 18422843 928 | rs12628864 18423033 929 | rs400509 18426299 930 | rs415681 18426372 931 | rs407232 18426379 932 | rs416081 18427418 933 | rs423790 18428443 934 | rs378914 18429001 935 | rs425557 18429279 936 | rs431071 18429694 937 | rs1109052 18430193 938 | rs5747428 18430658 939 | rs385945 18431064 940 | rs442494 18431846 941 | rs396012 18432033 942 | rs396524 18433308 943 | rs5992134 18433994 944 | rs411682 18434088 945 | rs413494 18434889 946 | rs5746498 18435794 947 | rs393241 18436758 948 | rs415651 18436944 949 | rs374597 18437043 950 | rs439712 18438221 951 | rs384215 18438485 952 | rs1077543 18439047 953 | rs2401414 18439392 954 | rs406129 18440076 955 | rs413717 18440628 956 | rs5747435 18441563 957 | rs8143037 18441767 958 | rs382983 18442522 959 | rs117900521 18442592 960 | rs5992916 18443681 961 | rs1108372 18444180 962 | rs1867356 18444649 963 | rs1110663 18445031 964 | rs4819652 18445450 965 | rs11089210 18445692 966 | rs404140 18445952 967 | rs412238 18446691 968 | rs445583 18446831 969 | rs448680 18447496 970 | rs379667 18448129 971 | rs58952930 18448255 972 | rs421743 18448276 973 | rs433576 18448960 974 | rs4819477 18449208 975 | rs427669 18449576 976 | rs432660 18449758 977 | rs433545 18450224 978 | rs11704739 18450270 979 | rs443983 18450965 980 | rs5992928 18451438 981 | rs28434757 18451554 982 | rs444279 18451593 983 | rs5992930 18452843 984 | rs61451998 18452995 985 | rs5992136 18453103 986 | rs2593207 18454125 987 | rs447955 18454588 988 | rs448627 18455232 989 | rs438708 18455381 990 | rs453435 18455621 991 | rs433047 18455700 992 | rs453557 18455830 993 | rs2067071 18456192 994 | rs370241 18456874 995 | rs5992934 18457119 996 | rs454921 18457246 997 | rs5992137 18457986 998 | rs365219 18459127 999 | rs5992142 18459329 1000 | rs9605460 18459576 1001 | rs9605461 18459658 1002 | -------------------------------------------------------------------------------- /R/Big_LD.R: -------------------------------------------------------------------------------- 1 | 2 | ################################################################################################################# 3 | # Big-LD 4 | #' @title Estimation of LD block regions 5 | #' @name Big_LD 6 | #' 7 | #' @description \code{Big_LD} returns the estimation of LD block regions of given data. 8 | #' 9 | #' @param geno A data frame or matrix of additive genotype data, each column is additive genotype of each SNP. 10 | #' @param SNPinfo A data frame or matrix of SNPs information. 1st column is rsID and 2nd column is bp position. 11 | #' @param CLQcut A numeric value of threshold for the correlation value |r|, between 0 to 1. 12 | #' @param clstgap An integer value to specifying the threshold of physical distance (bp) between two consecutive SNPs 13 | #' which do not belong to the same clique, i.e., if a physical distance between two consecutive SNPs in a clique 14 | #' greater than \code{clstgap}, then the algorithm split the cliques satisfying each 15 | #' clique do not contain such consecutive SNPs 16 | #' @param leng An integer value to specify the number of SNPs in a preceding and a following region 17 | #' of each sub-region boundary, every SNP in a preceding and every SNP in a following region need to be in weak LD. 18 | #' @param MAFcut An numeric value to specifying the MAF threshold. 19 | #' @param subSegmSize An integer value to specify the upper bound of the number of SNPs in a one-take sub-region. 20 | #' @param appendRare If \code{appendRare = TRUE}, the algorithm append rare SNPs (MAF 24 | #' @return A data frame of block estimation result. 25 | #' Each row of data frame shows the starting SNP and end SNP of each estimated LD block. 26 | #' 27 | #' @author Sun-Ah Kim , Yun Joo Yoo 28 | #' @seealso \code{\link{CLQD}}, \code{\link{LDblockHeatmap}} 29 | #' 30 | #' @examples 31 | #' 32 | #' data(geno) 33 | #' data(SNPinfo) 34 | #' Big_LD(geno, SNPinfo) 35 | #' Big_LD(geno, SNPinfo, CLQcut = 0.5, clstgap = 40000, leng = 200, subSegmSize = 1500) 36 | #' 37 | # sub-Functions 1. CLQD < built - in > 2. cutsequence.modi, 3.intervalCliqueList, 4. find.maximum.indept, 5. constructLDblock, 38 | #' @export 39 | #' 40 | Big_LD <- function(geno, SNPinfo, CLQcut = 0.5, clstgap = 40000, leng = 200, subSegmSize = 1500, MAFcut = 0.05, 41 | appendrare = FALSE, checkLargest = FALSE, CLQmode="Density") { 42 | # packages 43 | # library(igraph) 44 | ####################################################################################################### 45 | # sub-Functions 1. cutsequence.modi, 2.intervalCliqueList, 3. find.maximum.indept, 4. constructLDblock, 5. CLQ 46 | 47 | cutsequence.modi <- function(geno, leng, subSegmSize) 48 | { 49 | print("split whole sequence into subsegments") 50 | modeNum <- 1 51 | lastnum <- 0 52 | # region length<=3000 53 | if (dim(geno)[2] <= subSegmSize) { 54 | print("there is only one sub-region!") 55 | return(list(dim(geno)[2], NULL)) 56 | } else { 57 | # sq = floor(log(leng, base = 10)) 58 | calterms = c(1, 10, leng) 59 | # calterms = unique(c(calterms, leng)) 60 | # calend = tail(calterms,1) 61 | cutpoints <- NULL 62 | i = leng # i :current candidate cutposition 63 | while (i <= (dim(geno)[2] - leng)) { 64 | if((i-lastnum) > 5*subSegmSize){ 65 | modeNum <- 2 66 | break; 67 | } 68 | # tick size = leng * (1/10) 69 | for(j in calterms){ 70 | # print(j) 71 | nowcm <- cor(geno[,(i-j+1):(i)], geno[,((i+1):(i+j))] 72 | ,use="pairwise.complete.obs") 73 | nowr2 <- nowcm^2 74 | nowr2[which(nowr2 < 0.5)] = 0 75 | if(sum(nowr2,na.rm = T)>0){ 76 | i<-i+1 77 | cutnow <- FALSE 78 | break 79 | } 80 | if(j==leng){ 81 | cutnow <-TRUE 82 | } 83 | } 84 | if(cutnow == TRUE){ 85 | cutpoints = c(cutpoints, i) 86 | lastnum = i 87 | print(i) 88 | i<-i+(leng/2) 89 | cutnow = FALSE 90 | } 91 | # 92 | }##end while 93 | if(modeNum == 1){ 94 | cutpoints <- c(0,cutpoints, dim(geno)[2]) 95 | # separate too big regions candi.cutpoints return(cutpoints,candi.cutpoints) 96 | atfcut <- NULL 97 | while (max(diff(cutpoints)) > subSegmSize) { 98 | diffseq = diff(cutpoints) 99 | recutpoint <- which(diffseq > subSegmSize) 100 | nowmaxsize = max(diff(cutpoints)) 101 | # print(nowmaxsize) print(recutpoint) 102 | tt <- cbind((cutpoints[recutpoint] + 1), cutpoints[recutpoint + 1]) 103 | numvec = NULL 104 | for (i in 1:dim(tt)[1]) { 105 | st <- tt[i, 1] 106 | ed <- tt[i, 2] 107 | if (ed > (dim(geno)[2] - leng)) { 108 | ed <- dim(geno)[2] - leng 109 | } 110 | weakcount <- sapply(c((st + leng):(ed - leng)), function(x) { 111 | tick <- as.integer(leng/5) 112 | nowCM <- cor(geno[, (x - tick + 1):(x)], geno[, (x+ 1):(x + tick)] 113 | ,use="pairwise.complete.obs") 114 | nowr2 <- nowCM^2 115 | diag(nowr2) <- 0 116 | length(which(nowr2>= 0.5)) 117 | }) 118 | weakcount.s <- sort(weakcount) 119 | weaks <- weakcount.s[10] 120 | weakpoint <- which(weakcount <= weaks) 121 | weakpoint <- weakpoint + st + leng - 1 122 | nearcenter = sapply(weakpoint, function(x) abs((ed - x) - (x - st)), simplify = TRUE) 123 | addcut <- weakpoint[which(nearcenter == min(nearcenter))][1] 124 | print(paste("add cutpoint", addcut)) 125 | numvec <- c(numvec, addcut) 126 | atfcut <- c(atfcut, addcut) 127 | } ##end for 128 | cutpoints <- sort(c(cutpoints, numvec)) 129 | newcandi = which(diff(cutpoints) > subSegmSize) 130 | # remaxsize = max(diff(cutpoints)) 131 | # print(remaxsize) print(newcandi) 132 | if (length(newcandi) == 0) { 133 | break 134 | } 135 | } 136 | } 137 | ##end while 138 | if(modeNum == 2){ 139 | cutpoints = seq(subSegmSize, dim(geno)[2], subSegmSize/2) 140 | atfcut = cutpoints 141 | if(max(cutpoints) == dim(geno)[2]){ 142 | atfcut = atfcut[-(length(atfcut))] 143 | }else{ 144 | cutpoints = c(cutpoints, dim(geno)[2]) 145 | } 146 | 147 | } 148 | } 149 | print("cutting sequence, done") 150 | return(list(cutpoints, atfcut)) 151 | } 152 | intervalCliqueList = function(clstlist, allsnps, onlybp) { 153 | bp.clstlist <- lapply(clstlist, function(x) onlybp[x]) ### 154 | bp.allsnps <- lapply(allsnps, function(x) onlybp[x]) 155 | 156 | IMsize <- length(bp.clstlist) ## adjacency matrix of intervals in interval graph 157 | adjacencyM <- matrix(0, IMsize, IMsize) 158 | for (i in 1:IMsize) { 159 | for (j in 1:IMsize) { 160 | adjacencyM[i, j] <- length(intersect(bp.allsnps[[i]], bp.allsnps[[j]])) 161 | } 162 | } 163 | diag(adjacencyM) <- 0 164 | interval.graph <- graph.adjacency(adjacencyM, mode = "undirected", weighted = TRUE, diag = FALSE, add.colnames = NULL) 165 | # print(paste("max coreness", max(coreness(interval.graph)))) 166 | # print(paste("ecount", ecount(interval.graph), "vertex*5 ", 5*IMsize)) 167 | if(max(coreness(interval.graph))>10){ #ecount(interval.graph)> 5*IMsize| 168 | interval.cliques <- maximal.cliques(interval.graph, min = 1) 169 | }else{ 170 | interval.cliques <- cliques(interval.graph, min = 1) 171 | } 172 | interval.cliques <- interval.cliques[order(sapply(interval.cliques, min))] 173 | 174 | intervals <- lapply(interval.cliques, function(x) unlist(bp.clstlist[x])) 175 | intervals <- lapply(intervals, sort) 176 | intervals <- lapply(intervals, unique) 177 | weight.itv <- sapply(intervals, length) 178 | 179 | intervals.range <- t(sapply(intervals, range)) 180 | unique.intervals.range <- unique(intervals.range) 181 | 182 | rangeinfo <- cbind(intervals.range, weight.itv) 183 | 184 | interval.info <- apply(unique.intervals.range, 1, function(x) { 185 | info1 = which(rangeinfo[, 1] == x[1]) 186 | info2 = which(rangeinfo[, 2] == x[2]) 187 | sameitv <- intersect(info1, info2) 188 | maxweight <- max(rangeinfo[sameitv, 3]) 189 | sameitv[which(maxweight == rangeinfo[sameitv, 3])][1] 190 | }) 191 | 192 | final.intervals <- intervals[interval.info] 193 | final.intervals.w <- rangeinfo[interval.info, 3] 194 | return(list(final.intervals, final.intervals.w)) 195 | } 196 | # find maximum weight independent set input: clique interval list, clique weights 197 | find.maximum.indept = function(sample.itv, sample.weight) { 198 | n.of.sample <- length(sample.itv) 199 | interval.range <- t(sapply(sample.itv, range)) 200 | pre.range <- as.list(rep(NA, n.of.sample)) #pre.range : range of predecessor 201 | ## pre.range : n by 2, i row (x,y) : possible predecessors of i interval are from x interval to y interval 202 | for (i in 1:n.of.sample) { 203 | nowstart <- interval.range[i, 1] 204 | if (length(which(interval.range[, 2] < nowstart)) > 0) { 205 | pre.range[[i]] <- which(interval.range[, 2] < nowstart) 206 | } 207 | } 208 | sources <- c(1:n.of.sample)[(sapply(pre.range, function(x) all(is.na(x)) == TRUE))] 209 | 210 | ## source of comparability graph of complement of Interval graph 211 | if (length(sources) < n.of.sample) { 212 | not.s <- setdiff(c(1:n.of.sample), sources) 213 | for (i in not.s) { 214 | pre.pre <- sort(unique(unlist(pre.range[pre.range[[i]]]))) 215 | pre.range[[i]] <- setdiff(pre.range[[i]], pre.pre) 216 | } 217 | names(pre.range) <- sample.weight 218 | n.interval <- c(1:n.of.sample) 219 | route.weights <- rep(0, n.of.sample) ##cumulative weights 220 | route.weights[sources] <- sample.weight[sources] 221 | pointers <- rep(0, n.of.sample) ## predecessor of current interval 222 | pointers[sources] <- NA 223 | explored <- rep(0, n.of.sample) 224 | explored[sources] <- 1 225 | info <- cbind(n.interval, route.weights, pointers, explored) 226 | 227 | for (i in not.s) { 228 | maybe.pred <- pre.range[[i]] 229 | now.info <- info[maybe.pred, , drop = FALSE] 230 | max.info <- now.info[which(now.info[, 2] == max(now.info[, 2])), , drop = FALSE] 231 | if (dim(max.info)[1] > 1) 232 | max.info <- max.info[1, , drop = FALSE] 233 | info[i, 2] <- sample.weight[i] + max.info[2] 234 | info[i, 3] <- max.info[1] 235 | info[i, 4] <- 1 236 | } 237 | 238 | #### trace maximum independent set 239 | start.itv <- which(info[, 2] == max(info[, 2]))[1] 240 | predecessor <- info[start.itv, 3] 241 | indept.set <- c(predecessor, start.itv) 242 | while (!is.na(predecessor)) { 243 | predecessor <- info[predecessor, 3] 244 | indept.set <- c(predecessor, indept.set) 245 | } 246 | 247 | indept.set <- as.vector(indept.set) 248 | indept.set <- indept.set[-which(is.na(indept.set))] 249 | indept.set.weight <- max(info[, 2]) 250 | } else { 251 | indept.set = which(sample.weight == max(sample.weight)) 252 | indept.set.weight = max(sample.weight) 253 | } 254 | 255 | final.result <- list(indept.set = indept.set, indept.set.weight = indept.set.weight) 256 | return(final.result) 257 | } 258 | constructLDblock = function(clstlist, subSNPinfo) { 259 | # subfunction: intervalCliqueList, find.maximum.indept 260 | Totalblocks = NULL 261 | while (length(clstlist) > 0) { 262 | allsnps <- lapply(clstlist, function(x) c(min(x):max(x))) 263 | onlybp <- subSNPinfo[, 2] 264 | candi.interval <- intervalCliqueList(clstlist, allsnps, onlybp) 265 | intervals <- candi.interval[[1]] ## list of SNPs in each cliques 266 | weight.itv <- candi.interval[[2]] ## weights of each cliques 267 | MWIS <- find.maximum.indept(intervals, weight.itv) ##find independent set 268 | indept.set <- intervals[MWIS[[1]]] 269 | LDintervals <- lapply(indept.set, function(x) match(x, subSNPinfo[, 2])) 270 | subLDblocks <- t(sapply(LDintervals, range)) 271 | Totalblocks <- rbind(Totalblocks, subLDblocks) 272 | takenSNPs <- apply(Totalblocks, 1, function(x) c(min(x):max(x))) 273 | takenSNPs <- as.vector(unlist(takenSNPs)) 274 | clstlist <- lapply(clstlist, function(x) setdiff(x, takenSNPs)) 275 | clstlist <- clstlist[sapply(clstlist, function(x) length(x) > 1)] 276 | if (length(clstlist) == 0) break 277 | addinglist <- NULL 278 | for (n in 1:length(clstlist)) { 279 | nowbin <- clstlist[[n]] 280 | intersection <- intersect(c(min(nowbin):max(nowbin)), takenSNPs) 281 | if (length(intersection) > 0) { 282 | part1 <- nowbin[which(nowbin < min(intersection))] 283 | part2 <- setdiff(nowbin, c(min(part1):max(intersection))) 284 | clstlist[[n]] <- part1 285 | addinglist <- c(addinglist, list(part2)) 286 | } 287 | } 288 | clstlist <- c(clstlist, addinglist) 289 | clstlist <- clstlist[sapply(clstlist, function(x) length(x) > 1)] 290 | } 291 | return(Totalblocks) 292 | } 293 | subBigLD = function(subgeno, subSNPinfo, CLQcut, clstgap, CLQmode, checkLargest){ 294 | subbinvec <- CLQD(subgeno, subSNPinfo, CLQcut, clstgap, CLQmode,codechange = FALSE, checkLargest) 295 | # print('CLQ done!') 296 | bins <- c(1:max(subbinvec[which(!is.na(subbinvec))])) 297 | clstlist <- sapply(bins, function(x) which(subbinvec == x), simplify = FALSE) 298 | clstlist <- lapply(clstlist, sort) ### 299 | clstlist <- clstlist[order(sapply(clstlist, min))] ### 300 | nowLDblocks <- constructLDblock(clstlist, subSNPinfo) 301 | # print('constructLDblock done!') 302 | nowLDblocks <- nowLDblocks[order(nowLDblocks[, 1]), , drop = FALSE] 303 | return(nowLDblocks) 304 | 305 | } 306 | appendSGTs = function(LDblocks, Ogeno, OSNPinfo, CLQcut, clstgap, checkLargest){ 307 | expandB = NULL 308 | # failB = NULL 309 | snp1 = which(OSNPinfo[,2]2){ 311 | OSNPs = 1:max(snp1) 312 | firstB = LDblocks[1,] 313 | secondSNPs = which(OSNPinfo[,2]>=firstB$start.bp & OSNPinfo[,2] <= firstB$end.bp) 314 | cor2 = suppressWarnings(cor(Ogeno[,c(secondSNPs, OSNPs),drop=FALSE], use="pairwise.complete.obs")^2) 315 | cor2 = cor2[1:length(secondSNPs), -(1:length(secondSNPs)), drop=FALSE] 316 | cor2num = apply(cor2, 2, function(x) { 317 | sum(x>CLQcut^2) 318 | }) 319 | cor2ratio = cor2num/(dim(cor2)[1]) 320 | # grid.draw(LDr2map(genoDp(Ogeno[,min(firstSNPs):max(secondSNPs)]), 321 | # c(0, length(firstSNPs), length(firstSNPs)+length(OSNPs),length(firstSNPs)+length(OSNPs)+length(secondSNPs)),1)) 322 | cor2numT = cor2ratio>0.6 323 | cor2numT = c(cor2numT, 1) 324 | points2 = min(which(cor2numT>0)) 325 | NsecondSNPs = points2:max(secondSNPs) 326 | reOSNPs = setdiff(c(1:max(NsecondSNPs)), NsecondSNPs) 327 | if(length(reOSNPs)>1){ 328 | subgeno = Ogeno[, reOSNPs] 329 | subSNPinfo = OSNPinfo[reOSNPs,] 330 | subBlocks = subBigLD(subgeno, subSNPinfo, CLQcut, CLQmode,clstgap, checkLargest) 331 | subBlocks = subBlocks+min(reOSNPs)-1 332 | expandB = rbind(expandB, subBlocks) 333 | } 334 | firstSNPs=NsecondSNPs 335 | }else{ 336 | firstB = LDblocks[1,] 337 | firstSNPs = which(OSNPinfo[,2]>=firstB$start.bp & OSNPinfo[,2] <= firstB$end.bp) 338 | } 339 | if(dim(LDblocks)[1]>1){ 340 | for(i in 1:(dim(LDblocks)[1]-1)){ 341 | secondB = LDblocks[(i+1),] 342 | secondSNPs = which(OSNPinfo[,2]>=secondB$start.bp & OSNPinfo[,2]<= secondB$end.bp) 343 | OSNPs = setdiff(max(firstSNPs):min(secondSNPs), c(max(firstSNPs), min(secondSNPs))) 344 | if(length(OSNPs)==0){ 345 | expandB = rbind(expandB, range(firstSNPs)) 346 | firstSNPs = secondSNPs 347 | }else{ 348 | cor1 = suppressWarnings(cor(Ogeno[,c(firstSNPs, OSNPs),drop=FALSE], use="pairwise.complete.obs")^2) 349 | cor1 = cor1[1:length(firstSNPs), -(1:length(firstSNPs)), drop=FALSE] 350 | cor1num = apply(cor1, 2, function(x) { 351 | sum(x>CLQcut^2) 352 | }) 353 | cor1ratio = cor1num/(dim(cor1)[1]) 354 | # cor1num = apply(cor1r2, 2, function(x) length(which(x>CLQcut^2))/length(firstSNPs)) 355 | cor2 = suppressWarnings(cor(Ogeno[,c(secondSNPs, OSNPs),drop=FALSE], use="pairwise.complete.obs")^2) 356 | cor2 = cor2[1:length(secondSNPs), -(1:length(secondSNPs)), drop=FALSE] 357 | cor2num = apply(cor2, 2, function(x) { 358 | sum(x>CLQcut^2) 359 | }) 360 | cor2ratio = cor2num/(dim(cor2)[1]) 361 | # grid.draw(LDr2map(cor(Ogeno[,min(firstSNPs):max(secondSNPs)]), 362 | # c(0, length(firstSNPs), length(firstSNPs)+length(OSNPs),length(firstSNPs)+length(OSNPs)+length(secondSNPs)),1)) 363 | cor1numT = cor1ratio>0.6 364 | cor2numT = cor2ratio>0.6 365 | cor1numT = c(1, cor1numT, 0) 366 | cor2numT = c(0, cor2numT, 1) 367 | points1 = max(firstSNPs)+max(which(cor1numT>0))-1 368 | NfirstSNPs = min(firstSNPs):points1 369 | points2 = max(firstSNPs)+max(which(cor2numT>0))-1 370 | NsecondSNPs = points2:max(secondSNPs) 371 | if(max(NfirstSNPs)1){ 375 | subgeno = Ogeno[, reOSNPs] 376 | subSNPinfo = OSNPinfo[reOSNPs,] 377 | subBlocks = subBigLD(subgeno, subSNPinfo, CLQcut, CLQmode, clstgap, checkLargest) 378 | subBlocks = subBlocks+min(reOSNPs)-1 379 | expandB = rbind(expandB, subBlocks) 380 | } 381 | firstSNPs=NsecondSNPs 382 | }else{ 383 | #merge two blocks 384 | subgeno = Ogeno[, c(min(firstSNPs):max(secondSNPs))] 385 | subSNPinfo = OSNPinfo[c(min(firstSNPs):max(secondSNPs)),] 386 | subBlocks = subBigLD(subgeno, subSNPinfo, CLQcut, CLQmode,clstgap, checkLargest) 387 | subBlocks = subBlocks+min(firstSNPs)-1 388 | if(dim(subBlocks)[1]==1) { 389 | firstSNPs = subBlocks[1,1]:subBlocks[1,2] 390 | }else{ 391 | expandB = rbind(expandB, subBlocks[-(dim(subBlocks)[1]),]) 392 | firstSNPs = subBlocks[(dim(subBlocks)[1]),1]:subBlocks[(dim(subBlocks)[1]),2] 393 | } 394 | } 395 | print(c(i, dim(LDblocks)[1])) 396 | # print(tail(expandB)) 397 | # if(i >= 30) break 398 | } 399 | } 400 | } 401 | # firstSNPs 402 | if(max(firstSNPs)<(dim(Ogeno)[2]-1)){ 403 | OSNPs = (max(firstSNPs)+1):(dim(Ogeno)[2]) 404 | cor1 = suppressWarnings(cor(Ogeno[,c(firstSNPs, OSNPs),drop=FALSE], use="pairwise.complete.obs")^2) 405 | cor1 = cor1[1:length(firstSNPs), -(1:length(firstSNPs)), drop=FALSE] 406 | cor1num = apply(cor1, 2, function(x) { 407 | sum(x>CLQcut^2) 408 | }) 409 | cor1ratio = cor1num/(dim(cor1)[1]) 410 | cor1numT = cor1ratio>0.6 411 | cor1numT = c(1, cor1numT, 0) 412 | points1 = max(firstSNPs)+max(which(cor1numT>0))-1 413 | NfirstSNPs = min(firstSNPs):points1 414 | expandB = rbind(expandB, range(NfirstSNPs)) 415 | reOSNPs = setdiff(c(min(NfirstSNPs):dim(Ogeno)[2]), c(NfirstSNPs)) 416 | if(length(reOSNPs)>1){ 417 | subgeno = Ogeno[, reOSNPs] 418 | subSNPinfo = OSNPinfo[reOSNPs,] 419 | subBlocks = subBigLD(subgeno, subSNPinfo, CLQcut, CLQmode,clstgap, checkLargest) 420 | subBlocks = subBlocks+min(reOSNPs)-1 421 | expandB = rbind(expandB, subBlocks) 422 | } 423 | }else{ 424 | expandB = rbind(expandB, range(firstSNPs)) 425 | } 426 | # LDblocks = expandB 427 | expandB = expandB[(expandB[,1]!=expandB[,2]),,drop=FALSE] 428 | start.bp <- OSNPinfo[, 2][expandB[, 1]] 429 | end.bp <- OSNPinfo[, 2][expandB[, 2]] 430 | start.rsID <- as.character(OSNPinfo[, 1][expandB[, 1]]) 431 | end.rsID <- as.character(OSNPinfo[, 1][expandB[, 2]]) 432 | TexpandB <- data.frame(expandB, start.rsID, end.rsID, start.bp, end.bp) 433 | colnames(TexpandB) <- c("start", "end", "start.rsID", "end.rsID", "start.bp", "end.bp") 434 | return(TexpandB) 435 | } 436 | ####################################################################################################### 437 | # Main part input data check!!!!!!!!!!!!!!!!! 438 | Ogeno = geno 439 | OSNPinfo = SNPinfo 440 | if (dim(Ogeno)[2] != dim(OSNPinfo)[1]) { 441 | stop("N of SNPs in geno data and N of SNPs in SNPinfo data Do Not Match!!") 442 | 443 | } else if (dim(OSNPinfo)[2] != 2) { 444 | stop("SNPinfo data Must Contain 2 columns!!") 445 | } 446 | Omono = apply(Ogeno, 2, function(x) { 447 | y<- x[!is.na(x)] 448 | length(unique(y))!=1 449 | }) 450 | Ogeno <- Ogeno[,Omono] 451 | monoSNPs = OSNPinfo[!Omono,] 452 | OSNPinfo <- OSNPinfo[Omono,] 453 | maf = apply(Ogeno, 2, function(x) mean(x,na.rm=TRUE)/2) 454 | maf_ok=ifelse(maf>=0.5,1-maf,maf) 455 | maf=maf_ok 456 | mafprun <- which(maf >= MAFcut) 457 | geno <- Ogeno[,mafprun] 458 | SNPinfo <- OSNPinfo[mafprun,] 459 | # print("split whole sequence into subsegments") 460 | cutpoints.all <- cutsequence.modi(geno, leng, subSegmSize) 461 | cutpoints <- cutpoints.all[[1]] 462 | atfcut <- (cutpoints.all[[2]]) 463 | if (!is.null(atfcut)){ 464 | atfcut <- sort(atfcut) 465 | } 466 | cutpoints = setdiff(cutpoints, 0) 467 | cutblock <- cbind(c(1, cutpoints + 1), c(cutpoints, dim(geno)[2])) 468 | cutblock <- cutblock[-(dim(cutblock)[1]), , drop = FALSE] 469 | LDblocks <- matrix(NA, dim(SNPinfo)[1], 2) 470 | # partition each segment into LD blocks 471 | for (i in 1:dim(cutblock)[1]) { 472 | nowst <- cutblock[i, 1] 473 | nowed <- cutblock[i, 2] 474 | subgeno <- geno[, nowst:nowed] 475 | subSNPinfo <- SNPinfo[nowst:nowed, ] 476 | # subbinvec <- CLQD(subgeno, subSNPinfo, CLQcut, clstgap, CLQmode = "Density", codechange = FALSE) 477 | subbinvec <- CLQD(subgeno, subSNPinfo, CLQcut, clstgap, CLQmode,codechange = FALSE, checkLargest) 478 | print('CLQ done!') 479 | bins <- c(1:max(subbinvec[which(!is.na(subbinvec))])) 480 | clstlist <- sapply(bins, function(x) which(subbinvec == x), simplify = FALSE) 481 | clstlist <- lapply(clstlist, sort) ### 482 | clstlist <- clstlist[order(sapply(clstlist, min))] ### 483 | nowLDblocks <- constructLDblock(clstlist, subSNPinfo) 484 | # print('constructLDblock done!') 485 | nowLDblocks <- nowLDblocks + (cutblock[i, 1] - 1) 486 | nowLDblocks <- nowLDblocks[order(nowLDblocks[, 1]), , drop = FALSE] 487 | preleng1 <- length(which(!is.na(LDblocks[, 1]))) 488 | LDblocks[(preleng1 + 1):(preleng1 + dim(nowLDblocks)[1]), ] <- nowLDblocks 489 | print(c(i, dim(cutblock)[1])) 490 | print(Sys.time()) 491 | } 492 | doneLDblocks <- LDblocks[which(!is.na(LDblocks[, 1])), , drop = FALSE] 493 | if (length(atfcut) != 0) { 494 | newLDblocks <- matrix(NA, dim(SNPinfo)[1], 2) 495 | consecutive.atf = 0 496 | for(i in 1:(dim(doneLDblocks)[1]-1)){ 497 | # if(i==1080) break; 498 | print(paste(i, dim(doneLDblocks)[1])) 499 | #if(i == 1){ 500 | endblock = doneLDblocks[i,] 501 | #} 502 | # if(nowblock[1]0){ 506 | consecutive.atf = consecutive.atf+1 507 | ## merge 508 | if(consecutive.atf>1){ 509 | addlinei = max(which(!is.na(newLDblocks[,1])==TRUE)) 510 | endblock = newLDblocks[addlinei,] 511 | } 512 | nowatfcut = intersect(gap, atfcut) 513 | newbigblock = range(c(endblock, nextblock)) 514 | newbigblocksize = diff(newbigblock)+1 515 | nowst = newbigblock[1] 516 | nowed = newbigblock[2] 517 | subgeno <- geno[, nowst:nowed] 518 | subSNPinfo <- SNPinfo[nowst:nowed, ] 519 | subbinvec <- CLQD(subgeno, subSNPinfo, CLQcut, clstgap, CLQmode, codechange = FALSE, checkLargest) 520 | # print('CLQ done!') 521 | bins <- c(1:max(subbinvec[which(!is.na(subbinvec))])) 522 | clstlist <- sapply(bins, function(x) which(subbinvec == x), simplify = FALSE) 523 | clstlist <- lapply(clstlist, sort) ### 524 | clstlist <- clstlist[order(sapply(clstlist, min))] ### 525 | nowLDblocks <- constructLDblock(clstlist, SNPinfo[nowst:nowed, ]) 526 | # print('constructLDblock done!') 527 | nowLDblocks <- nowLDblocks + (newbigblock[1] - 1) 528 | nowLDblocks <- nowLDblocks[order(nowLDblocks[, 1]), , drop = FALSE] 529 | preleng1 <- length(which(!is.na(newLDblocks[, 1]))) 530 | nowLDbleng = dim(nowLDblocks)[1] 531 | #if(nowLDbleng != 1){ 532 | newLDblocks[(preleng1 + 1):(preleng1 + dim(nowLDblocks)[1]), ] <- nowLDblocks 533 | #} 534 | #endblock <- nowLDblocks[nowLDbleng, ,drop = FALSE] 535 | # end <- max(nowLDblocks) 536 | # if(diff(newbigblock)+1 < subSegmSize) 537 | print(Sys.time()) 538 | }else{ 539 | consecutive.atf = 0 540 | addlinei = min(which(is.na(newLDblocks[,1])==TRUE)) 541 | newLDblocks[addlinei,] <-endblock 542 | #endblock <- nextblock 543 | if(i == (dim(doneLDblocks)[1]-1)){ 544 | addlinei = min(which(is.na(newLDblocks[,1])==TRUE)) 545 | newLDblocks[addlinei,] <-nextblock 546 | } 547 | } 548 | } 549 | LDblocks = newLDblocks 550 | }else{ 551 | LDblocks = doneLDblocks 552 | } 553 | LDblocks <- LDblocks[which(!is.na(LDblocks[, 1])), , drop = FALSE] 554 | LDblocks <- LDblocks[order(LDblocks[, 1]), , drop = FALSE] 555 | #overlapping LD block merging 556 | i = 1 557 | while(i