├── .github ├── .gitignore ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── R-CMD-check.yaml │ └── test-coverage.yaml ├── .gitignore ├── README.md ├── TODO ├── detectRUNS ├── .Rbuildignore ├── .gitignore ├── DESCRIPTION ├── NAMESPACE ├── NEWS.md ├── R │ ├── RcppExports.R │ ├── Stats.R │ ├── funktionen.R │ ├── plots.R │ ├── run.R │ └── zzz.R ├── README.Rmd ├── README.md ├── cran-comments.md ├── inst │ └── extdata │ │ ├── Kijas2016_Sheep_subset.consecutive.csv │ │ ├── Kijas2016_Sheep_subset.map │ │ ├── Kijas2016_Sheep_subset.ped │ │ └── Kijas2016_Sheep_subset.sliding.csv ├── man │ ├── Froh_inbreeding.Rd │ ├── Froh_inbreedingClass.Rd │ ├── chromosomeLength.Rd │ ├── consecutiveRUNS.run.Rd │ ├── consecutiveRuns.Rd │ ├── consecutiveRunsCpp.Rd │ ├── createRUNdf.Rd │ ├── findOppositeAndMissing.Rd │ ├── genoConvert.Rd │ ├── genoConvertCpp.Rd │ ├── heteroZygotTest.Rd │ ├── heteroZygotTestCpp.Rd │ ├── homoZygotTest.Rd │ ├── homoZygotTestCpp.Rd │ ├── pedConvertCpp.Rd │ ├── plot_DistributionRuns.Rd │ ├── plot_InbreedingChr.Rd │ ├── plot_PatternRuns.Rd │ ├── plot_Runs.Rd │ ├── plot_SnpsInRuns.Rd │ ├── plot_StackedRuns.Rd │ ├── plot_ViolinRuns.Rd │ ├── plot_manhattanRuns.Rd │ ├── readExternalRuns.Rd │ ├── readMapFile.Rd │ ├── readPOPCpp.Rd │ ├── reorderDF.Rd │ ├── slidingRUNS.run.Rd │ ├── slidingRuns.Rd │ ├── slidingWindow.Rd │ ├── slidingWindowCpp.Rd │ ├── snpInRun.Rd │ ├── snpInRunCpp.Rd │ ├── snpInsideRuns.Rd │ ├── snpInsideRunsCpp.Rd │ ├── summaryRuns.Rd │ ├── tableRuns.Rd │ └── writeRUN.Rd ├── src │ ├── .gitignore │ ├── RcppExports.cpp │ └── functions.cpp ├── tests │ ├── testthat.R │ └── testthat │ │ ├── test.ROHet.consecutive.csv │ │ ├── test.ROHet.sliding.csv │ │ ├── test.map │ │ ├── test.ped │ │ ├── test.raw │ │ ├── test_functions.R │ │ ├── test_plots.R │ │ └── test_run.R └── vignettes │ └── detectRUNS.vignette.Rmd └── performance ├── helper.R └── test1.R /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Include the steps to reproduce the bug mentioned above 15 | 16 | **Expected behavior** 17 | A clear and concise description of what you expected to happen. 18 | 19 | **detectRUNS version** 20 | Please indicate the version of `detectRUNS` you are using (with `packageVersion("detectRUNS")`) 21 | 22 | **platform** 23 | Identify the platform, architecture, and version of R where you found this bug. 24 | You can retrieve that from within R by typing `R.version`. 25 | 26 | **Additional context** 27 | Add any other context about the problem here. 28 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Description 4 | 5 | 6 | ## Related Issue 7 | 8 | 9 | 10 | 11 | 12 | ## Motivation and Context 13 | 14 | 15 | ## How Has This Been Tested? 16 | 17 | 18 | 19 | 20 | ## Types of changes 21 | 22 | - [ ] Bug fix (non-breaking change which fixes an issue) 23 | - [ ] New feature (non-breaking change which adds functionality) 24 | - [ ] Breaking change (fix or feature that would cause existing functionality to change) 25 | 26 | ## Checklist: 27 | 28 | 29 | - [ ] My code follows the code style of this project. 30 | - [ ] My change requires a change to the documentation. 31 | - [ ] I have updated the documentation accordingly. 32 | - [ ] I have added tests to cover my changes. 33 | - [ ] All new and existing tests passed. 34 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/master/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [master, devel] 6 | pull_request: 7 | branches: [master, devel] 8 | 9 | name: R-CMD-check 10 | 11 | jobs: 12 | R-CMD-check: 13 | 14 | runs-on: ${{ matrix.config.os }} 15 | 16 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 17 | 18 | strategy: 19 | fail-fast: false 20 | matrix: 21 | config: 22 | - {os: macOS-latest, r: 'release'} 23 | - {os: windows-latest, r: 'release'} 24 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 25 | - {os: ubuntu-latest, r: 'release'} 26 | - {os: ubuntu-latest, r: 'oldrel-1'} 27 | 28 | env: 29 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 30 | R_KEEP_PKG_SOURCE: yes 31 | 32 | steps: 33 | - uses: actions/checkout@v2 34 | 35 | - uses: r-lib/actions/setup-pandoc@v2 36 | 37 | - uses: r-lib/actions/setup-r@v2 38 | with: 39 | r-version: ${{ matrix.config.r }} 40 | http-user-agent: ${{ matrix.config.http-user-agent }} 41 | use-public-rspm: true 42 | 43 | - uses: r-lib/actions/setup-r-dependencies@v2 44 | with: 45 | extra-packages: rcmdcheck 46 | working-directory: ./detectRUNS 47 | 48 | - uses: r-lib/actions/check-r-package@v2 49 | with: 50 | working-directory: ./detectRUNS 51 | 52 | - name: Show testthat output 53 | if: always() 54 | run: find ./check -name '*check.log' -exec cat '{}' \; || true 55 | shell: bash 56 | working-directory: ./detectRUNS 57 | 58 | - name: Upload check results 59 | if: failure() 60 | uses: actions/upload-artifact@main 61 | with: 62 | name: ${{ runner.os }}-r${{ matrix.config.r }}-results 63 | path: check 64 | -------------------------------------------------------------------------------- /.github/workflows/test-coverage.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/master/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [master, devel] 6 | pull_request: 7 | branches: [master, devel] 8 | 9 | name: test-coverage 10 | 11 | jobs: 12 | test-coverage: 13 | 14 | runs-on: ubuntu-latest 15 | 16 | env: 17 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 18 | 19 | steps: 20 | - uses: actions/checkout@v2 21 | 22 | - uses: r-lib/actions/setup-r@v2 23 | with: 24 | use-public-rspm: true 25 | 26 | - uses: r-lib/actions/setup-r-dependencies@v2 27 | with: 28 | extra-packages: covr 29 | working-directory: ./detectRUNS 30 | 31 | - name: Test coverage 32 | run: covr::codecov() 33 | shell: Rscript {0} 34 | working-directory: ./detectRUNS 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # mac specific files 3 | .DS_Store 4 | 5 | # Temporary files 6 | *~ 7 | 8 | # History files 9 | .Rhistory 10 | .Rapp.history 11 | 12 | # Session Data files 13 | .RData 14 | 15 | # Example code in package build process 16 | *-Ex.R 17 | 18 | # Output files from R CMD build 19 | /*.tar.gz 20 | 21 | # Output files from R CMD check 22 | /*.Rcheck/ 23 | 24 | # RStudio files 25 | .Rproj.user/ 26 | *.Rproj 27 | 28 | # produced vignettes 29 | vignettes/*.html 30 | vignettes/*.pdf 31 | 32 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 33 | .httr-oauth 34 | 35 | # knitr and R markdown default cache directories 36 | /*_cache/ 37 | /cache/ 38 | 39 | # Temporary files created by R markdown 40 | *.utf8.md 41 | *.knit.md 42 | .Rproj.user 43 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | Detect Runs of Homozygosity and Runs of Heterozygosity in diploid genomes 3 | ========================================================================= 4 | 5 | 6 | [![R-CMD-check](https://github.com/bioinformatics-ptp/detectRUNS/workflows/R-CMD-check/badge.svg)](https://github.com/bioinformatics-ptp/detectRUNS/actions) 7 | [![codecov.io](https://codecov.io/github/bioinformatics-ptp/detectRUNS/coverage.svg?branch=master)](https://codecov.io/github/bioinformatics-ptp/detectRUNS?branch=master) 8 | [![CRAN version](http://www.r-pkg.org/badges/version/detectRUNS)](https://cran.r-project.org/package=detectRUNS) 9 | 10 | 11 | This repository contains the source code for the R package `detectRUNS` and related 12 | `performance` tests. Here's the directory content: 13 | 14 | ``` 15 | ├── detectRUNS 16 | ├── performance 17 | ├── README.md 18 | └── TODO 19 | ``` 20 | 21 | `detectRUNS` implements two statistical approaches to runs' detection: 22 | - a sequential approach (consecutive runs, as described in Marras et al. 2015, and implemented in the software package Zanardi: https://github.com/bioinformatics-ptp/Zanardi); 23 | - an approach based on overlapping sliding windows (à la Plink: Purcell et al. 2007) 24 | 25 | More info in the Google Doc (https://goo.gl/yXR7iA) 26 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | 2 | * Calculate by chromosome (cut mapfile by chrom, then subset genotype) 3 | 4 | * Deal with compressed data? 5 | 6 | * Update Google Docs documentation (https://goo.gl/yXR7iA) 7 | 8 | * Write WIKI on GitHub 9 | -------------------------------------------------------------------------------- /detectRUNS/.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | cran-comments.md 4 | ^README\.Rmd$ 5 | ^README-.*\.png$ 6 | ^\.github$ 7 | -------------------------------------------------------------------------------- /detectRUNS/.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | 5 | # Session Data files 6 | .RData 7 | 8 | # User-specific files 9 | .Ruserdata 10 | 11 | # Example code in package build process 12 | *-Ex.R 13 | 14 | # Output files from R CMD build 15 | /*.tar.gz 16 | 17 | # Output files from R CMD check 18 | /*.Rcheck/ 19 | 20 | # RStudio files 21 | .Rproj.user/ 22 | 23 | # produced vignettes 24 | vignettes/*.html 25 | vignettes/*.pdf 26 | vignettes/ 27 | 28 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 29 | .httr-oauth 30 | 31 | # knitr and R markdown default cache directories 32 | *_cache/ 33 | /cache/ 34 | 35 | # Temporary files created by R markdown 36 | *.utf8.md 37 | *.knit.md 38 | -------------------------------------------------------------------------------- /detectRUNS/DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: detectRUNS 2 | Type: Package 3 | Title: Detect Runs of Homozygosity and Runs of Heterozygosity in 4 | Diploid Genomes 5 | Version: 0.9.6.9000 6 | Date: 2020-12-20 7 | Authors@R: c( 8 | person("Filippo","Biscarini", email="filippo.biscarini@gmail.com", role=c("aut", "cre")), 9 | person("Paolo","Cozzi", email="paolo.cozzi@ptp.it", role="aut"), 10 | person("Giustino","Gaspa", email="gigaspa@uniss.it", role="aut"), 11 | person("Gabriele","Marras", email="gmarras@uoguelph.ca", role="aut") 12 | ) 13 | Description: Detection of runs of homozygosity and of heterozygosity 14 | in diploid genomes using two methods: sliding windows (Purcell et al (2007) 15 | ) and consecutive runs (Marras et al (2015) 16 | ). 17 | Depends: 18 | R (>= 3.0.0) 19 | License: GPL-3 20 | LazyData: TRUE 21 | Encoding: UTF-8 22 | Imports: 23 | plyr, 24 | iterators, 25 | itertools, 26 | ggplot2, 27 | reshape2, 28 | Rcpp, 29 | gridExtra, 30 | data.table 31 | RoxygenNote: 7.1.1 32 | Suggests: 33 | testthat, 34 | knitr, 35 | rmarkdown, 36 | prettydoc 37 | LinkingTo: Rcpp 38 | VignetteBuilder: knitr 39 | URL: https://github.com/bioinformatics-ptp/detectRUNS/tree/master/detectRUNS 40 | BugReports: https://github.com/bioinformatics-ptp/detectRUNS/issues 41 | -------------------------------------------------------------------------------- /detectRUNS/NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(Froh_inbreeding) 4 | export(Froh_inbreedingClass) 5 | export(consecutiveRUNS.run) 6 | export(plot_DistributionRuns) 7 | export(plot_InbreedingChr) 8 | export(plot_PatternRuns) 9 | export(plot_Runs) 10 | export(plot_SnpsInRuns) 11 | export(plot_StackedRuns) 12 | export(plot_ViolinRuns) 13 | export(plot_manhattanRuns) 14 | export(readExternalRuns) 15 | export(reorderDF) 16 | export(slidingRUNS.run) 17 | export(summaryRuns) 18 | export(tableRuns) 19 | import(ggplot2) 20 | import(itertools) 21 | import(plyr) 22 | import(reshape2) 23 | import(utils) 24 | importFrom(Rcpp,sourceCpp) 25 | importFrom(grDevices,dev.off) 26 | importFrom(grDevices,pdf) 27 | importFrom(stats,na.omit) 28 | useDynLib(detectRUNS) 29 | -------------------------------------------------------------------------------- /detectRUNS/NEWS.md: -------------------------------------------------------------------------------- 1 | 2 | # detectRUNS 0.9.3 3 | 4 | ## Major changes 5 | 6 | * First submission to CRAN 7 | 8 | ## Bug fixes 9 | 10 | * No bugs identified at the moment 11 | -------------------------------------------------------------------------------- /detectRUNS/R/RcppExports.R: -------------------------------------------------------------------------------- 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | fast_factor <- function(x) { 5 | .Call('_detectRUNS_fast_factor', PACKAGE = 'detectRUNS', x) 6 | } 7 | 8 | #' Convert 0/1/2 genotypes to 0/1 9 | #' 10 | #' This is a utility function, that convert 0/1/2 genotypes (AA/AB/BB) into 0/1 11 | #' (either homozygous/heterozygous) 12 | #' 13 | #' @param genotype vector of 0/1/2 genotypes 14 | #' 15 | #' @return converted vector of genotypes (0/1) 16 | #' 17 | #' @useDynLib detectRUNS 18 | #' @importFrom Rcpp sourceCpp 19 | #' 20 | genoConvertCpp <- function(genotype) { 21 | .Call('_detectRUNS_genoConvertCpp', PACKAGE = 'detectRUNS', genotype) 22 | } 23 | 24 | #' Convert ped genotypes to 0/1 25 | #' 26 | #' This is a utility function, that convert ped genotypes (AA/AB/BB) into 0/1 27 | #' (either homozygous/heterozygous) 28 | #' 29 | #' @param genotype vector of pair of genotypes (01, AA, AG) 30 | #' 31 | #' @return converted vector of genotypes (0/1) 32 | #' 33 | #' @useDynLib detectRUNS 34 | #' @importFrom Rcpp sourceCpp 35 | #' 36 | pedConvertCpp <- function(genotype) { 37 | .Call('_detectRUNS_pedConvertCpp', PACKAGE = 'detectRUNS', genotype) 38 | } 39 | 40 | #' Function to check whether a window is (loosely) homozygous or not 41 | #' 42 | #' This is a core function. Parameters on how to consider a window homozygous are here (maxHet, maxMiss) 43 | #' 44 | #' @param x vector of 0/1 genotypes (from genoConvert()) 45 | #' @param gaps vector of differences between consecutive positions (gaps) in bps 46 | #' @param maxHet max n. of heterozygous SNP in a homozygous window 47 | #' @param maxMiss max n. of missing in a window 48 | #' @param maxGap max distance between consecutive SNP in a window to be still considered a potential run 49 | #' 50 | #' @return TRUE/FALSE (whether a window is homozygous or NOT) 51 | #' 52 | #' @useDynLib detectRUNS 53 | #' @importFrom Rcpp sourceCpp 54 | #' 55 | homoZygotTestCpp <- function(x, gaps, maxHet, maxMiss, maxGap) { 56 | .Call('_detectRUNS_homoZygotTestCpp', PACKAGE = 'detectRUNS', x, gaps, maxHet, maxMiss, maxGap) 57 | } 58 | 59 | #' Function to check whether a window is (loosely) heterozygous or not 60 | #' 61 | #' This is a core function. Parameters on how to consider a window heterozygous are here (maxHom, maxMiss) 62 | #' 63 | #' @param x vector of 0/1 genotypes (from genoConvert()) 64 | #' @param gaps vector of differences between consecutive positions (gaps) in bps 65 | #' @param maxHom max n. of homozygous SNP in a heterozygous window 66 | #' @param maxMiss max n. of missing in a window 67 | #' @param maxGap max distance between consecutive SNP in a window to be still considered a potential run 68 | #' 69 | #' @return TRUE/FALSE (whether a window is heterozygous or NOT) 70 | #' 71 | #' @useDynLib detectRUNS 72 | #' @importFrom Rcpp sourceCpp 73 | #' 74 | heteroZygotTestCpp <- function(x, gaps, maxHom, maxMiss, maxGap) { 75 | .Call('_detectRUNS_heteroZygotTestCpp', PACKAGE = 'detectRUNS', x, gaps, maxHom, maxMiss, maxGap) 76 | } 77 | 78 | #' Function to calculate oppositeAndMissingGenotypes array 79 | #' 80 | #' This is an helper function, this will be called by another function 81 | #' 82 | #' @param data vector of 0/1/2 genotypes 83 | #' @param ROHet TRUE in ROHet evaluation, FALSE for ROHom 84 | #' 85 | #' @return character array; names will be index in which opposite and missing 86 | #' snps are found in data array 87 | #' 88 | #' @useDynLib detectRUNS 89 | #' @importFrom Rcpp sourceCpp 90 | #' 91 | findOppositeAndMissing <- function(data, ROHet = TRUE) { 92 | .Call('_detectRUNS_findOppositeAndMissing', PACKAGE = 'detectRUNS', data, ROHet) 93 | } 94 | 95 | #' Function to slide a window over a vector (individual's genotypes) 96 | #' 97 | #' This is a core function. The functions to detect RUNS are slid over the genome 98 | #' 99 | #' @param data vector of 0/1/2 genotypes 100 | #' @param gaps vector of differences between consecutive positions (gaps) in bps 101 | #' @param windowSize size of window (n. of SNP) 102 | #' @param step by which (how many SNP) is the window slid 103 | #' @param maxGap max distance between consecutive SNP in a window to be still considered a potential run 104 | #' @param ROHet shall we detect ROHet or ROHom? 105 | #' @param maxOppositeGenotype max n. of homozygous/heterozygous SNP 106 | #' @param maxMiss max. n. of missing SNP 107 | #' 108 | #' @return vector of TRUE/FALSE (whether a window is homozygous or NOT) 109 | #' 110 | #' @useDynLib detectRUNS 111 | #' @importFrom Rcpp sourceCpp 112 | #' 113 | slidingWindowCpp <- function(data, gaps, windowSize, step, maxGap, ROHet = TRUE, maxOppositeGenotype = 1L, maxMiss = 1L) { 114 | .Call('_detectRUNS_slidingWindowCpp', PACKAGE = 'detectRUNS', data, gaps, windowSize, step, maxGap, ROHet, maxOppositeGenotype, maxMiss) 115 | } 116 | 117 | #' Function to return a vector of T/F for whether a SNP is or not in a RUN 118 | #' 119 | #' This is a core function. The function to determine whether a SNP is or not in a RUN. 120 | #' The ratio between homozygous/heterozygous windows and total n. of windows is computed here 121 | #' 122 | #' @param RunVector vector of TRUE/FALSE (is a window homozygous/heterozygous?) 123 | #' @param windowSize size of window (n. of SNP) 124 | #' @param threshold threshold to call a SNP in a RUN 125 | #' 126 | #' @return vector of TRUE/FALSE (whether a SNP is in a RUN or NOT) 127 | #' 128 | #' @useDynLib detectRUNS 129 | #' @importFrom Rcpp sourceCpp 130 | #' 131 | snpInRunCpp <- function(RunVector, windowSize, threshold) { 132 | .Call('_detectRUNS_snpInRunCpp', PACKAGE = 'detectRUNS', RunVector, windowSize, threshold) 133 | } 134 | 135 | #' Function to return a dataframe of population (POP, ID) 136 | #' 137 | #' This is a core function. Read PED file and returns a data.frame with the first two 138 | #' columns 139 | #' 140 | #' @param genotypeFile genotype (.ped) file location 141 | #' 142 | #' @return a dataframe of POP, ID 143 | #' 144 | #' @useDynLib detectRUNS 145 | #' @importFrom Rcpp sourceCpp 146 | #' 147 | readPOPCpp <- function(genotypeFile) { 148 | .Call('_detectRUNS_readPOPCpp', PACKAGE = 'detectRUNS', genotypeFile) 149 | } 150 | 151 | #' Function to detect consecutive runs in a vector (individual's genotypes) 152 | #' 153 | #' This is a core function. It implements the consecutive method for detection of runs in diploid genomes 154 | #' (see Marras et al. 2015) 155 | #' 156 | #' @param indGeno vector of 0/1/NAs of individual genotypes (0: homozygote; 1: heterozygote) 157 | #' @param individual list of group (breed, population, case/control etc.) and ID of individual sample 158 | #' @param mapFile Plink map file (for SNP position) 159 | #' @param ROHet shall we detect ROHet or ROHom? 160 | #' @param minSNP minimum number of SNP in a run 161 | #' @param maxOppositeGenotype max n. of homozygous/heterozygous SNP 162 | #' @param maxMiss max. n. of missing SNP 163 | #' @param minLengthBps min length of a run in bps 164 | #' @param maxGap max distance between consecutive SNP in a window to be still considered a potential run 165 | #' 166 | #' @details 167 | #' The consecutive method detect runs by consecutively scanning SNP loci along the genome. 168 | #' No sliding windows are used. Checks on minimum n. of SNP, max n. of opposite and missing genotypes, 169 | #' max gap between adjacent loci and minimum length of the run are implemented (as in the sliding window method). 170 | #' Both runs of homozygosity (RoHom) and of heterozygosity (RoHet) can be search for (option ROHet: TRUE/FALSE) 171 | #' 172 | #' @return A data frame of runs per individual sample 173 | #' 174 | #' @useDynLib detectRUNS 175 | #' @importFrom Rcpp sourceCpp 176 | #' 177 | consecutiveRunsCpp <- function(indGeno, individual, mapFile, ROHet = TRUE, minSNP = 3L, maxOppositeGenotype = 1L, maxMiss = 1L, minLengthBps = 1000L, maxGap = 10e5L) { 178 | .Call('_detectRUNS_consecutiveRunsCpp', PACKAGE = 'detectRUNS', indGeno, individual, mapFile, ROHet, minSNP, maxOppositeGenotype, maxMiss, minLengthBps, maxGap) 179 | } 180 | 181 | #' Function to count number of times a SNP is in a RUN 182 | #' 183 | #' 184 | #' @param runsChrom R object (dataframe) with results per chromosome 185 | #' @param mapChrom R map object with SNP per chromosome 186 | #' @param genotypeFile genotype (.ped) file location 187 | #' 188 | #' @return dataframe with counts per SNP in runs (per population) 189 | #' 190 | #' @import utils 191 | #' 192 | #' @useDynLib detectRUNS 193 | #' @importFrom Rcpp sourceCpp 194 | #' 195 | snpInsideRunsCpp <- function(runsChrom, mapChrom, genotypeFile) { 196 | .Call('_detectRUNS_snpInsideRunsCpp', PACKAGE = 'detectRUNS', runsChrom, mapChrom, genotypeFile) 197 | } 198 | 199 | -------------------------------------------------------------------------------- /detectRUNS/R/Stats.R: -------------------------------------------------------------------------------- 1 | ##################### 2 | ## STATISTIC FOR RUNS 3 | ##################### 4 | 5 | #' Function to found max position for each chromosome 6 | #' 7 | #' 8 | #' @param mapFile Plink map file (for SNP position) 9 | #' 10 | #' @details 11 | #' Create a data frame with the max position in map file (plink format) 12 | #' 13 | #' @return A data frame with the max position for chromosome 14 | #' @keywords internal 15 | #' 16 | 17 | chromosomeLength <- function(mapFile){ 18 | # read mapfile 19 | mappa <- readMapFile(mapFile) 20 | 21 | maps<-mappa[mappa$POSITION != 0, ] #delete chromosome 0 22 | 23 | # defining NULL variables to avoid warning messages 24 | CHR <- NULL 25 | POSITION <- NULL 26 | 27 | # find max value for chromosome 28 | LengthGenome=ddply(maps,.(CHR),summarize,max(POSITION)) 29 | names<-c("CHROMOSOME","CHR_LENGTH") 30 | colnames(LengthGenome)<-names 31 | 32 | LengthGenome$CHR_LENGTH = as.numeric(as.vector(LengthGenome$CHR_LENGTH)) 33 | 34 | # get total chromosome length 35 | message(paste("Total genome length:",sum(LengthGenome$CHR_LENGTH),sep=' ')) 36 | 37 | return(LengthGenome) 38 | } 39 | 40 | 41 | #' Function to calculated Froh genome-wide or chromosome-wide 42 | #' 43 | #' This function calculates the individual inbreeding coefficients based on runs of 44 | #' homozygosity (ROH), either per-chromosome (chromosome-wide) or based on the 45 | #' entire genome (genome-wide). See details of calculations below 46 | #' 47 | #' @param runs R object (dataframe) with results on runs 48 | #' @param mapFile Plink map file (to retrieve SNP position) 49 | #' @param genome_wide vector of TRUE/FALSE (genome-wide or chromosome-wide; 50 | #' defaults to TRUE/genome-wide) 51 | #' 52 | #' @details 53 | #' Froh is calculated as: 54 | #' 55 | #' \eqn{ F_{ROH} = \frac{\sum ROH_{length}}{Length_{genome}} } 56 | #' 57 | #' Depending on whether genome-wide or chromosome-wide calculations are required, 58 | #' the terms in the numerator and denominator will refer to the entire genome 59 | #' or will be restricted to specific chromosomes. 60 | #' 61 | #' @import reshape2 62 | #' 63 | #' @return A data frame with the inbreeding coefficients of each individual sample 64 | #' 65 | #' @export 66 | #' 67 | #' @examples 68 | #' # getting map and ped paths 69 | #' genotypeFile <- system.file("extdata", "Kijas2016_Sheep_subset.ped", package = "detectRUNS") 70 | #' mapFile <- system.file("extdata", "Kijas2016_Sheep_subset.map", package = "detectRUNS") 71 | #' 72 | #' # calculating runs of Homozygosity 73 | #' \dontrun{ 74 | #' # skipping runs calculation 75 | #' runs <- slidingRUNS.run(genotypeFile, mapFile, windowSize = 15, threshold = 0.1, minSNP = 15, 76 | #' ROHet = FALSE, maxOppositeGenotype = 1, maxMiss = 1, minLengthBps = 100000, minDensity = 1/10000) 77 | #' } 78 | #' # loading pre-calculated data 79 | #' runsFile <- system.file("extdata", "Kijas2016_Sheep_subset.sliding.csv", package="detectRUNS") 80 | #' runsDF <- readExternalRuns(inputFile = runsFile, program = 'detectRUNS') 81 | #' 82 | #' Froh_inbreeding(runs = runsDF, mapFile = mapFile) 83 | #' Froh_inbreeding(runs = runsDF, mapFile = mapFile, genome_wide=FALSE) 84 | #' 85 | 86 | Froh_inbreeding <- function(runs, mapFile, genome_wide=TRUE){ 87 | 88 | LengthGenome=chromosomeLength(mapFile = mapFile) 89 | info_breed=unique(runs[c('group','id')]) 90 | 91 | # Suppress warnings 92 | id <- NULL 93 | lengthBps <- NULL 94 | chrom <- NULL 95 | 96 | #sum of ROH for Sample 97 | if (genome_wide) { 98 | message("calculating Froh on all genome") 99 | 100 | # RESULTS!!!!! 101 | Froh <- ddply(runs,.(id),summarize,sum=sum(lengthBps)) 102 | Froh$Froh_genome = Froh$sum/sum(LengthGenome$CHR_LENGTH) 103 | 104 | } else { 105 | message("calculating Froh chromosome by chromosome") 106 | 107 | Froh_temp <- ddply(runs,.(id,chrom),summarize,sum=sum(lengthBps)) 108 | Froh_temp=merge(Froh_temp,LengthGenome,by.y='CHROMOSOME',by.x='chrom') 109 | Froh_temp$Froh = Froh_temp$sum/Froh_temp$CHR_LENGTH 110 | 111 | Froh=reshape2::dcast(Froh_temp,id ~ chrom ,value.var = "Froh") 112 | 113 | chr_order <- c((0:99),"X","Y","XY","MT","Z","W") 114 | list_chr=unique(Froh_temp$chrom) 115 | new_list_chr=as.vector(sort(factor(list_chr,levels=chr_order, ordered=TRUE))) 116 | new_list_chr1=paste("Chr_",new_list_chr,sep="") 117 | new_list_chr=c("id",new_list_chr) 118 | 119 | # RESULTS!!!!! 120 | Froh <- Froh[new_list_chr] 121 | colnames(Froh) <- c('id',new_list_chr1) 122 | } 123 | 124 | Froh=merge(info_breed,Froh,by="id",all=TRUE) 125 | 126 | return(Froh) 127 | } 128 | 129 | 130 | #' Function to calculated Froh using a ROH-class 131 | #' 132 | #' This function calculates the individual inbreeding coefficients based on runs of 133 | #' homozygosity (ROH) using only ROH of specific size classes. 134 | #' The parameter \code{class} specify the size interval to split up calculations. 135 | #' For example, if \code{class = 2} Froh based on ROH 0-2, 2-4, 4-8, 80-16, >16 Mbps long 136 | #' will be calculated. 137 | #' 138 | #' @param runs R object (dataframe) with ROH results 139 | #' @param mapFile Plink map file (for SNP position) 140 | #' @param Class base ROH-length interval (in Mbps) (default: 0-2, 2-4, 4-8, 8-16, >16) 141 | #' 142 | #' 143 | #' @return A data frame with individual inbreeding coefficients based on ROH-length of 144 | #' specific size. The sum of ROH-length of specific size in each individual is 145 | #' reported alongside 146 | #' @export 147 | #' 148 | #' @examples 149 | #' # getting map and ped paths 150 | #' genotypeFile <- system.file("extdata", "Kijas2016_Sheep_subset.ped", package = "detectRUNS") 151 | #' mapFile <- system.file("extdata", "Kijas2016_Sheep_subset.map", package = "detectRUNS") 152 | #' 153 | #' # calculating runs of Homozygosity 154 | #' \dontrun{ 155 | #' # skipping runs calculation 156 | #' runs <- slidingRUNS.run(genotypeFile, mapFile, windowSize = 15, threshold = 0.1, minSNP = 15, 157 | #' ROHet = FALSE, maxOppositeGenotype = 1, maxMiss = 1, minLengthBps = 100000, minDensity = 1/10000) 158 | #' } 159 | #' # loading pre-calculated data 160 | #' runsFile <- system.file("extdata", "Kijas2016_Sheep_subset.sliding.csv", package="detectRUNS") 161 | #' runsDF <- readExternalRuns(inputFile = runsFile, program = 'detectRUNS') 162 | #' 163 | #' Froh_inbreedingClass(runs = runsDF, mapFile = mapFile, Class = 2) 164 | #' 165 | 166 | Froh_inbreedingClass <- function(runs, mapFile, Class=2){ 167 | 168 | step_value=Class 169 | range_mb=c(0,0,0,0,0,99999) 170 | for (i in seq(from = 2 , to= length(range_mb)-1, by = 1) ){ 171 | range_mb[i]=step_value 172 | step_value=step_value*2 173 | } 174 | 175 | #range_mb 176 | name_CLASS=c(paste(range_mb[1],"-",range_mb[2],sep=''), 177 | paste(range_mb[2],"-",range_mb[3],sep=''), 178 | paste(range_mb[3],"-",range_mb[4],sep=''), 179 | paste(range_mb[4],"-",range_mb[5],sep=''), 180 | paste(">",range_mb[5],sep=''), 181 | paste(">",range_mb[6],sep='')) 182 | 183 | # Creating the data frame 184 | runs$MB <- runs$lengthBps/1000000 185 | runs$CLASS=cut(as.numeric(runs$MB),range_mb) 186 | levels(runs$CLASS) = name_CLASS 187 | runs$CLASS=factor(runs$CLASS) 188 | table(runs$CLASS) 189 | 190 | LengthGenome=chromosomeLength(mapFile) 191 | 192 | # sum of ROH for Sample 193 | message("calculating Froh by Class") 194 | 195 | # Suppress warnings 196 | id <- NULL 197 | lengthBps <- NULL 198 | 199 | Froh_Class=unique(runs[c('group','id')]) 200 | for (i in range_mb[1:5]){ 201 | print(paste("Class used: >",i,sep='')) 202 | 203 | # subset ROHom/ROHet 204 | subset_roh <- runs[runs$MB >= i,] 205 | 206 | #if subset is empty (no runs for that class) skip/continue 207 | if(nrow(subset_roh)<1) next 208 | 209 | Froh_temp <- ddply(subset_roh,.(id),summarize,sum=sum(lengthBps)) 210 | Froh_temp[[paste("Froh_Class_",i,sep="")]] = Froh_temp$sum/sum(LengthGenome$CHR_LENGTH) 211 | colnames(Froh_temp)[2]<- paste("Sum_Class_",i,sep="") 212 | Froh_Class=merge(Froh_Class,Froh_temp,by="id",all=TRUE) 213 | } 214 | 215 | return(Froh_Class) 216 | 217 | } 218 | 219 | 220 | #' Summary statistics on detected runs 221 | #' 222 | #' This function processes the results from \code{slidingRUNS.run} and 223 | #' \code{consecutiveRUNS.run} and produces a number of interesting descriptives 224 | #' statistics on results. 225 | #' 226 | #' @param genotypeFile Plink ped file (for SNP position) 227 | #' @param mapFile Plink map file (for SNP position) 228 | #' @param runs R object (dataframe) with results on detected runs 229 | #' @param Class group of length (in Mbps) by class (default: 0-2, 2-4, 4-8, 8-16, >16) 230 | #' @param snpInRuns TRUE/FALSE (default): should the function \code{snpInsideRuns} be 231 | #' called to compute the proportion of times each SNP falls inside a run in the 232 | #' group/population? 233 | #' 234 | #' @details 235 | #' \code{summaryRuns} calculates: i) the number of runs per chromosome and group/population; 236 | #' ii) the percent distribution of runs per chromosome and group; iii) the number of 237 | #' runs per size-class and group; iv) the percent distribution of runs per size-class 238 | #' and group; v) the mean length of runs per chromosome and group; vi) the mean 239 | #' length of runs per size-class and group; vii) individual inbreeding coefficient 240 | #' estimated from ROH; viii) individual inbreeding coefficient estimated from ROH 241 | #' per chromosome; ix) individual inbreeding coefficient estimated from ROH per 242 | #' size-class 243 | #' 244 | #' @return A list of dataframes containing the most relevant descriptives 245 | #' statistics on detected runs. The list conveniently contains 9 dataframes that can 246 | #' be used for further processing and visualization, or can be written out to text files 247 | #' @export 248 | #' 249 | #' @examples 250 | #' # getting map and ped paths 251 | #' genotypeFile <- system.file("extdata", "Kijas2016_Sheep_subset.ped", package = "detectRUNS") 252 | #' mapFile <- system.file("extdata", "Kijas2016_Sheep_subset.map", package = "detectRUNS") 253 | #' 254 | #' # calculating runs of Homozygosity 255 | #' \dontrun{ 256 | #' # skipping runs calculation 257 | #' runs <- slidingRUNS.run(genotypeFile, mapFile, windowSize = 15, threshold = 0.1, minSNP = 15, 258 | #' ROHet = FALSE, maxOppositeGenotype = 1, maxMiss = 1, minLengthBps = 100000, minDensity = 1/10000) 259 | #' } 260 | #' # loading pre-calculated data 261 | #' runsFile <- system.file("extdata", "Kijas2016_Sheep_subset.sliding.csv", package="detectRUNS") 262 | #' runsDF <- readExternalRuns(inputFile = runsFile, program = 'detectRUNS') 263 | #' 264 | #' summaryRuns(runs = runsDF, mapFile = mapFile, genotypeFile = genotypeFile, Class = 2, 265 | #' snpInRuns = FALSE) 266 | #' 267 | 268 | summaryRuns <- function(runs, mapFile, genotypeFile, Class=2, snpInRuns=FALSE){ 269 | message("Checking files...") 270 | message(paste("Using class:",Class)) 271 | 272 | # Avoid warnings 273 | group <- NULL 274 | CLASS <- NULL 275 | MB <- NULL 276 | chrom <- NULL 277 | 278 | n_class=Class 279 | 280 | result_Froh_genome_wide <- Froh_inbreeding(runs = runs, 281 | mapFile = mapFile, 282 | genome_wide = TRUE) 283 | result_Froh_chromosome_wide <- Froh_inbreeding(runs = runs, 284 | mapFile = mapFile, 285 | genome_wide = FALSE) 286 | result_Froh_class <- Froh_inbreedingClass(runs = runs, 287 | mapFile = mapFile, 288 | Class = n_class) 289 | 290 | 291 | runs$MB <- runs$lengthBps/1000000 292 | head(runs) 293 | #step_value=2 294 | 295 | range_mb <- c(0,0,0,0,0,99999) 296 | 297 | for (i in seq(from = 2 , to= length(range_mb)-1, by = 1) ){ 298 | range_mb[i]=n_class 299 | n_class=n_class*2 300 | } 301 | 302 | #range_mb 303 | name_CLASS=c(paste(range_mb[1],"-",range_mb[2],sep=''), 304 | paste(range_mb[2],"-",range_mb[3],sep=''), 305 | paste(range_mb[3],"-",range_mb[4],sep=''), 306 | paste(range_mb[4],"-",range_mb[5],sep=''), 307 | paste(">",range_mb[5],sep=''), 308 | paste(">",range_mb[6],sep='')) 309 | 310 | message(paste("Class created:" ,name_CLASS[0:5],sep=' ')) 311 | runs$CLASS=cut(as.numeric(runs$MB),range_mb) 312 | levels(runs$CLASS) = name_CLASS 313 | runs$CLASS=factor(runs$CLASS) 314 | 315 | #RESULTS!!!!! 316 | summary_ROH_mean1 = ddply(runs,.(group,CLASS),summarize,sum=mean(MB)) 317 | summary_ROH_mean_class = dcast(summary_ROH_mean1,CLASS ~ group ,value.var = "sum") 318 | levels(summary_ROH_mean_class$CLASS) = name_CLASS[0:5] 319 | 320 | #RESULTS!!!!! 321 | summary_ROH_mean_chr1 = ddply(runs,.(group,chrom),summarize,sum=mean(MB)) 322 | summary_ROH_mean_chr = reorderDF(dcast(summary_ROH_mean_chr1,chrom ~ group ,value.var = "sum")) 323 | 324 | #RESULTS!!!!! 325 | summary_ROH_count = ddply(runs,.(CLASS,group),nrow) 326 | summary_ROH_count1=dcast(summary_ROH_count, CLASS ~ group , value.var = "V1") 327 | rownames(summary_ROH_count1)=summary_ROH_count1$CLASS 328 | summary_ROH_count1$CLASS=NULL 329 | summary_ROH_count=summary_ROH_count1 330 | summary_ROH_percentage= as.data.frame(t(as.data.frame( t(summary_ROH_count)/colSums(summary_ROH_count,na.rm=TRUE)))) 331 | summary_ROH_percentage$CLASS=row.names(summary_ROH_percentage) 332 | summary_ROH_percentage 333 | 334 | #RESULTS!!!!! 335 | summary_ROH_count_chr = ddply(runs,.(chrom,group),nrow) 336 | summary_ROH_count_chr1=dcast(summary_ROH_count_chr, chrom ~ group , value.var = "V1") 337 | rownames(summary_ROH_count_chr1)=summary_ROH_count_chr1$chrom 338 | summary_ROH_count_chr1$chrom=NULL 339 | summary_ROH_count_chr=summary_ROH_count_chr1 340 | summary_ROH_percentage_chr= as.data.frame(t(as.data.frame( t(summary_ROH_count_chr)/colSums(summary_ROH_count_chr,na.rm=TRUE)))) 341 | summary_ROH_percentage_chr$chrom=row.names(summary_ROH_percentage_chr) 342 | summary_ROH_percentage_chr 343 | 344 | result_summary <- list(summary_ROH_count_chr=summary_ROH_count_chr, 345 | summary_ROH_percentage_chr=summary_ROH_percentage_chr, 346 | summary_ROH_count=summary_ROH_count, 347 | summary_ROH_percentage=summary_ROH_percentage, 348 | summary_ROH_mean_chr=summary_ROH_mean_chr, 349 | summary_ROH_mean_class=summary_ROH_mean_class, 350 | result_Froh_genome_wide = result_Froh_genome_wide, 351 | result_Froh_chromosome_wide = result_Froh_chromosome_wide, 352 | result_Froh_class= result_Froh_class) 353 | 354 | if (snpInRuns){ 355 | 356 | message("Calculating SNPs inside ROH") 357 | names(runs) <- c("POPULATION","IND","CHROMOSOME","COUNT","START","END","LENGTH") 358 | 359 | #read map file 360 | mappa <- readMapFile(mapFile) 361 | 362 | #Start calculation % SNP in ROH 363 | message("Calculation % SNP in ROH") #FILIPPO 364 | all_SNPinROH <- data.frame("SNP_NAME"=character(), 365 | "CHR"=integer(), 366 | "POSITION"=numeric(), 367 | "COUNT"=integer(), 368 | "BREED"=factor(), 369 | "PERCENTAGE"=numeric(), 370 | stringsAsFactors=FALSE) 371 | 372 | # create progress bar 373 | total <- length(unique(runs$CHROMOSOME)) 374 | message(paste('Chromosome founds: ',total)) #FILIPPO 375 | n=0 376 | pb <- txtProgressBar(min = 0, max = total, style = 3) 377 | 378 | for (chrom in sort(unique(runs$CHROMOSOME))) { 379 | runsChrom <- runs[runs$CHROMOSOME==chrom,] 380 | mapKrom <- mappa[mappa$CHR==chrom,] 381 | snpInRuns <- snpInsideRunsCpp(runsChrom,mapKrom, genotypeFile) 382 | all_SNPinROH <- rbind.data.frame(all_SNPinROH,snpInRuns) 383 | n=n+1 384 | setTxtProgressBar(pb, n) 385 | } 386 | close(pb) 387 | 388 | result_summary=append(result_summary,list(SNPinRun = all_SNPinROH)) 389 | message("Calculation % SNP in ROH finish") #FILIPPO 390 | } 391 | 392 | 393 | 394 | return(result_summary) 395 | } 396 | 397 | 398 | #' Function to retrieve most common runs in the population 399 | #' 400 | #' This function takes in input either the run results or the output from 401 | #' the function \code{snpInsideRuns} (proportion of times a SNP is inside a run) 402 | #' in the population/group, and returns a subset of the runs most commonly 403 | #' found in the group/population. The parameter \code{threshold} controls the definition 404 | #' of most common (e.g. in at least 50\%, 70\% etc. of the sampled individuals) 405 | #' 406 | #' @param genotypeFile Plink ped file (for SNP position) 407 | #' @param mapFile Plink map file (for SNP position) 408 | #' @param runs R object (dataframe) with results on detected runs 409 | #' @param threshold value from 0 to 1 (default 0.7) that controls the desired 410 | #' proportion of individuals carrying that run (e.g. 70\%) 411 | #' @param SnpInRuns dataframe with the proportion of times each SNP falls inside a 412 | #' run in the population (output from \code{snpInsideRuns}) 413 | #' 414 | #' @return A dataframe with the most common runs detected in the sampled individuals 415 | #' (the group/population, start and end position of the run, chromosome and number of SNP 416 | #' included in the run are reported in the output dataframe) 417 | #' @export 418 | #' 419 | #' @examples 420 | #' # getting map and ped paths 421 | #' genotypeFile <- system.file("extdata", "Kijas2016_Sheep_subset.ped", package = "detectRUNS") 422 | #' mapFile <- system.file("extdata", "Kijas2016_Sheep_subset.map", package = "detectRUNS") 423 | #' 424 | #' # calculating runs of Homozygosity 425 | #' \dontrun{ 426 | #' # skipping runs calculation 427 | #' runs <- slidingRUNS.run(genotypeFile, mapFile, windowSize = 15, threshold = 0.1, minSNP = 15, 428 | #' ROHet = FALSE, maxOppositeGenotype = 1, maxMiss = 1, minLengthBps = 100000, minDensity = 1/10000) 429 | #' } 430 | #' # loading pre-calculated data 431 | #' runsFile <- system.file("extdata", "Kijas2016_Sheep_subset.sliding.csv", package="detectRUNS") 432 | #' runsDF = readExternalRuns(inputFile = runsFile, program = 'detectRUNS') 433 | #' 434 | #' tableRuns(runs = runsDF, genotypeFile = genotypeFile, mapFile = mapFile, threshold = 0.5) 435 | #' 436 | 437 | tableRuns <- function(runs=NULL,SnpInRuns=NULL,genotypeFile, mapFile, threshold = 0.5) { 438 | 439 | #set a threshold 440 | threshold_used=threshold*100 441 | message(paste('Threshold used:',threshold_used)) 442 | 443 | # read map file 444 | mappa <- readMapFile(mapFile) 445 | 446 | if(!is.null(runs) & is.null(SnpInRuns)){ 447 | message('I found only Runs data frame. GOOD!') 448 | 449 | #change colnames in runs file 450 | names(runs) <- c("POPULATION","IND","CHROMOSOME","COUNT","START","END","LENGTH") 451 | 452 | #Start calculation % SNP in ROH 453 | message("Calculation % SNP in ROH") #FILIPPO 454 | all_SNPinROH <- data.frame("SNP_NAME"=character(), 455 | "CHR"=integer(), 456 | "POSITION"=numeric(), 457 | "COUNT"=integer(), 458 | "BREED"=factor(), 459 | "PERCENTAGE"=numeric(), 460 | stringsAsFactors=FALSE) 461 | 462 | # create progress bar 463 | total <- length(unique(runs$CHROMOSOME)) 464 | message(paste('Chromosome founds: ',total)) #FILIPPO 465 | n=0 466 | pb <- txtProgressBar(min = 0, max = total, style = 3) 467 | 468 | #SNP in ROH 469 | for (chrom in sort(unique(runs$CHROMOSOME))) { 470 | runsChrom <- runs[runs$CHROMOSOME==chrom,] 471 | mapKrom <- mappa[mappa$CHR==chrom,] 472 | snpInRuns <- snpInsideRunsCpp(runsChrom,mapKrom, genotypeFile) 473 | all_SNPinROH <- rbind.data.frame(all_SNPinROH,snpInRuns) 474 | n=n+1 475 | setTxtProgressBar(pb, n) 476 | } 477 | close(pb) 478 | message("Calculation % SNP in ROH finish") #FILIPPO 479 | } else if (is.null(runs) & !is.null(SnpInRuns)) { 480 | message('I found only SNPinRuns data frame. GOOD!') 481 | all_SNPinROH=SnpInRuns 482 | } else{ 483 | stop('You gave me Runs and SNPinRuns! Please choose one!') 484 | } 485 | 486 | #consecutive number 487 | all_SNPinROH$Number <- seq(1,length(all_SNPinROH$PERCENTAGE)) 488 | 489 | #final data frame 490 | final_table <- data.frame("GROUP"=character(0),"Start_SNP"=character(0),"End_SNP"=character(0), 491 | "chrom"=character(0),"nSNP"=integer(0),"from"=integer(0),"to"=integer(0)) 492 | 493 | 494 | #vector of breeds 495 | group_list=as.vector(unique(all_SNPinROH$BREED)) 496 | 497 | for (grp in group_list){ 498 | message(paste('checking: ',grp)) 499 | 500 | #create subset for group/thresold 501 | group_subset=as.data.frame(all_SNPinROH[all_SNPinROH$BREED %in% c(grp) & all_SNPinROH$PERCENTAGE > threshold_used,]) 502 | 503 | #print(group_subset) 504 | 505 | #variable 506 | old_pos=group_subset[1,7] 507 | snp_pos1=group_subset[1,3] 508 | Start_SNP=group_subset[1,1] 509 | snp_count=0 510 | 511 | x=2 512 | while(x <= length(rownames(group_subset))) { 513 | 514 | snp_count = snp_count + 1 515 | new_pos=group_subset[x,7] 516 | old_pos=group_subset[x-1,7] 517 | chr_old=group_subset[x-1,2] 518 | chr_new =group_subset[x,2] 519 | 520 | diff=new_pos-old_pos 521 | 522 | if ((diff > 1) | (chr_new != chr_old) | x==length(rownames(group_subset))) { 523 | if (x==length(rownames(group_subset))){ 524 | end_SNP=group_subset[x,1] 525 | TO=group_subset[x,3] 526 | }else{ 527 | end_SNP=group_subset[x-1,1] 528 | TO=group_subset[x-1,3] 529 | } 530 | 531 | final_table <- rbind.data.frame(final_table,final_table=data.frame("Group"= group_subset[x-1,5], 532 | "Start_SNP"=Start_SNP, 533 | "End_SNP"=end_SNP, 534 | "chrom"=group_subset[x-1,2], 535 | "nSNP"=snp_count, 536 | "from"=snp_pos1, 537 | "to"=TO)) 538 | 539 | #reset variable 540 | snp_count=0 541 | snp_pos1=group_subset[x,3] 542 | Start_SNP=group_subset[x,1] 543 | } 544 | 545 | #upgrade x value 546 | x <- x+1 547 | 548 | } 549 | } 550 | 551 | rownames(final_table) <- seq(1,length(row.names(final_table))) 552 | return(final_table) 553 | } 554 | 555 | -------------------------------------------------------------------------------- /detectRUNS/R/run.R: -------------------------------------------------------------------------------- 1 | ########################################################### 2 | ### Compute Genomic Runs in R (homozygosity/heterozygosity) 3 | ########################################################### 4 | 5 | 6 | #' Main function to detect RUNS (ROHom/ROHet) using sliding windows (a la Plink) 7 | #' 8 | #' This is one of the main function of detectRUNS and is used to detect runs 9 | #' (of homozygosity or heterozygosity) 10 | #' in the genome (diploid) with the sliding-window method. 11 | #' All parameters to detect runs (e.g. minimum n. of SNP, max n. of missing genotypes, 12 | #' max n. of opposite genotypes etc.) are specified here. 13 | #' Input data are in the ped/map 14 | #' Plink format (https://www.cog-genomics.org/plink/1.9/input#ped) 15 | #' 16 | #' @param genotypeFile genotype (.ped) file path 17 | #' @param mapFile map file (.map) file path 18 | #' @param windowSize the size of sliding window (number of SNP loci) (default = 15) 19 | #' @param threshold the threshold of overlapping windows of the same state 20 | #' (homozygous/heterozygous) to call a SNP in a RUN (default = 0.05) 21 | #' @param minSNP minimum n. of SNP in a RUN (default = 3) 22 | #' @param ROHet should we look for ROHet or ROHom? (default = FALSE) 23 | #' @param maxOppWindow max n. of homozygous/heterozygous SNP in the 24 | #' sliding window (default = 1) 25 | #' @param maxMissWindow max. n. of missing SNP in the sliding window (default = 1) 26 | #' @param maxGap max distance between consecutive SNP to be still considered a 27 | #' potential run (default = 10^6 bps) 28 | #' @param minLengthBps minimum length of run in bps (defaults to 1000 bps = 1 kbps) 29 | #' @param minDensity minimum n. of SNP per kbps (defaults to 0.1 = 1 SNP every 10 kbps) 30 | #' @param maxOppRun max n. of opposite genotype SNPs in the run (optional) 31 | #' @param maxMissRun max n. of missing SNPs in the run (optional) 32 | #' 33 | #' @details 34 | #' This function scans the genome (diploid) for runs using the sliding-window method. 35 | #' This is a wrapper function for many component functions that handle the input data 36 | #' (ped/map files), perform internal conversions, accept parameters specifications, 37 | #' select whether runs of homozygosity (RoHom) or of heterozygosity (RoHet) 38 | #' are looked for. 39 | #' 40 | #' In the ped file, the groups samples belong to can be specified (first column). 41 | #' This is important if comparisons between human ethnic groups or between animal breeds 42 | #' or plant varieties or biological populations are to be performed. 43 | #' Also, if cases and controls are to be compared, this is the place where this 44 | #' information needs to be specified. 45 | #' 46 | #' This function returns a data frame with all runs detected in the dataset. 47 | #' This data frame can then be written out to a csv file. 48 | #' The data frame is, in turn, the input for other functions of the detectRUNS package 49 | #' that create plots and produce statistics from the results 50 | #' (see plots and statistics functions in this manual, 51 | #' and/or refer to the detectRUNS vignette). 52 | #' 53 | #' @return A dataframe with RUNs of Homozygosity or Heterozygosity in the analysed dataset. 54 | #' The returned dataframe contains the following seven columns: "group", "id", "chrom", 55 | #' "nSNP", "from", "to", "lengthBps" (group: population, breed, case/control etc.; 56 | #' id: individual identifier; chrom: chromosome on which the run is located; 57 | #' nSNP: number of SNPs in the run; from: starting position of the run, in bps; 58 | #' to: end position of the run, in bps; lengthBps: size of the run) 59 | #' @export 60 | #' 61 | #' @import plyr 62 | #' @import itertools 63 | #' @import ggplot2 64 | #' @import itertools 65 | #' @import utils 66 | #' 67 | #' @examples 68 | #' # getting map and ped paths 69 | #' genotypeFile <- system.file("extdata", "Kijas2016_Sheep_subset.ped", package = "detectRUNS") 70 | #' mapFile <- system.file("extdata", "Kijas2016_Sheep_subset.map", package = "detectRUNS") 71 | #' # calculating runs with sliding window approach 72 | #' \dontrun{ 73 | #' # skipping runs calculation 74 | #' runs <- slidingRUNS.run(genotypeFile, mapFile, windowSize = 15, threshold = 0.1, 75 | #' minSNP = 15, ROHet = FALSE, maxOppWindow = 1, maxMissWindow = 1, maxGap=10^6, 76 | #' minLengthBps = 100000, minDensity = 1/10000) 77 | #' } 78 | #' # loading pre-calculated data 79 | #' runsFile <- system.file("extdata", "Kijas2016_Sheep_subset.sliding.csv", package="detectRUNS") 80 | #' colClasses <- c(rep("character", 3), rep("numeric", 4) ) 81 | #' runs <- read.csv2(runsFile, header = TRUE, stringsAsFactors = FALSE, 82 | #' colClasses = colClasses) 83 | #' 84 | 85 | slidingRUNS.run <- function(genotypeFile, mapFile, windowSize = 15, threshold = 0.05, 86 | minSNP = 3, ROHet = FALSE, maxOppWindow = 1, maxMissWindow = 1, 87 | maxGap = 10^6, minLengthBps = 1000, minDensity = 1/1000, 88 | maxOppRun = NULL, maxMissRun = NULL) { 89 | 90 | message(paste("You are using the method: SLIDING WINDOWS")) 91 | 92 | # debug 93 | if (ROHet == TRUE) { 94 | message("Analysing Runs of Heterozygosity (ROHet)") 95 | } else if (ROHet == FALSE) { 96 | message("Analysing Runs of Homozygosity (ROHom)") 97 | } else { 98 | stop(paste("Unknown ROHet value:",ROHet, ". It MUST be only TRUE/FALSE (see documentation)")) 99 | } 100 | 101 | # if genotype is file, open file 102 | if(file.exists(genotypeFile)){ 103 | conn <- file(genotypeFile, open = "r") 104 | } else { 105 | stop(paste("file", genotypeFile, "doesn't exists")) 106 | } 107 | 108 | # read mapfile 109 | mapFile <- readMapFile(mapFile) 110 | 111 | # override colnames 112 | colnames(mapFile) <- c("Chrom", "SNP", "bps") 113 | 114 | # collect all parameters in a variable 115 | parameters <- list(windowSize=windowSize, threshold=threshold, minSNP=minSNP, 116 | ROHet=ROHet, maxOppWindow=maxOppWindow, 117 | maxMissWindow=maxMissWindow, maxGap=maxGap, minLengthBps=minLengthBps, 118 | minDensity=minDensity, maxOppRun=maxOppRun, maxMissRun=maxMissRun) 119 | 120 | # calculate gaps 121 | gaps <- diff(mapFile$bps) 122 | 123 | message(paste("Window size:", windowSize)) 124 | message(paste("Threshold for calling SNP in a Run:", threshold)) 125 | 126 | # initialize data.frame of results 127 | RUNs <- data.frame(group=character(), id=character(), chrom=character(), nSNP=integer(), 128 | from=integer(), to=integer(), lengthBps=integer()) 129 | 130 | # read file line by line (http://stackoverflow.com/questions/4106764/what-is-a-good-way-to-read-line-by-line-in-r) 131 | while (length(oneLine <- readLines(conn, n = 1, warn = FALSE)) > 0) { 132 | genotype <- (strsplit(oneLine, " ")) 133 | genotype <- as.character(genotype[[1]]) 134 | 135 | # check that genotype columns and mapFile rows (+6) are identical 136 | if (length(genotype)-6 != nrow(mapFile)*2) { 137 | stop("Number of markers differ in mapFile and genotype: are those file the same dataset?") 138 | } 139 | 140 | # get animal 141 | animal <- list(FID=genotype[1], IID=genotype[2]) 142 | 143 | # convert into genotype (use from 7th column to last column) 144 | genotype <- pedConvertCpp(genotype[7:length(genotype)]) 145 | 146 | # find run for this genotype 147 | a_run <- slidingRuns(genotype, animal, mapFile, gaps, parameters) 148 | 149 | # bind this run (if has rows) to others RUNs (if any) 150 | RUNs <- rbind(RUNs, a_run) 151 | 152 | } 153 | 154 | # close input stream 155 | close(conn) 156 | 157 | # fix row names 158 | row.names(RUNs) <- NULL 159 | 160 | # return calculated runs (data.frame) 161 | return(RUNs) 162 | } 163 | 164 | 165 | #' Main function to detect genomic RUNS (ROHom/ROHet) using the consecutive method 166 | #' 167 | #' This is the main detectRUNS function to scan the genome for runs (of homozygosity or heterozygosity) 168 | #' using the consecutive method (Marras et al. 2015, Animal Genetics 46(2):110-121). 169 | #' All parameters to detect runs (e.g. minimum n. of SNP, max n. of missing genotypes, 170 | #' max n. of opposite genotypes etc.) are specified here. 171 | #' Input data are in the ped/map 172 | #' Plink format (https://www.cog-genomics.org/plink/1.9/input#ped) 173 | #' 174 | #' @param genotypeFile genotype (.ped) file path 175 | #' @param mapFile map file (.map) file path 176 | #' @param ROHet should we look for ROHet or ROHom? (default = FALSE) 177 | #' @param maxOppRun max n. of opposite genotype SNPs in the run (default = 0) 178 | #' @param maxMissRun max n. of missing SNPs in the run (default = 0) 179 | #' @param minSNP minimum n. of SNP in a RUN (default = 15) 180 | #' @param minLengthBps minimum length of run in bps (defaults to 1000 bps = 1 kbps) 181 | #' @param maxGap max distance between consecutive SNP in a window to be still considered a potential run (defaults to 10^6) 182 | #' 183 | #' @details 184 | #' This function scans the genome (diploid) for runs using the consecutive method. 185 | #' This is a wrapper function for many component functions that handle the input data (ped/map files), performs internal conversions, 186 | #' accepts parameters specifications, selects the statistical method to detect runs (sliding windows, consecutive loci) and whether 187 | #' runs of homozygosity (RoHom) or of heterozygosity (RoHet) are looked for. 188 | #' 189 | #' In the ped file, the groups samples belong to can be specified (first column). This is important if comparisons between 190 | #' human ethnic groups or between animal breeds or plant varieties or biological populations are to be performed. 191 | #' Also, if cases and controls are to be compared, this is the place where this information needs to be specified. 192 | #' 193 | #' This function returns a data frame with all runs detected in the dataset. This data frame can then be written out to a csv file. 194 | #' The data frame is, in turn, the input for other functions of the detectRUNS package that create plots and produce statistics 195 | #' of the results (see plot and statistic functions in this manual, and/or refer to the vignette of detectRUNS). 196 | #' 197 | #' @return A dataframe with RUNs of Homozygosity or Heterozygosity in the analysed dataset. 198 | #' The returned dataframe contains the following seven columns: "group", "id", "chrom", 199 | #' "nSNP", "from", "to", "lengthBps" (group: population, breed, case/control etc.; 200 | #' id: individual identifier; chrom: chromosome on which the run is located; 201 | #' nSNP: number of SNPs in the run; from: starting position of the run, in bps; 202 | #' to: end position of the run, in bps; lengthBps: size of the run) 203 | #' @export 204 | #' 205 | #' @import plyr 206 | #' @import itertools 207 | #' @import ggplot2 208 | #' @import itertools 209 | #' @import utils 210 | #' 211 | #' @examples 212 | #' # getting map and ped paths 213 | #' genotypeFile <- system.file("extdata", "Kijas2016_Sheep_subset.ped", package = "detectRUNS") 214 | #' mapFile <- system.file("extdata", "Kijas2016_Sheep_subset.map", package = "detectRUNS") 215 | #' # calculating runs with consecutive run approach 216 | #' \dontrun{ 217 | #' # skipping runs calculation 218 | #' runs <- consecutiveRUNS.run(genotypeFile, mapFile, minSNP = 15, ROHet = FALSE, 219 | #' maxOppRun = 0, maxMissRun = 0, maxGap=10^6, 220 | #' minLengthBps = 100000) 221 | #' } 222 | #' # loading pre-calculated data 223 | #' runsFile <- system.file("extdata", "Kijas2016_Sheep_subset.consecutive.csv", package="detectRUNS") 224 | #' colClasses <- c(rep("character", 3), rep("numeric", 4) ) 225 | #' runs <- read.csv2(runsFile, header = TRUE, stringsAsFactors = FALSE, 226 | #' colClasses = colClasses) 227 | #' 228 | 229 | consecutiveRUNS.run <- function(genotypeFile, mapFile, ROHet = FALSE, 230 | maxOppRun = 0, maxMissRun = 0, 231 | minSNP = 15, minLengthBps = 1000, 232 | maxGap = 10^6) { 233 | 234 | message(paste("You are using the method: CONSECUTIVE RUNS")) 235 | 236 | # debug 237 | if (ROHet == TRUE) { 238 | message("Analysing Runs of Heterozygosity (ROHet)") 239 | } else if (ROHet == FALSE) { 240 | message("Analysing Runs of Homozygosity (ROHom)") 241 | } else { 242 | stop(paste("Unknown ROHet value:",ROHet, ". It MUST be only TRUE/FALSE (see documentation)")) 243 | } 244 | 245 | # if genotype is file, open file 246 | if(file.exists(genotypeFile)){ 247 | conn <- file(genotypeFile, open = "r") 248 | } else { 249 | stop(paste("file", genotypeFile, "doesn't exists")) 250 | } 251 | 252 | # read mapfile 253 | mapFile <- readMapFile(mapFile) 254 | 255 | # override colnames 256 | colnames(mapFile) <- c("Chrom", "SNP", "bps") 257 | 258 | # initialize data.frame of results 259 | RUNs <- data.frame(group=character(), id=character(), chrom=character(), nSNP=integer(), 260 | from=integer(), to=integer(), lengthBps=integer()) 261 | 262 | # read file line by line (http://stackoverflow.com/questions/4106764/what-is-a-good-way-to-read-line-by-line-in-r) 263 | while (length(oneLine <- readLines(conn, n = 1, warn = FALSE)) > 0) { 264 | genotype <- (strsplit(oneLine, " ")) 265 | genotype <- as.character(genotype[[1]]) 266 | 267 | # check that genotype columns and mapFile rows (+6) are identical 268 | if (length(genotype)-6 != nrow(mapFile)*2) { 269 | stop("Number of markers differ in mapFile and genotype: are those file the same dataset?") 270 | } 271 | 272 | # get animal 273 | animal <- list(FID=genotype[1], IID=genotype[2]) 274 | 275 | # convert into genotype (use from 7th column to last column) 276 | genotype <- pedConvertCpp(genotype[7:length(genotype)]) 277 | 278 | a_run <- consecutiveRunsCpp(genotype, animal, mapFile=mapFile, ROHet=ROHet, minSNP=minSNP, 279 | maxOppositeGenotype=maxOppRun, maxMiss=maxMissRun, 280 | minLengthBps=minLengthBps, maxGap = maxGap) 281 | 282 | # bind this run (if has rows) to others RUNs (if any) 283 | RUNs <- rbind(RUNs, a_run) 284 | 285 | } 286 | 287 | # close input stream 288 | close(conn) 289 | 290 | # fix row names 291 | row.names(RUNs) <- NULL 292 | 293 | # return calculated runs (data.frame) 294 | return(RUNs) 295 | } 296 | -------------------------------------------------------------------------------- /detectRUNS/R/zzz.R: -------------------------------------------------------------------------------- 1 | 2 | .onAttach <- function(libname, pkgname) { 3 | version = packageVersion("detectRUNS") 4 | packageStartupMessage(paste("Using detectRUNS", version)) 5 | } 6 | 7 | .onUnload <- function (libpath) { 8 | library.dynam.unload("detectRUNS", libpath) 9 | } 10 | -------------------------------------------------------------------------------- /detectRUNS/README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: 3 | md_document: 4 | variant: markdown_github 5 | --- 6 | 7 | 8 | 9 | ```{r, echo = FALSE} 10 | knitr::opts_chunk$set( 11 | collapse = TRUE, 12 | comment = "#>", 13 | fig.path = "README-" 14 | ) 15 | ``` 16 | 17 | # detectRUNS 18 | 19 | detectRUNS is a R package for the detection of runs of homozygosity (ROH/ROHom) and of heterozygosity (ROHet, a.k.a. "heterozygosity-rich regions") in diploid genomes. Besides runs detection, it implements several functions to summarize and plot results. 20 | 21 | ## Installation 22 | 23 | detectRUNS is installed as a standard R package. Some core functions are written in C++ to increase efficieny of calculations: this makes use of the R library Rcpp. detectRUNS uses other R packages for data manipulation and plots. These packages are set as _Imports_, and detectRUNS will try to install any missing packages upon installation. 24 | 25 | ## Dependencies 26 | 27 | detectRUNS imports: plyr, iterators, itertools, ggplot2, reshape2, Rcpp, gridExtra, data.table 28 | detectRUNS suggests: testthat, knitr, rmarkdown, prettydoc 29 | 30 | ## Documentation 31 | Please see the package vignette for a complete tutorial. What follows is a minimal working example to give the gist of the tool. 32 | 33 | ## Example 34 | 35 | This is a basic example which shows you how to detect runs of homozygosity (ROH): 36 | 37 | ```{r example, eval=FALSE, include=TRUE} 38 | #1) detectRUNS (sliding-windows method) 39 | genotypeFile <- system.file("extdata", "Kijas2016_Sheep_subset.ped", package = "detectRUNS") 40 | mapFile <- system.file("extdata", "Kijas2016_Sheep_subset.map", package = "detectRUNS") 41 | # calculating runs with sliding window approach 42 | \dontrun{ 43 | # skipping runs calculation 44 | runs <- slidingRUNS.run(genotypeFile, mapFile, windowSize = 15, threshold = 0.1, 45 | minSNP = 15, ROHet = FALSE, maxOppWindow = 1, maxMissWindow = 1, maxGap=10^6, 46 | minLengthBps = 100000, minDensity = 1/10000) 47 | } 48 | # loading pre-calculated data 49 | runsFile <- system.file("extdata", "Kijas2016_Sheep_subset.sliding.csv", package="detectRUNS") 50 | colClasses <- c(rep("character", 3), rep("numeric", 4) ) 51 | runs <- read.csv2(runsFile, header = TRUE, stringsAsFactors = FALSE, colClasses = colClasses) 52 | 53 | #2) summarise results 54 | summaryList <- summaryRuns(runs = runs, mapFile = mapFilePath, genotypeFile = genotypeFilePath, Class = 6, snpInRuns = TRUE) 55 | 56 | #3) plot results 57 | plot_Runs(runs = runs) 58 | ``` 59 | 60 | -------------------------------------------------------------------------------- /detectRUNS/README.md: -------------------------------------------------------------------------------- 1 | 2 | detectRUNS 3 | ========== 4 | 5 | detectRUNS is a R package for the detection of runs of homozygosity (ROH/ROHom) and of heterozygosity (ROHet, a.k.a. "heterozygosity-rich regions") in diploid genomes. Besides runs detection, it implements several functions to summarize and plot results. 6 | 7 | Installation 8 | ------------ 9 | 10 | detectRUNS is installed as a standard R package. Some core functions are written in C++ to increase efficieny of calculations: this makes use of the R library Rcpp. detectRUNS uses other R packages for data manipulation and plots. These packages are set as *Imports*, and detectRUNS will try to install any missing packages upon installation. 11 | 12 | Dependencies 13 | ------------ 14 | 15 | detectRUNS imports: plyr, iterators, itertools, ggplot2, reshape2, Rcpp, gridExtra, data.table detectRUNS suggests: testthat, knitr, rmarkdown, prettydoc 16 | 17 | Documentation 18 | ------------- 19 | 20 | Please see the package vignette for a complete tutorial. What follows is a minimal working example to give the gist of the tool. 21 | 22 | Example 23 | ------- 24 | 25 | This is a basic example which shows you how to detect runs of homozygosity (ROH): 26 | 27 | ``` r 28 | #1) detectRUNS (sliding-windows method) 29 | genotypeFile <- system.file("extdata", "Kijas2016_Sheep_subset.ped", package = "detectRUNS") 30 | mapFile <- system.file("extdata", "Kijas2016_Sheep_subset.map", package = "detectRUNS") 31 | # calculating runs with sliding window approach 32 | \dontrun{ 33 | # skipping runs calculation 34 | runs <- slidingRUNS.run(genotypeFile, mapFile, windowSize = 15, threshold = 0.1, 35 | minSNP = 15, ROHet = FALSE, maxOppWindow = 1, maxMissWindow = 1, maxGap=10^6, 36 | minLengthBps = 100000, minDensity = 1/10000) 37 | } 38 | # loading pre-calculated data 39 | runsFile <- system.file("extdata", "Kijas2016_Sheep_subset.sliding.csv", package="detectRUNS") 40 | colClasses <- c(rep("character", 3), rep("numeric", 4) ) 41 | runs <- read.csv2(runsFile, header = TRUE, stringsAsFactors = FALSE, colClasses = colClasses) 42 | 43 | #2) summarise results 44 | summaryList <- summaryRuns(runs = runs, mapFile = mapFilePath, genotypeFile = genotypeFilePath, Class = 6, snpInRuns = TRUE) 45 | 46 | #3) plot results 47 | plot_Runs(runs = runs) 48 | ``` 49 | -------------------------------------------------------------------------------- /detectRUNS/cran-comments.md: -------------------------------------------------------------------------------- 1 | 2 | ## test environments 3 | 4 | Package tested using GitHub workflow [r-lib/actions](https://github.com/r-lib/actions) 5 | 6 | * os: macOS-latest, R: release 7 | * os: windows-latest, R: release 8 | * os: ubuntu-latest, R: devel 9 | * os: ubuntu-latest, R: release 10 | * os: ubuntu-latest, R: oldrel-1 11 | ## R CMD check results 12 | 0 errors ✔ | 0 warnings ✔ | 1 note ✖ 13 | 14 | ❯ checking installed package size ... NOTE 15 | installed size is 7.1Mb 16 | sub-directories of 1Mb or more: 17 | extdata 2.1Mb 18 | libs 4.0Mb 19 | 20 | Installed package size is greater than 5Mb due to functions compiled with `RCpp` 21 | and some example data used in vignettes 22 | 23 | ## Downstream dependencies 24 | There are currently no downstream dependencies for this package 25 | -------------------------------------------------------------------------------- /detectRUNS/man/Froh_inbreeding.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Stats.R 3 | \name{Froh_inbreeding} 4 | \alias{Froh_inbreeding} 5 | \title{Function to calculated Froh genome-wide or chromosome-wide} 6 | \usage{ 7 | Froh_inbreeding(runs, mapFile, genome_wide = TRUE) 8 | } 9 | \arguments{ 10 | \item{runs}{R object (dataframe) with results on runs} 11 | 12 | \item{mapFile}{Plink map file (to retrieve SNP position)} 13 | 14 | \item{genome_wide}{vector of TRUE/FALSE (genome-wide or chromosome-wide; 15 | defaults to TRUE/genome-wide)} 16 | } 17 | \value{ 18 | A data frame with the inbreeding coefficients of each individual sample 19 | } 20 | \description{ 21 | This function calculates the individual inbreeding coefficients based on runs of 22 | homozygosity (ROH), either per-chromosome (chromosome-wide) or based on the 23 | entire genome (genome-wide). See details of calculations below 24 | } 25 | \details{ 26 | Froh is calculated as: 27 | 28 | \eqn{ F_{ROH} = \frac{\sum ROH_{length}}{Length_{genome}} } 29 | 30 | Depending on whether genome-wide or chromosome-wide calculations are required, 31 | the terms in the numerator and denominator will refer to the entire genome 32 | or will be restricted to specific chromosomes. 33 | } 34 | \examples{ 35 | # getting map and ped paths 36 | genotypeFile <- system.file("extdata", "Kijas2016_Sheep_subset.ped", package = "detectRUNS") 37 | mapFile <- system.file("extdata", "Kijas2016_Sheep_subset.map", package = "detectRUNS") 38 | 39 | # calculating runs of Homozygosity 40 | \dontrun{ 41 | # skipping runs calculation 42 | runs <- slidingRUNS.run(genotypeFile, mapFile, windowSize = 15, threshold = 0.1, minSNP = 15, 43 | ROHet = FALSE, maxOppositeGenotype = 1, maxMiss = 1, minLengthBps = 100000, minDensity = 1/10000) 44 | } 45 | # loading pre-calculated data 46 | runsFile <- system.file("extdata", "Kijas2016_Sheep_subset.sliding.csv", package="detectRUNS") 47 | runsDF <- readExternalRuns(inputFile = runsFile, program = 'detectRUNS') 48 | 49 | Froh_inbreeding(runs = runsDF, mapFile = mapFile) 50 | Froh_inbreeding(runs = runsDF, mapFile = mapFile, genome_wide=FALSE) 51 | 52 | } 53 | -------------------------------------------------------------------------------- /detectRUNS/man/Froh_inbreedingClass.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Stats.R 3 | \name{Froh_inbreedingClass} 4 | \alias{Froh_inbreedingClass} 5 | \title{Function to calculated Froh using a ROH-class} 6 | \usage{ 7 | Froh_inbreedingClass(runs, mapFile, Class = 2) 8 | } 9 | \arguments{ 10 | \item{runs}{R object (dataframe) with ROH results} 11 | 12 | \item{mapFile}{Plink map file (for SNP position)} 13 | 14 | \item{Class}{base ROH-length interval (in Mbps) (default: 0-2, 2-4, 4-8, 8-16, >16)} 15 | } 16 | \value{ 17 | A data frame with individual inbreeding coefficients based on ROH-length of 18 | specific size. The sum of ROH-length of specific size in each individual is 19 | reported alongside 20 | } 21 | \description{ 22 | This function calculates the individual inbreeding coefficients based on runs of 23 | homozygosity (ROH) using only ROH of specific size classes. 24 | The parameter \code{class} specify the size interval to split up calculations. 25 | For example, if \code{class = 2} Froh based on ROH 0-2, 2-4, 4-8, 80-16, >16 Mbps long 26 | will be calculated. 27 | } 28 | \examples{ 29 | # getting map and ped paths 30 | genotypeFile <- system.file("extdata", "Kijas2016_Sheep_subset.ped", package = "detectRUNS") 31 | mapFile <- system.file("extdata", "Kijas2016_Sheep_subset.map", package = "detectRUNS") 32 | 33 | # calculating runs of Homozygosity 34 | \dontrun{ 35 | # skipping runs calculation 36 | runs <- slidingRUNS.run(genotypeFile, mapFile, windowSize = 15, threshold = 0.1, minSNP = 15, 37 | ROHet = FALSE, maxOppositeGenotype = 1, maxMiss = 1, minLengthBps = 100000, minDensity = 1/10000) 38 | } 39 | # loading pre-calculated data 40 | runsFile <- system.file("extdata", "Kijas2016_Sheep_subset.sliding.csv", package="detectRUNS") 41 | runsDF <- readExternalRuns(inputFile = runsFile, program = 'detectRUNS') 42 | 43 | Froh_inbreedingClass(runs = runsDF, mapFile = mapFile, Class = 2) 44 | 45 | } 46 | -------------------------------------------------------------------------------- /detectRUNS/man/chromosomeLength.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Stats.R 3 | \name{chromosomeLength} 4 | \alias{chromosomeLength} 5 | \title{Function to found max position for each chromosome} 6 | \usage{ 7 | chromosomeLength(mapFile) 8 | } 9 | \arguments{ 10 | \item{mapFile}{Plink map file (for SNP position)} 11 | } 12 | \value{ 13 | A data frame with the max position for chromosome 14 | } 15 | \description{ 16 | Function to found max position for each chromosome 17 | } 18 | \details{ 19 | Create a data frame with the max position in map file (plink format) 20 | } 21 | \keyword{internal} 22 | -------------------------------------------------------------------------------- /detectRUNS/man/consecutiveRUNS.run.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/run.R 3 | \name{consecutiveRUNS.run} 4 | \alias{consecutiveRUNS.run} 5 | \title{Main function to detect genomic RUNS (ROHom/ROHet) using the consecutive method} 6 | \usage{ 7 | consecutiveRUNS.run( 8 | genotypeFile, 9 | mapFile, 10 | ROHet = FALSE, 11 | maxOppRun = 0, 12 | maxMissRun = 0, 13 | minSNP = 15, 14 | minLengthBps = 1000, 15 | maxGap = 10^6 16 | ) 17 | } 18 | \arguments{ 19 | \item{genotypeFile}{genotype (.ped) file path} 20 | 21 | \item{mapFile}{map file (.map) file path} 22 | 23 | \item{ROHet}{should we look for ROHet or ROHom? (default = FALSE)} 24 | 25 | \item{maxOppRun}{max n. of opposite genotype SNPs in the run (default = 0)} 26 | 27 | \item{maxMissRun}{max n. of missing SNPs in the run (default = 0)} 28 | 29 | \item{minSNP}{minimum n. of SNP in a RUN (default = 15)} 30 | 31 | \item{minLengthBps}{minimum length of run in bps (defaults to 1000 bps = 1 kbps)} 32 | 33 | \item{maxGap}{max distance between consecutive SNP in a window to be still considered a potential run (defaults to 10^6)} 34 | } 35 | \value{ 36 | A dataframe with RUNs of Homozygosity or Heterozygosity in the analysed dataset. 37 | The returned dataframe contains the following seven columns: "group", "id", "chrom", 38 | "nSNP", "from", "to", "lengthBps" (group: population, breed, case/control etc.; 39 | id: individual identifier; chrom: chromosome on which the run is located; 40 | nSNP: number of SNPs in the run; from: starting position of the run, in bps; 41 | to: end position of the run, in bps; lengthBps: size of the run) 42 | } 43 | \description{ 44 | This is the main detectRUNS function to scan the genome for runs (of homozygosity or heterozygosity) 45 | using the consecutive method (Marras et al. 2015, Animal Genetics 46(2):110-121). 46 | All parameters to detect runs (e.g. minimum n. of SNP, max n. of missing genotypes, 47 | max n. of opposite genotypes etc.) are specified here. 48 | Input data are in the ped/map 49 | Plink format (https://www.cog-genomics.org/plink/1.9/input#ped) 50 | } 51 | \details{ 52 | This function scans the genome (diploid) for runs using the consecutive method. 53 | This is a wrapper function for many component functions that handle the input data (ped/map files), performs internal conversions, 54 | accepts parameters specifications, selects the statistical method to detect runs (sliding windows, consecutive loci) and whether 55 | runs of homozygosity (RoHom) or of heterozygosity (RoHet) are looked for. 56 | 57 | In the ped file, the groups samples belong to can be specified (first column). This is important if comparisons between 58 | human ethnic groups or between animal breeds or plant varieties or biological populations are to be performed. 59 | Also, if cases and controls are to be compared, this is the place where this information needs to be specified. 60 | 61 | This function returns a data frame with all runs detected in the dataset. This data frame can then be written out to a csv file. 62 | The data frame is, in turn, the input for other functions of the detectRUNS package that create plots and produce statistics 63 | of the results (see plot and statistic functions in this manual, and/or refer to the vignette of detectRUNS). 64 | } 65 | \examples{ 66 | # getting map and ped paths 67 | genotypeFile <- system.file("extdata", "Kijas2016_Sheep_subset.ped", package = "detectRUNS") 68 | mapFile <- system.file("extdata", "Kijas2016_Sheep_subset.map", package = "detectRUNS") 69 | # calculating runs with consecutive run approach 70 | \dontrun{ 71 | # skipping runs calculation 72 | runs <- consecutiveRUNS.run(genotypeFile, mapFile, minSNP = 15, ROHet = FALSE, 73 | maxOppRun = 0, maxMissRun = 0, maxGap=10^6, 74 | minLengthBps = 100000) 75 | } 76 | # loading pre-calculated data 77 | runsFile <- system.file("extdata", "Kijas2016_Sheep_subset.consecutive.csv", package="detectRUNS") 78 | colClasses <- c(rep("character", 3), rep("numeric", 4) ) 79 | runs <- read.csv2(runsFile, header = TRUE, stringsAsFactors = FALSE, 80 | colClasses = colClasses) 81 | 82 | } 83 | -------------------------------------------------------------------------------- /detectRUNS/man/consecutiveRuns.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/funktionen.R 3 | \name{consecutiveRuns} 4 | \alias{consecutiveRuns} 5 | \title{Function to detect consecutive runs in a vector (individual's genotypes)} 6 | \usage{ 7 | consecutiveRuns( 8 | indGeno, 9 | individual, 10 | mapFile, 11 | ROHet = TRUE, 12 | minSNP = 3, 13 | maxOppositeGenotype = 1, 14 | maxMiss = 1, 15 | minLengthBps = 1000, 16 | maxGap = 10^6 17 | ) 18 | } 19 | \arguments{ 20 | \item{indGeno}{vector of 0/1/NAs of individual genotypes (0: homozygote; 1: heterozygote)} 21 | 22 | \item{individual}{list of group (breed, population, case/control etc.) and ID of individual sample} 23 | 24 | \item{mapFile}{Plink map file (for SNP position)} 25 | 26 | \item{ROHet}{shall we detect ROHet or ROHom?} 27 | 28 | \item{minSNP}{minimum number of SNP in a run} 29 | 30 | \item{maxOppositeGenotype}{max n. of homozygous/heterozygous SNP} 31 | 32 | \item{maxMiss}{max. n. of missing SNP} 33 | 34 | \item{minLengthBps}{min length of a run in bps} 35 | 36 | \item{maxGap}{max distance between consecutive SNP in a window to be still considered a potential run} 37 | } 38 | \value{ 39 | A data frame of runs per individual sample 40 | } 41 | \description{ 42 | This is a core function. It implements the consecutive method for detection of runs in diploid genomes 43 | (see Marras et al. 2015) 44 | } 45 | \details{ 46 | The consecutive method detect runs by consecutively scanning SNP loci along the genome. 47 | No sliding windows are used. Checks on minimum n. of SNP, max n. of opposite and missing genotypes, 48 | max gap between adjacent loci and minimum length of the run are implemented (as in the sliding window method). 49 | Both runs of homozygosity (RoHom) and of heterozygosity (RoHet) can be search for (option ROHet: TRUE/FALSE) 50 | } 51 | \keyword{internal} 52 | -------------------------------------------------------------------------------- /detectRUNS/man/consecutiveRunsCpp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{consecutiveRunsCpp} 4 | \alias{consecutiveRunsCpp} 5 | \title{Function to detect consecutive runs in a vector (individual's genotypes)} 6 | \usage{ 7 | consecutiveRunsCpp( 8 | indGeno, 9 | individual, 10 | mapFile, 11 | ROHet = TRUE, 12 | minSNP = 3L, 13 | maxOppositeGenotype = 1L, 14 | maxMiss = 1L, 15 | minLengthBps = 1000L, 16 | maxGap = 1000000L 17 | ) 18 | } 19 | \arguments{ 20 | \item{indGeno}{vector of 0/1/NAs of individual genotypes (0: homozygote; 1: heterozygote)} 21 | 22 | \item{individual}{list of group (breed, population, case/control etc.) and ID of individual sample} 23 | 24 | \item{mapFile}{Plink map file (for SNP position)} 25 | 26 | \item{ROHet}{shall we detect ROHet or ROHom?} 27 | 28 | \item{minSNP}{minimum number of SNP in a run} 29 | 30 | \item{maxOppositeGenotype}{max n. of homozygous/heterozygous SNP} 31 | 32 | \item{maxMiss}{max. n. of missing SNP} 33 | 34 | \item{minLengthBps}{min length of a run in bps} 35 | 36 | \item{maxGap}{max distance between consecutive SNP in a window to be still considered a potential run} 37 | } 38 | \value{ 39 | A data frame of runs per individual sample 40 | } 41 | \description{ 42 | This is a core function. It implements the consecutive method for detection of runs in diploid genomes 43 | (see Marras et al. 2015) 44 | } 45 | \details{ 46 | The consecutive method detect runs by consecutively scanning SNP loci along the genome. 47 | No sliding windows are used. Checks on minimum n. of SNP, max n. of opposite and missing genotypes, 48 | max gap between adjacent loci and minimum length of the run are implemented (as in the sliding window method). 49 | Both runs of homozygosity (RoHom) and of heterozygosity (RoHet) can be search for (option ROHet: TRUE/FALSE) 50 | } 51 | -------------------------------------------------------------------------------- /detectRUNS/man/createRUNdf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/funktionen.R 3 | \name{createRUNdf} 4 | \alias{createRUNdf} 5 | \title{Function to create a dataframe of RUNS per individual animal 6 | Requires a map file (other filename to read or R object) 7 | Parameters on maximum number of missing and opposite genotypes in the run (not the window) are implemented here} 8 | \usage{ 9 | createRUNdf( 10 | snpRun, 11 | mapFile, 12 | minSNP = 3, 13 | minLengthBps = 1000, 14 | minDensity = 1/10, 15 | oppositeAndMissingSNP, 16 | maxOppRun = NULL, 17 | maxMissRun = NULL 18 | ) 19 | } 20 | \arguments{ 21 | \item{snpRun}{vector of TRUE/FALSE (is the SNP in a RUN?)} 22 | 23 | \item{mapFile}{Plink-like map file (data.frame)} 24 | 25 | \item{minSNP}{minimum n. of SNP to call a RUN} 26 | 27 | \item{minLengthBps}{minimum length of run in bps (defaults to 1000 bps = 1 kbps)} 28 | 29 | \item{minDensity}{minimum n. of SNP per kbps (defaults to 0.1 = 1 SNP every 10 kbps)} 30 | 31 | \item{oppositeAndMissingSNP}{indexed array of missing and opposite genotypes (SNP order in the genome is the index)} 32 | 33 | \item{maxOppRun}{max n. of opposite genotype SNPs in the run (not in the window!)} 34 | 35 | \item{maxMissRun}{max n. of missing SNPs in the run (not in the window!)} 36 | } 37 | \value{ 38 | a data.frame with RUNS per animal 39 | } 40 | \description{ 41 | Function to create a dataframe of RUNS per individual animal 42 | Requires a map file (other filename to read or R object) 43 | Parameters on maximum number of missing and opposite genotypes in the run (not the window) are implemented here 44 | } 45 | -------------------------------------------------------------------------------- /detectRUNS/man/findOppositeAndMissing.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{findOppositeAndMissing} 4 | \alias{findOppositeAndMissing} 5 | \title{Function to calculate oppositeAndMissingGenotypes array} 6 | \usage{ 7 | findOppositeAndMissing(data, ROHet = TRUE) 8 | } 9 | \arguments{ 10 | \item{data}{vector of 0/1/2 genotypes} 11 | 12 | \item{ROHet}{TRUE in ROHet evaluation, FALSE for ROHom} 13 | } 14 | \value{ 15 | character array; names will be index in which opposite and missing 16 | snps are found in data array 17 | } 18 | \description{ 19 | This is an helper function, this will be called by another function 20 | } 21 | -------------------------------------------------------------------------------- /detectRUNS/man/genoConvert.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/funktionen.R 3 | \name{genoConvert} 4 | \alias{genoConvert} 5 | \title{Convert 0/1/2 genotypes to 0/1} 6 | \usage{ 7 | genoConvert(x) 8 | } 9 | \arguments{ 10 | \item{x}{vector of 0/1/2 genotypes} 11 | } 12 | \value{ 13 | converted vector of genotypes (0/1) 14 | } 15 | \description{ 16 | This is a utility function, that convert 0/1/2 genotypes (AA/AB/BB) into 0/1 17 | (either homozygous/heterozygous) 18 | } 19 | \keyword{internal} 20 | -------------------------------------------------------------------------------- /detectRUNS/man/genoConvertCpp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{genoConvertCpp} 4 | \alias{genoConvertCpp} 5 | \title{Convert 0/1/2 genotypes to 0/1} 6 | \usage{ 7 | genoConvertCpp(genotype) 8 | } 9 | \arguments{ 10 | \item{genotype}{vector of 0/1/2 genotypes} 11 | } 12 | \value{ 13 | converted vector of genotypes (0/1) 14 | } 15 | \description{ 16 | This is a utility function, that convert 0/1/2 genotypes (AA/AB/BB) into 0/1 17 | (either homozygous/heterozygous) 18 | } 19 | -------------------------------------------------------------------------------- /detectRUNS/man/heteroZygotTest.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/funktionen.R 3 | \name{heteroZygotTest} 4 | \alias{heteroZygotTest} 5 | \title{Function to check whether a window is (loosely) heterozygous or not} 6 | \usage{ 7 | heteroZygotTest(x, gaps, maxHom, maxMiss, maxGap, i, windowSize) 8 | } 9 | \arguments{ 10 | \item{x}{vector of 0/1 genotypes (from genoConvert())} 11 | 12 | \item{gaps}{vector of differences between consecutive positions (gaps) in bps} 13 | 14 | \item{maxHom}{max n. of homozygous SNP in a heterozygous window} 15 | 16 | \item{maxMiss}{max n. of missing in a window} 17 | 18 | \item{maxGap}{max distance between consecutive SNP in a window to be still considered a potential run} 19 | 20 | \item{i}{index along the genome (genome-vector for each individual)} 21 | 22 | \item{windowSize}{size of window (n. of SNP)} 23 | } 24 | \value{ 25 | a list: i) TRUE/FALSE (whether a window is heterozygous or NOT); ii) indexes of "opposite and missing" genotype 26 | } 27 | \description{ 28 | This is a core function within the sliding-window workflow. Parameters on how to consider a window heterozygous are here (maxHom, maxMiss) 29 | } 30 | -------------------------------------------------------------------------------- /detectRUNS/man/heteroZygotTestCpp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{heteroZygotTestCpp} 4 | \alias{heteroZygotTestCpp} 5 | \title{Function to check whether a window is (loosely) heterozygous or not} 6 | \usage{ 7 | heteroZygotTestCpp(x, gaps, maxHom, maxMiss, maxGap) 8 | } 9 | \arguments{ 10 | \item{x}{vector of 0/1 genotypes (from genoConvert())} 11 | 12 | \item{gaps}{vector of differences between consecutive positions (gaps) in bps} 13 | 14 | \item{maxHom}{max n. of homozygous SNP in a heterozygous window} 15 | 16 | \item{maxMiss}{max n. of missing in a window} 17 | 18 | \item{maxGap}{max distance between consecutive SNP in a window to be still considered a potential run} 19 | } 20 | \value{ 21 | TRUE/FALSE (whether a window is heterozygous or NOT) 22 | } 23 | \description{ 24 | This is a core function. Parameters on how to consider a window heterozygous are here (maxHom, maxMiss) 25 | } 26 | -------------------------------------------------------------------------------- /detectRUNS/man/homoZygotTest.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/funktionen.R 3 | \name{homoZygotTest} 4 | \alias{homoZygotTest} 5 | \title{Function to check whether a window is (loosely) homozygous or not} 6 | \usage{ 7 | homoZygotTest(x, gaps, maxHet, maxMiss, maxGap, i, windowSize) 8 | } 9 | \arguments{ 10 | \item{x}{vector of 0/1 genotypes (from genoConvert())} 11 | 12 | \item{gaps}{vector of differences between consecutive positions (gaps) in bps} 13 | 14 | \item{maxHet}{max n. of heterozygous SNP in a homozygous window} 15 | 16 | \item{maxMiss}{max n. of missing in a window} 17 | 18 | \item{maxGap}{max distance between consecutive SNP in a window to be still considered a potential run} 19 | 20 | \item{i}{index along the genome (genome-vector for each individual)} 21 | 22 | \item{windowSize}{size of window (n. of SNP)} 23 | } 24 | \value{ 25 | a list: i) TRUE/FALSE (whether a window is heterozygous or NOT); ii) indexes of "opposite and missing" genotype 26 | } 27 | \description{ 28 | This is a core function. Parameters on how to consider a window homozygous are here (maxHet, maxMiss) 29 | } 30 | -------------------------------------------------------------------------------- /detectRUNS/man/homoZygotTestCpp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{homoZygotTestCpp} 4 | \alias{homoZygotTestCpp} 5 | \title{Function to check whether a window is (loosely) homozygous or not} 6 | \usage{ 7 | homoZygotTestCpp(x, gaps, maxHet, maxMiss, maxGap) 8 | } 9 | \arguments{ 10 | \item{x}{vector of 0/1 genotypes (from genoConvert())} 11 | 12 | \item{gaps}{vector of differences between consecutive positions (gaps) in bps} 13 | 14 | \item{maxHet}{max n. of heterozygous SNP in a homozygous window} 15 | 16 | \item{maxMiss}{max n. of missing in a window} 17 | 18 | \item{maxGap}{max distance between consecutive SNP in a window to be still considered a potential run} 19 | } 20 | \value{ 21 | TRUE/FALSE (whether a window is homozygous or NOT) 22 | } 23 | \description{ 24 | This is a core function. Parameters on how to consider a window homozygous are here (maxHet, maxMiss) 25 | } 26 | -------------------------------------------------------------------------------- /detectRUNS/man/pedConvertCpp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{pedConvertCpp} 4 | \alias{pedConvertCpp} 5 | \title{Convert ped genotypes to 0/1} 6 | \usage{ 7 | pedConvertCpp(genotype) 8 | } 9 | \arguments{ 10 | \item{genotype}{vector of pair of genotypes (01, AA, AG)} 11 | } 12 | \value{ 13 | converted vector of genotypes (0/1) 14 | } 15 | \description{ 16 | This is a utility function, that convert ped genotypes (AA/AB/BB) into 0/1 17 | (either homozygous/heterozygous) 18 | } 19 | -------------------------------------------------------------------------------- /detectRUNS/man/plot_DistributionRuns.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plots.R 3 | \name{plot_DistributionRuns} 4 | \alias{plot_DistributionRuns} 5 | \title{Plot Distribution of runs} 6 | \usage{ 7 | plot_DistributionRuns( 8 | runs, 9 | mapFile, 10 | groupSplit = TRUE, 11 | style = c("MeanClass", "MeanChr", "RunsPCT", "RunsPCT_Chr", "All"), 12 | savePlots = FALSE, 13 | outputName = NULL, 14 | plotTitle = NULL, 15 | Class = 2 16 | ) 17 | } 18 | \arguments{ 19 | \item{runs}{R object (dataframe) with results on detected runs} 20 | 21 | \item{mapFile}{Plink map file (for SNP position)} 22 | 23 | \item{groupSplit}{plots split by group (defaults to TRUE)} 24 | 25 | \item{style}{type of plot: MeanClass, MeanChr, RunsPCT, RunsPCT_Chr, All (all plots)} 26 | 27 | \item{savePlots}{should plots be saved out to files or plotted in the graphical terminal (default)?} 28 | 29 | \item{outputName}{title prefix (the base name of graph, if savePlots is TRUE)#'} 30 | 31 | \item{plotTitle}{title in plot (default NULL)} 32 | 33 | \item{Class}{group of length (in Mbps) by class (default: 0-2, 2-4, 4-8, 8-16, >16)} 34 | } 35 | \value{ 36 | plot Distribution Runs 37 | } 38 | \description{ 39 | This function the distribution of runs per group. The average run length per size-class, 40 | the average run length per chromosome (and group), the percent distribution of runs 41 | per size-class and group, and the proportion of runs per chromosome are plotted. 42 | With \code{style="All"} all three plots are produced. 43 | } 44 | \examples{ 45 | # getting map and ped paths 46 | genotypeFile <- system.file("extdata", "Kijas2016_Sheep_subset.ped", package = "detectRUNS") 47 | mapFile <- system.file("extdata", "Kijas2016_Sheep_subset.map", package = "detectRUNS") 48 | 49 | # calculating runs of Homozygosity 50 | \dontrun{ 51 | # skipping runs calculation 52 | runs <- slidingRUNS.run(genotypeFile, mapFile, windowSize = 15, threshold = 0.1, minSNP = 15, 53 | ROHet = FALSE, maxOppositeGenotype = 1, maxMiss = 1, minLengthBps = 100000, minDensity = 1/10000) 54 | } 55 | # loading pre-calculated data 56 | runsFile <- system.file("extdata", "Kijas2016_Sheep_subset.sliding.csv", package="detectRUNS") 57 | runsDF <- readExternalRuns(inputFile = runsFile, program = 'detectRUNS') 58 | 59 | plot_InbreedingChr(runs = runsDF, mapFile = mapFile, style='All') 60 | 61 | } 62 | -------------------------------------------------------------------------------- /detectRUNS/man/plot_InbreedingChr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plots.R 3 | \name{plot_InbreedingChr} 4 | \alias{plot_InbreedingChr} 5 | \title{Plot Froh-based inbreeding coefficients by group} 6 | \usage{ 7 | plot_InbreedingChr( 8 | runs, 9 | mapFile, 10 | groupSplit = TRUE, 11 | style = c("ChrBarPlot", "ChrBoxPlot", "FrohBoxPlot", "All"), 12 | outputName = NULL, 13 | plotTitle = NULL, 14 | savePlots = FALSE 15 | ) 16 | } 17 | \arguments{ 18 | \item{runs}{R object (dataframe) with results on detected runs} 19 | 20 | \item{mapFile}{Plink map file (for SNP position)} 21 | 22 | \item{groupSplit}{plots split by group (defaults to TRUE)} 23 | 24 | \item{style}{type of plot: ChrBarPlot, ChrBoxPlot, FrohBoxPlot, All (all plots)} 25 | 26 | \item{outputName}{title prefix (the base name of graph, if savePlots is TRUE)} 27 | 28 | \item{plotTitle}{title in plot (default NULL)} 29 | 30 | \item{savePlots}{should plots be saved out to files or plotted in the graphical terminal (default)?} 31 | } 32 | \value{ 33 | plots of the distribution of inbreeding by chromosome and group 34 | } 35 | \description{ 36 | The function plots the distribution of inbreeding/consanguinity coefficients 37 | per chromosome and/or group. Three types of plots can be produces: barplots, boxplots, 38 | violin plots. With \code{style="All"} all three plots are produced. 39 | } 40 | \examples{ 41 | # getting map and ped paths 42 | genotypeFile <- system.file("extdata", "Kijas2016_Sheep_subset.ped", package = "detectRUNS") 43 | mapFile <- system.file("extdata", "Kijas2016_Sheep_subset.map", package = "detectRUNS") 44 | 45 | # calculating runs of Homozygosity 46 | \dontrun{ 47 | # skipping runs calculation 48 | runs <- slidingRUNS.run(genotypeFile, mapFile, windowSize = 15, threshold = 0.1, minSNP = 15, 49 | ROHet = FALSE, maxOppositeGenotype = 1, maxMiss = 1, minLengthBps = 100000, minDensity = 1/10000) 50 | } 51 | # loading pre-calculated data 52 | runsFile <- system.file("extdata", "Kijas2016_Sheep_subset.sliding.csv", package="detectRUNS") 53 | runsDF <- readExternalRuns(inputFile = runsFile, program = 'detectRUNS') 54 | 55 | plot_InbreedingChr(runs = runsDF, mapFile = mapFile, style='All') 56 | 57 | } 58 | -------------------------------------------------------------------------------- /detectRUNS/man/plot_PatternRuns.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plots.R 3 | \name{plot_PatternRuns} 4 | \alias{plot_PatternRuns} 5 | \title{Plot sum of run-lengths (or average run-lengths) against the number of runs per individual} 6 | \usage{ 7 | plot_PatternRuns( 8 | runs, 9 | mapFile, 10 | method = c("sum", "mean"), 11 | outputName = NULL, 12 | savePlots = FALSE, 13 | plotTitle = NULL 14 | ) 15 | } 16 | \arguments{ 17 | \item{runs}{a data.frame with runs per individual (group, id, chrom, nSNP, start, end, length)} 18 | 19 | \item{mapFile}{map file (.map) file path} 20 | 21 | \item{method}{"sum" or "mean" of run lengths per individual sample} 22 | 23 | \item{outputName}{title prefix (the base name of graph, if savePlots is TRUE)#'} 24 | 25 | \item{savePlots}{should plots be saved out to files or plotted in the graphical terminal (default)?} 26 | 27 | \item{plotTitle}{title in plot (default NULL)} 28 | } 29 | \value{ 30 | plot of number of runs vs run-length sum/mean per individual sample 31 | } 32 | \description{ 33 | Function to plot the sum of run lengths (or the average run length) per individual 34 | against the average number of runs per individual. Points can be differentially 35 | coloured by group/population. This plot can be useful to identify patterns in 36 | the distribution of runs in different groups (e.g. few long runs vs many short runs) 37 | } 38 | \examples{ 39 | # getting map and ped paths 40 | genotypeFile <- system.file("extdata", "Kijas2016_Sheep_subset.ped", package = "detectRUNS") 41 | mapFile <- system.file("extdata", "Kijas2016_Sheep_subset.map", package = "detectRUNS") 42 | 43 | # calculating runs of Homozygosity 44 | \dontrun{ 45 | # skipping runs calculation 46 | runs <- slidingRUNS.run(genotypeFile, mapFile, windowSize = 15, threshold = 0.1, minSNP = 15, 47 | ROHet = FALSE, maxOppositeGenotype = 1, maxMiss = 1, minLengthBps = 100000, minDensity = 1/10000) 48 | } 49 | # loading pre-calculated data 50 | runsFile <- system.file("extdata", "Kijas2016_Sheep_subset.sliding.csv", package="detectRUNS") 51 | runsDF <- readExternalRuns(inputFile = runsFile, program = 'detectRUNS') 52 | 53 | plot_PatternRuns(runs = runsDF, mapFile = mapFile, method = 'sum') 54 | plot_PatternRuns(runs = runsDF, mapFile = mapFile, method = 'mean') 55 | 56 | } 57 | -------------------------------------------------------------------------------- /detectRUNS/man/plot_Runs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plots.R 3 | \name{plot_Runs} 4 | \alias{plot_Runs} 5 | \title{Function to plot runs per individual} 6 | \usage{ 7 | plot_Runs( 8 | runs, 9 | suppressInds = FALSE, 10 | savePlots = FALSE, 11 | separatePlots = FALSE, 12 | outputName = NULL 13 | ) 14 | } 15 | \arguments{ 16 | \item{runs}{a data.frame with runs per individual (group, id, chrom, nSNP, start, end, length)} 17 | 18 | \item{suppressInds}{shall we suppress individual IDs on the y-axis? (defaults to FALSE)} 19 | 20 | \item{savePlots}{should plots be saved out to files (one pdf file for all chromosomes) 21 | or plotted in the graphical terminal (default)?} 22 | 23 | \item{separatePlots}{should plots for each chromosome be saved out to separate files?} 24 | 25 | \item{outputName}{title prefix (the base name of graph, if savePlots is TRUE)} 26 | } 27 | \value{ 28 | plot of runs by chromosome 29 | } 30 | \description{ 31 | Function to plot runs per individual (see Williams et al. 2016, Animal Genetics, 32 | for an example with animal data) 33 | Individual IDs on the y-axis, bps on the x-axis (position along the chromosome) 34 | } 35 | \examples{ 36 | # getting map and ped paths 37 | genotypeFile <- system.file("extdata", "Kijas2016_Sheep_subset.ped", package = "detectRUNS") 38 | mapFile <- system.file("extdata", "Kijas2016_Sheep_subset.map", package = "detectRUNS") 39 | 40 | # calculating runs of Homozygosity 41 | \dontrun{ 42 | # skipping runs calculation 43 | runs <- slidingRUNS.run(genotypeFile, mapFile, windowSize = 15, threshold = 0.1, minSNP = 15, 44 | ROHet = FALSE, maxOppositeGenotype = 1, maxMiss = 1, minLengthBps = 100000, minDensity = 1/10000) 45 | } 46 | # loading pre-calculated data 47 | runsFile <- system.file("extdata", "Kijas2016_Sheep_subset.sliding.csv", package="detectRUNS") 48 | runsDF <- readExternalRuns(inputFile = runsFile, program = 'detectRUNS') 49 | 50 | # plot runs per animal (interactive) 51 | plot_Runs(runs = runsDF, suppressInds = FALSE, savePlots = FALSE, outputName = "ROHom") 52 | 53 | } 54 | -------------------------------------------------------------------------------- /detectRUNS/man/plot_SnpsInRuns.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plots.R 3 | \name{plot_SnpsInRuns} 4 | \alias{plot_SnpsInRuns} 5 | \title{Plot the number of times each SNP falls inside runs} 6 | \usage{ 7 | plot_SnpsInRuns( 8 | runs, 9 | genotypeFile, 10 | mapFile, 11 | savePlots = FALSE, 12 | separatePlots = FALSE, 13 | outputName = NULL 14 | ) 15 | } 16 | \arguments{ 17 | \item{runs}{a data.frame with runs per individual (group, id, chrom, nSNP, start, end, length)} 18 | 19 | \item{genotypeFile}{genotype (.ped) file path} 20 | 21 | \item{mapFile}{map file (.map) file path} 22 | 23 | \item{savePlots}{should plots be saved out in files (default) or plotted in 24 | the graphical terminal?} 25 | 26 | \item{separatePlots}{should plots for each chromosome be saved out to separate files?} 27 | 28 | \item{outputName}{title prefix (the base name of graph, if savePlots is TRUE)} 29 | } 30 | \value{ 31 | plot number of times a SNP is in a run by chromosome and population (pdf files) 32 | } 33 | \description{ 34 | Function to plot the number of times/percentage each SNP is inside a run 35 | (population-specific signals) against the SNP positions in the genome. 36 | Proportions on the y-axis, bps on the x-axis 37 | } 38 | \examples{ 39 | # getting map and ped paths 40 | genotypeFile <- system.file("extdata", "Kijas2016_Sheep_subset.ped", package = "detectRUNS") 41 | mapFile <- system.file("extdata", "Kijas2016_Sheep_subset.map", package = "detectRUNS") 42 | 43 | # calculating runs of Homozygosity 44 | # skipping runs calculation 45 | \dontrun{ 46 | runs <- slidingRUNS.run(genotypeFile, mapFile, windowSize = 15, threshold = 0.1, minSNP = 15, 47 | ROHet = FALSE, maxOppositeGenotype = 1, maxMiss = 1, minLengthBps = 100000, minDensity = 1/10000) 48 | } 49 | # loading pre-calculated data 50 | runsFile <- system.file("extdata", "Kijas2016_Sheep_subset.sliding.csv", package="detectRUNS") 51 | runsDF <- readExternalRuns(inputFile = runsFile, program = 'detectRUNS') 52 | 53 | # plot runs per animal (interactive) 54 | plot_SnpsInRuns(runs = runsDF, genotypeFile = genotypeFile, mapFile = mapFile, 55 | savePlots = FALSE, outputName = "ROHom") 56 | 57 | } 58 | -------------------------------------------------------------------------------- /detectRUNS/man/plot_StackedRuns.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plots.R 3 | \name{plot_StackedRuns} 4 | \alias{plot_StackedRuns} 5 | \title{Plot stacked runs} 6 | \usage{ 7 | plot_StackedRuns( 8 | runs, 9 | savePlots = FALSE, 10 | separatePlots = FALSE, 11 | outputName = NULL 12 | ) 13 | } 14 | \arguments{ 15 | \item{runs}{a data.frame with runs per individual (group, id, chrom, nSNP, start, end, length)} 16 | 17 | \item{savePlots}{should plots be saved out in files (default) or plotted in 18 | the graphical terminal?} 19 | 20 | \item{separatePlots}{should plots for chromosomes be saved out to separate files?} 21 | 22 | \item{outputName}{title prefix (the base name of graph, if savePlots is TRUE)} 23 | } 24 | \value{ 25 | plot of stacked runs by population and by chromosome (pdf files) 26 | } 27 | \description{ 28 | Function to plot stacked runs along the chromosome (signaling presence of large numbers of runs 29 | in specific regions of a chromosome) 30 | Counts on the y-axis, bps on the x-axis (position along the chromosome) 31 | } 32 | \examples{ 33 | # getting map and ped paths 34 | genotypeFile <- system.file("extdata", "Kijas2016_Sheep_subset.ped", package = "detectRUNS") 35 | mapFile <- system.file("extdata", "Kijas2016_Sheep_subset.map", package = "detectRUNS") 36 | 37 | # calculating runs of Homozygosity 38 | \dontrun{ 39 | # skipping runs calculation 40 | runs <- slidingRUNS.run(genotypeFile, mapFile, windowSize = 15, threshold = 0.1, minSNP = 15, 41 | ROHet = FALSE, maxOppositeGenotype = 1, maxMiss = 1, minLengthBps = 100000, minDensity = 1/10000) 42 | } 43 | # loading pre-calculated data 44 | runsFile <- system.file("extdata", "Kijas2016_Sheep_subset.sliding.csv", package="detectRUNS") 45 | runsDF <- readExternalRuns(inputFile = runsFile, program = 'detectRUNS') 46 | 47 | # plot runs per animal (interactive) 48 | plot_StackedRuns(runs = runsDF, savePlots = FALSE, outputName = "ROHom") 49 | 50 | } 51 | -------------------------------------------------------------------------------- /detectRUNS/man/plot_ViolinRuns.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plots.R 3 | \name{plot_ViolinRuns} 4 | \alias{plot_ViolinRuns} 5 | \title{Violin plot of run length per individual (either sum or mean)} 6 | \usage{ 7 | plot_ViolinRuns( 8 | runs, 9 | method = c("sum", "mean"), 10 | outputName = NULL, 11 | plotTitle = NULL, 12 | savePlots = FALSE 13 | ) 14 | } 15 | \arguments{ 16 | \item{runs}{a data.frame with runs per individual (group, id, chrom, nSNP, start, end, length)} 17 | 18 | \item{method}{"sum" or "mean" of run lengths per individual samples} 19 | 20 | \item{outputName}{title prefix (the base name of graph, if savePlots is TRUE)} 21 | 22 | \item{plotTitle}{title in plot (default NULL)} 23 | 24 | \item{savePlots}{should plots be saved out to files or plotted in the graphical terminal (default)?} 25 | } 26 | \value{ 27 | Violin plot of the distribution of runs-lengths (sum or mean) 28 | } 29 | \description{ 30 | Function to produce violin plots of the distribution of runs lengths per group 31 | The sum of run lengths, or its average, per individual sample is used to 32 | characterize the distribution of runs 33 | } 34 | \examples{ 35 | # getting map and ped paths 36 | genotypeFile <- system.file("extdata", "Kijas2016_Sheep_subset.ped", package = "detectRUNS") 37 | mapFile <- system.file("extdata", "Kijas2016_Sheep_subset.map", package = "detectRUNS") 38 | 39 | # calculating runs of Homozygosity 40 | \dontrun{ 41 | # skipping runs calculation 42 | runs <- slidingRUNS.run(genotypeFile, mapFile, windowSize = 15, threshold = 0.1, minSNP = 15, 43 | ROHet = FALSE, maxOppositeGenotype = 1, maxMiss = 1, minLengthBps = 100000, minDensity = 1/10000) 44 | } 45 | # loading pre-calculated data 46 | runsFile <- system.file("extdata", "Kijas2016_Sheep_subset.sliding.csv", package="detectRUNS") 47 | runsDF <- readExternalRuns(inputFile = runsFile, program = 'detectRUNS') 48 | 49 | plot_ViolinRuns(runs = runsDF, method = "sum" , savePlots = FALSE) 50 | plot_ViolinRuns(runs = runsDF, method = "mean" , savePlots = FALSE) 51 | 52 | } 53 | -------------------------------------------------------------------------------- /detectRUNS/man/plot_manhattanRuns.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plots.R 3 | \name{plot_manhattanRuns} 4 | \alias{plot_manhattanRuns} 5 | \title{Plot the proportion of times SNPs are inside runs - MANHATTAN PLOT} 6 | \usage{ 7 | plot_manhattanRuns( 8 | runs, 9 | genotypeFile, 10 | mapFile, 11 | pct_threshold = 0.33, 12 | x_font_size = 10, 13 | savePlots = FALSE, 14 | file_type = "pdf", 15 | outputName = NULL, 16 | plotTitle = NULL, 17 | plot_w = 8, 18 | plot_h = 6 19 | ) 20 | } 21 | \arguments{ 22 | \item{runs}{a data.frame with runs per individual (group, id, chrom, nSNP, start, end, length)} 23 | 24 | \item{genotypeFile}{genotype (.ped) file path} 25 | 26 | \item{mapFile}{map file (.map) file path} 27 | 28 | \item{pct_threshold}{reference line for significant regions (e.g. 0.5 --> 50\% SNPs in runs; default is 0.33)} 29 | 30 | \item{x_font_size}{font size for x axis values (chromosome numbers: default = 10)} 31 | 32 | \item{savePlots}{should plots be saved out in files (default) or plotted in the graphical terminal?} 33 | 34 | \item{file_type}{type of plot file to ba saved (if savePlots is TRUE; default is pdf)} 35 | 36 | \item{outputName}{title prefix (the base name of graph, if savePlots is TRUE)} 37 | 38 | \item{plotTitle}{title in plot (default)} 39 | 40 | \item{plot_w}{plot width (if savePlots = TRUE; default is 8)} 41 | 42 | \item{plot_h}{plot height (if savePlots = TRUE; default is 6)} 43 | } 44 | \value{ 45 | Manhattan plots of proportion of times SNPs are inside runs, 46 | per population (pdf files) 47 | } 48 | \description{ 49 | Function to plot the proportion of times/percentage each SNP in inside a run 50 | (population-specific signals) against SNP position in all chromosomes together 51 | Proportions on the y-axis, bps on the x-axis for all analysed chromosomes 52 | This is similar to the familiar GWAS Manhattan plot 53 | } 54 | \examples{ 55 | # getting map and ped paths 56 | genotypeFile <- system.file("extdata", "Kijas2016_Sheep_subset.ped", package = "detectRUNS") 57 | mapFile <- system.file("extdata", "Kijas2016_Sheep_subset.map", package = "detectRUNS") 58 | 59 | # calculating runs of Homozygosity 60 | \dontrun{ 61 | # skipping runs calculation 62 | runs <- slidingRUNS.run(genotypeFile, mapFile, windowSize = 15, threshold = 0.1, minSNP = 15, 63 | ROHet = FALSE, maxOppositeGenotype = 1, maxMiss = 1, minLengthBps = 100000, minDensity = 1/10000) 64 | } 65 | # loading pre-calculated data 66 | runsFile <- system.file("extdata", "Kijas2016_Sheep_subset.sliding.csv", package="detectRUNS") 67 | runsDF <- readExternalRuns(inputFile = runsFile, program = 'detectRUNS') 68 | 69 | # plot runs per animal (interactive) 70 | plot_manhattanRuns(runs = runsDF, genotypeFile = genotypeFile, mapFile = mapFile, 71 | savePlots = FALSE, plotTitle = "ROHom") 72 | 73 | } 74 | -------------------------------------------------------------------------------- /detectRUNS/man/readExternalRuns.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/funktionen.R 3 | \name{readExternalRuns} 4 | \alias{readExternalRuns} 5 | \title{Read runs from external files} 6 | \usage{ 7 | readExternalRuns( 8 | inputFile = NULL, 9 | program = c("plink", "BCFtools", "detectRUNS") 10 | ) 11 | } 12 | \arguments{ 13 | \item{inputFile}{name of (path to) external file} 14 | 15 | \item{program}{source program that produced the ROH file (one of \code{detectRUNS}, 16 | \code{Plink}, \code{BCFtools})} 17 | } 18 | \value{ 19 | dataframe in the correct format to be used with plots and statistics functions from \code{detectRUNS} 20 | } 21 | \description{ 22 | Function to read in, from external files, the output of software for ROH: 23 | \enumerate{ 24 | \item \code{detectRUNS}: output saved out to a file (e.g. write.table) 25 | \item \code{Plink}: output from the \code{--homozyg} option (\code{.hom} files) 26 | \item \code{BCFtools}: output from the \code{roh} option 27 | } 28 | } 29 | \examples{ 30 | # getting map and ped paths 31 | \dontrun{ 32 | genotypeFile <- system.file("extdata", "Kijas2016_Sheep_subset.ped", package = "detectRUNS") 33 | mapFile <- system.file("extdata", "Kijas2016_Sheep_subset.map", package = "detectRUNS") 34 | 35 | # calculating runs of Homozygosity 36 | runs <- slidingRUNS.run(genotypeFile, mapFile, windowSize = 15, threshold = 0.1, minSNP = 15, 37 | ROHet = FALSE, maxMissRun = 1, maxMissWindow = 1, minLengthBps = 100000, minDensity = 1/10000) 38 | 39 | write.table(x= runs,file = 'Kijas2016_Sheep_subset.sliding.csv', quote=F, row.names = F) 40 | } 41 | runsFile <- system.file("extdata", "Kijas2016_Sheep_subset.sliding.csv", package = "detectRUNS") 42 | newData=readExternalRuns(runsFile, program = 'detectRUNS') 43 | 44 | } 45 | -------------------------------------------------------------------------------- /detectRUNS/man/readMapFile.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/funktionen.R 3 | \name{readMapFile} 4 | \alias{readMapFile} 5 | \title{Read from a .map file locations and return a data.table object} 6 | \usage{ 7 | readMapFile(mapFile) 8 | } 9 | \arguments{ 10 | \item{mapFile}{map file (.map) file path} 11 | } 12 | \value{ 13 | data.table object 14 | } 15 | \description{ 16 | This is an utility function which check for file existance, define 17 | colClasses and then returns the read data.table object 18 | } 19 | \keyword{internal} 20 | -------------------------------------------------------------------------------- /detectRUNS/man/readPOPCpp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{readPOPCpp} 4 | \alias{readPOPCpp} 5 | \title{Function to return a dataframe of population (POP, ID)} 6 | \usage{ 7 | readPOPCpp(genotypeFile) 8 | } 9 | \arguments{ 10 | \item{genotypeFile}{genotype (.ped) file location} 11 | } 12 | \value{ 13 | a dataframe of POP, ID 14 | } 15 | \description{ 16 | This is a core function. Read PED file and returns a data.frame with the first two 17 | columns 18 | } 19 | -------------------------------------------------------------------------------- /detectRUNS/man/reorderDF.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/funktionen.R 3 | \name{reorderDF} 4 | \alias{reorderDF} 5 | \title{Function to reorder data frames by CHROMOSOME} 6 | \usage{ 7 | reorderDF(dfx) 8 | } 9 | \arguments{ 10 | \item{dfx}{data frame to be reordered (with column "CHROMOSOME")} 11 | } 12 | \value{ 13 | A reordered data frame by chromosome 14 | } 15 | \description{ 16 | The data frame will be reordered according to chromosome: 17 | from 1 to n, then X, Y, XY, MT 18 | The data frame needs to have a column with name "CHROMOSOME" 19 | } 20 | \details{ 21 | Reorder results based on chromosome 22 | } 23 | -------------------------------------------------------------------------------- /detectRUNS/man/slidingRUNS.run.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/run.R 3 | \name{slidingRUNS.run} 4 | \alias{slidingRUNS.run} 5 | \title{Main function to detect RUNS (ROHom/ROHet) using sliding windows (a la Plink)} 6 | \usage{ 7 | slidingRUNS.run( 8 | genotypeFile, 9 | mapFile, 10 | windowSize = 15, 11 | threshold = 0.05, 12 | minSNP = 3, 13 | ROHet = FALSE, 14 | maxOppWindow = 1, 15 | maxMissWindow = 1, 16 | maxGap = 10^6, 17 | minLengthBps = 1000, 18 | minDensity = 1/1000, 19 | maxOppRun = NULL, 20 | maxMissRun = NULL 21 | ) 22 | } 23 | \arguments{ 24 | \item{genotypeFile}{genotype (.ped) file path} 25 | 26 | \item{mapFile}{map file (.map) file path} 27 | 28 | \item{windowSize}{the size of sliding window (number of SNP loci) (default = 15)} 29 | 30 | \item{threshold}{the threshold of overlapping windows of the same state 31 | (homozygous/heterozygous) to call a SNP in a RUN (default = 0.05)} 32 | 33 | \item{minSNP}{minimum n. of SNP in a RUN (default = 3)} 34 | 35 | \item{ROHet}{should we look for ROHet or ROHom? (default = FALSE)} 36 | 37 | \item{maxOppWindow}{max n. of homozygous/heterozygous SNP in the 38 | sliding window (default = 1)} 39 | 40 | \item{maxMissWindow}{max. n. of missing SNP in the sliding window (default = 1)} 41 | 42 | \item{maxGap}{max distance between consecutive SNP to be still considered a 43 | potential run (default = 10^6 bps)} 44 | 45 | \item{minLengthBps}{minimum length of run in bps (defaults to 1000 bps = 1 kbps)} 46 | 47 | \item{minDensity}{minimum n. of SNP per kbps (defaults to 0.1 = 1 SNP every 10 kbps)} 48 | 49 | \item{maxOppRun}{max n. of opposite genotype SNPs in the run (optional)} 50 | 51 | \item{maxMissRun}{max n. of missing SNPs in the run (optional)} 52 | } 53 | \value{ 54 | A dataframe with RUNs of Homozygosity or Heterozygosity in the analysed dataset. 55 | The returned dataframe contains the following seven columns: "group", "id", "chrom", 56 | "nSNP", "from", "to", "lengthBps" (group: population, breed, case/control etc.; 57 | id: individual identifier; chrom: chromosome on which the run is located; 58 | nSNP: number of SNPs in the run; from: starting position of the run, in bps; 59 | to: end position of the run, in bps; lengthBps: size of the run) 60 | } 61 | \description{ 62 | This is one of the main function of detectRUNS and is used to detect runs 63 | (of homozygosity or heterozygosity) 64 | in the genome (diploid) with the sliding-window method. 65 | All parameters to detect runs (e.g. minimum n. of SNP, max n. of missing genotypes, 66 | max n. of opposite genotypes etc.) are specified here. 67 | Input data are in the ped/map 68 | Plink format (https://www.cog-genomics.org/plink/1.9/input#ped) 69 | } 70 | \details{ 71 | This function scans the genome (diploid) for runs using the sliding-window method. 72 | This is a wrapper function for many component functions that handle the input data 73 | (ped/map files), perform internal conversions, accept parameters specifications, 74 | select whether runs of homozygosity (RoHom) or of heterozygosity (RoHet) 75 | are looked for. 76 | 77 | In the ped file, the groups samples belong to can be specified (first column). 78 | This is important if comparisons between human ethnic groups or between animal breeds 79 | or plant varieties or biological populations are to be performed. 80 | Also, if cases and controls are to be compared, this is the place where this 81 | information needs to be specified. 82 | 83 | This function returns a data frame with all runs detected in the dataset. 84 | This data frame can then be written out to a csv file. 85 | The data frame is, in turn, the input for other functions of the detectRUNS package 86 | that create plots and produce statistics from the results 87 | (see plots and statistics functions in this manual, 88 | and/or refer to the detectRUNS vignette). 89 | } 90 | \examples{ 91 | # getting map and ped paths 92 | genotypeFile <- system.file("extdata", "Kijas2016_Sheep_subset.ped", package = "detectRUNS") 93 | mapFile <- system.file("extdata", "Kijas2016_Sheep_subset.map", package = "detectRUNS") 94 | # calculating runs with sliding window approach 95 | \dontrun{ 96 | # skipping runs calculation 97 | runs <- slidingRUNS.run(genotypeFile, mapFile, windowSize = 15, threshold = 0.1, 98 | minSNP = 15, ROHet = FALSE, maxOppWindow = 1, maxMissWindow = 1, maxGap=10^6, 99 | minLengthBps = 100000, minDensity = 1/10000) 100 | } 101 | # loading pre-calculated data 102 | runsFile <- system.file("extdata", "Kijas2016_Sheep_subset.sliding.csv", package="detectRUNS") 103 | colClasses <- c(rep("character", 3), rep("numeric", 4) ) 104 | runs <- read.csv2(runsFile, header = TRUE, stringsAsFactors = FALSE, 105 | colClasses = colClasses) 106 | 107 | } 108 | -------------------------------------------------------------------------------- /detectRUNS/man/slidingRuns.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/funktionen.R 3 | \name{slidingRuns} 4 | \alias{slidingRuns} 5 | \title{Function to detect runs using sliding window approach} 6 | \usage{ 7 | slidingRuns(indGeno, individual, mapFile, gaps, parameters, cpp = TRUE) 8 | } 9 | \arguments{ 10 | \item{indGeno}{vector of 0/1/NAs of individual genotypes (0: homozygote; 1: heterozygote)} 11 | 12 | \item{individual}{list of group (breed, population, case/control etc.) and ID of individual sample} 13 | 14 | \item{mapFile}{Plink map file (for SNP position)} 15 | 16 | \item{gaps}{distance between SNPs} 17 | 18 | \item{parameters}{list of parameters} 19 | 20 | \item{cpp}{use cpp functions or not (DEBUG)} 21 | } 22 | \value{ 23 | A data frame of runs per individual sample 24 | } 25 | \description{ 26 | This is a core function not intended to be exported 27 | } 28 | \details{ 29 | This method uses sliding windows to detect RUNs. Checks on minimum n. of SNP, max n. of opposite and missing genotypes, 30 | max gap between adjacent loci and minimum length of the run are implemented (as in the sliding window method). 31 | Both runs of homozygosity (RoHom) and of heterozygosity (RoHet) can be search for (option ROHet: TRUE/FALSE) 32 | NOTE: this methods is intended to not be exported 33 | } 34 | \keyword{internal} 35 | -------------------------------------------------------------------------------- /detectRUNS/man/slidingWindow.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/funktionen.R 3 | \name{slidingWindow} 4 | \alias{slidingWindow} 5 | \title{Function to slide a window over a vector (individual's genotypes)} 6 | \usage{ 7 | slidingWindow( 8 | data, 9 | gaps, 10 | windowSize, 11 | step, 12 | maxGap, 13 | ROHet = TRUE, 14 | maxOppositeGenotype = 1, 15 | maxMiss = 1 16 | ) 17 | } 18 | \arguments{ 19 | \item{data}{vector of 0/1/2 genotypes} 20 | 21 | \item{gaps}{vector of differences between consecutive positions (gaps) in bps} 22 | 23 | \item{windowSize}{size of window (n. of SNP)} 24 | 25 | \item{step}{by which (how many SNP) is the window slid} 26 | 27 | \item{maxGap}{max distance between consecutive SNP in a window to be still considered a potential run} 28 | 29 | \item{ROHet}{shall we detect ROHet or ROHom?} 30 | 31 | \item{maxOppositeGenotype}{max n. of homozygous/heterozygous SNP} 32 | 33 | \item{maxMiss}{max. n. of missing SNP} 34 | } 35 | \value{ 36 | vector of TRUE/FALSE (whether a window is homozygous or NOT) 37 | } 38 | \description{ 39 | This is a core function. The functions to detect RUNS are slid over the genome 40 | } 41 | -------------------------------------------------------------------------------- /detectRUNS/man/slidingWindowCpp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{slidingWindowCpp} 4 | \alias{slidingWindowCpp} 5 | \title{Function to slide a window over a vector (individual's genotypes)} 6 | \usage{ 7 | slidingWindowCpp( 8 | data, 9 | gaps, 10 | windowSize, 11 | step, 12 | maxGap, 13 | ROHet = TRUE, 14 | maxOppositeGenotype = 1L, 15 | maxMiss = 1L 16 | ) 17 | } 18 | \arguments{ 19 | \item{data}{vector of 0/1/2 genotypes} 20 | 21 | \item{gaps}{vector of differences between consecutive positions (gaps) in bps} 22 | 23 | \item{windowSize}{size of window (n. of SNP)} 24 | 25 | \item{step}{by which (how many SNP) is the window slid} 26 | 27 | \item{maxGap}{max distance between consecutive SNP in a window to be still considered a potential run} 28 | 29 | \item{ROHet}{shall we detect ROHet or ROHom?} 30 | 31 | \item{maxOppositeGenotype}{max n. of homozygous/heterozygous SNP} 32 | 33 | \item{maxMiss}{max. n. of missing SNP} 34 | } 35 | \value{ 36 | vector of TRUE/FALSE (whether a window is homozygous or NOT) 37 | } 38 | \description{ 39 | This is a core function. The functions to detect RUNS are slid over the genome 40 | } 41 | -------------------------------------------------------------------------------- /detectRUNS/man/snpInRun.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/funktionen.R 3 | \name{snpInRun} 4 | \alias{snpInRun} 5 | \title{Function to return a vector of T/F for whether a SNP is or not in a RUN} 6 | \usage{ 7 | snpInRun(RunVector, windowSize, threshold) 8 | } 9 | \arguments{ 10 | \item{RunVector}{vector of TRUE/FALSE (is a window homozygous/heterozygous?)} 11 | 12 | \item{windowSize}{size of window (n. of SNP)} 13 | 14 | \item{threshold}{threshold to call a SNP in a RUN} 15 | } 16 | \value{ 17 | vector of TRUE/FALSE (whether a SNP is in a RUN or NOT) 18 | } 19 | \description{ 20 | This is a core function. The function to determine whether a SNP is or not in a RUN. 21 | The ratio between homozygous/heterozygous windows and total n. of windows is computed here 22 | } 23 | -------------------------------------------------------------------------------- /detectRUNS/man/snpInRunCpp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{snpInRunCpp} 4 | \alias{snpInRunCpp} 5 | \title{Function to return a vector of T/F for whether a SNP is or not in a RUN} 6 | \usage{ 7 | snpInRunCpp(RunVector, windowSize, threshold) 8 | } 9 | \arguments{ 10 | \item{RunVector}{vector of TRUE/FALSE (is a window homozygous/heterozygous?)} 11 | 12 | \item{windowSize}{size of window (n. of SNP)} 13 | 14 | \item{threshold}{threshold to call a SNP in a RUN} 15 | } 16 | \value{ 17 | vector of TRUE/FALSE (whether a SNP is in a RUN or NOT) 18 | } 19 | \description{ 20 | This is a core function. The function to determine whether a SNP is or not in a RUN. 21 | The ratio between homozygous/heterozygous windows and total n. of windows is computed here 22 | } 23 | -------------------------------------------------------------------------------- /detectRUNS/man/snpInsideRuns.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/funktionen.R 3 | \name{snpInsideRuns} 4 | \alias{snpInsideRuns} 5 | \title{Function to count number of times a SNP is in a RUN. Need to be called per 6 | chromosome (not using a dataframe with all results on all chromosomes)} 7 | \usage{ 8 | snpInsideRuns(runsChrom, mapChrom, genotypeFile) 9 | } 10 | \arguments{ 11 | \item{runsChrom}{R object (dataframe) with results per chromosome (column names:"POPULATION","IND","CHROMOSOME","COUNT","START","END","LENGTH")} 12 | 13 | \item{mapChrom}{R object (dataframe) with SNP name and position per chromosome (map file) (column names: "CHR","SNP_NAME","x","POSITION")} 14 | 15 | \item{genotypeFile}{genotype (.ped) file location} 16 | } 17 | \value{ 18 | dataframe with counts per SNP in runs (per population) 19 | } 20 | \description{ 21 | Function to count number of times a SNP is in a RUN. Need to be called per 22 | chromosome (not using a dataframe with all results on all chromosomes) 23 | } 24 | \keyword{internal} 25 | -------------------------------------------------------------------------------- /detectRUNS/man/snpInsideRunsCpp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{snpInsideRunsCpp} 4 | \alias{snpInsideRunsCpp} 5 | \title{Function to count number of times a SNP is in a RUN} 6 | \usage{ 7 | snpInsideRunsCpp(runsChrom, mapChrom, genotypeFile) 8 | } 9 | \arguments{ 10 | \item{runsChrom}{R object (dataframe) with results per chromosome} 11 | 12 | \item{mapChrom}{R map object with SNP per chromosome} 13 | 14 | \item{genotypeFile}{genotype (.ped) file location} 15 | } 16 | \value{ 17 | dataframe with counts per SNP in runs (per population) 18 | } 19 | \description{ 20 | Function to count number of times a SNP is in a RUN 21 | } 22 | -------------------------------------------------------------------------------- /detectRUNS/man/summaryRuns.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Stats.R 3 | \name{summaryRuns} 4 | \alias{summaryRuns} 5 | \title{Summary statistics on detected runs} 6 | \usage{ 7 | summaryRuns(runs, mapFile, genotypeFile, Class = 2, snpInRuns = FALSE) 8 | } 9 | \arguments{ 10 | \item{runs}{R object (dataframe) with results on detected runs} 11 | 12 | \item{mapFile}{Plink map file (for SNP position)} 13 | 14 | \item{genotypeFile}{Plink ped file (for SNP position)} 15 | 16 | \item{Class}{group of length (in Mbps) by class (default: 0-2, 2-4, 4-8, 8-16, >16)} 17 | 18 | \item{snpInRuns}{TRUE/FALSE (default): should the function \code{snpInsideRuns} be 19 | called to compute the proportion of times each SNP falls inside a run in the 20 | group/population?} 21 | } 22 | \value{ 23 | A list of dataframes containing the most relevant descriptives 24 | statistics on detected runs. The list conveniently contains 9 dataframes that can 25 | be used for further processing and visualization, or can be written out to text files 26 | } 27 | \description{ 28 | This function processes the results from \code{slidingRUNS.run} and 29 | \code{consecutiveRUNS.run} and produces a number of interesting descriptives 30 | statistics on results. 31 | } 32 | \details{ 33 | \code{summaryRuns} calculates: i) the number of runs per chromosome and group/population; 34 | ii) the percent distribution of runs per chromosome and group; iii) the number of 35 | runs per size-class and group; iv) the percent distribution of runs per size-class 36 | and group; v) the mean length of runs per chromosome and group; vi) the mean 37 | length of runs per size-class and group; vii) individual inbreeding coefficient 38 | estimated from ROH; viii) individual inbreeding coefficient estimated from ROH 39 | per chromosome; ix) individual inbreeding coefficient estimated from ROH per 40 | size-class 41 | } 42 | \examples{ 43 | # getting map and ped paths 44 | genotypeFile <- system.file("extdata", "Kijas2016_Sheep_subset.ped", package = "detectRUNS") 45 | mapFile <- system.file("extdata", "Kijas2016_Sheep_subset.map", package = "detectRUNS") 46 | 47 | # calculating runs of Homozygosity 48 | \dontrun{ 49 | # skipping runs calculation 50 | runs <- slidingRUNS.run(genotypeFile, mapFile, windowSize = 15, threshold = 0.1, minSNP = 15, 51 | ROHet = FALSE, maxOppositeGenotype = 1, maxMiss = 1, minLengthBps = 100000, minDensity = 1/10000) 52 | } 53 | # loading pre-calculated data 54 | runsFile <- system.file("extdata", "Kijas2016_Sheep_subset.sliding.csv", package="detectRUNS") 55 | runsDF <- readExternalRuns(inputFile = runsFile, program = 'detectRUNS') 56 | 57 | summaryRuns(runs = runsDF, mapFile = mapFile, genotypeFile = genotypeFile, Class = 2, 58 | snpInRuns = FALSE) 59 | 60 | } 61 | -------------------------------------------------------------------------------- /detectRUNS/man/tableRuns.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Stats.R 3 | \name{tableRuns} 4 | \alias{tableRuns} 5 | \title{Function to retrieve most common runs in the population} 6 | \usage{ 7 | tableRuns( 8 | runs = NULL, 9 | SnpInRuns = NULL, 10 | genotypeFile, 11 | mapFile, 12 | threshold = 0.5 13 | ) 14 | } 15 | \arguments{ 16 | \item{runs}{R object (dataframe) with results on detected runs} 17 | 18 | \item{SnpInRuns}{dataframe with the proportion of times each SNP falls inside a 19 | run in the population (output from \code{snpInsideRuns})} 20 | 21 | \item{genotypeFile}{Plink ped file (for SNP position)} 22 | 23 | \item{mapFile}{Plink map file (for SNP position)} 24 | 25 | \item{threshold}{value from 0 to 1 (default 0.7) that controls the desired 26 | proportion of individuals carrying that run (e.g. 70\%)} 27 | } 28 | \value{ 29 | A dataframe with the most common runs detected in the sampled individuals 30 | (the group/population, start and end position of the run, chromosome and number of SNP 31 | included in the run are reported in the output dataframe) 32 | } 33 | \description{ 34 | This function takes in input either the run results or the output from 35 | the function \code{snpInsideRuns} (proportion of times a SNP is inside a run) 36 | in the population/group, and returns a subset of the runs most commonly 37 | found in the group/population. The parameter \code{threshold} controls the definition 38 | of most common (e.g. in at least 50\%, 70\% etc. of the sampled individuals) 39 | } 40 | \examples{ 41 | # getting map and ped paths 42 | genotypeFile <- system.file("extdata", "Kijas2016_Sheep_subset.ped", package = "detectRUNS") 43 | mapFile <- system.file("extdata", "Kijas2016_Sheep_subset.map", package = "detectRUNS") 44 | 45 | # calculating runs of Homozygosity 46 | \dontrun{ 47 | # skipping runs calculation 48 | runs <- slidingRUNS.run(genotypeFile, mapFile, windowSize = 15, threshold = 0.1, minSNP = 15, 49 | ROHet = FALSE, maxOppositeGenotype = 1, maxMiss = 1, minLengthBps = 100000, minDensity = 1/10000) 50 | } 51 | # loading pre-calculated data 52 | runsFile <- system.file("extdata", "Kijas2016_Sheep_subset.sliding.csv", package="detectRUNS") 53 | runsDF = readExternalRuns(inputFile = runsFile, program = 'detectRUNS') 54 | 55 | tableRuns(runs = runsDF, genotypeFile = genotypeFile, mapFile = mapFile, threshold = 0.5) 56 | 57 | } 58 | -------------------------------------------------------------------------------- /detectRUNS/man/writeRUN.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/funktionen.R 3 | \name{writeRUN} 4 | \alias{writeRUN} 5 | \title{Function to write out RUNS per individual animal} 6 | \usage{ 7 | writeRUN(ind, dRUN, ROHet = TRUE, group, outputName) 8 | } 9 | \arguments{ 10 | \item{ind}{ID of animals} 11 | 12 | \item{dRUN}{data.frame with RUNS per animal} 13 | 14 | \item{ROHet}{shall we detect ROHet or ROHom?} 15 | 16 | \item{group}{group (factor): population, breed, ethnicity, case/control etc.} 17 | 18 | \item{outputName}{output filename} 19 | } 20 | \value{ 21 | TRUE/FALSE if RUNS are written out or not 22 | } 23 | \description{ 24 | Function to write out RUNS per individual animal 25 | } 26 | -------------------------------------------------------------------------------- /detectRUNS/src/.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Compiled Object files 5 | *.slo 6 | *.lo 7 | *.o 8 | *.obj 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.dylib 17 | *.dll 18 | 19 | # Fortran module files 20 | *.mod 21 | *.smod 22 | 23 | # Compiled Static libraries 24 | *.lai 25 | *.la 26 | *.a 27 | *.lib 28 | 29 | # Executables 30 | *.exe 31 | *.out 32 | *.app 33 | -------------------------------------------------------------------------------- /detectRUNS/src/RcppExports.cpp: -------------------------------------------------------------------------------- 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #include 5 | 6 | using namespace Rcpp; 7 | 8 | // fast_factor 9 | SEXP fast_factor(SEXP x); 10 | RcppExport SEXP _detectRUNS_fast_factor(SEXP xSEXP) { 11 | BEGIN_RCPP 12 | Rcpp::RObject rcpp_result_gen; 13 | Rcpp::RNGScope rcpp_rngScope_gen; 14 | Rcpp::traits::input_parameter< SEXP >::type x(xSEXP); 15 | rcpp_result_gen = Rcpp::wrap(fast_factor(x)); 16 | return rcpp_result_gen; 17 | END_RCPP 18 | } 19 | // genoConvertCpp 20 | IntegerVector genoConvertCpp(IntegerVector genotype); 21 | RcppExport SEXP _detectRUNS_genoConvertCpp(SEXP genotypeSEXP) { 22 | BEGIN_RCPP 23 | Rcpp::RObject rcpp_result_gen; 24 | Rcpp::RNGScope rcpp_rngScope_gen; 25 | Rcpp::traits::input_parameter< IntegerVector >::type genotype(genotypeSEXP); 26 | rcpp_result_gen = Rcpp::wrap(genoConvertCpp(genotype)); 27 | return rcpp_result_gen; 28 | END_RCPP 29 | } 30 | // pedConvertCpp 31 | IntegerVector pedConvertCpp(CharacterVector genotype); 32 | RcppExport SEXP _detectRUNS_pedConvertCpp(SEXP genotypeSEXP) { 33 | BEGIN_RCPP 34 | Rcpp::RObject rcpp_result_gen; 35 | Rcpp::RNGScope rcpp_rngScope_gen; 36 | Rcpp::traits::input_parameter< CharacterVector >::type genotype(genotypeSEXP); 37 | rcpp_result_gen = Rcpp::wrap(pedConvertCpp(genotype)); 38 | return rcpp_result_gen; 39 | END_RCPP 40 | } 41 | // homoZygotTestCpp 42 | bool homoZygotTestCpp(IntegerVector x, IntegerVector gaps, int maxHet, int maxMiss, int maxGap); 43 | RcppExport SEXP _detectRUNS_homoZygotTestCpp(SEXP xSEXP, SEXP gapsSEXP, SEXP maxHetSEXP, SEXP maxMissSEXP, SEXP maxGapSEXP) { 44 | BEGIN_RCPP 45 | Rcpp::RObject rcpp_result_gen; 46 | Rcpp::RNGScope rcpp_rngScope_gen; 47 | Rcpp::traits::input_parameter< IntegerVector >::type x(xSEXP); 48 | Rcpp::traits::input_parameter< IntegerVector >::type gaps(gapsSEXP); 49 | Rcpp::traits::input_parameter< int >::type maxHet(maxHetSEXP); 50 | Rcpp::traits::input_parameter< int >::type maxMiss(maxMissSEXP); 51 | Rcpp::traits::input_parameter< int >::type maxGap(maxGapSEXP); 52 | rcpp_result_gen = Rcpp::wrap(homoZygotTestCpp(x, gaps, maxHet, maxMiss, maxGap)); 53 | return rcpp_result_gen; 54 | END_RCPP 55 | } 56 | // heteroZygotTestCpp 57 | bool heteroZygotTestCpp(IntegerVector x, IntegerVector gaps, int maxHom, int maxMiss, int maxGap); 58 | RcppExport SEXP _detectRUNS_heteroZygotTestCpp(SEXP xSEXP, SEXP gapsSEXP, SEXP maxHomSEXP, SEXP maxMissSEXP, SEXP maxGapSEXP) { 59 | BEGIN_RCPP 60 | Rcpp::RObject rcpp_result_gen; 61 | Rcpp::RNGScope rcpp_rngScope_gen; 62 | Rcpp::traits::input_parameter< IntegerVector >::type x(xSEXP); 63 | Rcpp::traits::input_parameter< IntegerVector >::type gaps(gapsSEXP); 64 | Rcpp::traits::input_parameter< int >::type maxHom(maxHomSEXP); 65 | Rcpp::traits::input_parameter< int >::type maxMiss(maxMissSEXP); 66 | Rcpp::traits::input_parameter< int >::type maxGap(maxGapSEXP); 67 | rcpp_result_gen = Rcpp::wrap(heteroZygotTestCpp(x, gaps, maxHom, maxMiss, maxGap)); 68 | return rcpp_result_gen; 69 | END_RCPP 70 | } 71 | // findOppositeAndMissing 72 | StringVector findOppositeAndMissing(IntegerVector data, bool ROHet); 73 | RcppExport SEXP _detectRUNS_findOppositeAndMissing(SEXP dataSEXP, SEXP ROHetSEXP) { 74 | BEGIN_RCPP 75 | Rcpp::RObject rcpp_result_gen; 76 | Rcpp::RNGScope rcpp_rngScope_gen; 77 | Rcpp::traits::input_parameter< IntegerVector >::type data(dataSEXP); 78 | Rcpp::traits::input_parameter< bool >::type ROHet(ROHetSEXP); 79 | rcpp_result_gen = Rcpp::wrap(findOppositeAndMissing(data, ROHet)); 80 | return rcpp_result_gen; 81 | END_RCPP 82 | } 83 | // slidingWindowCpp 84 | List slidingWindowCpp(IntegerVector data, IntegerVector gaps, int windowSize, int step, int maxGap, bool ROHet, int maxOppositeGenotype, int maxMiss); 85 | RcppExport SEXP _detectRUNS_slidingWindowCpp(SEXP dataSEXP, SEXP gapsSEXP, SEXP windowSizeSEXP, SEXP stepSEXP, SEXP maxGapSEXP, SEXP ROHetSEXP, SEXP maxOppositeGenotypeSEXP, SEXP maxMissSEXP) { 86 | BEGIN_RCPP 87 | Rcpp::RObject rcpp_result_gen; 88 | Rcpp::RNGScope rcpp_rngScope_gen; 89 | Rcpp::traits::input_parameter< IntegerVector >::type data(dataSEXP); 90 | Rcpp::traits::input_parameter< IntegerVector >::type gaps(gapsSEXP); 91 | Rcpp::traits::input_parameter< int >::type windowSize(windowSizeSEXP); 92 | Rcpp::traits::input_parameter< int >::type step(stepSEXP); 93 | Rcpp::traits::input_parameter< int >::type maxGap(maxGapSEXP); 94 | Rcpp::traits::input_parameter< bool >::type ROHet(ROHetSEXP); 95 | Rcpp::traits::input_parameter< int >::type maxOppositeGenotype(maxOppositeGenotypeSEXP); 96 | Rcpp::traits::input_parameter< int >::type maxMiss(maxMissSEXP); 97 | rcpp_result_gen = Rcpp::wrap(slidingWindowCpp(data, gaps, windowSize, step, maxGap, ROHet, maxOppositeGenotype, maxMiss)); 98 | return rcpp_result_gen; 99 | END_RCPP 100 | } 101 | // snpInRunCpp 102 | LogicalVector snpInRunCpp(LogicalVector RunVector, const int windowSize, const float threshold); 103 | RcppExport SEXP _detectRUNS_snpInRunCpp(SEXP RunVectorSEXP, SEXP windowSizeSEXP, SEXP thresholdSEXP) { 104 | BEGIN_RCPP 105 | Rcpp::RObject rcpp_result_gen; 106 | Rcpp::RNGScope rcpp_rngScope_gen; 107 | Rcpp::traits::input_parameter< LogicalVector >::type RunVector(RunVectorSEXP); 108 | Rcpp::traits::input_parameter< const int >::type windowSize(windowSizeSEXP); 109 | Rcpp::traits::input_parameter< const float >::type threshold(thresholdSEXP); 110 | rcpp_result_gen = Rcpp::wrap(snpInRunCpp(RunVector, windowSize, threshold)); 111 | return rcpp_result_gen; 112 | END_RCPP 113 | } 114 | // readPOPCpp 115 | DataFrame readPOPCpp(std::string genotypeFile); 116 | RcppExport SEXP _detectRUNS_readPOPCpp(SEXP genotypeFileSEXP) { 117 | BEGIN_RCPP 118 | Rcpp::RObject rcpp_result_gen; 119 | Rcpp::RNGScope rcpp_rngScope_gen; 120 | Rcpp::traits::input_parameter< std::string >::type genotypeFile(genotypeFileSEXP); 121 | rcpp_result_gen = Rcpp::wrap(readPOPCpp(genotypeFile)); 122 | return rcpp_result_gen; 123 | END_RCPP 124 | } 125 | // consecutiveRunsCpp 126 | DataFrame consecutiveRunsCpp(IntegerVector indGeno, List individual, DataFrame mapFile, bool ROHet, int minSNP, int maxOppositeGenotype, int maxMiss, int minLengthBps, int maxGap); 127 | RcppExport SEXP _detectRUNS_consecutiveRunsCpp(SEXP indGenoSEXP, SEXP individualSEXP, SEXP mapFileSEXP, SEXP ROHetSEXP, SEXP minSNPSEXP, SEXP maxOppositeGenotypeSEXP, SEXP maxMissSEXP, SEXP minLengthBpsSEXP, SEXP maxGapSEXP) { 128 | BEGIN_RCPP 129 | Rcpp::RObject rcpp_result_gen; 130 | Rcpp::RNGScope rcpp_rngScope_gen; 131 | Rcpp::traits::input_parameter< IntegerVector >::type indGeno(indGenoSEXP); 132 | Rcpp::traits::input_parameter< List >::type individual(individualSEXP); 133 | Rcpp::traits::input_parameter< DataFrame >::type mapFile(mapFileSEXP); 134 | Rcpp::traits::input_parameter< bool >::type ROHet(ROHetSEXP); 135 | Rcpp::traits::input_parameter< int >::type minSNP(minSNPSEXP); 136 | Rcpp::traits::input_parameter< int >::type maxOppositeGenotype(maxOppositeGenotypeSEXP); 137 | Rcpp::traits::input_parameter< int >::type maxMiss(maxMissSEXP); 138 | Rcpp::traits::input_parameter< int >::type minLengthBps(minLengthBpsSEXP); 139 | Rcpp::traits::input_parameter< int >::type maxGap(maxGapSEXP); 140 | rcpp_result_gen = Rcpp::wrap(consecutiveRunsCpp(indGeno, individual, mapFile, ROHet, minSNP, maxOppositeGenotype, maxMiss, minLengthBps, maxGap)); 141 | return rcpp_result_gen; 142 | END_RCPP 143 | } 144 | // snpInsideRunsCpp 145 | DataFrame snpInsideRunsCpp(DataFrame runsChrom, DataFrame mapChrom, std::string genotypeFile); 146 | RcppExport SEXP _detectRUNS_snpInsideRunsCpp(SEXP runsChromSEXP, SEXP mapChromSEXP, SEXP genotypeFileSEXP) { 147 | BEGIN_RCPP 148 | Rcpp::RObject rcpp_result_gen; 149 | Rcpp::RNGScope rcpp_rngScope_gen; 150 | Rcpp::traits::input_parameter< DataFrame >::type runsChrom(runsChromSEXP); 151 | Rcpp::traits::input_parameter< DataFrame >::type mapChrom(mapChromSEXP); 152 | Rcpp::traits::input_parameter< std::string >::type genotypeFile(genotypeFileSEXP); 153 | rcpp_result_gen = Rcpp::wrap(snpInsideRunsCpp(runsChrom, mapChrom, genotypeFile)); 154 | return rcpp_result_gen; 155 | END_RCPP 156 | } 157 | 158 | static const R_CallMethodDef CallEntries[] = { 159 | {"_detectRUNS_fast_factor", (DL_FUNC) &_detectRUNS_fast_factor, 1}, 160 | {"_detectRUNS_genoConvertCpp", (DL_FUNC) &_detectRUNS_genoConvertCpp, 1}, 161 | {"_detectRUNS_pedConvertCpp", (DL_FUNC) &_detectRUNS_pedConvertCpp, 1}, 162 | {"_detectRUNS_homoZygotTestCpp", (DL_FUNC) &_detectRUNS_homoZygotTestCpp, 5}, 163 | {"_detectRUNS_heteroZygotTestCpp", (DL_FUNC) &_detectRUNS_heteroZygotTestCpp, 5}, 164 | {"_detectRUNS_findOppositeAndMissing", (DL_FUNC) &_detectRUNS_findOppositeAndMissing, 2}, 165 | {"_detectRUNS_slidingWindowCpp", (DL_FUNC) &_detectRUNS_slidingWindowCpp, 8}, 166 | {"_detectRUNS_snpInRunCpp", (DL_FUNC) &_detectRUNS_snpInRunCpp, 3}, 167 | {"_detectRUNS_readPOPCpp", (DL_FUNC) &_detectRUNS_readPOPCpp, 1}, 168 | {"_detectRUNS_consecutiveRunsCpp", (DL_FUNC) &_detectRUNS_consecutiveRunsCpp, 9}, 169 | {"_detectRUNS_snpInsideRunsCpp", (DL_FUNC) &_detectRUNS_snpInsideRunsCpp, 3}, 170 | {NULL, NULL, 0} 171 | }; 172 | 173 | RcppExport void R_init_detectRUNS(DllInfo *dll) { 174 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 175 | R_useDynamicSymbols(dll, FALSE); 176 | } 177 | -------------------------------------------------------------------------------- /detectRUNS/tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(detectRUNS) 3 | 4 | test_check("detectRUNS") 5 | -------------------------------------------------------------------------------- /detectRUNS/tests/testthat/test.ROHet.consecutive.csv: -------------------------------------------------------------------------------- 1 | group;id;chrom;nSNP;from;to;lengthBps 2 | Jacobs;H70;24;35;16041579;18669354;2627775 3 | Jacobs;H70;24;17;22479026;23828331;1349305 4 | Jacobs;H70;24;100;29375887;36250095;6874208 5 | Jacobs;H71;24;19;5384735;6405344;1020609 6 | Jacobs;H71;24;15;7564606;8314969;750363 7 | Jacobs;H71;24;31;11007102;13558686;2551584 8 | Jacobs;H71;24;17;16041579;17307132;1265553 9 | Jacobs;H71;24;45;17996334;21762352;3766018 10 | Jacobs;H71;24;48;29475441;32598869;3123428 11 | Jacobs;H71;24;15;32681863;33503397;821534 12 | Jacobs;H71;24;53;37808477;42027686;4219209 13 | Jacobs;H72;24;22;6434392;7721963;1287571 14 | Jacobs;H72;24;17;19741398;21019195;1277797 15 | Jacobs;H72;24;15;21629724;22855134;1225410 16 | Jacobs;H72;24;61;33397677;37808477;4410800 17 | Jacobs;H78;24;17;11007102;12335936;1328834 18 | Jacobs;H78;24;21;16041579;17578852;1537273 19 | Jacobs;H78;24;99;31774063;38615161;6841098 20 | Jacobs;H78;24;20;40075324;42027686;1952362 21 | Jacobs;H79;24;16;9352754;11007102;1654348 22 | Jacobs;H79;24;16;25421718;26493858;1072140 23 | Jacobs;H79;24;16;29197813;30103953;906140 24 | Jacobs;H79;24;15;31359697;32349389;989692 25 | Jacobs;H79;24;19;33798780;35272492;1473712 26 | Jacobs;H80;24;70;16041579;21762352;5720773 27 | Jacobs;H86;24;42;96467;3507572;3411105 28 | Jacobs;H86;24;25;33056732;34225118;1168386 29 | Jacobs;H87;24;15;25809786;27159621;1349835 30 | Jacobs;H87;24;31;31556086;33397677;1841591 31 | Jacobs;H87;24;20;40075324;42027686;1952362 32 | Jacobs;H88;24;313;9352754;33343694;23990940 33 | Jacobs;H88;24;35;39137763;42027686;2889923 34 | Jacobs;H93;24;19;96467;1459984;1363517 35 | Jacobs;H93;24;51;3807645;7107585;3299940 36 | Jacobs;H93;24;75;11366060;16811910;5445850 37 | Navajo-Churro;H38;24;17;1938133;3214984;1276851 38 | Navajo-Churro;H33;24;29;32623905;34176391;1552486 39 | Navajo-Churro;H39;24;18;9942134;11611963;1669829 40 | Navajo-Churro;H45;24;18;9658771;11366060;1707289 41 | Navajo-Churro;H45;24;36;20734455;23689555;2955100 42 | Navajo-Churro;H45;24;25;25614223;27620251;2006028 43 | Navajo-Churro;H45;24;21;36938439;38056567;1118128 44 | Navajo-Churro;H28;24;16;21910840;23312288;1401448 45 | Navajo-Churro;H34;24;15;6101314;6819203;717889 46 | Navajo-Churro;H34;24;19;16114681;17578852;1464171 47 | Navajo-Churro;H34;24;19;25614223;27159621;1545398 48 | Navajo-Churro;H34;24;15;39432711;40336394;903683 49 | Navajo-Churro;H52;24;16;21910840;23312288;1401448 50 | Navajo-Churro;H58;24;20;4766009;5978910;1212901 51 | Navajo-Churro;H58;24;15;26558782;27928201;1369419 52 | -------------------------------------------------------------------------------- /detectRUNS/tests/testthat/test.ROHet.sliding.csv: -------------------------------------------------------------------------------- 1 | group;id;chrom;nSNP;from;to;lengthBps 2 | Jacobs;H70;24;34;16077497;18669354;2591857 3 | Jacobs;H70;24;15;22486632;23689555;1202923 4 | Jacobs;H70;24;99;29375887;36160755;6784868 5 | Jacobs;H71;24;21;5075204;6265369;1190165 6 | Jacobs;H71;24;31;11076801;13647199;2570398 7 | Jacobs;H71;24;15;16077497;17253902;1176405 8 | Jacobs;H71;24;44;17996334;21629724;3633390 9 | Jacobs;H71;24;47;29475441;32552648;3077207 10 | Jacobs;H71;24;58;37613659;42027686;4414027 11 | Jacobs;H72;24;22;6468168;7796684;1328516 12 | Jacobs;H72;24;16;19819848;21019195;1199347 13 | Jacobs;H72;24;60;33397677;37753985;4356308 14 | Jacobs;H78;24;16;11076801;12335936;1259135 15 | Jacobs;H78;24;20;16041579;17514108;1472529 16 | Jacobs;H78;24;103;31821433;38974331;7152898 17 | Jacobs;H78;24;19;40193947;42027686;1833739 18 | Jacobs;H79;24;17;9268996;10977878;1708882 19 | Jacobs;H79;24;15;31093416;32103135;1009719 20 | Jacobs;H79;24;18;33798780;35216176;1417396 21 | Jacobs;H80;24;69;16041579;21629724;5588145 22 | Jacobs;H86;24;41;96467;3446498;3350031 23 | Jacobs;H86;24;17;25884487;27526097;1641610 24 | Jacobs;H86;24;29;32623905;34176391;1552486 25 | Jacobs;H87;24;30;31556086;33343694;1787608 26 | Jacobs;H87;24;19;40193947;42027686;1833739 27 | Jacobs;H88;24;315;9268996;33343694;24074698 28 | Jacobs;H88;24;34;39214598;42027686;2813088 29 | Jacobs;H93;24;18;96467;1435916;1339449 30 | Jacobs;H93;24;51;4000350;7213312;3212962 31 | Jacobs;H93;24;74;11451480;16811910;5360430 32 | Navajo-Churro;H38;24;17;1807272;3031957;1224685 33 | Navajo-Churro;H38;24;15;33225361;33879086;653725 34 | Navajo-Churro;H33;24;28;32623905;34113252;1489347 35 | Navajo-Churro;H39;24;18;9865712;11526475;1660763 36 | Navajo-Churro;H45;24;18;9460076;11326321;1866245 37 | Navajo-Churro;H45;24;36;20734455;23689555;2955100 38 | Navajo-Churro;H45;24;25;25523363;27613295;2089932 39 | Navajo-Churro;H45;24;20;36938439;37965791;1027352 40 | Navajo-Churro;H28;24;25;21019195;23209530;2190335 41 | Navajo-Churro;H34;24;20;16041579;17514108;1472529 42 | Navajo-Churro;H34;24;17;25638494;27091483;1452989 43 | Navajo-Churro;H52;24;25;21019195;23209530;2190335 44 | Navajo-Churro;H58;24;18;4866988;5951054;1084066 45 | -------------------------------------------------------------------------------- /detectRUNS/tests/testthat/test.map: -------------------------------------------------------------------------------- 1 | 24 s72739.1 0 96467 2 | 24 s23759.1 0 203039 3 | 24 OAR24_862318.1 0 335894 4 | 24 s01400.1 0 347365 5 | 24 OAR24_885120.1 0 355727 6 | 24 s56532.1 0 481794 7 | 24 OAR24_1207928.1 0 702665 8 | 24 s29930.1 0 794098 9 | 24 s01398.1 0 884092 10 | 24 s34109.1 0 962868 11 | 24 DU370509_408.1 0 1012093 12 | 24 OAR24_1604830.1 0 1049988 13 | 24 s33505.1 0 1152766 14 | 24 OAR24_1686363.1 0 1211676 15 | 24 s56493.1 0 1244496 16 | 24 s09648.1 0 1303692 17 | 24 s32804.1 0 1377045 18 | 24 OAR24_1989440.1 0 1435916 19 | 24 s74151.1 0 1459984 20 | 24 s15037.1 0 1527822 21 | 24 s07278.1 0 1807272 22 | 24 s57470.1 0 1938133 23 | 24 s54890.1 0 2043961 24 | 24 OAR24_2841958.1 0 2191707 25 | 24 s74400.1 0 2381978 26 | 24 s12708.1 0 2416588 27 | 24 s25251.1 0 2532451 28 | 24 s58502.1 0 2578020 29 | 24 s25478.1 0 2613669 30 | 24 OAR24_3313470.1 0 2661190 31 | 24 s54800.1 0 2748756 32 | 24 OAR24_3409909.1 0 2758943 33 | 24 s40068.1 0 2818844 34 | 24 DU245677_479.1 0 2843599 35 | 24 s48530.1 0 2920403 36 | 24 s55148.1 0 2973822 37 | 24 s36987.1 0 3031957 38 | 24 OAR24_3882451.1 0 3214984 39 | 24 OAR24_3997544.1 0 3333012 40 | 24 s67652.1 0 3439178 41 | 24 s03019.1 0 3446498 42 | 24 s67586.1 0 3507572 43 | 24 s32401.1 0 3580800 44 | 24 OAR24_4400996.1 0 3698354 45 | 24 s68231.1 0 3791894 46 | 24 s66763.1 0 3807645 47 | 24 s18006.1 0 4000350 48 | 24 s69050.1 0 4116377 49 | 24 OAR24_4995509.1 0 4276981 50 | 24 s17528.1 0 4325906 51 | 24 s15154.1 0 4398926 52 | 24 s62308.1 0 4435644 53 | 24 s44109.1 0 4492779 54 | 24 s39492.1 0 4542556 55 | 24 s34904.1 0 4674353 56 | 24 s07735.1 0 4734623 57 | 24 OAR24_5505124.1 0 4766009 58 | 24 s06038.1 0 4866988 59 | 24 s35487.1 0 4936555 60 | 24 s18280.1 0 4990613 61 | 24 s31084.1 0 5039185 62 | 24 OAR24_5933801.1 0 5075204 63 | 24 s46925.1 0 5176310 64 | 24 OAR24_6094542.1 0 5232180 65 | 24 DU281388_299.1 0 5384735 66 | 24 s48656.1 0 5433186 67 | 24 OAR24_6411852.1 0 5485098 68 | 24 OAR24_6423269.1 0 5495363 69 | 24 s35059.1 0 5593329 70 | 24 OAR24_6554282.1 0 5621749 71 | 24 OAR24_6671055.1 0 5726120 72 | 24 s13832.1 0 5785403 73 | 24 s03559.1 0 5874652 74 | 24 OAR24_6927502_X.1 0 5908734 75 | 24 OAR24_6970428.1 0 5951054 76 | 24 s54640.1 0 5978910 77 | 24 OAR24_7019248.1 0 6001198 78 | 24 OAR24_7119784.1 0 6101314 79 | 24 s11598.1 0 6155793 80 | 24 s68193.1 0 6189670 81 | 24 OAR24_7239593.1 0 6222497 82 | 24 s31852.1 0 6265369 83 | 24 OAR24_7424449.1 0 6405344 84 | 24 OAR24_7435004.1 0 6415384 85 | 24 OAR24_7509611.1 0 6434392 86 | 24 OAR24_7543424.1 0 6468168 87 | 24 s30115.1 0 6526499 88 | 24 s50383.1 0 6612546 89 | 24 OAR24_7741373.1 0 6647168 90 | 24 OAR24_7849246.1 0 6753316 91 | 24 s04817.1 0 6783196 92 | 24 OAR24_7944022_X.1 0 6819203 93 | 24 OAR24_8021643.1 0 6891468 94 | 24 OAR24_8063846.1 0 6926756 95 | 24 s04466.1 0 6976657 96 | 24 s56297.1 0 7107585 97 | 24 OAR24_8358348.1 0 7213312 98 | 24 OAR24_8402585.1 0 7245176 99 | 24 OAR24_8437214.1 0 7279523 100 | 24 OAR24_8533480.1 0 7366298 101 | 24 s28749.1 0 7454516 102 | 24 OAR24_8719574.1 0 7483090 103 | 24 OAR24_8655784.1 0 7564606 104 | 24 s03253.1 0 7615311 105 | 24 s12995.1 0 7667607 106 | 24 s50307.1 0 7721963 107 | 24 OAR24_8950654.1 0 7796684 108 | 24 OAR24_8989305.1 0 7833106 109 | 24 OAR24_9019723.1 0 7862681 110 | 24 OAR24_9095524.1 0 7935697 111 | 24 OAR24_9134934.1 0 7967370 112 | 24 s13251.1 0 8064215 113 | 24 OAR24_9291436.1 0 8113486 114 | 24 s08453.1 0 8170749 115 | 24 s65784.1 0 8224303 116 | 24 OAR24_9426433.1 0 8251305 117 | 24 OAR24_9525158.1 0 8314969 118 | 24 OAR24_9556480.1 0 8339627 119 | 24 s55323.1 0 8379806 120 | 24 s60985.1 0 8460459 121 | 24 s11846.1 0 8489860 122 | 24 s49862.1 0 8556294 123 | 24 OAR24_9805829.1 0 8579698 124 | 24 OAR24_9851027.1 0 8622723 125 | 24 OAR24_9982055.1 0 8673109 126 | 24 OAR24_9929376.1 0 8741291 127 | 24 s35996.1 0 8842899 128 | 24 s33564.1 0 8874963 129 | 24 s36149.1 0 8960592 130 | 24 OAR24_10354450.1 0 9010958 131 | 24 s72995.1 0 9054794 132 | 24 s39830.1 0 9076208 133 | 24 OAR24_10464626.1 0 9101635 134 | 24 s57548.1 0 9165564 135 | 24 OAR24_10617462.1 0 9191451 136 | 24 OAR24_10569180.1 0 9268996 137 | 24 s50126.1 0 9299002 138 | 24 s06239.1 0 9352754 139 | 24 OAR24_10812839.1 0 9399031 140 | 24 s41072.1 0 9460076 141 | 24 s45778.1 0 9658771 142 | 24 s59452.1 0 9721533 143 | 24 s48115.1 0 9865712 144 | 24 OAR24_11445879.1 0 9942134 145 | 24 s65657.1 0 10130446 146 | 24 s74381.1 0 10176168 147 | 24 s62439.1 0 10208845 148 | 24 s58194.1 0 10429322 149 | 24 OAR24_11975271.1 0 10471141 150 | 24 s43077.1 0 10709974 151 | 24 s19809.1 0 10802789 152 | 24 s29806.1 0 10977878 153 | 24 s11845.1 0 11007102 154 | 24 s41672.1 0 11076801 155 | 24 s55569.1 0 11127323 156 | 24 s70383.1 0 11232313 157 | 24 OAR24_12863478.1 0 11326321 158 | 24 OAR24_12907825.1 0 11366060 159 | 24 s49976.1 0 11451480 160 | 24 OAR24_13080361.1 0 11526475 161 | 24 OAR24_13169307.1 0 11611963 162 | 24 OAR24_13240468.1 0 11682465 163 | 24 OAR24_13448346.1 0 11828268 164 | 24 s71593.1 0 11837275 165 | 24 s53502.1 0 12036238 166 | 24 OAR24_13675640.1 0 12065734 167 | 24 s57389.1 0 12129478 168 | 24 s60506.1 0 12181694 169 | 24 s30772.1 0 12335936 170 | 24 OAR24_14004421.1 0 12387404 171 | 24 OAR24_14120825.1 0 12496861 172 | 24 OAR24_14180223.1 0 12550566 173 | 24 OAR24_14191141.1 0 12558998 174 | 24 OAR24_14339607.1 0 12722014 175 | 24 OAR24_14436204.1 0 12817574 176 | 24 OAR24_14523420.1 0 12909856 177 | 24 s53334.1 0 12952274 178 | 24 OAR24_14589978.1 0 12978326 179 | 24 OAR24_14777333.1 0 13170165 180 | 24 s48669.1 0 13231049 181 | 24 s67889.1 0 13299441 182 | 24 OAR24_14937645.1 0 13326931 183 | 24 OAR24_15159567.1 0 13558686 184 | 24 OAR24_15249340.1 0 13647199 185 | 24 s38145.1 0 13702846 186 | 24 s01548.1 0 13724802 187 | 24 OAR24_15364787.1 0 13761594 188 | 24 OAR24_15370721.1 0 13767641 189 | 24 s60345.1 0 13900804 190 | 24 s02623.1 0 13922554 191 | 24 s46525.1 0 13980455 192 | 24 DU417100_360.1 0 14043458 193 | 24 s50891.1 0 14091247 194 | 24 s51540.1 0 14101008 195 | 24 OAR24_15842976.1 0 14275553 196 | 24 OAR24_15980862.1 0 14407738 197 | 24 s38283.1 0 14492312 198 | 24 s20384.1 0 14504777 199 | 24 s74032.1 0 14573998 200 | 24 s08527.1 0 14625661 201 | 24 OAR24_16205332.1 0 14654934 202 | 24 s50198.1 0 14774974 203 | 24 OAR24_16396809.1 0 14838850 204 | 24 s68944.1 0 14870639 205 | 24 OAR24_16511707.1 0 14945620 206 | 24 OAR24_16582273.1 0 15019403 207 | 24 s00875.1 0 15078704 208 | 24 OAR24_16667981.1 0 15100676 209 | 24 OAR24_16719822.1 0 15151258 210 | 24 s15963.1 0 15217815 211 | 24 OAR24_16800536.1 0 15234910 212 | 24 s47518.1 0 15279664 213 | 24 OAR24_16900162.1 0 15324479 214 | 24 s05526.1 0 15367865 215 | 24 s25240.1 0 15510478 216 | 24 s38044.1 0 15551549 217 | 24 s64989.1 0 15589004 218 | 24 s54541.1 0 15617705 219 | 24 s20579.1 0 15637488 220 | 24 s37881.1 0 15762894 221 | 24 s03210.1 0 15821681 222 | 24 s28072.1 0 15865169 223 | 24 OAR24_17464944.1 0 15893121 224 | 24 s07830.1 0 16041579 225 | 24 OAR24_17655645.1 0 16077497 226 | 24 OAR24_17692688_X.1 0 16114681 227 | 24 OAR24_17726461.1 0 16147749 228 | 24 OAR24_17892863.1 0 16310285 229 | 24 s44380.1 0 16351007 230 | 24 OAR24_18156741.1 0 16573702 231 | 24 s07991.1 0 16686431 232 | 24 s20481.1 0 16811910 233 | 24 OAR24_18567026.1 0 16892562 234 | 24 s34101.1 0 16957079 235 | 24 s07059.1 0 17040364 236 | 24 s41614.1 0 17133716 237 | 24 OAR24_18834917.1 0 17159602 238 | 24 OAR24_18856734.1 0 17177139 239 | 24 s60009.1 0 17253902 240 | 24 s46918.1 0 17307132 241 | 24 s02147.1 0 17438055 242 | 24 s56991.1 0 17460873 243 | 24 s01644.1 0 17514108 244 | 24 OAR24_19260416.1 0 17578852 245 | 24 OAR24_19323253.1 0 17642594 246 | 24 s23486.1 0 17809880 247 | 24 OAR24_19539092.1 0 17848975 248 | 24 s68386.1 0 17916525 249 | 24 s33350.1 0 17996334 250 | 24 s54035.1 0 18027756 251 | 24 s14230.1 0 18035114 252 | 24 OAR24_19758612.1 0 18056244 253 | 24 OAR24_19855862.1 0 18135719 254 | 24 s08881.1 0 18235931 255 | 24 OAR24_19994470.1 0 18272045 256 | 24 OAR24_20204270.1 0 18523650 257 | 24 OAR24_20052648.1 0 18591348 258 | 24 OAR24_20302996.1 0 18669354 259 | 24 OAR24_20329442.1 0 18697488 260 | 24 s05422.1 0 18834056 261 | 24 s36126.1 0 18875112 262 | 24 s35655.1 0 18888717 263 | 24 OAR24_20753430.1 0 19034754 264 | 24 OAR24_20863524.1 0 19223435 265 | 24 OAR24_20886826.1 0 19247160 266 | 24 OAR24_21000041.1 0 19347886 267 | 24 OAR24_21006003.1 0 19357253 268 | 24 OAR24_21411103.1 0 19579841 269 | 24 OAR24_21248784.1 0 19683887 270 | 24 OAR24_21308566.1 0 19741398 271 | 24 s28099.1 0 19819848 272 | 24 OAR24_21591643.1 0 19946691 273 | 24 s16604.1 0 20005048 274 | 24 s22065.1 0 20189283 275 | 24 s29329.1 0 20395301 276 | 24 s31524.1 0 20406106 277 | 24 OAR24_22201078.1 0 20442734 278 | 24 OAR24_22245800.1 0 20480133 279 | 24 OAR24_22279571.1 0 20513392 280 | 24 DU452167_477.1 0 20687733 281 | 24 s43516.1 0 20734455 282 | 24 s08099.1 0 20838669 283 | 24 s14509.1 0 20909487 284 | 24 s53189.1 0 20918106 285 | 24 OAR24_22751062.1 0 20978925 286 | 24 s70338.1 0 21019195 287 | 24 OAR24_22917633.1 0 21176747 288 | 24 OAR24_23134207.1 0 21392558 289 | 24 s62631.1 0 21460746 290 | 24 OAR24_23301447.1 0 21554660 291 | 24 OAR24_23326463.1 0 21581821 292 | 24 OAR24_23376701_X.1 0 21629724 293 | 24 OAR24_23511791.1 0 21762352 294 | 24 s55782.1 0 21836004 295 | 24 OAR24_23628621.1 0 21878624 296 | 24 OAR24_23664305.1 0 21910840 297 | 24 OAR24_23977454.1 0 22222030 298 | 24 OAR24_24088024.1 0 22329952 299 | 24 OAR24_24136222.1 0 22377381 300 | 24 OAR24_24319983.1 0 22419138 301 | 24 s39513.1 0 22479026 302 | 24 OAR24_24389032.1 0 22486632 303 | 24 OAR24_24517773.1 0 22561935 304 | 24 OAR24_24557331.1 0 22600531 305 | 24 s04085.1 0 22769125 306 | 24 s25419.1 0 22855134 307 | 24 s70645.1 0 22948393 308 | 24 OAR24_25044819.1 0 23002452 309 | 24 s44854.1 0 23184425 310 | 24 s56329.1 0 23209530 311 | 24 OAR24_25440977.1 0 23312288 312 | 24 OAR24_25774639.1 0 23418622 313 | 24 OAR24_25910052.1 0 23497703 314 | 24 s36966.1 0 23509895 315 | 24 OAR24_26374323.1 0 23664552 316 | 24 s39918.1 0 23689555 317 | 24 OAR24_26210500.1 0 23828331 318 | 24 OAR24_26497561.1 0 24040495 319 | 24 OAR24_26519271.1 0 24060526 320 | 24 OAR16_51462528.1 0 24062352 321 | 24 OAR24_26566995.1 0 24106007 322 | 24 OAR24_26621114.1 0 24148552 323 | 24 OAR24_26676939.1 0 24190286 324 | 24 s07055.1 0 24314975 325 | 24 s28518.1 0 24472703 326 | 24 s61978.1 0 24493823 327 | 24 s16016.1 0 24591560 328 | 24 s46796.1 0 24629625 329 | 24 s11960.1 0 24690825 330 | 24 s32273.1 0 24740555 331 | 24 OAR24_27265846.1 0 24776080 332 | 24 s00643.1 0 24846522 333 | 24 OAR24_27348134_X.1 0 24859938 334 | 24 OAR24_27411708.1 0 24925070 335 | 24 s10237.1 0 24989534 336 | 24 s31717.1 0 25017904 337 | 24 OAR24_27595446.1 0 25107237 338 | 24 s33445.1 0 25240220 339 | 24 s31442.1 0 25421718 340 | 24 s06972.1 0 25474118 341 | 24 s61368.1 0 25523363 342 | 24 s32061.1 0 25614223 343 | 24 OAR24_28186386.1 0 25638494 344 | 24 OAR24_28249504.1 0 25700691 345 | 24 s00988.1 0 25710129 346 | 24 s33057.1 0 25809786 347 | 24 OAR24_28445680.1 0 25884487 348 | 24 OAR24_28471865.1 0 25909650 349 | 24 s07656.1 0 25961188 350 | 24 s68253.1 0 26018761 351 | 24 s07657.1 0 26068027 352 | 24 OAR24_28699670.1 0 26142394 353 | 24 s32325.1 0 26210836 354 | 24 s13195.1 0 26493858 355 | 24 s36253.1 0 26558782 356 | 24 OAR24_29236630.1 0 26681408 357 | 24 s07049.1 0 26759068 358 | 24 OAR24_29399883.1 0 26824139 359 | 24 s44711.1 0 27091483 360 | 24 s16047.1 0 27159621 361 | 24 s35650.1 0 27416389 362 | 24 s31244.1 0 27419408 363 | 24 s51834.1 0 27526097 364 | 24 s68838.1 0 27533351 365 | 24 OAR24_30139569_X.1 0 27613295 366 | 24 OAR24_30146533.1 0 27620251 367 | 24 OAR24_30209224.1 0 27680635 368 | 24 OAR24_30266733.1 0 27739247 369 | 24 OAR24_30492037.1 0 27928201 370 | 24 OAR24_30703545.1 0 28037384 371 | 24 OAR24_30746423.1 0 28078898 372 | 24 s56450.1 0 28178738 373 | 24 OAR24_30877035.1 0 28198419 374 | 24 OAR24_30906216.1 0 28227369 375 | 24 s08134.1 0 28291169 376 | 24 OAR24_31031292.1 0 28348997 377 | 24 s07554.1 0 28405516 378 | 24 s05297.1 0 28424137 379 | 24 s34346.1 0 28474045 380 | 24 s16947.1 0 28511321 381 | 24 OAR24_31266500.1 0 28591252 382 | 24 OAR24_31322147.1 0 28644720 383 | 24 s47851.1 0 28677855 384 | 24 OAR24_31413264.1 0 28745899 385 | 24 s40549.1 0 28840429 386 | 24 OAR24_31544557.1 0 28877449 387 | 24 s07298.1 0 29155367 388 | 24 s44496.1 0 29197813 389 | 24 s70259.1 0 29314003 390 | 24 OAR24_32091810.1 0 29343449 391 | 24 OAR24_32127759.1 0 29375887 392 | 24 s45400.1 0 29475441 393 | 24 OAR24_32232601.1 0 29487180 394 | 24 OAR24_32270570.1 0 29525075 395 | 24 OAR24_32311680.1 0 29566674 396 | 24 OAR24_32374470.1 0 29571057 397 | 24 s59832.1 0 29652597 398 | 24 s32211.1 0 29910952 399 | 24 OAR24_32735779.1 0 29927667 400 | 24 OAR24_32779035.1 0 29970616 401 | 24 OAR24_32820415.1 0 30012896 402 | 24 OAR24_32895087.1 0 30083398 403 | 24 OAR24_32915074.1 0 30103953 404 | 24 OAR24_33013180.1 0 30202253 405 | 24 OAR24_33226792.1 0 30412150 406 | 24 s58673.1 0 30456600 407 | 24 OAR24_33276643.1 0 30463028 408 | 24 OAR24_33316807.1 0 30478163 409 | 24 OAR24_33558668.1 0 30690033 410 | 24 s47943.1 0 30808769 411 | 24 OAR24_33703095.1 0 30843847 412 | 24 OAR24_33734697.1 0 30875675 413 | 24 OAR24_33949802.1 0 30939595 414 | 24 s28848.1 0 30998948 415 | 24 OAR24_34105442.1 0 31093416 416 | 24 OAR24_34129250.1 0 31117811 417 | 24 s40632.1 0 31215558 418 | 24 s15024.1 0 31229367 419 | 24 OAR24_34322098.1 0 31309854 420 | 24 s51590.1 0 31359697 421 | 24 s52452.1 0 31556086 422 | 24 OAR24_34590246.1 0 31568788 423 | 24 s15456.1 0 31774063 424 | 24 s52785.1 0 31821433 425 | 24 s30101.1 0 31899729 426 | 24 s50902.1 0 31943864 427 | 24 s75375.1 0 31947993 428 | 24 OAR24_35166939.1 0 31953567 429 | 24 OAR24_35315373.1 0 32103135 430 | 24 s59335.1 0 32163368 431 | 24 s49066.1 0 32221432 432 | 24 s39500.1 0 32236767 433 | 24 OAR24_35501776.1 0 32294099 434 | 24 s30694.1 0 32349389 435 | 24 s21269.1 0 32354842 436 | 24 s13493.1 0 32390507 437 | 24 OAR24_35606022.1 0 32495625 438 | 24 OAR24_35669227.1 0 32552648 439 | 24 OAR24_35715684.1 0 32598869 440 | 24 OAR24_35736226.1 0 32623905 441 | 24 s37249.1 0 32681863 442 | 24 s05045.1 0 32800970 443 | 24 s64882.1 0 32827227 444 | 24 s17643.1 0 32984246 445 | 24 s59571.1 0 33056732 446 | 24 s41842.1 0 33159082 447 | 24 s40989.1 0 33172658 448 | 24 s01074.1 0 33225361 449 | 24 OAR24_36387717.1 0 33313461 450 | 24 DU442221_261.1 0 33343694 451 | 24 s13600.1 0 33397677 452 | 24 s02082.1 0 33413892 453 | 24 OAR24_36511570.1 0 33438700 454 | 24 s33485.1 0 33483609 455 | 24 OAR24_36594681.1 0 33503397 456 | 24 OAR24_36724314.1 0 33631100 457 | 24 s09857.1 0 33643979 458 | 24 OAR24_36782932.1 0 33692582 459 | 24 DU328546_113.1 0 33740824 460 | 24 s65979.1 0 33780315 461 | 24 s62096.1 0 33798780 462 | 24 s08706.1 0 33879086 463 | 24 DU170943_581.1 0 33934482 464 | 24 s43015.1 0 33953578 465 | 24 OAR24_37064565.1 0 33961710 466 | 24 OAR24_37150973.1 0 34039464 467 | 24 s27151.1 0 34113252 468 | 24 s18493.1 0 34176391 469 | 24 OAR24_37414016.1 0 34225118 470 | 24 s28866.1 0 34312321 471 | 24 OAR24_37519391.1 0 34345815 472 | 24 s03477.1 0 34460565 473 | 24 s53082.1 0 34553003 474 | 24 s67748.1 0 34607110 475 | 24 s37086.1 0 34666650 476 | 24 s06024.1 0 34985988 477 | 24 s69400.1 0 35106020 478 | 24 s54071.1 0 35216176 479 | 24 OAR24_38453414.1 0 35272492 480 | 24 OAR24_38653760.1 0 35459203 481 | 24 s20643.1 0 35558717 482 | 24 OAR24_38776803.1 0 35582464 483 | 24 s04373.1 0 35585230 484 | 24 s70828.1 0 35833919 485 | 24 s03728.1 0 35878194 486 | 24 s08464.1 0 35991517 487 | 24 s34514.1 0 36036418 488 | 24 s31693.1 0 36076909 489 | 24 OAR24_39340682.1 0 36160755 490 | 24 s23797.1 0 36250095 491 | 24 OAR24_39533389.1 0 36394143 492 | 24 s34597.1 0 36425274 493 | 24 OAR24_39945216.1 0 36791669 494 | 24 s49742.1 0 36938439 495 | 24 s69014.1 0 36962278 496 | 24 OAR24_40254205.1 0 37015852 497 | 24 s66117.1 0 37103553 498 | 24 OAR24_40402783.1 0 37161456 499 | 24 s17819.1 0 37202588 500 | 24 s63991.1 0 37258914 501 | 24 OAR24_40508317.1 0 37267443 502 | 24 OAR24_40526014.1 0 37284788 503 | 24 s66826.1 0 37349126 504 | 24 OAR24_40645682.1 0 37404374 505 | 24 OAR24_40760850.1 0 37523464 506 | 24 s38040.1 0 37613659 507 | 24 s68269.1 0 37661519 508 | 24 OAR24_40925627.1 0 37690077 509 | 24 OAR24_40952345.1 0 37716648 510 | 24 s69582.1 0 37753985 511 | 24 s73415.1 0 37808477 512 | 24 s24530.1 0 37923367 513 | 24 OAR24_41096897.1 0 37965791 514 | 24 OAR24_41186690.1 0 38056567 515 | 24 OAR24_41302992.1 0 38125663 516 | 24 OAR24_41337655.1 0 38162634 517 | 24 s51547.1 0 38184103 518 | 24 s38384.1 0 38268540 519 | 24 OAR24_41535215.1 0 38357380 520 | 24 OAR24_41635456.1 0 38459023 521 | 24 s08533.1 0 38615161 522 | 24 s62367.1 0 38671665 523 | 24 s71282.1 0 38763760 524 | 24 s19150.1 0 38840214 525 | 24 OAR24_42102371.1 0 38916880 526 | 24 OAR24_42162573.1 0 38974331 527 | 24 s43383.1 0 38985778 528 | 24 s01323.1 0 39101148 529 | 24 s57069.1 0 39137763 530 | 24 s51596.1 0 39214598 531 | 24 OAR24_42446682.1 0 39246209 532 | 24 s10916.1 0 39300789 533 | 24 s69462.1 0 39432711 534 | 24 s74973.1 0 39537744 535 | 24 s16306.1 0 39603557 536 | 24 s00899.1 0 39623269 537 | 24 s33432.1 0 39651791 538 | 24 OAR24_42927903.1 0 39711452 539 | 24 OAR24_42942982.1 0 39726323 540 | 24 OAR24_42952578.1 0 39735778 541 | 24 OAR24_43169917.1 0 39950630 542 | 24 s50495.1 0 39996737 543 | 24 OAR24_43254456.1 0 40031027 544 | 24 s30517.1 0 40075324 545 | 24 s65063.1 0 40193947 546 | 24 OAR24_43494716.1 0 40256403 547 | 24 s10591.1 0 40336394 548 | 24 s34013.1 0 40371552 549 | 24 s59442.1 0 40395382 550 | 24 OAR24_43722123.1 0 40431914 551 | 24 s42827.1 0 40501449 552 | 24 s55689.1 0 40717526 553 | 24 OAR24_44025537.1 0 40762840 554 | 24 s11015.1 0 40817958 555 | 24 s17831.1 0 40882153 556 | 24 s51638.1 0 40933388 557 | 24 s31118.1 0 41008043 558 | 24 OAR24_44293969.1 0 41034970 559 | 24 s14581.1 0 41143981 560 | 24 s04960.1 0 41295400 561 | 24 s12851.1 0 41600515 562 | 24 s75862.1 0 41924374 563 | 24 OAR24_44850918.1 0 42027686 564 | -------------------------------------------------------------------------------- /detectRUNS/tests/testthat/test.raw: -------------------------------------------------------------------------------- 1 | FID IID PAT MAT SEX PHENOTYPE s72739.1_C s23759.1_0 OAR24_862318.1_G s01400.1_T OAR24_885120.1_T s56532.1_G OAR24_1207928.1_G s29930.1_T s01398.1_A s34109.1_C DU370509_408.1_A OAR24_1604830.1_G s33505.1_T OAR24_1686363.1_C s56493.1_G s09648.1_G s32804.1_C OAR24_1989440.1_T s74151.1_C s15037.1_A s07278.1_A s57470.1_G s54890.1_C OAR24_2841958.1_A s74400.1_T s12708.1_T s25251.1_T s58502.1_C s25478.1_G OAR24_3313470.1_G s54800.1_C OAR24_3409909.1_A s40068.1_T DU245677_479.1_T s48530.1_T s55148.1_G s36987.1_A OAR24_3882451.1_C OAR24_3997544.1_A s67652.1_C s03019.1_C s67586.1_T s32401.1_A OAR24_4400996.1_G s68231.1_C s66763.1_A s18006.1_T s69050.1_C OAR24_4995509.1_A s17528.1_A s15154.1_A s62308.1_C s44109.1_G s39492.1_A s34904.1_C s07735.1_T OAR24_5505124.1_C s06038.1_T s35487.1_C s18280.1_G s31084.1_C OAR24_5933801.1_A s46925.1_C OAR24_6094542.1_G DU281388_299.1_C s48656.1_G OAR24_6411852.1_T OAR24_6423269.1_A s35059.1_T OAR24_6554282.1_0 OAR24_6671055.1_0 s13832.1_A s03559.1_A OAR24_6927502_X.1_A OAR24_6970428.1_A s54640.1_G OAR24_7019248.1_T OAR24_7119784.1_T s11598.1_T s68193.1_A OAR24_7239593.1_C s31852.1_A OAR24_7424449.1_T OAR24_7435004.1_T OAR24_7509611.1_C OAR24_7543424.1_T s30115.1_A s50383.1_T OAR24_7741373.1_T OAR24_7849246.1_C s04817.1_T OAR24_7944022_X.1_A OAR24_8021643.1_A OAR24_8063846.1_T s04466.1_A s56297.1_G OAR24_8358348.1_C OAR24_8402585.1_C OAR24_8437214.1_A OAR24_8533480.1_A s28749.1_A OAR24_8719574.1_T OAR24_8655784.1_G s03253.1_A s12995.1_G s50307.1_A OAR24_8950654.1_T OAR24_8989305.1_T OAR24_9019723.1_T OAR24_9095524.1_A OAR24_9134934.1_G s13251.1_T OAR24_9291436.1_T s08453.1_C s65784.1_C OAR24_9426433.1_C OAR24_9525158.1_C OAR24_9556480.1_G s55323.1_T s60985.1_T s11846.1_C s49862.1_A OAR24_9805829.1_G OAR24_9851027.1_T OAR24_9982055.1_T OAR24_9929376.1_T s35996.1_A s33564.1_T s36149.1_G OAR24_10354450.1_T s72995.1_T s39830.1_T OAR24_10464626.1_T s57548.1_C OAR24_10617462.1_A OAR24_10569180.1_C s50126.1_C s06239.1_T OAR24_10812839.1_C s41072.1_T s45778.1_A s59452.1_A s48115.1_G OAR24_11445879.1_G s65657.1_G s74381.1_T s62439.1_A s58194.1_G OAR24_11975271.1_C s43077.1_C s19809.1_C s29806.1_T s11845.1_T s41672.1_T s55569.1_A s70383.1_G OAR24_12863478.1_C OAR24_12907825.1_A s49976.1_A OAR24_13080361.1_A OAR24_13169307.1_G OAR24_13240468.1_A OAR24_13448346.1_T s71593.1_T s53502.1_A OAR24_13675640.1_A s57389.1_A s60506.1_T s30772.1_A OAR24_14004421.1_G OAR24_14120825.1_G OAR24_14180223.1_0 OAR24_14191141.1_T OAR24_14339607.1_G OAR24_14436204.1_C OAR24_14523420.1_A s53334.1_C OAR24_14589978.1_G OAR24_14777333.1_T s48669.1_0 s67889.1_G OAR24_14937645.1_T OAR24_15159567.1_T OAR24_15249340.1_A s38145.1_C s01548.1_A OAR24_15364787.1_C OAR24_15370721.1_G s60345.1_G s02623.1_T s46525.1_G DU417100_360.1_T s50891.1_T s51540.1_C OAR24_15842976.1_A OAR24_15980862.1_T s38283.1_C s20384.1_C s74032.1_C s08527.1_T OAR24_16205332.1_G s50198.1_T OAR24_16396809.1_G s68944.1_T OAR24_16511707.1_A OAR24_16582273.1_A s00875.1_A OAR24_16667981.1_G OAR24_16719822.1_G s15963.1_A OAR24_16800536.1_T s47518.1_C OAR24_16900162.1_G s05526.1_C s25240.1_G s38044.1_T s64989.1_C s54541.1_C s20579.1_C s37881.1_G s03210.1_G s28072.1_T OAR24_17464944.1_T s07830.1_C OAR24_17655645.1_G OAR24_17692688_X.1_T OAR24_17726461.1_T OAR24_17892863.1_A s44380.1_T OAR24_18156741.1_A s07991.1_0 s20481.1_T OAR24_18567026.1_A s34101.1_C s07059.1_A s41614.1_T OAR24_18834917.1_C OAR24_18856734.1_A s60009.1_C s46918.1_T s02147.1_T s56991.1_T s01644.1_C OAR24_19260416.1_T OAR24_19323253.1_G s23486.1_A OAR24_19539092.1_C s68386.1_C s33350.1_C s54035.1_A s14230.1_G OAR24_19758612.1_G OAR24_19855862.1_T s08881.1_C OAR24_19994470.1_T OAR24_20204270.1_G OAR24_20052648.1_A OAR24_20302996.1_G OAR24_20329442.1_A s05422.1_C s36126.1_A s35655.1_G OAR24_20753430.1_T OAR24_20863524.1_T OAR24_20886826.1_G OAR24_21000041.1_A OAR24_21006003.1_C OAR24_21411103.1_C OAR24_21248784.1_C OAR24_21308566.1_A s28099.1_T OAR24_21591643.1_T s16604.1_G s22065.1_T s29329.1_C s31524.1_T OAR24_22201078.1_G OAR24_22245800.1_G OAR24_22279571.1_A DU452167_477.1_G s43516.1_G s08099.1_A s14509.1_C s53189.1_A OAR24_22751062.1_T s70338.1_T OAR24_22917633.1_C OAR24_23134207.1_0 s62631.1_G OAR24_23301447.1_C OAR24_23326463.1_T OAR24_23376701_X.1_G OAR24_23511791.1_G s55782.1_G OAR24_23628621.1_G OAR24_23664305.1_C OAR24_23977454.1_C OAR24_24088024.1_G OAR24_24136222.1_C OAR24_24319983.1_A s39513.1_A OAR24_24389032.1_C OAR24_24517773.1_A OAR24_24557331.1_T s04085.1_G s25419.1_0 s70645.1_C OAR24_25044819.1_T s44854.1_T s56329.1_T OAR24_25440977.1_C OAR24_25774639.1_A OAR24_25910052.1_T s36966.1_T OAR24_26374323.1_A s39918.1_A OAR24_26210500.1_G OAR24_26497561.1_T OAR24_26519271.1_G OAR16_51462528.1_A OAR24_26566995.1_C OAR24_26621114.1_G OAR24_26676939.1_A s07055.1_T s28518.1_C s61978.1_T s16016.1_C s46796.1_G s11960.1_T s32273.1_G OAR24_27265846.1_C s00643.1_A OAR24_27348134_X.1_C OAR24_27411708.1_A s10237.1_C s31717.1_T OAR24_27595446.1_T s33445.1_C s31442.1_A s06972.1_T s61368.1_C s32061.1_T OAR24_28186386.1_T OAR24_28249504.1_C s00988.1_T s33057.1_0 OAR24_28445680.1_G OAR24_28471865.1_A s07656.1_G s68253.1_G s07657.1_A OAR24_28699670.1_G s32325.1_A s13195.1_C s36253.1_C OAR24_29236630.1_G s07049.1_G OAR24_29399883.1_T s44711.1_C s16047.1_0 s35650.1_G s31244.1_A s51834.1_T s68838.1_A OAR24_30139569_X.1_A OAR24_30146533.1_G OAR24_30209224.1_T OAR24_30266733.1_G OAR24_30492037.1_G OAR24_30703545.1_G OAR24_30746423.1_A s56450.1_T OAR24_30877035.1_A OAR24_30906216.1_A s08134.1_G OAR24_31031292.1_T s07554.1_A s05297.1_0 s34346.1_T s16947.1_T OAR24_31266500.1_G OAR24_31322147.1_T s47851.1_G OAR24_31413264.1_C s40549.1_T OAR24_31544557.1_A s07298.1_G s44496.1_T s70259.1_T OAR24_32091810.1_A OAR24_32127759.1_T s45400.1_C OAR24_32232601.1_C OAR24_32270570.1_T OAR24_32311680.1_G OAR24_32374470.1_G s59832.1_T s32211.1_T OAR24_32735779.1_A OAR24_32779035.1_A OAR24_32820415.1_C OAR24_32895087.1_T OAR24_32915074.1_G OAR24_33013180.1_A OAR24_33226792.1_A s58673.1_A OAR24_33276643.1_G OAR24_33316807.1_T OAR24_33558668.1_A s47943.1_A OAR24_33703095.1_G OAR24_33734697.1_T OAR24_33949802.1_A s28848.1_0 OAR24_34105442.1_T OAR24_34129250.1_A s40632.1_T s15024.1_A OAR24_34322098.1_G s51590.1_C s52452.1_G OAR24_34590246.1_C s15456.1_G s52785.1_G s30101.1_G s50902.1_A s75375.1_A OAR24_35166939.1_C OAR24_35315373.1_A s59335.1_G s49066.1_G s39500.1_C OAR24_35501776.1_T s30694.1_G s21269.1_A s13493.1_T OAR24_35606022.1_T OAR24_35669227.1_G OAR24_35715684.1_T OAR24_35736226.1_A s37249.1_T s05045.1_A s64882.1_C s17643.1_A s59571.1_A s41842.1_T s40989.1_C s01074.1_C OAR24_36387717.1_G DU442221_261.1_G s13600.1_T s02082.1_T OAR24_36511570.1_T s33485.1_G OAR24_36594681.1_G OAR24_36724314.1_A s09857.1_T OAR24_36782932.1_T DU328546_113.1_T s65979.1_T s62096.1_C s08706.1_G DU170943_581.1_C s43015.1_C OAR24_37064565.1_C OAR24_37150973.1_A s27151.1_A s18493.1_A OAR24_37414016.1_T s28866.1_A OAR24_37519391.1_T s03477.1_A s53082.1_T s67748.1_A s37086.1_A s06024.1_A s69400.1_G s54071.1_0 OAR24_38453414.1_C OAR24_38653760.1_C s20643.1_A OAR24_38776803.1_G s04373.1_A s70828.1_T s03728.1_G s08464.1_A s34514.1_G s31693.1_T OAR24_39340682.1_G s23797.1_T OAR24_39533389.1_A s34597.1_G OAR24_39945216.1_A s49742.1_G s69014.1_G OAR24_40254205.1_G s66117.1_T OAR24_40402783.1_C s17819.1_G s63991.1_T OAR24_40508317.1_T OAR24_40526014.1_C s66826.1_A OAR24_40645682.1_A OAR24_40760850.1_T s38040.1_C s68269.1_C OAR24_40925627.1_A OAR24_40952345.1_T s69582.1_A s73415.1_T s24530.1_A OAR24_41096897.1_A OAR24_41186690.1_C OAR24_41302992.1_T OAR24_41337655.1_T s51547.1_G s38384.1_A OAR24_41535215.1_C OAR24_41635456.1_A s08533.1_A s62367.1_C s71282.1_A s19150.1_C OAR24_42102371.1_T OAR24_42162573.1_A s43383.1_T s01323.1_T s57069.1_0 s51596.1_C OAR24_42446682.1_T s10916.1_T s69462.1_G s74973.1_C s16306.1_0 s00899.1_G s33432.1_G OAR24_42927903.1_A OAR24_42942982.1_C OAR24_42952578.1_A OAR24_43169917.1_C s50495.1_G OAR24_43254456.1_T s30517.1_G s65063.1_T OAR24_43494716.1_G s10591.1_C s34013.1_A s59442.1_T OAR24_43722123.1_A s42827.1_G s55689.1_A OAR24_44025537.1_G s11015.1_A s17831.1_C s51638.1_A s31118.1_T OAR24_44293969.1_A s14581.1_A s04960.1_T s12851.1_C s75862.1_A OAR24_44850918.1_C 2 | Jacobs H70 0 0 1 -9 1 0 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 2 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 1 1 0 0 0 0 0 1 1 0 0 1 0 0 0 1 0 1 0 1 0 1 1 0 0 1 1 0 1 1 0 2 0 0 1 0 1 1 0 0 1 1 1 1 0 1 0 0 0 0 1 0 1 1 0 0 0 0 0 0 1 1 0 0 2 0 0 0 0 1 0 0 1 2 0 1 0 0 0 0 0 0 0 1 1 0 0 1 0 2 1 1 1 0 0 0 1 1 1 0 0 0 0 0 1 1 1 1 1 1 1 0 1 0 0 0 1 1 1 0 1 0 0 1 0 0 0 0 1 0 0 0 1 0 0 1 0 1 0 1 0 0 1 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 1 0 0 1 1 1 1 0 0 0 1 1 0 0 1 0 0 0 1 0 0 0 0 0 0 0 2 1 1 1 1 0 1 1 0 0 0 0 2 0 1 0 0 0 0 2 0 0 2 2 0 1 1 0 1 1 0 1 0 1 0 0 0 1 0 1 0 0 1 0 0 1 0 1 1 0 0 1 2 0 2 0 1 2 0 1 0 0 1 1 0 1 0 0 0 1 1 1 1 1 1 0 0 0 2 0 0 0 0 1 1 0 0 1 0 0 1 1 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 2 2 0 0 2 0 0 2 2 0 2 0 2 0 0 0 0 0 2 2 0 0 0 0 0 0 0 2 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 2 0 2 2 0 0 2 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 1 0 0 0 1 1 0 1 1 0 1 0 0 1 0 1 0 0 1 1 0 0 0 1 1 0 0 0 0 0 0 2 1 0 0 1 0 1 1 1 0 1 0 0 1 0 0 0 0 0 0 1 1 1 0 0 0 2 1 0 1 1 1 2 2 2 1 1 0 2 1 0 1 0 0 2 2 3 | Jacobs H71 0 0 1 -9 0 0 1 0 1 1 0 0 2 1 1 1 0 0 0 0 0 0 0 2 1 0 0 0 1 0 0 1 0 1 2 1 1 0 1 1 1 0 0 1 1 0 1 2 1 NA 1 1 1 0 NA 0 0 0 0 1 0 1 0 1 0 0 0 1 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 1 1 1 0 0 0 1 1 0 0 1 1 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1 1 2 0 0 2 0 1 1 1 1 0 1 0 1 2 0 1 1 0 0 0 0 0 1 1 1 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 1 0 1 0 0 1 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 1 0 1 0 1 1 0 0 1 0 0 1 0 1 1 2 1 0 0 1 0 0 1 0 1 0 1 2 0 0 0 1 0 1 1 1 1 0 1 0 1 1 0 0 0 0 1 0 0 1 1 2 1 1 1 1 0 1 1 0 1 1 0 1 0 0 0 1 0 0 2 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 0 0 2 0 0 0 0 2 0 0 2 0 0 0 0 0 0 2 1 0 1 0 2 2 0 0 0 1 2 2 2 0 0 0 0 0 1 1 NA 1 1 0 0 0 0 0 1 1 0 1 0 0 1 0 0 0 1 0 0 1 2 0 0 0 0 0 0 0 1 0 0 1 0 1 0 2 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 2 2 0 0 0 0 0 0 0 2 0 0 0 0 0 0 2 2 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 0 2 2 0 2 2 2 0 0 0 0 0 0 1 0 0 4 | Jacobs H72 0 0 1 -9 1 0 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 2 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 1 1 0 0 0 0 0 1 1 0 0 1 0 0 0 1 0 1 0 1 0 0 0 0 0 0 1 1 2 1 0 1 0 1 0 0 1 1 0 0 2 2 0 1 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 1 2 1 0 1 0 0 0 1 1 0 1 2 1 0 1 1 1 1 0 1 0 0 1 0 0 1 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 0 1 1 1 0 1 1 0 0 1 0 0 1 2 0 1 0 0 1 1 0 0 0 0 1 0 0 1 1 1 0 0 0 0 0 1 0 1 1 0 0 0 1 1 0 1 1 0 1 0 0 0 1 1 1 0 0 0 0 0 1 0 0 0 1 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 0 1 1 0 0 0 1 1 0 1 1 1 1 0 0 1 1 1 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 1 1 0 0 1 0 0 0 1 0 0 1 0 1 1 1 1 0 0 1 0 0 1 0 0 0 0 1 0 0 0 0 0 0 1 1 1 1 1 0 0 1 0 0 0 1 1 1 1 1 1 1 1 1 1 0 1 0 1 0 0 0 0 0 0 0 1 0 0 0 1 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 0 1 1 1 1 1 1 0 1 0 0 0 0 1 1 0 1 0 0 0 0 1 1 1 1 0 0 0 1 1 1 0 0 1 1 1 1 0 0 0 0 0 2 2 0 0 2 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 1 1 0 0 0 0 1 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 1 1 1 0 0 1 1 0 1 1 0 1 1 1 0 0 0 0 0 0 1 0 0 5 | Jacobs H78 0 0 2 -9 0 0 1 0 1 1 0 0 2 1 1 1 0 0 0 0 0 0 0 2 1 0 0 0 1 0 0 1 0 1 2 1 1 0 1 1 1 0 0 1 1 0 1 2 1 1 1 1 1 0 1 0 0 0 0 1 0 1 0 1 0 0 0 1 0 0 0 0 1 0 0 0 1 0 1 1 0 1 0 0 0 0 1 1 0 0 1 1 1 1 0 1 0 0 0 0 1 0 1 1 0 0 0 0 0 0 1 1 0 0 2 0 0 0 0 1 0 0 2 2 1 1 0 1 0 1 0 0 0 1 1 0 0 1 0 2 1 1 1 0 0 0 1 1 1 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 1 1 0 1 0 1 1 1 0 0 0 0 1 0 0 0 2 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 1 1 0 1 1 0 1 0 0 1 1 0 0 0 1 1 0 0 1 1 1 0 1 0 0 1 1 1 0 1 0 0 0 0 0 1 0 1 0 0 0 2 1 1 1 1 1 1 1 0 0 1 1 0 0 0 0 0 1 0 1 0 0 0 0 0 2 0 1 1 1 0 1 0 2 1 0 1 0 0 1 0 1 0 0 0 1 0 2 2 0 1 1 2 0 1 1 0 1 0 0 1 1 0 0 1 0 0 0 0 1 0 0 1 1 0 1 1 0 0 1 0 1 0 1 2 0 1 1 2 1 1 0 1 0 1 1 0 1 0 1 1 0 1 1 0 0 0 0 1 2 0 0 1 0 0 1 1 1 1 0 1 0 1 1 2 1 0 2 0 1 0 0 1 0 0 0 0 0 2 2 2 2 0 0 0 0 2 2 0 2 0 0 0 0 2 0 2 2 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 2 0 0 2 2 0 2 0 0 0 2 0 0 0 2 0 0 0 0 0 0 0 2 2 0 2 0 0 2 0 2 2 0 0 0 0 1 0 2 0 2 0 1 0 1 2 1 0 0 0 0 0 0 0 0 1 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 6 | Jacobs H79 0 0 2 -9 1 0 1 0 0 1 0 0 1 1 1 1 0 0 1 0 0 0 0 0 2 1 0 0 0 0 0 0 2 1 1 0 0 0 1 0 0 0 1 1 1 1 1 1 1 0 0 0 1 0 2 1 1 0 1 0 1 2 0 1 0 1 1 1 0 2 0 1 1 0 0 1 0 2 2 0 0 0 0 2 1 0 2 2 0 0 1 2 0 1 1 0 0 0 1 0 0 0 1 0 1 0 0 0 1 0 0 1 1 0 0 0 0 0 1 0 0 2 1 0 1 0 1 2 1 0 2 1 1 0 0 0 2 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 0 1 1 1 0 1 1 0 0 1 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 2 0 1 0 0 0 1 0 0 2 0 1 1 2 1 0 2 2 1 2 0 0 0 0 2 0 0 0 0 0 0 1 0 1 0 1 0 1 0 1 0 1 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 1 1 1 2 2 0 0 0 1 1 0 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 0 0 1 1 0 0 1 0 0 0 1 0 1 0 0 1 1 0 1 0 1 1 0 0 1 1 0 0 0 0 1 0 1 0 0 0 0 1 0 0 1 1 1 0 1 1 0 1 0 1 0 0 0 0 0 1 1 1 0 0 1 0 0 1 0 2 0 0 0 0 2 0 2 0 1 2 0 0 0 0 1 1 0 2 0 0 0 1 1 1 0 0 1 0 0 0 2 0 0 0 0 1 1 0 0 1 0 0 1 1 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 2 2 0 1 1 0 0 1 1 0 2 0 1 0 0 0 0 0 1 2 0 0 0 0 0 0 0 2 0 1 0 0 0 0 1 1 1 0 0 1 0 1 1 1 0 0 0 1 1 1 0 0 0 1 0 1 1 1 1 1 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 1 0 0 0 1 1 2 1 0 1 0 2 1 0 0 2 1 1 1 1 0 0 0 0 1 0 1 0 1 1 1 0 0 0 2 2 1 0 0 0 0 0 1 0 1 0 1 0 1 0 1 1 2 0 0 1 1 0 0 0 0 0 1 0 0 0 0 0 1 0 1 2 1 1 1 1 1 1 1 0 1 1 0 0 0 1 1 1 7 | Jacobs H80 0 0 1 -9 0 0 0 0 0 1 0 0 1 1 0 1 0 1 0 0 0 1 0 1 0 0 0 0 0 1 0 2 0 0 1 1 1 0 1 0 1 1 0 2 1 0 0 1 0 0 1 0 1 0 NA 1 1 0 1 0 0 1 0 0 0 1 1 1 0 0 0 0 1 0 0 0 0 0 1 0 1 1 1 0 0 0 0 0 0 0 0 1 0 1 1 1 0 1 0 1 0 0 1 0 2 1 1 0 0 0 1 1 1 1 0 1 0 0 0 1 0 1 0 0 1 0 0 1 1 0 1 1 1 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 1 1 1 1 1 0 1 0 0 0 0 0 0 1 1 1 0 1 0 0 0 1 1 1 1 1 0 0 0 0 0 0 1 0 1 0 0 1 1 2 0 0 0 1 1 1 1 0 1 0 0 0 1 1 0 0 0 0 1 1 2 0 0 0 0 0 NA 0 0 1 1 1 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 2 0 0 0 0 0 0 0 1 0 1 0 1 0 0 1 0 0 2 2 0 1 0 0 1 0 0 1 0 1 1 0 1 0 2 1 0 1 1 0 0 1 0 1 1 0 2 1 1 0 0 0 0 1 1 1 1 2 0 0 1 0 0 0 0 2 0 0 1 1 1 2 2 0 0 0 0 0 0 1 1 0 0 0 1 0 1 1 0 1 0 0 0 1 1 2 1 1 2 2 0 0 1 0 1 1 1 1 0 0 1 0 1 1 2 1 1 0 1 0 1 0 0 1 0 1 1 0 0 0 1 0 0 1 1 1 1 1 0 0 0 1 1 2 0 1 0 0 0 1 1 0 1 1 1 1 1 0 1 0 0 2 1 0 0 1 0 1 1 0 1 0 1 2 0 0 1 0 0 1 0 0 0 0 1 1 0 0 0 1 1 0 1 2 0 1 0 1 0 2 0 0 1 1 0 1 1 0 0 0 1 2 2 0 1 0 0 1 0 1 1 1 0 1 0 1 0 1 1 1 0 0 0 1 2 2 1 1 0 1 0 1 0 0 1 1 0 0 1 1 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 1 0 8 | Jacobs H86 0 0 2 -9 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 1 0 1 1 1 0 1 0 0 1 0 0 0 1 1 1 0 2 0 2 2 0 0 1 0 1 1 0 0 0 0 1 1 0 1 1 0 0 0 2 0 1 1 1 0 1 1 0 0 0 1 0 2 0 1 0 1 0 1 1 1 1 0 1 0 0 0 1 0 1 0 0 1 0 0 1 1 0 1 1 1 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 1 1 1 1 1 0 1 0 0 0 0 0 0 1 1 1 0 1 0 0 0 1 1 0 1 1 0 1 0 0 0 0 0 0 2 1 0 1 1 1 0 0 0 0 2 1 2 1 2 0 0 1 2 2 1 0 0 0 0 2 1 0 0 0 0 0 1 0 1 1 1 0 1 0 0 0 2 0 0 0 0 2 1 0 0 0 1 1 0 1 0 0 2 1 1 2 1 0 0 0 0 0 0 1 1 1 1 1 2 1 0 0 1 0 1 1 1 1 1 1 1 2 0 1 0 2 2 0 1 2 0 0 0 1 0 0 0 0 1 0 0 2 0 1 1 1 1 2 1 1 0 0 0 2 0 1 0 0 0 0 2 0 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 0 1 0 2 0 0 2 0 0 1 0 2 1 0 1 0 1 0 2 0 0 2 0 0 0 0 2 2 0 1 0 0 0 0 0 0 1 1 0 1 1 2 0 1 0 1 0 1 0 0 0 0 0 1 1 0 1 0 1 2 0 1 2 1 1 0 0 0 0 0 1 1 0 1 1 1 0 0 0 0 0 1 0 0 1 0 1 1 2 2 1 0 0 2 1 0 0 0 1 0 1 1 1 0 1 0 1 1 2 1 0 2 0 2 2 1 0 0 2 0 2 2 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 2 0 0 1 0 0 0 0 0 1 1 0 0 1 1 0 1 1 0 1 1 0 0 0 2 0 1 0 0 1 NA 0 1 1 1 2 0 0 1 0 1 1 1 0 0 0 1 0 2 0 2 0 1 0 1 2 1 0 0 0 0 0 1 0 0 1 1 1 1 0 0 1 1 1 1 1 1 1 1 0 1 1 0 0 0 0 2 1 9 | Jacobs H87 0 0 2 -9 0 0 1 0 0 2 0 0 1 1 1 1 0 1 1 0 0 1 0 0 1 1 0 0 0 1 0 1 1 1 1 0 0 0 2 NA 0 1 1 1 1 1 1 1 1 0 0 0 1 0 2 1 1 0 1 0 1 2 0 1 0 1 1 1 0 2 0 1 1 0 0 1 0 2 2 0 0 0 0 2 1 0 2 2 0 0 1 2 0 1 1 0 0 0 1 0 0 0 1 0 1 0 0 0 1 0 0 1 1 0 0 0 0 0 1 0 0 2 1 0 1 0 1 2 1 0 2 1 0 1 0 0 2 0 1 0 1 0 0 0 1 0 1 1 1 0 0 0 1 1 0 0 1 0 0 1 2 0 1 2 1 0 1 1 1 0 1 0 0 1 1 0 1 0 1 0 0 0 1 0 1 1 1 0 1 1 0 0 0 2 1 0 0 0 0 0 1 1 0 0 0 1 1 1 2 1 0 0 1 0 1 0 1 1 0 1 0 1 0 1 0 1 1 1 0 0 0 1 0 0 0 0 1 1 0 0 0 1 1 0 1 1 0 0 0 0 1 1 1 0 1 0 0 0 0 0 1 1 0 1 1 0 0 1 0 1 1 0 0 0 1 1 0 1 0 0 1 0 1 1 1 0 0 0 1 0 1 0 0 0 0 1 0 0 0 0 2 2 1 1 0 0 0 0 1 0 2 0 1 1 0 2 0 0 2 2 1 2 0 0 1 0 0 0 1 1 1 0 0 1 1 0 0 1 0 0 0 0 0 1 1 0 1 1 1 0 0 0 0 0 0 0 2 1 0 0 0 0 0 0 1 1 1 1 0 0 0 1 1 1 0 1 0 1 0 1 1 0 0 0 1 1 1 1 0 0 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 0 0 0 2 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 2 0 0 0 0 0 0 1 1 1 2 1 1 1 NA 1 1 0 0 0 0 0 1 0 0 2 1 0 0 0 0 0 1 1 0 0 0 1 1 0 1 1 0 2 1 1 1 1 1 0 1 0 1 0 0 1 1 NA 1 0 2 0 0 1 0 0 1 1 1 1 0 0 0 0 1 0 1 0 2 1 0 2 1 0 2 0 0 2 1 1 0 0 0 1 0 0 0 1 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 10 | Jacobs H88 0 0 1 -9 1 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 0 0 0 0 1 0 1 1 0 0 0 0 0 1 0 0 1 0 1 0 0 1 0 0 0 0 1 1 0 NA 1 1 0 1 0 0 1 0 0 0 1 1 1 0 1 0 0 1 0 0 0 0 1 1 0 0 0 0 1 0 0 1 1 0 0 1 1 0 1 1 0 0 0 0 1 0 0 1 0 1 1 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0 2 1 0 1 0 1 2 1 0 2 1 1 0 0 0 2 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 2 2 0 2 2 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 2 0 0 2 0 0 0 2 2 0 2 2 0 2 0 0 0 0 2 0 0 0 0 0 0 2 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 2 0 2 2 0 0 0 2 2 0 2 2 2 2 0 0 2 2 2 0 0 0 0 2 2 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 1 0 0 0 2 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 2 0 2 2 0 0 0 0 0 2 0 2 0 2 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 2 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 2 0 0 0 0 0 0 1 1 1 1 1 1 1 0 1 1 0 0 0 0 0 2 1 0 1 1 0 0 0 0 0 1 1 0 0 1 0 0 0 1 1 0 1 1 1 1 0 0 0 0 1 1 1 0 0 1 0 0 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 1 0 0 1 0 0 2 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 11 | Jacobs H93 0 0 2 -9 2 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 1 0 0 0 0 0 1 2 1 1 0 1 0 0 0 0 0 0 1 1 0 1 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 2 2 0 0 2 0 0 0 0 0 2 0 0 2 0 2 2 0 0 0 0 2 2 0 2 0 0 0 0 1 0 1 1 1 0 1 0 0 0 0 0 0 1 1 0 0 0 0 1 0 1 0 1 0 0 0 1 0 0 0 0 1 0 2 1 0 1 0 2 1 1 0 0 0 0 2 2 1 0 0 0 0 0 1 0 1 0 2 1 1 0 2 1 0 0 2 2 2 0 0 0 0 2 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 2 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 1 0 0 2 1 1 2 2 0 1 1 0 0 0 1 0 1 1 1 1 1 0 0 1 0 2 2 0 0 2 1 1 1 1 0 0 2 2 1 0 2 0 0 0 2 1 0 0 0 0 1 0 0 0 0 0 0 2 1 2 1 0 0 0 1 0 0 1 1 1 1 1 1 0 0 0 0 1 1 0 1 1 2 1 1 0 1 1 0 2 0 0 0 0 1 0 0 0 0 0 1 0 0 1 1 0 1 0 1 1 0 1 1 1 0 0 1 0 0 0 0 1 2 2 1 1 0 0 1 1 2 0 1 1 0 1 2 0 1 1 1 1 1 0 0 1 0 0 2 0 0 0 0 1 1 1 0 0 0 0 2 2 0 1 1 0 0 1 1 0 2 0 1 0 0 0 0 0 1 2 0 0 0 0 0 0 0 1 1 0 1 0 0 0 2 1 0 0 0 0 0 0 0 2 0 1 1 0 0 0 0 0 0 2 1 1 1 0 1 1 0 0 1 1 1 0 0 0 1 0 0 0 0 0 0 1 0 0 1 0 1 1 0 0 1 0 0 1 1 1 1 0 0 1 0 1 0 1 0 0 2 2 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 2 1 1 1 1 0 0 2 0 0 1 0 1 0 0 1 1 0 0 0 0 1 1 0 0 0 1 1 0 0 0 0 1 2 2 0 0 0 1 0 0 1 1 0 2 1 12 | Navajo-Churro H38 0 0 2 -9 1 0 2 0 1 2 0 1 1 1 0 0 0 1 0 1 0 0 1 2 1 2 0 0 2 0 2 2 0 0 2 2 0 0 0 0 2 1 1 0 0 0 0 1 0 0 1 0 0 2 2 1 1 1 1 1 1 0 2 2 0 1 1 2 0 2 0 0 2 0 0 0 1 1 1 1 1 1 0 1 0 1 1 1 0 0 1 1 1 2 0 0 0 0 0 0 1 0 1 1 2 1 0 1 0 1 1 2 1 1 2 0 0 1 1 0 2 0 2 1 1 NA 1 2 1 0 0 0 0 0 1 1 1 0 0 0 0 1 1 0 1 2 1 1 1 0 0 2 1 0 2 1 0 1 2 2 0 0 2 1 0 0 1 1 0 0 0 0 0 1 1 0 0 1 0 2 1 1 0 0 0 1 0 0 1 2 0 0 1 1 1 1 0 2 1 2 2 2 0 1 0 2 1 1 2 0 0 0 0 0 2 0 1 0 0 1 1 1 1 2 0 1 0 1 1 1 0 2 0 0 0 0 2 1 0 1 0 1 0 0 0 2 1 2 0 0 1 0 0 0 1 2 1 0 1 1 1 1 1 1 1 1 0 0 0 0 0 1 2 1 1 1 0 2 1 1 0 1 2 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 1 1 1 0 2 1 0 0 1 0 0 2 1 0 1 0 0 0 0 1 0 1 1 1 0 1 1 2 0 0 1 2 1 1 1 1 1 1 1 0 0 0 1 0 1 0 1 1 0 0 0 1 1 0 2 0 1 0 0 0 0 0 1 0 0 1 0 0 0 1 2 1 1 2 1 1 0 2 1 0 1 2 0 0 0 1 2 0 0 0 0 1 1 1 1 2 1 1 1 1 1 1 0 1 1 1 1 0 1 0 0 0 1 1 2 2 1 1 0 0 0 2 2 1 2 0 1 0 0 0 0 1 1 2 0 2 0 1 0 1 0 1 0 0 2 1 1 1 1 0 1 2 0 0 0 0 0 0 2 2 0 0 1 2 0 0 0 0 1 1 2 2 0 1 1 0 2 1 1 1 0 1 0 0 1 0 0 0 0 1 0 1 1 0 1 2 0 0 2 0 2 0 1 0 0 1 1 0 1 0 0 2 0 0 0 1 1 1 0 0 0 0 0 1 1 1 1 1 1 1 1 0 1 0 0 0 1 0 1 0 1 2 1 1 0 2 1 0 0 0 NA 2 1 1 1 1 1 2 0 0 1 0 0 0 0 1 1 2 13 | Navajo-Churro H44 0 0 1 -9 2 0 0 0 0 1 0 0 1 0 1 0 0 1 0 0 0 1 0 1 0 1 0 0 0 0 1 2 2 2 2 0 1 0 1 0 0 0 0 1 1 0 0 1 2 0 0 1 2 1 1 0 0 1 0 0 1 0 1 2 0 0 1 1 1 2 0 0 0 0 0 1 0 0 0 1 2 0 0 1 1 1 0 0 0 0 2 1 1 1 0 1 1 1 1 0 1 2 0 1 0 1 1 0 1 1 0 1 1 1 1 1 1 0 1 0 1 0 0 1 2 1 1 0 0 1 0 2 0 0 1 1 0 0 0 1 0 0 1 0 0 0 0 1 0 0 0 0 0 0 2 2 1 0 2 1 0 0 2 1 0 0 1 1 1 1 0 1 0 1 1 0 1 1 1 1 0 2 0 0 0 0 1 1 0 1 1 1 1 0 1 2 0 2 0 2 0 2 0 1 1 0 1 0 1 1 0 1 1 1 1 0 2 0 0 1 0 2 0 0 0 2 0 0 1 1 0 1 0 0 0 0 2 1 0 0 0 2 2 0 1 0 0 1 0 0 1 1 1 0 1 1 0 0 0 0 1 1 0 1 2 0 0 0 0 2 2 0 1 0 1 2 1 1 0 1 1 2 0 1 1 0 1 0 0 0 1 0 0 2 1 1 2 0 0 0 1 1 1 0 0 0 1 0 0 0 2 0 1 1 0 1 0 1 0 0 0 1 0 1 1 1 2 0 1 1 1 0 0 1 0 1 1 1 1 0 0 0 0 1 1 0 0 1 1 0 1 0 1 0 0 0 1 1 0 0 0 1 0 0 1 0 1 1 1 1 1 0 0 1 1 1 0 1 0 0 0 0 1 0 0 1 0 0 0 1 1 1 2 2 2 0 1 2 1 1 0 0 1 0 1 1 1 0 0 0 2 0 1 2 1 0 2 0 1 0 0 0 0 0 0 1 1 1 0 0 1 1 2 0 1 0 1 0 0 0 2 2 0 1 0 1 2 0 1 0 0 0 1 1 2 2 2 1 0 0 0 2 2 0 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 2 1 0 1 1 2 1 1 0 1 1 0 2 1 0 0 1 0 NA 0 1 0 1 0 0 2 1 0 0 1 NA 0 1 1 1 0 0 1 0 1 1 1 0 1 1 0 1 1 1 0 1 0 1 0 0 1 1 1 0 0 1 2 0 1 0 1 0 0 1 0 1 1 2 0 1 0 1 2 1 0 1 1 0 1 0 0 2 2 14 | Navajo-Churro H56 0 0 2 -9 2 0 0 0 0 1 0 0 1 0 1 0 0 1 0 0 0 1 0 1 0 1 0 0 0 0 1 2 2 2 2 0 1 0 1 0 0 0 0 1 1 0 0 1 2 0 0 1 2 1 1 0 0 1 0 0 1 0 1 2 0 0 1 1 1 2 0 0 0 0 0 1 0 0 0 1 2 0 0 1 1 1 0 0 0 0 2 1 1 1 0 1 1 1 1 0 1 2 0 1 0 1 1 0 1 1 0 1 1 1 1 1 1 0 1 0 1 0 0 1 2 1 1 0 0 1 0 2 0 0 1 1 0 0 0 1 0 0 1 0 0 0 0 1 0 0 0 0 0 0 2 2 1 0 2 1 0 0 2 1 0 0 1 1 1 1 0 1 0 1 1 0 1 1 1 1 0 2 0 0 0 0 1 1 0 1 1 1 1 0 1 2 0 2 0 1 1 2 1 2 1 1 2 1 2 1 0 1 2 0 1 0 2 0 0 0 1 1 0 1 1 1 0 0 0 2 0 0 0 0 0 0 1 1 0 0 0 2 1 0 1 0 0 1 0 0 1 1 0 1 2 1 0 0 0 1 0 0 1 1 2 0 0 0 1 1 1 0 1 1 1 1 2 1 0 1 0 2 0 1 2 0 0 0 1 0 0 0 0 2 1 1 1 0 0 0 1 2 1 1 1 0 0 0 0 0 1 0 1 1 0 2 0 1 1 1 1 2 0 1 0 2 1 0 1 2 1 0 0 2 0 0 0 0 1 0 0 0 0 1 1 0 0 1 1 0 1 0 1 0 0 0 1 1 0 0 0 2 1 0 0 0 0 0 2 2 1 1 1 1 1 0 0 0 1 0 0 0 0 0 0 0 1 1 1 1 2 2 2 1 1 1 0 1 1 1 0 0 1 0 0 0 2 0 0 0 1 0 1 1 1 0 1 0 1 1 1 1 1 0 0 2 1 1 0 0 1 1 1 1 0 0 1 0 0 0 2 2 0 2 0 2 2 0 1 0 0 1 1 1 1 2 2 2 1 0 0 2 2 0 0 0 1 2 2 0 0 1 1 1 0 1 1 0 0 1 1 0 1 1 0 0 0 1 2 0 0 1 0 1 1 2 1 1 1 1 0 1 0 0 2 0 0 0 0 0 1 1 1 1 2 0 0 0 1 1 2 1 1 1 0 2 1 0 2 0 0 1 0 1 2 0 0 0 0 0 2 1 2 2 1 0 0 1 0 2 2 0 1 1 2 1 2 0 0 1 2 2 0 1 2 1 1 15 | Navajo-Churro H33 0 0 2 -9 1 0 1 0 0 0 1 1 0 0 1 2 0 2 0 0 0 0 1 1 0 1 0 0 NA 0 1 1 1 1 2 1 0 0 1 0 2 0 0 0 1 0 1 0 0 1 1 0 0 0 0 1 0 2 1 1 0 2 0 1 1 0 1 1 1 1 0 0 0 0 0 1 1 1 1 2 1 1 0 0 0 0 0 0 0 0 0 1 1 2 1 0 0 0 0 1 2 0 2 0 2 0 0 1 0 0 1 1 1 1 1 0 1 1 0 1 1 1 2 1 0 1 1 1 1 1 1 1 0 1 0 0 1 1 0 1 1 1 2 0 1 1 1 1 2 0 0 1 1 0 1 1 1 0 0 2 0 0 0 2 1 1 2 1 0 1 2 0 0 1 1 0 1 0 2 1 2 1 1 0 0 1 1 0 1 1 1 0 1 0 0 0 1 1 0 2 1 2 2 1 0 1 1 2 2 0 0 0 1 1 1 0 2 0 1 0 1 1 0 2 0 1 1 2 1 1 0 1 0 0 0 0 1 0 0 0 0 2 1 0 2 1 0 2 0 0 1 1 1 1 2 2 0 0 0 2 0 1 1 1 1 0 0 0 0 0 0 2 0 1 2 1 2 0 1 1 0 1 1 0 2 0 1 2 0 0 0 0 0 0 1 1 0 0 0 0 1 2 1 2 2 0 0 0 0 0 1 0 2 0 0 1 0 0 0 0 1 0 0 1 0 0 1 0 0 0 2 0 0 1 0 1 1 1 1 0 0 0 0 1 1 0 0 1 2 0 2 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 0 1 1 1 1 2 1 0 1 2 0 1 0 2 2 0 0 0 1 0 0 2 0 0 0 1 1 1 1 0 0 1 1 1 1 0 2 0 0 1 0 0 1 2 1 0 0 0 0 1 1 0 2 0 0 0 0 0 0 1 0 1 1 1 1 0 0 2 0 1 1 1 2 0 2 2 0 0 0 2 0 0 0 2 2 2 0 2 2 2 0 0 2 2 0 2 2 2 2 0 1 1 1 0 0 1 0 0 1 0 0 1 1 1 1 0 0 1 0 1 1 1 1 1 1 0 2 0 1 0 1 0 0 1 0 0 1 0 0 2 0 0 0 1 1 NA 0 0 1 1 1 0 1 0 1 1 2 0 1 0 0 0 2 1 1 0 0 0 1 1 2 0 0 2 0 0 1 0 NA 1 1 0 1 1 0 2 0 0 1 1 1 1 1 2 0 0 16 | Navajo-Churro H39 0 0 2 -9 1 0 2 0 0 1 1 1 0 0 1 2 0 1 1 0 0 1 1 0 0 2 1 1 NA 1 1 2 0 0 2 2 0 0 0 0 1 0 1 0 1 0 1 1 1 2 1 0 0 1 0 0 0 1 2 1 1 1 0 0 2 1 0 0 0 1 0 0 1 0 0 2 0 0 2 2 2 1 0 0 0 0 1 1 1 1 1 1 0 0 1 0 0 1 1 0 2 2 2 1 2 0 1 0 0 0 0 1 0 0 0 1 0 1 1 0 1 2 1 0 0 0 1 1 2 1 1 1 1 1 0 0 1 1 0 1 0 1 2 0 1 0 1 0 2 0 0 0 0 0 2 0 2 0 2 0 0 1 0 2 0 1 1 1 2 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 2 2 2 1 2 0 0 1 2 2 1 1 1 0 1 1 0 2 0 0 0 0 2 1 1 0 0 0 1 2 0 1 2 0 0 0 0 1 1 1 0 0 2 1 0 2 0 1 2 2 1 1 2 1 0 2 1 0 0 1 1 0 1 0 2 2 1 0 0 0 0 0 0 0 2 1 2 0 0 0 1 0 2 0 0 1 1 2 1 0 0 2 0 0 2 2 0 2 0 1 0 1 1 1 0 0 0 0 0 0 0 2 0 1 1 0 2 2 1 0 0 0 1 1 1 2 0 0 1 0 2 1 1 0 1 1 1 1 1 0 0 0 0 0 2 1 1 1 1 1 0 1 1 1 0 0 0 1 1 0 0 0 1 1 0 1 1 2 1 1 2 1 0 0 0 1 1 2 0 0 0 1 0 1 0 1 2 0 0 0 1 1 2 1 1 2 1 2 2 0 0 0 0 1 2 1 0 2 1 1 0 0 1 1 0 1 0 1 0 1 1 1 1 1 0 0 0 2 0 0 1 0 0 1 1 1 1 0 1 0 0 2 0 0 1 1 2 1 2 1 1 1 0 1 1 1 1 1 2 2 0 0 1 2 0 0 2 1 2 2 1 0 0 1 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 2 0 0 1 0 0 1 0 0 2 0 2 0 0 1 0 1 1 0 2 1 NA 1 0 0 1 1 1 2 0 1 2 2 0 0 1 1 1 1 0 1 0 0 0 0 0 2 1 2 0 0 0 2 1 1 0 0 1 2 1 0 1 2 0 2 0 1 0 2 0 0 2 1 1 0 1 2 1 2 17 | Navajo-Churro H45 0 0 2 -9 0 0 1 0 1 2 1 1 1 1 1 1 0 2 0 1 0 1 0 2 1 2 1 0 1 0 1 2 0 1 2 1 1 0 0 0 2 2 0 1 0 0 1 0 0 0 0 0 0 1 1 0 0 1 1 2 0 1 2 2 0 0 1 1 1 1 0 0 1 0 0 1 0 1 0 0 0 1 0 1 0 2 0 1 0 0 0 1 1 1 0 0 1 0 0 0 2 1 1 0 2 0 1 1 1 0 1 1 0 2 1 0 0 1 1 1 1 1 2 0 0 1 0 1 1 0 0 0 0 1 0 0 1 1 0 2 0 1 0 1 0 2 0 0 2 0 0 0 0 0 2 0 0 0 2 0 1 0 1 1 0 1 1 0 0 0 1 0 0 2 0 0 1 0 0 1 1 0 1 0 0 1 0 0 0 2 0 0 1 1 0 0 0 0 0 0 0 1 0 1 1 0 1 1 2 0 1 0 0 1 0 0 0 1 0 1 1 1 1 0 1 0 1 0 0 0 0 1 0 0 0 1 1 0 0 1 0 2 2 0 1 1 0 2 1 0 0 2 2 2 2 1 0 0 1 1 1 1 1 0 2 1 2 0 0 2 2 0 0 1 1 2 1 2 0 0 1 2 1 1 0 0 0 0 2 0 0 0 0 2 0 0 2 0 0 0 1 2 0 2 0 0 0 0 2 0 0 0 0 0 0 2 0 0 0 0 1 1 0 0 1 0 0 0 0 1 2 0 1 1 1 1 0 0 1 1 1 2 1 2 1 0 0 2 2 0 2 0 2 0 0 0 0 0 0 0 0 1 0 0 2 0 2 2 0 0 1 0 1 0 1 1 0 1 0 0 1 0 1 1 0 1 0 0 0 0 1 1 1 1 2 1 1 1 0 0 0 0 0 0 2 2 1 0 1 0 1 1 0 2 0 0 2 0 2 0 1 0 1 0 0 2 1 1 0 0 0 1 2 1 0 0 1 0 0 0 1 1 0 2 0 2 2 0 1 0 1 0 1 1 2 2 2 1 1 1 1 1 1 1 1 0 2 2 2 1 0 1 1 1 0 0 0 0 0 2 0 0 1 1 1 1 0 0 1 0 1 1 1 1 0 1 1 2 0 2 0 2 0 0 2 0 0 0 0 0 0 2 2 0 1 0 0 0 1 2 2 0 0 2 1 2 0 0 0 2 0 0 0 1 0 2 1 0 0 1 2 0 1 0 1 0 0 0 0 NA 2 1 1 0 1 1 2 0 0 0 0 0 0 1 1 1 2 18 | Navajo-Churro H28 0 0 2 -9 1 0 1 0 0 1 0 0 0 0 0 1 0 1 0 0 0 0 1 2 0 1 0 0 2 0 1 1 1 1 2 1 0 0 1 0 2 0 1 0 0 0 1 1 0 0 1 0 0 1 1 2 1 1 0 1 1 1 1 2 0 1 0 2 0 1 0 0 1 0 0 1 1 2 2 2 1 0 0 0 0 0 2 2 0 0 1 1 0 1 0 0 0 0 0 0 1 0 1 1 1 1 0 0 0 1 0 2 1 2 0 0 1 1 0 1 1 0 0 1 1 1 2 1 0 0 1 2 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 2 1 1 0 2 1 1 0 1 1 0 0 0 0 1 1 1 0 0 1 0 0 1 1 0 1 0 2 1 0 0 0 2 0 0 1 2 1 1 0 0 2 0 2 0 2 0 2 0 1 1 1 1 1 1 0 0 0 0 2 0 0 1 0 0 2 1 2 0 1 0 2 0 1 1 1 0 1 0 0 0 NA 1 0 1 0 0 2 1 0 1 0 1 1 1 0 2 1 1 0 1 1 0 0 0 1 1 2 0 1 2 0 0 0 0 1 1 0 1 1 1 2 0 1 0 0 1 2 0 0 1 1 2 1 0 0 2 0 0 2 2 0 2 0 1 0 0 0 0 0 0 0 1 0 0 0 2 0 0 2 0 1 1 0 0 0 0 0 1 0 2 0 1 0 0 1 2 1 0 1 0 1 1 1 1 0 0 0 0 1 1 1 1 1 1 0 1 0 1 0 0 0 1 1 0 0 0 0 1 0 2 0 2 1 1 1 0 0 0 0 0 2 1 1 0 0 1 0 1 0 1 2 0 0 0 1 0 0 2 1 1 0 2 1 0 0 0 0 0 0 2 2 1 1 0 0 2 0 1 1 0 0 2 0 2 1 1 1 0 0 0 2 1 1 0 0 0 1 1 1 0 1 1 0 0 0 1 2 1 2 0 2 2 0 1 1 1 0 2 2 2 2 2 1 0 1 1 1 1 1 1 1 2 2 2 0 0 1 0 1 0 1 0 0 0 2 0 0 1 1 2 1 1 0 1 0 0 1 2 0 2 0 1 NA 0 2 0 1 1 0 1 1 1 1 1 0 2 0 0 1 0 0 1 1 1 0 0 0 1 1 0 1 1 1 0 2 0 1 0 1 0 1 0 0 0 1 2 1 2 1 2 0 0 0 0 2 2 2 0 0 1 1 1 1 0 0 0 0 1 0 0 1 1 19 | Navajo-Churro H34 0 0 2 -9 1 0 1 0 1 2 0 1 NA 1 1 0 0 1 0 1 0 1 0 1 1 1 0 0 1 0 2 2 1 1 2 1 0 0 1 0 1 1 0 0 0 0 0 0 1 0 0 1 NA 1 1 0 0 2 1 1 1 0 1 2 0 0 2 0 0 2 0 0 0 0 0 1 0 0 1 1 1 0 0 0 0 1 0 0 0 0 2 0 0 0 0 0 1 1 0 0 2 2 1 1 1 2 1 0 1 0 0 0 1 1 1 0 1 0 0 1 0 0 1 0 1 0 1 0 0 0 0 1 1 0 0 0 1 0 0 1 0 0 1 0 1 1 0 1 0 1 1 1 0 0 2 1 1 0 1 1 0 0 2 1 1 0 1 1 1 1 1 0 0 0 1 0 1 1 0 1 0 1 1 0 0 0 1 0 0 0 2 1 2 0 0 1 0 1 0 2 1 2 1 1 0 0 0 1 1 1 1 0 0 1 1 1 1 0 0 1 0 1 0 0 0 1 0 0 1 0 0 2 0 0 0 NA 2 0 0 0 0 2 2 0 0 0 0 1 1 0 1 1 2 1 1 1 0 0 1 1 2 1 1 0 2 1 1 0 0 2 2 0 1 0 2 2 1 2 0 0 1 2 1 0 0 0 1 0 1 0 1 0 0 2 1 0 2 0 0 0 0 1 0 1 0 0 1 0 1 0 1 0 0 1 0 1 0 0 0 0 1 1 0 2 2 0 1 0 0 1 2 0 0 1 0 2 1 1 2 0 0 1 1 1 1 0 0 2 2 0 2 0 2 1 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 1 1 0 0 0 2 1 1 0 0 0 0 1 1 0 1 0 0 0 0 1 0 1 1 1 1 2 1 1 1 0 0 1 1 1 1 1 1 0 0 2 0 1 1 1 0 2 0 1 0 1 1 0 0 0 2 1 1 0 0 1 1 2 1 0 0 1 0 0 0 2 2 0 1 0 2 NA 1 2 0 0 0 1 1 2 1 1 2 1 1 0 1 1 1 0 0 2 2 2 1 0 1 0 0 0 0 0 0 0 1 0 0 1 1 1 1 0 0 2 0 1 1 2 0 1 0 0 NA 0 2 1 1 0 0 2 0 0 0 0 0 1 1 1 1 0 0 2 0 0 1 1 1 1 1 0 0 0 1 0 1 0 1 0 0 1 1 0 0 0 2 2 0 2 0 2 0 1 0 0 2 2 1 1 0 0 2 2 0 1 0 0 0 1 0 0 2 1 20 | Navajo-Churro H52 0 0 2 -9 1 0 0 0 0 1 0 0 1 0 1 1 1 0 1 0 0 2 0 1 1 1 0 0 1 0 1 2 1 1 2 1 0 0 1 0 0 0 1 0 0 0 1 1 2 1 0 1 NA 1 0 0 0 1 1 1 2 0 0 1 1 1 1 0 0 2 0 0 0 0 0 1 0 0 1 1 2 0 0 0 0 1 1 1 0 0 2 1 0 0 0 0 0 1 1 0 2 2 1 1 1 1 0 0 1 0 0 0 1 1 0 0 1 1 1 0 0 1 1 0 1 0 1 0 1 0 0 1 1 0 0 0 1 1 0 1 0 0 1 0 1 0 0 0 1 0 0 0 0 0 2 1 2 0 2 0 0 0 1 2 0 1 1 1 1 1 1 0 0 1 0 0 1 1 0 1 0 2 1 0 0 0 2 0 0 1 2 1 1 0 0 2 0 2 0 2 0 2 0 1 1 1 1 1 1 0 0 0 0 2 0 0 1 0 0 2 1 2 0 1 0 2 0 1 1 1 0 1 0 0 0 0 1 0 1 0 0 2 1 0 1 0 1 1 1 0 2 1 1 0 1 1 0 0 0 1 1 2 0 1 2 0 0 0 0 1 1 0 1 1 1 2 0 1 0 0 1 2 0 0 1 1 2 1 0 0 2 0 0 2 2 0 2 0 1 0 0 0 0 0 0 0 1 0 0 0 2 0 0 2 0 1 1 0 0 0 0 0 1 0 2 0 1 0 0 1 2 1 0 1 0 1 1 1 1 0 0 0 0 1 1 1 1 1 1 0 1 0 1 0 0 0 1 1 0 0 0 0 1 0 2 0 2 1 1 1 0 0 0 0 0 2 1 1 0 0 1 0 1 0 1 2 0 0 0 1 0 1 2 1 2 0 2 2 0 0 0 0 0 1 1 1 1 0 0 0 1 0 1 1 1 0 1 0 1 0 0 0 0 0 0 1 2 1 0 0 0 1 2 1 0 0 1 0 1 1 2 1 0 2 0 2 2 0 1 0 0 0 1 1 1 1 1 2 0 1 0 2 1 1 0 0 2 2 2 1 0 1 0 1 0 0 0 1 0 2 0 0 1 1 1 1 0 0 1 1 0 1 1 0 0 1 1 1 1 1 0 1 0 0 0 1 0 1 0 0 1 0 0 1 0 0 2 0 0 1 1 0 0 1 1 1 1 1 0 1 0 1 0 0 1 2 1 0 0 1 2 1 2 0 1 1 1 0 0 2 2 1 1 0 1 1 2 0 0 1 1 1 0 0 1 1 2 21 | Navajo-Churro H58 0 0 2 -9 2 0 1 0 1 2 0 0 2 0 1 1 0 1 0 0 0 2 0 1 0 1 0 0 1 1 2 2 2 1 2 1 0 0 1 0 0 0 1 0 0 1 0 0 2 0 0 1 2 0 0 0 0 1 1 1 0 0 2 2 0 0 0 2 1 2 0 0 0 0 0 2 0 0 0 2 1 1 0 1 1 0 0 0 0 0 1 2 1 2 1 1 1 0 1 1 1 1 1 0 1 0 1 0 0 1 0 1 1 0 2 1 1 0 1 0 2 1 1 1 1 2 0 1 0 2 1 2 0 0 1 1 1 0 0 1 0 1 2 0 1 1 1 1 1 0 0 0 0 0 1 1 0 0 1 2 0 0 1 1 0 0 2 2 0 1 1 1 0 2 2 0 1 0 2 0 1 2 1 0 0 1 1 1 0 2 1 0 1 0 1 1 0 1 0 2 0 2 1 2 1 1 2 1 2 1 0 1 2 0 1 0 2 0 0 0 1 1 0 1 1 1 0 0 0 2 0 0 0 0 0 0 1 1 0 0 0 2 1 0 1 0 0 1 0 0 1 1 1 1 2 2 0 0 0 1 0 1 0 1 1 0 0 0 0 1 1 1 0 1 1 1 2 0 0 1 1 1 1 2 2 0 0 0 0 0 0 0 0 1 0 1 1 0 0 1 2 1 2 1 0 0 0 0 0 0 2 0 2 0 0 1 0 1 1 1 0 1 0 1 0 1 1 0 1 1 1 0 0 2 0 1 1 1 0 0 0 0 0 2 1 0 0 0 1 0 1 0 0 0 0 1 1 1 0 0 0 1 0 0 0 0 0 0 2 2 2 0 0 1 1 1 1 0 0 0 1 0 0 1 0 0 0 0 0 1 2 2 1 2 1 1 0 1 1 1 0 0 1 0 0 1 1 1 0 0 1 1 2 2 2 0 2 0 0 0 1 0 0 1 1 0 1 0 0 0 1 0 1 1 1 0 0 1 0 0 2 1 0 0 1 1 2 0 0 0 1 0 0 0 1 1 1 1 0 0 0 2 2 0 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 2 1 0 1 1 2 1 1 0 1 1 0 2 1 0 0 1 0 1 0 1 1 1 0 0 2 2 0 0 1 1 0 1 1 1 1 1 1 0 1 2 2 0 1 1 1 2 2 0 1 0 0 0 0 1 1 0 1 0 0 0 2 1 1 1 0 1 0 1 0 1 1 2 0 2 1 0 2 1 0 1 1 0 1 0 0 1 1 22 | -------------------------------------------------------------------------------- /detectRUNS/tests/testthat/test_plots.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(detectRUNS) 3 | context("Testing plots") 4 | 5 | runsFile <- "test.ROHet.sliding.csv" 6 | genotypeFile <- "test.ped" 7 | mapFile <- "test.map" 8 | 9 | test_that("Test plot_manhattanRuns", { 10 | # loading data from CSV 11 | runs <- readExternalRuns(inputFile = runsFile, program = 'detectRUNS') 12 | 13 | # plotting data 14 | plot_manhattanRuns(runs, genotypeFile, mapFile, savePlots = TRUE, outputName = tempfile()) 15 | }) -------------------------------------------------------------------------------- /detectRUNS/tests/testthat/test_run.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(detectRUNS) 3 | context("Testing RUNS") 4 | 5 | # get file paths: reference file need to be changed or removed 6 | genotypeFile <- "test.ped" 7 | mapFile <- "test.map" 8 | 9 | test_that("detected ROHet are identical", { 10 | # testing slinding windows 11 | test_sliding <- slidingRUNS.run(genotypeFile, mapFile, windowSize=15, threshold=0.1, minSNP=15, 12 | ROHet=FALSE, maxOppWindow=1, maxMissWindow=1, minLengthBps=100000, 13 | minDensity=1/10000, maxOppRun=NULL, maxMissRun=NULL) 14 | 15 | # reading rohet reference: this need to be updated 16 | reference_rohet <- readExternalRuns("test.ROHet.sliding.csv", program = "detectRUNS") 17 | 18 | # compare rohet table 19 | expect_equal(test_sliding, reference_rohet, info = "testing sliding window approach") 20 | 21 | # testing slinding windows 22 | test_consecutive <- consecutiveRUNS.run(genotypeFile, mapFile, minSNP=15, 23 | ROHet=FALSE, minLengthBps=100000, 24 | maxOppRun=1, maxMissRun=1) 25 | 26 | # reading rohet reference: this need to be updated 27 | reference_rohet <- readExternalRuns("test.ROHet.consecutive.csv", program = "detectRUNS") 28 | 29 | # compare rohet table 30 | expect_equal(test_consecutive, reference_rohet, info = "testing consecutive approach") 31 | }) 32 | 33 | test_that("Marker differ in size", { 34 | # read mapFile 35 | mapFile <- read.delim(mapFile, header = F) 36 | names(mapFile) <- c("Chrom","SNP","cM","bps") 37 | 38 | # subset mapfile 39 | mapFile <- mapFile[100, ] 40 | 41 | # write a fake mapfile in a temporary dir 42 | fake_mapfile = tempfile(fileext = ".map") 43 | write.table(mapFile, fake_mapfile, quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE) 44 | 45 | # test function 46 | expect_error(slidingRUNS.run(genotypeFile, fake_mapfile), "Number of markers differ") 47 | 48 | # clean up 49 | file.remove(fake_mapfile) 50 | }) 51 | 52 | 53 | test_that("No file path throws error", { 54 | # test for errors 55 | expect_error(slidingRUNS.run("fake_genotype", mapFile), "doesn't exists") 56 | expect_error(slidingRUNS.run(genotypeFile, "fake_map"), "doesn't exists") 57 | }) 58 | -------------------------------------------------------------------------------- /detectRUNS/vignettes/detectRUNS.vignette.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "detectRUNS: an R package to detect runs of homozygosity and heterozygosity in diploid genomes" 3 | author: 4 | - name: Filippo Biscarini, Paolo Cozzi, Giustino Gaspa, Gabriele Marras 5 | affiliation: 6 | - IBBA-CNR, PTP, Università degli Studi di Sassari, University of Guelph 7 | email: filippo.biscarini@ibba.cnr.it, gmarras@uoguelph.ca 8 | date: "`r Sys.Date()`" 9 | output: 10 | prettydoc::html_pretty: 11 | theme: tactile 12 | highlight: github 13 | toc: true 14 | fig_caption: yes 15 | vignette: > 16 | %\VignetteIndexEntry{detectRUNS: an R package to detect runs of homozygosity and heterozygosity in diploid genomes} 17 | %\VignetteEngine{knitr::rmarkdown} 18 | %\VignetteEncoding{UTF-8} 19 | --- 20 | 21 | 31 | 32 | ```{r, eval=TRUE, include=TRUE, echo=FALSE} 33 | #just to be sure 34 | library("detectRUNS") 35 | ``` 36 | 37 | # Overview 38 | 39 | **detectRUNS** is a R package for the detection of **runs of homozygosity** (**ROH/ROHom**) and of **heterozygosity** (**ROHet**, a.k.a. "**heterozygosity-rich regions**") in diploid genomes. 40 | **ROH/ROHom** were first studied in humans (e.g. McQuillan et al. 2008) and rapidly found applications not only in human genetics abut also in animal genetics (e.g. Ferencakovic et al., 2011, in *Bos taurus*). More recently, the idea of looking also at "runs of heterozygosity" (**ROHet** or, more appropriately, "heterozygosity-rich regions") has been proposed (Wiliams et al. 2016). 41 | 42 | detectRUNS uses two methods to detect genomic runs: 43 | 44 | 1. sliding-window based method: 45 | 2. consecutive runs: 46 | 47 | The sliding-window based method is similar to what is implemented in the computer package *Plink* (Purcell et al., 2007) [see Bjelland et al., 2013 for a description]. In brief, a sliding window is used to scan the genome, and the characteristics of consecutive windows are used to determine whether a SNP is or not in a run (either ROH/ROHom or ROHet). Parameters for both the sliding window and the run need to be specified. 48 | 49 | The "consecutive runs" method is window-free and directly scans the genome SNP by SNP. It was first proposed by Marras et al. (2015). Here, only parameters for the runs need to be specified. 50 | 51 | Besides detecting genomic runs (again, either homozygosity or heterozygosity, either sliding-window based or consecutive), and saving results to a data frame of individual runs, **detectRUNS** can: 52 | 53 | - plot runs along the genome: 54 | * plot runs per individual 55 | * plot stacked (piled) runs 56 | * plot the % of times each SNP is in a run in the population (per chromosome) 57 | * Manhattan-like plot of the % of times each SNP is in a run 58 | - plot mean or total run length vs number of runs per individual 59 | - generate summary descriptive statistics on detected runs 60 | - calculate inbreeding based on ROH (*$F_{ROH}$*), genome-wide and chromosome-wide 61 | - plot $F_{ROH}$ per chromosome 62 | 63 | The input files for **detectRUNS** are *Plink* **ped/map** files. If one wishes to use this R package only for plots and summary statistics, output files from *Plink* (.hom files) can be easily read into **detectRUNS** through a specific function. 64 | 65 | **detectRUNS** can be used with genotype data from any diploid organisms: humans, animals or plants. 66 | 67 | # Sample data 68 | 69 | To illustrate the functionalities of **detectRUNS**, we use data on sheep (*Ovis aries*) SNP genotypes from the work by Kijas et al. (2016), available on-line through "Dryad" (https://goo.gl/sfAy8k). 70 | A subset with two breeds ("Jacobs" and "Navajo-Churro", 100 animals) and two chromosomes ($4\,841$ SNPs from OAA 2 and 24) was used. 71 | 72 | ```{r} 73 | genotypeFilePath <- system.file( 74 | "extdata", "Kijas2016_Sheep_subset.ped", package="detectRUNS") 75 | mapFilePath <- system.file( 76 | "extdata", "Kijas2016_Sheep_subset.map", package="detectRUNS") 77 | ``` 78 | 79 | # Detect runs 80 | 81 | For the detection of genomic runs, **detectRUNS** uses two main functions: 82 | 83 | 1. **slidingRUNS.run**: for sliding-window-based detection 84 | 2. **consecutiveRUNS.run**: for consecutive-SNP-based detection 85 | 86 | Input files are to be passed as paths to files (e.g. /home/Documents/experiment/file.ped/map). 87 | 88 | ## sliding-window-based run detection 89 | 90 | The function *slidingRUNS.run()* accepts in input several parameters: besides the paths (or names) of ped/map files, there are parameters related to the sliding window and parameters related to the genomic runs. 91 | 92 | Sliding-window parameters are: *windowSize*, *threshold* (to call a SNP "in run"), *minSNP* (minimum number of homozygous/heterozygous SNP in the window), *maxOppWindow* (maximum number of SNP with opposite genotype: heterozygous/homozygous) and *maxMissWindow* (maximum number of missing genotypes). 93 | 94 | Run-related parameters are: *maxGap* (maximum gap between consecutive SNPs, in basepairs -bps), *minLenghtBps* (minimum length of the run, in bps), *minDensity* (number of SNPs every $x$ kilo-basepairs -kbps), *maxOppRun* (maximum number of opposite genotypes in the run), *maxMissRun* (maximum number of missing genotypes in the run). 95 | 96 | *ROHet* controls whether runs of homozygosity (ROH/ROHom) or of heterozygosity (heterozygosity-rich regions, ROHet) will be detected. It defaults to **FALSE** (ROH/ROHom). 97 | 98 | ```{r,results='hide',message=FALSE, cache=FALSE, warning=FALSE} 99 | slidingRuns <- slidingRUNS.run( 100 | genotypeFile = genotypeFilePath, 101 | mapFile = mapFilePath, 102 | windowSize = 15, 103 | threshold = 0.05, 104 | minSNP = 20, 105 | ROHet = FALSE, 106 | maxOppWindow = 1, 107 | maxMissWindow = 1, 108 | maxGap = 10^6, 109 | minLengthBps = 250000, 110 | minDensity = 1/10^3, # SNP/kbps 111 | maxOppRun = NULL, 112 | maxMissRun = NULL 113 | ) 114 | ``` 115 | 116 | ## consecutive SNP-based run detection 117 | 118 | The function *consecutiveRUNS.run()* has a similar structure, obviously without the sliding-window parameters. 119 | 120 | ```{r,results='hide',message=FALSE, cache=FALSE, warning=FALSE} 121 | consecutiveRuns <- consecutiveRUNS.run( 122 | genotypeFile =genotypeFilePath, 123 | mapFile = mapFilePath, 124 | minSNP = 20, 125 | ROHet = FALSE, 126 | maxGap = 10^6, 127 | minLengthBps = 250000, 128 | maxOppRun = 1, 129 | maxMissRun = 1 130 | ) 131 | ``` 132 | 133 | *slidingRUNS.run()* detected **`r nrow(slidingRuns)`** ROH; *consecutiveRUNS.run()* detected **`r nrow(consecutiveRuns)`** ROH. 134 | 135 | ## "Runs of heterozygosity" (a.k.a. heterozygosity-rich regions) 136 | 137 | By setting **ROHet=TRUE**, runs of heterozygosity (a.k.a. heterozygosity-rich genomic regions) are detected instead. Again, the usert can choose whether to use the sliding-window or the consecutive method. 138 | 139 | ```{r,results='hide',message=FALSE, cache=FALSE, warning=FALSE} 140 | slidingRuns_het <- slidingRUNS.run( 141 | genotypeFile = genotypeFilePath, 142 | mapFile = mapFilePath, 143 | windowSize = 10, 144 | threshold = 0.05, 145 | minSNP = 10, 146 | ROHet = TRUE, 147 | maxOppWindow = 2, 148 | maxMissWindow = 1, 149 | maxGap = 10^6, 150 | minLengthBps = 10000, 151 | minDensity = 1/10^6, # SNP/kbps 152 | maxOppRun = NULL, 153 | maxMissRun = NULL 154 | ) 155 | ``` 156 | 157 | ```{r,results='hide',message=FALSE, cache=FALSE} 158 | consecutiveRuns_het <- consecutiveRUNS.run( 159 | genotypeFile =genotypeFilePath, 160 | mapFile = mapFilePath, 161 | minSNP = 10, 162 | ROHet = TRUE, 163 | maxGap = 10^6, 164 | minLengthBps = 10000, 165 | maxOppRun = 2, 166 | maxMissRun = 1 167 | ) 168 | ``` 169 | *slidingRUNS.run()* detected **`r nrow(slidingRuns_het)`** ROHet; *consecutiveRUNS.run()* detected **`r nrow(consecutiveRuns_het)`** ROHet. 170 | 171 | Runs of homozygosity (ROH) detected using the sliding-windows method (output from *slidingRUNS.run()*) will be used to illustrate summary statistics, plots and inbreeding calculations. 172 | 173 | # Summary statistics on detected runs 174 | 175 | The function *summaryRuns()* takes in input the dataframe with results from runs detection and calculates a number of basic descriptive statistics on runs. Additional necessary parameters are the paths to the *Plink* ped and map files. `Class` and `snpInRuns` are optional arguments. 176 | 177 | ```{r, results='hide',message=FALSE, cache=FALSE} 178 | summaryList <- summaryRuns( 179 | runs = slidingRuns, mapFile = mapFilePath, genotypeFile = genotypeFilePath, 180 | Class = 6, snpInRuns = TRUE) 181 | ``` 182 | The returned list includes the following dataframes: 183 | 184 | `r names(summaryList)` 185 | 186 | We can, for instance, have a look at the number of runs per class-size (Mbps) in the two breeds: we see that in Jacobs sheep there are `r summaryList$summary_ROH_count[1,1]` ROH with size up to 6 Mbps. 187 | 188 | ```{r} 189 | summaryList$summary_ROH_count 190 | ``` 191 | 192 | Or, the average number of ROH per chromosome and per breed can be obtained. 193 | 194 | ```{r} 195 | summaryList$summary_ROH_mean_chr 196 | ``` 197 | 198 | The dataframe "SNPinRun" contains, for each SNP, the proportion of times it falls inside a run in any given population/group: 199 | 200 | ```{r} 201 | head(summaryList$SNPinRun) 202 | ``` 203 | 204 | The summary information included in the list returned from *summaryRuns()* is conveniently organized in data.frames, so that it can easily be visualized, manipulated and written out to text files (e.g. .csv files). 205 | 206 | # Plots 207 | 208 | **detectRUNS** produces a number of plots from the dataframe with runs (results from sliding-window or consecutive scans of the genome for ROH/ROHet). 209 | 210 | The basic plot, produced by the function *plot_Runs()*, plots directly all runs detected in each individual against their position along the chromosome. Separate plots per chromosome are produced, and different groups/populations are coloured differently to visualize contrasting patterns. 211 | 212 | ```{r, fig.show='hold', fig.cap="Plot runs per individual"} 213 | plot_Runs(runs = slidingRuns) 214 | ``` 215 | 216 | Alternatively, runs can still be plotted against their position along the chromosome, but stacked on top of each other: this way, regions of the genome with an excesse of runs can easily be identified. In this case, separate plots per chromosome and per group/population are produced. 217 | 218 | ```{r, fig.show='hold', fig.cap="Plot runs per individual", message=FALSE, cache=FALSE, warning=FALSE, results='hide'} 219 | plot_StackedRuns(runs = slidingRuns) 220 | ``` 221 | 222 | Finally, the proportion of times each SNP falls inside a run in any given population/group can be plotted against their position along the chromosome, separately per group. The function *plot_SnpsInRuns()* requires as arguments, besides the dataframe with detected runs, also the paths to the original ped (for information on groups) and map (for SNP positions) files. 223 | 224 | ```{r, fig.show='hold', message=FALSE, cache=FALSE, warning=FALSE, results='hide',fig.width=6,fig.height=4} 225 | plot_SnpsInRuns( 226 | runs = slidingRuns[slidingRuns$chrom==2,], genotypeFile = genotypeFilePath, 227 | mapFile = mapFilePath) 228 | ``` 229 | 230 | ```{r, fig.show='hold', message=FALSE, cache=FALSE, warning=FALSE, results='hide',fig.width=6,fig.height=4} 231 | plot_SnpsInRuns( 232 | runs = slidingRuns[slidingRuns$chrom==24,], genotypeFile = genotypeFilePath, 233 | mapFile = mapFilePath) 234 | ``` 235 | 236 | We can see from the plots above, that in the Jacob sheep breed a region with a "peakk"" of ROH can be spotted approximately halfway on chromosome 2 (OAR2) in the Jacob breed. This corresponds to the strong GWAS signals found by Kijas et al. (2016) on OAR2 associated with the four-horns phenotype. 237 | 238 | To identify the position of a runs (ROH in this case) peak, e.g. from *plot_SnpsInRuns()*, one can conveniently use the function *detectRUNS::tableRuns()*: this requests as input, besides the runs dataframe, also the paths to the original ped/map files, and the threshold above which we want information on such "peaks" (e.g. only peaks where SNP are inside runs in more than 70% of the individuals in that population/group). 239 | 240 | ```{r,message=FALSE, cache=FALSE, results='hide'} 241 | topRuns <- tableRuns( 242 | runs = slidingRuns, genotypeFile = genotypeFilePath, mapFile = mapFilePath, 243 | threshold = 0.7) 244 | ``` 245 | 246 | ```{r,echo=FALSE} 247 | print(topRuns) 248 | ``` 249 | 250 | The information on the proportion of times each SNP falls inside a run, can also be plotted against SNP positions in all chromosomes together, similarly to the familiar GWAS **Manhattan plots**: 251 | 252 | ```{r, message=FALSE, cache=FALSE, results='hide', fig.width=6,fig.height=4} 253 | plot_manhattanRuns( 254 | runs = slidingRuns[slidingRuns$group=="Jacobs",], 255 | genotypeFile = genotypeFilePath, mapFile = mapFilePath) 256 | ``` 257 | 258 | # $F_{ROH}$: ROH-based inbreeding 259 | 260 | From runs of homozygosity (ROH), individual inbreeding/consanguinity coefficients can be calculated as: 261 | 262 | $$ 263 | F_{ROH} = \frac{\sum L_{ROH}}{L_{genome}} 264 | $$ 265 | 266 | where $\sum L_{ROH}$ is the sum of the length of all ROH detected in an individual, and $L_{genome}$ is the total length of the genome that was used. 267 | 268 | **detectRUNS** provide functions to calculate individual inbreeding/consanguinity 269 | 270 | ```{r, echo=FALSE, message=FALSE, cache=FALSE} 271 | head( 272 | Froh_inbreeding(runs = slidingRuns,mapFile = mapFilePath,genome_wide = TRUE)) 273 | ``` 274 | 275 | The parameter "genome_wide" (which defaults to TRUE) can be used to obtain inbreeding/consanguinity estimates on a per-chromosome basis (by setting "genome_wide=FALSE") 276 | 277 | Inbreeding levels can be plotted by group, for example: 278 | 279 | ```{r, echo=FALSE, message=FALSE, cache=FALSE, fig.width=5, fig.height=4} 280 | plot_InbreedingChr( 281 | runs = slidingRuns, mapFile = mapFilePath, style = "FrohBoxPlot") 282 | ``` 283 | 284 | # Importing data from external files 285 | 286 | Results on runs (typically ROH) from external software can be imported into **detectRUNS** to produce plots, tables and summary statistics. 287 | Current options include: 288 | 289 | - ROH from **Plink** (.hom file from `--homozyg`) 290 | - ROH from **BCFtools** (output from the `roh` option) 291 | - runs dataframes from **detctRUNS** written out to files 292 | 293 | Through the parameter `program` the user can select from which source the output file is coming. As an illustration, we read in results from *detectRUNS* saved out to a .csv file: 294 | 295 | ```{r, message=FALSE} 296 | savedRunFile <- system.file( 297 | "extdata", "Kijas2016_Sheep_subset.sliding.csv", package="detectRUNS") 298 | runs <- readExternalRuns(inputFile = savedRunFile, program = "detectRUNS") 299 | head(runs) 300 | ``` 301 | 302 | # References 303 | 304 | - Bjelland, D. W., K. A. Weigel, N. Vukasinovic, and J. D. Nkrumah. "Evaluation of inbreeding depression in Holstein cattle using whole-genome SNP markers and alternative measures of genomic inbreeding." journal of dairy Science 96, no. 7 (2013): 4697-4706. 305 | - Ferencakovic, Maja, Edin Hamzic, Birgit Gredler, Ino Curik, and Johann Sölkner. "Runs of homozygosity reveal genome-wide autozygosity in the Austrian Fleckvieh cattle." Agriculturae Conspectus Scientificus (ACS) 76, no. 4 (2011): 325-329. 306 | - Kijas, James W., Tracy Hadfield, Marina Naval Sanchez, and Noelle Cockett. "Genome‐wide association reveals the locus responsible for four‐horned ruminant." Animal genetics 47, no. 2 (2016): 258-262. 307 | - Marras, Gabriele, Giustino Gaspa, Silvia Sorbolini, Corrado Dimauro, Paolo Ajmone‐Marsan, Alessio Valentini, John L. Williams, and Nicolò PP Macciotta. "Analysis of runs of homozygosity and their relationship with inbreeding in five cattle breeds farmed in Italy." Animal genetics 46, no. 2 (2015): 110-121. 308 | - McQuillan, Ruth, Anne-Louise Leutenegger, Rehab Abdel-Rahman, Christopher S. Franklin, Marijana Pericic, Lovorka Barac-Lauc, Nina Smolej-Narancic et al. "Runs of homozygosity in European populations." The American Journal of Human Genetics 83, no. 3 (2008): 359-372. 309 | - Purcell, Shaun, Benjamin Neale, Kathe Todd-Brown, Lori Thomas, Manuel AR Ferreira, David Bender, Julian Maller et al. "PLINK: a tool set for whole-genome association and population-based linkage analyses." The American Journal of Human Genetics 81, no. 3 (2007): 559-575. 310 | - Williams, John L., Stephen JG Hall, Marcello Del Corvo, K. T. Ballingall, L. I. C. I. A. Colli, P. A. O. L. O. Ajmone Marsan, and F. Biscarini. "Inbreeding and purging at the genomic Level: the Chillingham cattle reveal extensive, non‐random SNP heterozygosity." Animal genetics 47, no. 1 (2016): 19-27. 311 | -------------------------------------------------------------------------------- /performance/helper.R: -------------------------------------------------------------------------------- 1 | ## 2 | ## http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/ 3 | ## 4 | ## Gives count, mean, standard deviation, standard error of the mean, and confidence interval (default 95%). 5 | ## data: a data frame. 6 | ## measurevar: the name of a column that contains the variable to be summariezed 7 | ## groupvars: a vector containing names of columns that contain grouping variables 8 | ## na.rm: a boolean that indicates whether to ignore NA's 9 | ## conf.interval: the percent range of the confidence interval (default is 95%) 10 | summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE, 11 | conf.interval=.95, .drop=TRUE) { 12 | library(plyr) 13 | 14 | # New version of length which can handle NA's: if na.rm==T, don't count them 15 | length2 <- function (x, na.rm=FALSE) { 16 | if (na.rm) sum(!is.na(x)) 17 | else length(x) 18 | } 19 | 20 | # This does the summary. For each group's data frame, return a vector with 21 | # N, mean, and sd 22 | datac <- ddply(data, groupvars, .drop=.drop, 23 | .fun = function(xx, col) { 24 | c(N = length2(xx[[col]], na.rm=na.rm), 25 | mean = mean (xx[[col]], na.rm=na.rm), 26 | sd = sd (xx[[col]], na.rm=na.rm) 27 | ) 28 | }, 29 | measurevar 30 | ) 31 | 32 | # Rename the "mean" column 33 | datac <- plyr::rename(datac, c("mean" = measurevar)) 34 | 35 | datac$se <- datac$sd / sqrt(datac$N) # Calculate standard error of the mean 36 | 37 | # Confidence interval multiplier for standard error 38 | # Calculate t-statistic for confidence interval: 39 | # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1 40 | ciMult <- qt(conf.interval/2 + .5, datac$N-1) 41 | datac$ci <- datac$se * ciMult 42 | 43 | return(datac) 44 | } 45 | -------------------------------------------------------------------------------- /performance/test1.R: -------------------------------------------------------------------------------- 1 | 2 | # Valuating detectRUNS performance 3 | 4 | # clean up 5 | rm(list = ls()) 6 | 7 | # source library 8 | source('helper.R') 9 | 10 | # importing libraries 11 | library(detectRUNS) 12 | library(microbenchmark) 13 | library(ggplot2) 14 | library(data.table) 15 | 16 | # define a parameters list 17 | parameters <- list( 18 | windowSize=20, 19 | threshold=0.1, 20 | minSNP=5, 21 | ROHet=TRUE, 22 | maxOppositeGenotype=1, 23 | maxMiss=1, 24 | maxGap=10^6, 25 | minLengthBps=1000, 26 | minDensity=1/10, 27 | maxOppRun=NULL, 28 | maxMissRun=NULL 29 | ) 30 | 31 | # how many times perform test 32 | times <- 10 33 | 34 | # how many points in X axis 35 | x_points <- 10 36 | 37 | # get genotype data 38 | genotypeFile <- system.file("extdata", "Kijas2016_Sheep_subset.ped", package = "detectRUNS") 39 | genotype <- read.table(genotypeFile, sep = " ", header = FALSE, stringsAsFactors = FALSE) 40 | 41 | # get only ped data 42 | ped <- genotype[ , -c(1:6)] 43 | 44 | # convert into raw data 45 | raw <- t(apply(ped, 1, pedConvertCpp)) 46 | 47 | # clean unuseful data 48 | rm(list=c("genotype", "ped")) 49 | 50 | # read first two columns form genotype 51 | animals <- readPOPCpp(genotypeFile = genotypeFile) 52 | 53 | # get only one individual. Get index 54 | # idx <- 1 55 | idx <- which(animals$ID=="H70") 56 | 57 | # get an animal 58 | animal <- animals[idx, ] 59 | 60 | # remap animal correctly 61 | animal <- list(FID=animal$POP, IID=animal$ID) 62 | 63 | # remove unuseful rows 64 | x <- raw[idx, ] 65 | 66 | # get map data 67 | mapFile <- system.file("extdata", "Kijas2016_Sheep_subset.map", package = "detectRUNS") 68 | mapfile <- fread(mapFile, header = F) 69 | 70 | # setting colnames 71 | colnames(mapfile) <- c("Chrom","SNP","cM","bps") 72 | 73 | # calculate sequence. 11 elements, then remove the first 74 | steps <- ceiling(seq(1, length(x), length.out = (x_points+1) ))[-1] 75 | 76 | # calculating runs of Homozygosity 77 | runs <- slidingRUNS.run(genotypeFile, mapFile, windowSize = 15, threshold = 0.1, 78 | minSNP = 15, ROHet = FALSE, maxOppositeGenotype = 1, 79 | maxMiss = 1, minLengthBps = 100000, minDensity = 1/10000) 80 | 81 | # fix column names 82 | names(runs) <- c("POPULATION","IND","CHROMOSOME","COUNT","START","END","LENGTH") 83 | 84 | # a dataframe in which i will store everything 85 | tests <- data.frame(fun=character(), step=integer(), time=integer(), language=character()) 86 | 87 | # iterate over times steps 88 | for (i in steps) { 89 | # get a subset 90 | subset_map <- mapfile[1:i, ] 91 | subset_genotype <- x[1:i] 92 | 93 | # calculate gaps (only one chromosome) 94 | gaps <- diff(subset_map$bps) 95 | 96 | ############################################################################## 97 | # Test Windows 98 | 99 | # debug 100 | message(paste("Test sliding window: step", i)) 101 | 102 | # calculate sliding window 103 | y <- slidingWindow(subset_genotype, gaps, parameters$windowSize, step=1, parameters$maxGap, 104 | parameters$ROHet, parameters$maxOppositeGenotype, parameters$maxMiss) 105 | 106 | test_sliding <- microbenchmark( 107 | slidingWindow(subset_genotype, gaps, parameters$windowSize, step=1, parameters$maxGap, 108 | parameters$ROHet, parameters$maxOppositeGenotype, parameters$maxMiss), 109 | unit = 'ms', 110 | times = times 111 | ) 112 | 113 | test_fun <- rep("sliding", times) 114 | test_step <- rep(i, times) 115 | test_language <- rep("R", times) 116 | tmp <- data.frame(fun=test_fun, step=test_step, time=test_sliding$time, language=test_language) 117 | tests <- rbind(tests, tmp) 118 | 119 | # check cpp slidingWindow 120 | test_slidingCpp <- microbenchmark( 121 | slidingWindowCpp(subset_genotype, gaps, parameters$windowSize, step=1, parameters$maxGap, 122 | parameters$ROHet, parameters$maxOppositeGenotype, parameters$maxMiss), 123 | unit = 'ms', 124 | times = times 125 | ) 126 | 127 | test_fun <- rep("sliding", times) 128 | test_step <- rep(i, times) 129 | test_language <- rep("Cpp", times) 130 | tmp <- data.frame(fun=test_fun, step=test_step, time=test_slidingCpp$time, language=test_language) 131 | tests <- rbind(tests, tmp) 132 | 133 | ############################################################################## 134 | # Test snpInRun 135 | 136 | # debug 137 | message(paste("Test snpInRun: step", i)) 138 | 139 | # vector of TRUE/FALSE (whether a SNP is in a RUN or NOT) 140 | snpRun <- snpInRun(y$windowStatus, parameters$windowSize, parameters$threshold) 141 | 142 | test_snpInRun <- microbenchmark( 143 | snpInRun(y$windowStatus, parameters$windowSize, parameters$threshold), 144 | unit = 'ms', 145 | times = times 146 | ) 147 | 148 | test_fun <- rep("snpInRun", times) 149 | test_step = rep(i, times) 150 | test_language <- rep("R", times) 151 | tmp <- data.frame(fun=test_fun, step=test_step, time=test_snpInRun$time, language=test_language) 152 | tests <- rbind(tests, tmp) 153 | 154 | # check cpp snpInRun 155 | test_snpInRunCpp <- microbenchmark( 156 | snpInRunCpp(y$windowStatus, parameters$windowSize, parameters$threshold), 157 | unit = 'ms', 158 | times = times 159 | ) 160 | 161 | test_fun <- rep("snpInRun", times) 162 | test_step <- rep(i, times) 163 | test_language <- rep("Cpp", times) 164 | tmp <- data.frame(fun=test_fun, step=test_step, time=test_snpInRunCpp$time, language=test_language) 165 | tests <- rbind(tests, tmp) 166 | 167 | ############################################################################## 168 | # Test slidingRuns 169 | 170 | # debug 171 | message(paste("Test slidingRuns: step", i)) 172 | 173 | test_slidingRuns <- microbenchmark( 174 | slidingRuns(subset_genotype, animal, subset_map, gaps, parameters, cpp=FALSE), 175 | unit = 'ms', 176 | times = times 177 | ) 178 | 179 | test_fun <- rep("slidingRuns", times) 180 | test_step = rep(i, times) 181 | test_language <- rep("R", times) 182 | tmp <- data.frame(fun=test_fun, step=test_step, time=test_slidingRuns$time, language=test_language) 183 | tests <- rbind(tests, tmp) 184 | 185 | # check cpp slidingRuns 186 | test_slidingRunsCpp <- microbenchmark( 187 | slidingRuns(subset_genotype, animal, subset_map, gaps, parameters, cpp=TRUE), 188 | unit = 'ms', 189 | times = times 190 | ) 191 | 192 | test_fun <- rep("slidingRuns", times) 193 | test_step <- rep(i, times) 194 | test_language <- rep("Cpp", times) 195 | tmp <- data.frame(fun=test_fun, step=test_step, time=test_slidingRunsCpp$time, language=test_language) 196 | tests <- rbind(tests, tmp) 197 | 198 | ############################################################################## 199 | # Test consecutiveRuns 200 | 201 | # debug 202 | message(paste("Test consecutiveRuns: step", i)) 203 | 204 | test_consecutiveRuns <- microbenchmark( 205 | consecutiveRuns(subset_genotype, animal, subset_map, parameters$ROHet, parameters$minSNP, 206 | parameters$maxOppositeGenotype, parameters$maxMiss, parameters$minLengthBps, 207 | parameters$maxGap), 208 | unit = 'ms', 209 | times = times 210 | ) 211 | 212 | test_fun <- rep("consecutiveRuns", times) 213 | test_step = rep(i, times) 214 | test_language <- rep("R", times) 215 | tmp <- data.frame(fun=test_fun, step=test_step, time=test_slidingRuns$time, language=test_language) 216 | tests <- rbind(tests, tmp) 217 | 218 | # check cpp consecutiveRuns 219 | test_consecutiveRunsCpp <- microbenchmark( 220 | consecutiveRunsCpp(subset_genotype, animal, subset_map, parameters$ROHet, parameters$minSNP, 221 | parameters$maxOppositeGenotype, parameters$maxMiss, parameters$minLengthBps, 222 | parameters$maxGap), 223 | unit = 'ms', 224 | times = times 225 | ) 226 | 227 | test_fun <- rep("consecutiveRuns", times) 228 | test_step <- rep(i, times) 229 | test_language <- rep("Cpp", times) 230 | tmp <- data.frame(fun=test_fun, step=test_step, time=test_consecutiveRunsCpp$time, language=test_language) 231 | tests <- rbind(tests, tmp) 232 | 233 | ############################################################################## 234 | # Test snpInsideRuns 235 | 236 | # debug 237 | message(paste("Test snpInsideRuns: step", i)) 238 | 239 | # get temporary variables 240 | mappa <- subset_map 241 | names(mappa) <- c("CHR","SNP_NAME","x","POSITION") 242 | mappa$x <- NULL 243 | 244 | # snpInsideRuns needs to be launched against a single chromosome. For testing 245 | # purpose, we will consider mappa and runs as a unique chromosome 246 | 247 | test_snpInsideRuns <- microbenchmark( 248 | snpInsideRuns(runs, mappa, genotypeFile), 249 | unit = 'ms', 250 | times = times 251 | ) 252 | 253 | test_fun <- rep("snpInsideRuns", times) 254 | test_step = rep(i, times) 255 | test_language <- rep("R", times) 256 | tmp <- data.frame(fun=test_fun, step=test_step, time=test_snpInsideRuns$time, language=test_language) 257 | tests <- rbind(tests, tmp) 258 | 259 | # check cpp snpInsideRuns 260 | test_snpInsideRunsCpp <- microbenchmark( 261 | snpInsideRunsCpp(runs, mappa, genotypeFile), 262 | unit = 'ms', 263 | times = times 264 | ) 265 | 266 | test_fun <- rep("snpInsideRuns", times) 267 | test_step <- rep(i, times) 268 | test_language <- rep("Cpp", times) 269 | tmp <- data.frame(fun=test_fun, step=test_step, time=test_snpInsideRunsCpp$time, language=test_language) 270 | tests <- rbind(tests, tmp) 271 | } 272 | 273 | # as described by http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/ 274 | testsc <- summarySE(tests, measurevar="time", groupvars=c("fun","step", "language")) 275 | 276 | plotGraph <- function(mydata) { 277 | # Standard error of the mean 278 | graph <- ggplot(testsc, aes(x=step, y=time, colour=language)) + 279 | geom_errorbar(aes(ymin=time-se, ymax=time+se), width=.1) + 280 | geom_line() + 281 | geom_point() 282 | 283 | formatter1e6 <- function(x){ 284 | x/10e6 285 | } 286 | 287 | # scaling data to millisecond 288 | graph <- graph + 289 | scale_y_continuous(labels = formatter1e6) + 290 | ylab("time (ms)") + 291 | xlab("N° of SNPs") 292 | 293 | # plot subgraps 294 | graph <- graph + facet_wrap(~fun, ncol = 2, scales = "free_y") 295 | 296 | # change labels 297 | graph <- graph + 298 | theme(axis.title.y = element_text(size = rel(1.5), angle = 90)) + 299 | theme(axis.title.x = element_text(size = rel(1.5))) + 300 | theme(plot.title = element_text(size = rel(2))) + 301 | theme_bw() 302 | 303 | # return graph object 304 | return(graph) 305 | } 306 | 307 | # get a graph object 308 | graph <- plotGraph(testsc) 309 | 310 | # write graph in a file 311 | png("test_performance.png", width = 1024, height = 768) 312 | print(graph) 313 | dev.off() 314 | 315 | --------------------------------------------------------------------------------