├── test ├── Spec.hs ├── testDat │ ├── makeTestDat.sh │ └── 1240k_eigenstrat_snp_short.pos.txt └── SequenceTools │ ├── UtilsSpec.hs │ └── PileupCallerSpec.hs ├── Setup.hs ├── stack.yaml ├── old ├── Makefile ├── gzip.d ├── filterTrimFastq.d └── src-simpleBamCaller │ └── simpleBamCaller.hs ├── scripts ├── unmaintained │ ├── vcfRemoveDuplicates.awk │ ├── genToVcf.hs │ ├── vcfToGen.py │ ├── samMarkDup.py │ ├── preseqWizard.py │ ├── normaliseBimWithVCF.hs │ ├── makeAdmixtureDat.hs │ └── barcodeDemulti.py ├── qp3Pop_wrapper.hs ├── qpDstat_wrapper.hs ├── convertf_wrapper.hs ├── smartpca_wrapper.hs └── mergeit_wrapper.hs ├── .gitignore ├── Dockerfile.linux ├── .travis.yml ├── .github └── workflows │ ├── Dockerfile.centos │ └── release.yaml ├── LICENSE ├── Changelog.md ├── sequenceTools.cabal ├── src └── SequenceTools │ ├── Utils.hs │ └── PileupCaller.hs ├── src-executables ├── vcf2eigenstrat.hs ├── genoStats.hs └── pileupCaller.hs └── README.md /test/Spec.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -F -pgmF hspec-discover #-} 2 | -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /stack.yaml: -------------------------------------------------------------------------------- 1 | resolver: lts-22.43 2 | packages: 3 | - '.' 4 | extra-deps: 5 | - sequence-formats-1.11.0.0 6 | - pipes-zlib-0.4.4.2 -------------------------------------------------------------------------------- /old/Makefile: -------------------------------------------------------------------------------- 1 | build/filterTrimFastq : filterTrimFastq.d gzip.d 2 | dmd -O filterTrimFastq.d gzip.d -odbuild -ofbuild/filterTrimFastq 3 | 4 | clean : 5 | rm build/* 6 | 7 | .PHONY: clean -------------------------------------------------------------------------------- /scripts/unmaintained/vcfRemoveDuplicates.awk: -------------------------------------------------------------------------------- 1 | $0 ~ /^#/ {print} 2 | $0 !~ /^#/ { 3 | if(!($1 == last_chrom && $2 == last_pos)) 4 | print 5 | last_chrom = $1 6 | last_pos = $2 7 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | *.hi 3 | *.o 4 | .stack-work 5 | mergeit_wrapper 6 | smartpca_wrapper 7 | qp3Pop_wrapper 8 | qpDstat_wrapper 9 | convertf_wrapper 10 | .DS_Store 11 | ._* 12 | stack.yaml.lock 13 | -------------------------------------------------------------------------------- /Dockerfile.linux: -------------------------------------------------------------------------------- 1 | FROM centos:7 2 | 3 | RUN yum -y update 4 | RUN yum -y install zlib-devel wget ncurses-devel ncurses-compat-libs make gcc 5 | 6 | # Install GHC since stack's local install has issues 7 | RUN wget https://downloads.haskell.org/~ghc/9.4.7/ghc-9.4.7-x86_64-centos7-linux.tar.xz 8 | RUN tar xvf ghc-9.4.7-x86_64-centos7-linux.tar.xz 9 | RUN cd ghc-9.4.7; ./configure; make install 10 | 11 | # install stack 12 | RUN curl -sSL https://get.haskellstack.org/ | sh 13 | 14 | # add source directory 15 | ADD . source 16 | WORKDIR source 17 | 18 | # Install sequenceTools. As described in cryptonite README, cryptoniate requires disabling "use_target_attributes" 19 | RUN stack install --system-ghc --flag cryptonite:-use_target_attributes 20 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | 3 | language: c 4 | 5 | services: 6 | - docker 7 | 8 | jobs: 9 | include: 10 | - name: "Static artifact" 11 | if: tag IS present 12 | before_script: 13 | - docker build --tag linux -f Dockerfile.static-linux . 14 | script: 15 | - docker create --name linuxcontainer linux 16 | - docker cp linuxcontainer:/sequencetools_dist sequenceTools_x86_64-linux 17 | - cp LICENSE sequenceTools_x86_64-linux 18 | - tar -cvzf sequencetools_static.x86_64-linux.tar.gz sequenceTools_x86_64-linux 19 | 20 | deploy: 21 | provider: releases 22 | skip_cleanup: true 23 | api_key: $GITHUB_TOKEN 24 | repo: sequencetoolsconda/sequenceTools 25 | file: "sequencetools_static.x86_64-linux.tar.gz" 26 | on: 27 | tags: true 28 | 29 | -------------------------------------------------------------------------------- /scripts/unmaintained/genToVcf.hs: -------------------------------------------------------------------------------- 1 | import Control.Monad (liftM) 2 | import Data.List (intercalate) 3 | import Data.List.Split (chunksOf, splitOn) 4 | import System.Environment (getArgs) 5 | 6 | convertLine :: String -> String -> String 7 | convertLine chr line = 8 | let (_:_:pos:ref:alt:gens) = splitOn " " line 9 | genFields = map makeGenField $ chunksOf 3 (map read gens) 10 | in intercalate "\t" $ [chr, pos, ".", ref, alt, "100", ".", ".", "GT:GP"] ++ genFields 11 | 12 | makeGenField :: [Double] -> String 13 | makeGenField [p1, p2, p3] = 14 | if p1 > p2 && p1 > p3 then "0/0:" ++ probStr else 15 | if p2 > p1 && p2 > p3 then "0/1:" ++ probStr else "1/1:" ++ probStr 16 | where probStr = intercalate "," $ map show [p1, p2, p3] 17 | 18 | main = do 19 | chr <- liftM head getArgs 20 | interact $ unlines . map (convertLine chr) . lines 21 | -------------------------------------------------------------------------------- /.github/workflows/Dockerfile.centos: -------------------------------------------------------------------------------- 1 | FROM quay.io/condaforge/linux-anvil-cos7-x86_64 2 | 3 | RUN yum -y update 4 | RUN yum -y install zlib-devel wget ncurses-devel ncurses-compat-libs make gcc gcc-c++ 5 | 6 | # Install GHC since stack's local install has issues 7 | RUN wget https://downloads.haskell.org/~ghc/9.4.7/ghc-9.4.7-x86_64-centos7-linux.tar.xz 8 | RUN tar xvf ghc-9.4.7-x86_64-centos7-linux.tar.xz 9 | RUN cd ghc-9.4.7-x86_64-unknown-linux; ./configure; make install 10 | 11 | # install stack 12 | RUN curl -sSL https://get.haskellstack.org/ | sh 13 | 14 | # add source directory 15 | ADD . source 16 | WORKDIR source 17 | 18 | # install 19 | # - as described in cryptonite README, cryptoniate requires disabling "use_target_attributes" 20 | # - for bitvec the "simd" flag added in v1.1.5.0 has to be deactivated - the gcc version here does not seem to support it 21 | RUN stack install --system-ghc --flag cryptonite:-use_target_attributes --flag bitvec:-simd 22 | -------------------------------------------------------------------------------- /test/testDat/makeTestDat.sh: -------------------------------------------------------------------------------- 1 | # Making the short SNP file: 2 | (for CHR in {1..24}; do cat /projects1/public_data/Datashare_Boston_Jena_June2018.backup/1240K.snp | awk -v chr=$CHR '$2==chr' | head -100; done) > 1240k_eigenstrat_snp_short.snp.txt 3 | 4 | # Making a pos file for samtools 5 | cat 1240k_eigenstrat_snp_short.snp.txt | awk '{if($2==23)$2="X"; if($2==24)$2="Y"; print $2, $4}' > 1240k_eigenstrat_snp_short.pos.txt 6 | 7 | # Making a short example mpileup on these positions (takes 10 minutes): 8 | BAM_DIR=/projects1/users/schiffels/AncientBritish/bams; samtools mpileup -B -q30 -Q30 -R -f /projects1/Reference_Genomes/Human/hs37d5/hs37d5.fa -l 1240k_eigenstrat_snp_short.pos.txt $BAM_DIR/12880A.bam $BAM_DIR/12881A.bam $BAM_DIR/12883A.bam $BAM_DIR/12885A.bam > AncientBritish.short.pileup.txt 9 | 10 | # Running pileupCaller: 11 | pileupCaller --sampleNames 12880A,12881A,12883A,12885A --randomHaploid --singleStrandMode -f 1240k_eigenstrat_snp_short.snp.txt < AncientBritish.short.pileup.txt 12 | -------------------------------------------------------------------------------- /scripts/unmaintained/vcfToGen.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys 4 | 5 | def PLtoP(PL): 6 | min_PL = min(PL) 7 | pVec = [] 8 | for pl in PL: 9 | p = 10 ** (-(pl - min_PL) / 10.0) 10 | if p < 0.0001: 11 | p = 0.0 12 | pVec.append(p) 13 | norm = sum(pVec) 14 | return list(map(lambda p:p/norm, pVec)) 15 | 16 | line_nr = 1 17 | for line in sys.stdin: 18 | if line[0] != '#': 19 | fields = line.strip().split() 20 | chr_ = fields[0] 21 | pos = fields[1] 22 | ref = fields[3] 23 | alt = fields[4] 24 | id_ = "SNP{}".format(line_nr) 25 | rs = "{}:{}".format(chr_, pos) 26 | 27 | 28 | pVecs = [] 29 | for gen in fields[9:]: 30 | PL = list(map(int, gen[4:].split(","))) 31 | pVec = PLtoP(PL) if PL != [0, 0, 0] else PL 32 | pVecs.append(" ".join(map(str, pVec))) 33 | print(id_, rs, pos, ref, alt, " ".join(pVecs)) 34 | line_nr += 1 35 | -------------------------------------------------------------------------------- /scripts/unmaintained/samMarkDup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | import string 5 | import argparse 6 | 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument("--remove", action="store_true", help="Remove duplicates instead of marking") 9 | args = parser.parse_args() 10 | 11 | lastRead = None 12 | lastReadCount = 0 13 | dupCounts = {} 14 | for line in sys.stdin: 15 | if line[0] == '@': 16 | print line, 17 | continue 18 | fields = string.split(string.strip(line)) 19 | flag = int(fields[1]) 20 | 21 | if flag & 0x4: #unmapped 22 | print line, 23 | continue 24 | 25 | rname = fields[2] 26 | pos = int(fields[3]) 27 | seq = fields[9] 28 | 29 | if lastRead == (rname, pos, len(seq)): 30 | fields[1] = str(flag | 0x400) 31 | lastReadCount += 1 32 | if not args.remove: 33 | print "\t".join(fields) 34 | else: 35 | if lastReadCount > 0: 36 | if lastReadCount not in dupCounts: 37 | dupCounts[lastReadCount] = 0 38 | dupCounts[lastReadCount] += 1 39 | fields[1] = str(flag & (~0x400)) 40 | lastRead = (rname, pos, len(seq)) 41 | lastReadCount = 1 42 | print "\t".join(fields) 43 | 44 | 45 | sys.stderr.write("Cardinality\tCounts\n") 46 | for c in sorted(dupCounts.keys()): 47 | sys.stderr.write("{}\t{}\n".format(c, dupCounts[c])) 48 | 49 | -------------------------------------------------------------------------------- /scripts/qp3Pop_wrapper.hs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env stack 2 | -- stack script --resolver lts-14.1 --package turtle 3 | {-# LANGUAGE OverloadedStrings #-} 4 | 5 | import Control.Applicative (optional) 6 | import Prelude hiding (FilePath) 7 | import Turtle 8 | 9 | data Options = Options { 10 | optGeno :: FilePath, 11 | optSnp :: FilePath, 12 | optInd :: FilePath, 13 | optPopList :: FilePath 14 | } 15 | 16 | main = do 17 | args <- options "Admixtools qp3Pop wrapper" parser 18 | runManaged $ do 19 | paramFile <- mktempfile "." "qp3Pop_wrapper" 20 | let content = [(format ("genotypename:\t"%fp) (optGeno args)), 21 | (format ("snpname:\t"%fp) (optSnp args)), 22 | (format ("indivname:\t"%fp) (optInd args)), 23 | (format ("popfilename:\t"%fp) (optPopList args))] 24 | output paramFile . select . map unsafeTextToLine $ content 25 | ec <- proc "qp3Pop" ["-p", format fp paramFile] empty 26 | case ec of 27 | ExitSuccess -> return () 28 | ExitFailure n -> err . unsafeTextToLine $ format ("qp3Pop failed with exit code "%d) n 29 | 30 | parser :: Parser Options 31 | parser = Options <$> optPath "geno" 'g' "Genotype File" 32 | <*> optPath "snp" 's' "Snp File" 33 | <*> optPath "ind" 'i' "Ind File" 34 | <*> optPath "popList" 'p' "give a list with all population triples" 35 | -------------------------------------------------------------------------------- /test/SequenceTools/UtilsSpec.hs: -------------------------------------------------------------------------------- 1 | module SequenceTools.UtilsSpec (spec) where 2 | 3 | import SequenceTools.Utils (sampleWithoutReplacement) 4 | 5 | import Control.Monad (replicateM_) 6 | import Data.List (nub, sort, union) 7 | import Test.Hspec 8 | 9 | spec :: Spec 10 | spec = testSampleWithoutReplacement 11 | 12 | testSampleWithoutReplacement :: Spec 13 | testSampleWithoutReplacement = describe "sampleWithoutReplacement" $ do 14 | it "should return Nothing if sample 1 from empty list" $ 15 | sampleWithoutReplacement ([] :: [Char]) 1 `shouldReturn` Nothing 16 | it "should return one item if sample 1 from 1" $ 17 | sampleWithoutReplacement ['A'] 1 `shouldReturn` Just ['A'] 18 | it "should return an empty list of 0 sampled from 0" $ 19 | sampleWithoutReplacement ([] :: [Char]) 0 `shouldReturn` Just [] 20 | it "should return an empty list if 0 sampled from 1" $ 21 | sampleWithoutReplacement ['A'] 0 `shouldReturn` Just [] 22 | it "should return Nothing if 2 sampled from 1" $ 23 | sampleWithoutReplacement ['A'] 2 `shouldReturn` Nothing 24 | it "should return 2 items if 2 sampled from 2" $ do 25 | r <- sampleWithoutReplacement ['A', 'C'] 2 26 | fmap sort r `shouldBe` Just (sort ['A', 'C']) 27 | it "should return a non-duplicate subset of ABCDEFGHIJ if 4 are sampled" $ do 28 | replicateM_ 10 $ do 29 | Just r <- sampleWithoutReplacement "ABCDEFGHIJ" 4 30 | length (nub r) `shouldBe` 4 31 | length (union r "ABCDEFGHIJ") `shouldBe` 10 32 | 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Stephan Schiffels (c) 2024 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | * Neither the name of Author name here nor the names of other 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /scripts/qpDstat_wrapper.hs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env stack 2 | -- stack script --resolver lts-14.1 --package turtle 3 | {-# LANGUAGE OverloadedStrings #-} 4 | 5 | import Control.Applicative (optional) 6 | import Prelude hiding (FilePath) 7 | import Turtle 8 | 9 | data Options = Options { 10 | optGeno :: FilePath, 11 | optSnp :: FilePath, 12 | optInd :: FilePath, 13 | optPopList :: FilePath, 14 | optLow :: Maybe Int, 15 | optHigh :: Maybe Int 16 | } 17 | 18 | main = do 19 | args <- options "Admixtools qpDstat wrapper" parser 20 | runManaged $ do 21 | paramFile <- mktempfile "." "qpDstat_wrapper" 22 | let content = [(format ("genotypename:\t"%fp) (optGeno args)), 23 | (format ("snpname:\t"%fp) (optSnp args)), 24 | (format ("indivname:\t"%fp) (optInd args)), 25 | (format ("popfilename:\t"%fp) (optPopList args))] 26 | output paramFile . select . map unsafeTextToLine $ content 27 | let execParams = ["-p", format fp paramFile] ++ 28 | maybe [] (\low -> ["-l", format d low]) (optLow args) ++ 29 | maybe [] (\high -> ["-h", format d high]) (optHigh args) 30 | ec <- proc "qpDstat" execParams empty 31 | case ec of 32 | ExitSuccess -> return () 33 | ExitFailure n -> err . unsafeTextToLine $ 34 | format ("qpDstat failed with exit code "%d) n 35 | 36 | parser :: Parser Options 37 | parser = Options <$> optPath "geno" 'g' "Genotype File" 38 | <*> optPath "snp" 's' "Snp File" 39 | <*> optPath "ind" 'i' "Ind File" 40 | <*> optPath "popList" 'p' "give a list with all population triples" 41 | <*> optional (optInt "lower" 'l' "analyse population quadruples from this line in the popList") 42 | <*> optional (optInt "upper" 'u' "analyse population quadruples up to this line in the popList") 43 | -------------------------------------------------------------------------------- /scripts/unmaintained/preseqWizard.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import math 5 | 6 | parser = argparse.ArgumentParser(description="Calculate cost-efficient additional sequencing from Preseq output. Currently for Capture data only") 7 | parser.add_argument('-e', "--endogenous", metavar="", type=float, help="Fraction of reads (0-1 scale) that map to the reference", required=True) 8 | parser.add_argument('-m', "--mapped_reads", metavar="", type=int, help="Number of mapped reads after duplicate removal", required=True) 9 | parser.add_argument('-c', "--coverage", metavar="", help="Mean coverage on target SNPs", type=float, required=True) 10 | parser.add_argument('-s', "--snps", metavar="", help="Number of SNPs in the capture panel [Default=1240000]", type=int, default=1240000) 11 | parser.add_argument('-g', "--goal", metavar="", help="Goal, given as the number of new reads you are willing to produce to get one more SNP covered. [Default=100]", type=float, default=100) 12 | parser.add_argument("extrap_file", metavar="", type=open, help="Preseq extrap file") 13 | 14 | args = parser.parse_args() 15 | 16 | coverage_per_read = args.coverage / args.mapped_reads 17 | 18 | covered_snps = None 19 | tot_sequenced = None 20 | 21 | print("sequenced_reads", "SNPS covered", "Cost", sep="\t") 22 | 23 | next(args.extrap_file) 24 | for line in args.extrap_file: 25 | fields = line.strip().split() 26 | exp_tot_mapped = float(fields[0]) 27 | exp_unique = float(fields[1]) 28 | exp_tot_sequenced = exp_tot_mapped / args.endogenous 29 | exp_coverage = exp_unique * coverage_per_read 30 | exp_snps_covered = (1.0 - math.exp(-exp_coverage)) * args.snps 31 | if covered_snps is not None: 32 | new_snps = exp_snps_covered - covered_snps 33 | new_tot_sequenced = exp_tot_sequenced - tot_sequenced 34 | cost = new_tot_sequenced / new_snps 35 | print(int(exp_tot_sequenced), int(exp_snps_covered), int(cost), sep="\t") 36 | if cost > args.goal: 37 | break 38 | covered_snps = exp_snps_covered 39 | tot_sequenced = exp_tot_sequenced 40 | -------------------------------------------------------------------------------- /scripts/convertf_wrapper.hs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env stack 2 | -- stack script --resolver lts-14.1 --package turtle 3 | {-# LANGUAGE OverloadedStrings #-} 4 | 5 | import Control.Applicative (optional) 6 | import Prelude hiding (FilePath) 7 | import Turtle 8 | 9 | data Options = Options { 10 | optGeno :: FilePath, 11 | optSnp :: FilePath, 12 | optInd :: FilePath, 13 | optFormat :: FilePath, 14 | optPoplist :: Maybe FilePath, 15 | optOutGeno :: FilePath, 16 | optOutSnp :: FilePath, 17 | optOutInd :: FilePath 18 | } 19 | 20 | main = do 21 | args <- options "Eigensoft convertf wrapper" parser 22 | runManaged $ do 23 | paramFile <- mktempfile "." "convert_wrapper" 24 | let popListRow = case optPoplist args of 25 | Just popList -> return . unsafeTextToLine $ format ("poplistname:\t"%fp) popList 26 | Nothing -> empty 27 | let content = [(format ("genotypename:\t"%fp) (optGeno args)), 28 | (format ("snpname:\t"%fp) (optSnp args)), 29 | (format ("indivname:\t"%fp) (optInd args)), 30 | (format ("outputformat:\t"%fp) (optFormat args)), 31 | (format ("genotypeoutname:\t"%fp) (optOutGeno args)), 32 | (format ("snpoutname:\t"%fp) (optOutSnp args)), 33 | (format ("indivoutname:\t"%fp) (optOutInd args))] 34 | output paramFile $ select (map unsafeTextToLine content) <|> popListRow 35 | ec <- proc "convertf" ["-p", format fp paramFile] empty 36 | case ec of 37 | ExitSuccess -> return () 38 | ExitFailure n -> err . unsafeTextToLine $ format ("convertf failed with exit code "%d) n 39 | 40 | parser :: Parser Options 41 | parser = Options <$> optPath "geno" 'g' "Genotype File" 42 | <*> optPath "snp" 's' "Snp File" 43 | <*> optPath "ind" 'i' "Ind File" 44 | <*> optPath "outFormat" 'f' "output format" 45 | <*> optional (optPath "popList" 'p' "population list") 46 | <*> optPath "outGeno" 'G' "Output Genotype File" 47 | <*> optPath "outSnp" 'S' "Output Snp File" 48 | <*> optPath "outInd" 'I' "Output Ind File" 49 | -------------------------------------------------------------------------------- /scripts/smartpca_wrapper.hs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env stack 2 | -- stack script --resolver lts-14.1 --package turtle 3 | {-# LANGUAGE OverloadedStrings #-} 4 | 5 | import Control.Applicative (optional) 6 | import Prelude hiding (FilePath) 7 | import Turtle 8 | 9 | data Options = Options { 10 | optGeno :: FilePath, 11 | optSnp :: FilePath, 12 | optInd :: FilePath, 13 | optOutPrefix :: FilePath, 14 | optLSQproject :: Bool, 15 | optPopList :: Maybe FilePath 16 | } 17 | 18 | main = do 19 | args <- options "Eigensoft smartpca wrapper" parser 20 | runManaged $ do 21 | paramFile <- mktempfile "." "smartpca_wrapper" 22 | let content = [(format ("genotypename:\t"%fp) (optGeno args)), 23 | (format ("snpname:\t"%fp) (optSnp args)), 24 | (format ("indivname:\t"%fp) (optInd args)), 25 | (format ("evecoutname:\t"%fp%".evec.txt") (optOutPrefix args)), 26 | (format ("evaloutname:\t"%fp%".eval.txt") (optOutPrefix args))] 27 | lsqProjectLine = if (optLSQproject args) then return "lsqproject:\tYES" else empty 28 | popListLine = case optPopList args of 29 | Just popList -> return . unsafeTextToLine $ format ("poplistname:\t"%fp) popList 30 | Nothing -> empty 31 | output paramFile $ select (map unsafeTextToLine content) <|> lsqProjectLine <|> popListLine 32 | ec <- proc "smartpca" ["-p", format fp paramFile] empty 33 | case ec of 34 | ExitSuccess -> return () 35 | ExitFailure n -> err . unsafeTextToLine $ format ("mergeit failed with exit code "%d) n 36 | 37 | parser :: Parser Options 38 | parser = Options <$> optPath "geno" 'g' "Genotype File" 39 | <*> optPath "snp" 's' "Snp File" 40 | <*> optPath "ind" 'i' "Ind File" 41 | <*> optPath "outPrefix" 'o' "Output prefix for *.evec.txt and *.eval.txt output \ 42 | \files" 43 | <*> switch "lsqProject" 'l' "set lsqproject option to YES" 44 | <*> optional (optPath "popList" 'p' "give poplist file to restrict PCA to \ 45 | \populations listed.") 46 | -------------------------------------------------------------------------------- /Changelog.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | - V 1.6.0.0: 4 | - pileupCaller now supports writing of VCF (Variant Call Format), via the standard output. See option --vcf and the Readme. 5 | - V 1.5.4.0: 6 | - updated sequence-formats dependency allows more lenient parsing of pileup-data, now also allowing for arbitrary reference alleles (not just ACTGN). This won't affect calling (reads that support an allele that is not in the SNP-file input are treated as before), but will be less disruptive when parsing pileup-input, for example without a bed-file in samtools. 7 | - improved error output for parsing problems with pileup-format data. Now only a small part of the problematic chunk is output, hopefully easing error interpretation in such cases 8 | - output a useful error message if the number of samples passed in --sampleNames is inconsistent with the pileup-input 9 | - `--samplePopName` now accepts multiple pop-names, separated by comma. The number of pop-names must then match the number of samples. 10 | - V 1.5.3.2: fixed a bug in vcf2eigenstrat that would fail on VCFs with missing Quality values. 11 | - V 1.5.3.1: updated to latest GHC pedantic compilation 12 | - V 1.5.3: Upgraded to sequence-formats 1.7.0 introducing an option for plink popName encoding, and improved pileup-Parsing to allow for skip-reference characters 13 | - V 1.5.2: Fixed a bug with --samplePopName having to be entered after -p or -e. Fixed a bug in the sequence-formats dependency. 14 | - V 1.5.1: Added automatic building 15 | - V 1.5.0: Added support for Plink output 16 | - V 1.4.0.4: 17 | * Fixed eigenstrat-output in pileupCaller to add a dot after the outputprefix before the file extensions. 18 | * Updated haskell-stack wrapper scripts for EIGENSOFT and ADMIXTOOLS. 19 | * Moved unmaintained scripts into unmaintained folder. 20 | - V 1.4.0.3: Updated to new sequence-formats version, now including reading of genetic position from eigenstrat files. 21 | - V 1.4.0.1: Improved README, fixed output bug in genoStats.hs 22 | - V 1.4.0: Added single strand mode, and new triallelic treatment. 23 | - V 1.3.1: Bumped dependency on sequence-formats to new sequence-formats-1.4.0, which includes strand-information in pileup data, as well as rsIds in freqSum to output the correct rsId, and an option to parse chromosomes X, Y and MT. 24 | - V 1.3.0: Lots of refactoring. Lots of testing. Removed some features in vcf2eigenstrat and in pileupCaller, including the option in pileupCaller to call without a SNP file. 25 | - V 1.2.4: normaliseBimWithVCF is ready. 26 | - V 1.2.3 : Adapted to newest sequence-formats. Had to change all the chromosome-related code to the newType Chrom datatype. Also started implementing normaliseBimWithVCF. 27 | 28 | -------------------------------------------------------------------------------- /old/gzip.d: -------------------------------------------------------------------------------- 1 | import std.zlib; 2 | import std.stdio; 3 | import std.range; 4 | import std.traits; 5 | 6 | class GzipInputRange { 7 | UnCompress uncompressObj; 8 | File f; 9 | auto CHUNKSIZE = 0x4000; 10 | ReturnType!(f.byChunk) chunkRange; 11 | bool exhausted; 12 | char[] uncompressedBuffer; 13 | size_t bufferIndex; 14 | 15 | this(string filename) { 16 | f = File(filename, "r"); 17 | chunkRange = f.byChunk(CHUNKSIZE); 18 | uncompressObj = new UnCompress(HeaderFormat.gzip); 19 | load(); 20 | } 21 | 22 | void load() { 23 | if(!chunkRange.empty) { 24 | auto raw = chunkRange.front.dup; 25 | chunkRange.popFront(); 26 | uncompressedBuffer = cast(char[])uncompressObj.uncompress(raw); 27 | // uncompressedBuffer = cast(char[])(uncompressObj.uncompress(raw).dup); 28 | 29 | bufferIndex = 0; 30 | } 31 | else { 32 | if(!exhausted) { 33 | uncompressedBuffer = cast(char[])uncompressObj.flush(); 34 | // uncompressedBuffer = cast(char[])(uncompressObj.flush().dup); 35 | exhausted = true; 36 | bufferIndex = 0; 37 | } 38 | else 39 | uncompressedBuffer.length = 0; 40 | } 41 | } 42 | 43 | @property char front() { 44 | return uncompressedBuffer[bufferIndex]; 45 | } 46 | 47 | void popFront() { 48 | bufferIndex += 1; 49 | if(bufferIndex >= uncompressedBuffer.length) { 50 | load(); 51 | bufferIndex = 0; 52 | } 53 | } 54 | 55 | @property bool empty() { 56 | return uncompressedBuffer.length == 0; 57 | } 58 | } 59 | 60 | class GzipByLine { 61 | GzipInputRange range; 62 | char[] buf; 63 | 64 | this(string filename) { 65 | this.range = new GzipInputRange(filename); 66 | popFront(); 67 | } 68 | 69 | @property bool empty() { 70 | return buf.length == 0; 71 | } 72 | 73 | void popFront() { 74 | buf.length = 0; 75 | while(!range.empty && range.front != '\n') { 76 | buf ~= range.front; 77 | range.popFront(); 78 | } 79 | range.popFront(); 80 | } 81 | 82 | string front() { 83 | return buf.idup; 84 | } 85 | } 86 | 87 | class GzipOut { 88 | Compress compressObj; 89 | File f; 90 | 91 | this(string filename) { 92 | f = File(filename, "w"); 93 | compressObj = new Compress(HeaderFormat.gzip); 94 | } 95 | 96 | void compress(string s) { 97 | try { 98 | auto compressed = compressObj.compress(s.dup); 99 | f.rawWrite(compressed); 100 | } 101 | catch (Exception e) { 102 | stderr.writeln("trying to compress: ", s); 103 | throw e; 104 | } 105 | } 106 | 107 | void finish() { 108 | auto compressed = compressObj.flush(); 109 | f.rawWrite(compressed); 110 | } 111 | } 112 | 113 | -------------------------------------------------------------------------------- /sequenceTools.cabal: -------------------------------------------------------------------------------- 1 | name: sequenceTools 2 | version: 1.6.0.0 3 | synopsis: A package with tools for processing DNA sequencing data 4 | description: The tools in this package process sequencing Data, in particular from ancient DNA sequencing libraries. Key tool in this package is pileupCaller, a tool to randomly sample genotypes from sequencing data. 5 | license: GPL-3 6 | license-file: LICENSE 7 | author: Stephan Schiffels 8 | maintainer: stephan.schiffels@mac.com 9 | category: Bioinformatics 10 | build-type: Simple 11 | cabal-version: >=1.10 12 | Homepage: https://github.com/stschiff/sequenceTools 13 | Bug-Reports: https://github.com/stschiff/sequenceTools/issues 14 | extra-source-files: README.md, 15 | Changelog.md 16 | 17 | library 18 | exposed-modules: SequenceTools.Utils, 19 | SequenceTools.PileupCaller 20 | hs-source-dirs: src 21 | build-depends: base >= 4.7 && < 5, optparse-applicative, random, 22 | sequence-formats, bytestring, vector, pipes 23 | other-modules: Paths_sequenceTools 24 | default-language: Haskell2010 25 | 26 | Test-Suite sequenceToolsTests 27 | type: exitcode-stdio-1.0 28 | main-is: Spec.hs 29 | hs-source-dirs: test 30 | build-depends: base, hspec, sequenceTools, sequence-formats, 31 | vector, bytestring, pipes 32 | other-modules: SequenceTools.UtilsSpec, 33 | SequenceTools.PileupCallerSpec 34 | default-language: Haskell2010 35 | 36 | executable pileupCaller 37 | main-is: pileupCaller.hs 38 | hs-source-dirs: src-executables 39 | build-depends: base, sequenceTools, sequence-formats, 40 | optparse-applicative, pipes, vector, random, bytestring, 41 | pipes-safe, pipes-ordered-zip, split, prettyprinter, transformers, 42 | text 43 | default-language: Haskell2010 44 | other-modules: Paths_sequenceTools 45 | ghc-options: -threaded -rtsopts -with-rtsopts=-N2 46 | 47 | executable vcf2eigenstrat 48 | main-is: vcf2eigenstrat.hs 49 | hs-source-dirs: src-executables 50 | build-depends: base, sequenceTools, pipes-ordered-zip, 51 | sequence-formats, bytestring, vector, 52 | optparse-applicative, pipes, pipes-safe 53 | default-language: Haskell2010 54 | 55 | executable genoStats 56 | main-is: genoStats.hs 57 | hs-source-dirs: src-executables 58 | build-depends: base, sequence-formats, sequenceTools, 59 | foldl, bytestring, vector, lens-family, optparse-applicative, 60 | pipes, pipes-group, pipes-safe 61 | default-language: Haskell2010 62 | -------------------------------------------------------------------------------- /src/SequenceTools/Utils.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | module SequenceTools.Utils (versionInfoOpt, versionInfoText, sampleWithoutReplacement, 3 | freqSumToEigenstrat, dosageToEigenstratGeno, UserInputException(..)) where 4 | 5 | import SequenceFormats.Eigenstrat (EigenstratSnpEntry (..), 6 | GenoEntry (..), GenoLine) 7 | import SequenceFormats.FreqSum (FreqSumEntry (..)) 8 | import SequenceFormats.Utils (Chrom (..)) 9 | 10 | import Control.Exception (Exception) 11 | import qualified Data.ByteString.Char8 as B 12 | import Data.Vector (fromList) 13 | import Data.Version (showVersion) 14 | import qualified Options.Applicative as OP 15 | import Paths_sequenceTools (version) 16 | import System.Random (randomRIO) 17 | 18 | data UserInputException = UserInputException String deriving (Show) 19 | instance Exception UserInputException 20 | 21 | versionInfoOpt :: OP.Parser (a -> a) 22 | versionInfoOpt = OP.infoOption (showVersion version) (OP.long "version" <> OP.help "Print version and exit") 23 | 24 | versionInfoText :: String 25 | versionInfoText = "This tool is part of sequenceTools version " ++ showVersion version 26 | 27 | sampleWithoutReplacement :: [a] -> Int -> IO (Maybe [a]) 28 | sampleWithoutReplacement = go [] 29 | where 30 | go res _ 0 = return $ Just res 31 | go res xs n 32 | | n > length xs = return Nothing 33 | | n == length xs = return $ Just (xs ++ res) 34 | | otherwise = do 35 | rn <- randomRIO (0, length xs - 1) 36 | let a = xs !! rn 37 | xs' = remove rn xs 38 | go (a:res) xs' (n - 1) 39 | remove i xs = let (ys, zs) = splitAt i xs in ys ++ tail zs 40 | 41 | -- |convert a freqSum entry to an eigenstrat SNP entry 42 | freqSumToEigenstrat :: FreqSumEntry -> (EigenstratSnpEntry, GenoLine) 43 | freqSumToEigenstrat (FreqSumEntry chrom@(Chrom c) pos maybeSnpId maybeGeneticPos ref alt calls) = 44 | let snpId_ = case maybeSnpId of 45 | Just id_ -> id_ 46 | Nothing -> c <> "_" <> B.pack (show pos) 47 | geneticPos = case maybeGeneticPos of 48 | Just p -> p 49 | Nothing -> 0.0 50 | snpEntry = EigenstratSnpEntry chrom pos geneticPos snpId_ ref alt 51 | geno = fromList . map dosageToEigenstratGeno $ calls 52 | in (snpEntry, geno) 53 | 54 | -- |convert a Dosage to an eigenstrat-encoded genotype 55 | dosageToEigenstratGeno :: Maybe (Int, Int) -> GenoEntry 56 | dosageToEigenstratGeno Nothing = Missing 57 | dosageToEigenstratGeno (Just (0, 1)) = HomRef 58 | dosageToEigenstratGeno (Just (1, 1)) = HomAlt 59 | dosageToEigenstratGeno (Just (0, 2)) = HomRef 60 | dosageToEigenstratGeno (Just (1, 2)) = Het 61 | dosageToEigenstratGeno (Just (2, 2)) = HomAlt 62 | dosageToEigenstratGeno c = error ("unknown genotype " ++ show c) 63 | -------------------------------------------------------------------------------- /scripts/mergeit_wrapper.hs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env stack 2 | -- stack script --resolver lts-14.1 --package turtle 3 | {-# LANGUAGE OverloadedStrings #-} 4 | 5 | import Control.Applicative (optional) 6 | import Prelude hiding (FilePath) 7 | import Turtle 8 | 9 | data Options = Options { 10 | optGeno1 :: FilePath, 11 | optSnp1 :: FilePath, 12 | optInd1 :: FilePath, 13 | optGeno2 :: FilePath, 14 | optSnp2 :: FilePath, 15 | optInd2 :: FilePath, 16 | optOutPrefix :: FilePath, 17 | optAllowDups :: Bool, 18 | optStrandCheck :: Bool, 19 | optOutFormat :: Maybe OutFormat 20 | } 21 | 22 | data OutFormat = ANCESTRYMAP | EIGENSTRAT | PED | PACKEDPED | PACKEDANCESTRYMAP 23 | deriving (Show, Read) 24 | 25 | main = do 26 | args <- options "Eigensoft mergeit wrapper" parser 27 | runManaged $ do 28 | paramFile <- mktempfile "." "mergeit_wrapper" 29 | let content = [(format ("geno1:\t"%fp) (optGeno1 args)), 30 | (format ("snp1:\t"%fp) (optSnp1 args)), 31 | (format ("ind1:\t"%fp) (optInd1 args)), 32 | (format ("geno2:\t"%fp) (optGeno2 args)), 33 | (format ("snp2:\t"%fp) (optSnp2 args)), 34 | (format ("ind2:\t"%fp) (optInd2 args)), 35 | (format ("allowdups:\t"%s) 36 | (if optAllowDups args then "YES" else "NO")), 37 | (format ("strandcheck:\t"%s) 38 | (if optStrandCheck args then "YES" else "NO")), 39 | (format ("genooutfilename:\t"%fp%".geno") (optOutPrefix args)), 40 | (format ("snpoutfilename:\t"%fp%".snp") (optOutPrefix args)), 41 | (format ("indoutfilename:\t"%fp%".ind") (optOutPrefix args))] 42 | let outputFormatLine = case optOutFormat args of 43 | Just outFormat -> return . unsafeTextToLine $ format ("outputformat:\t"%w) outFormat 44 | Nothing -> empty 45 | output paramFile $ select (map unsafeTextToLine content) <|> outputFormatLine 46 | ec <- proc "mergeit" ["-p", format fp paramFile] empty 47 | case ec of 48 | ExitSuccess -> return () 49 | ExitFailure n -> err . unsafeTextToLine $ format ("mergeit failed with exit code "%d) n 50 | 51 | parser :: Parser Options 52 | parser = Options <$> optPath "geno1" 'g' "First Genotype File" 53 | <*> optPath "snp1" 's' "First Snp File" 54 | <*> optPath "ind1" 'i' "First Ind File" 55 | <*> optPath "geno2" 'G' "Second Genotype File" 56 | <*> optPath "snp2" 'S' "Second Snp File" 57 | <*> optPath "ind2" 'I' "Second Ind File" 58 | <*> optPath "outPrefix" 'o' "Output prefix for *.geno, *.snp and *.ind \ 59 | \output files" 60 | <*> switch "allowDups" 'd' "Allow duplicates, leading for any duplicate \ 61 | \individual in the second data set to be ignored" 62 | <*> switch "strandcheck" 'c' "Check for strand misalignment. Warning: If set, \ 63 | \removes all A/T and C/G SNPs" 64 | <*> optional (optRead "outFormat" 'f' "Output format. One of ANCESTRYMAP, \ 65 | \EIGENSTRAT, PED, PACKEDPED, PACKEDANCESTRYMAP. Default is \ 66 | \PACKEDANCESTRYMAP") 67 | -------------------------------------------------------------------------------- /scripts/unmaintained/normaliseBimWithVCF.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | import SequenceTools.Utils (versionInfoOpt, versionInfoText) 4 | 5 | import Control.Monad.IO.Class (MonadIO, liftIO) 6 | import qualified Data.ByteString.Char8 as B 7 | import Data.Maybe (fromJust, isJust) 8 | import qualified Options.Applicative as OP 9 | import Pipes (Pipe, cat, for, runEffect, yield, 10 | (>->)) 11 | import Pipes.OrderedZip (orderedZip) 12 | import qualified Pipes.Prelude as P 13 | import Pipes.Safe (runSafeT) 14 | import SequenceFormats.Eigenstrat (EigenstratSnpEntry (..), 15 | readBimFile, writeEigenstratSnp) 16 | import SequenceFormats.Utils (Chrom (..)) 17 | import SequenceFormats.VCF (VCFentry (..), isBiallelicSnp, 18 | readVCFfromFile) 19 | import System.IO (hPutStrLn, stderr, stdout) 20 | 21 | type ProgOpt = (FilePath, FilePath) 22 | 23 | main :: IO () 24 | main = OP.execParser optionSpec >>= runWithOptions 25 | 26 | optionSpec :: OP.ParserInfo ProgOpt 27 | optionSpec = OP.info (pure (.) <*> versionInfoOpt <*> OP.helper <*> optParser) ( 28 | OP.fullDesc <> 29 | OP.progDesc ("Program to flip all alleles in a BIM file with a refrence VCF file \ 30 | \into the correct REF and ALT order and the reference strand" <> versionInfoText)) 31 | 32 | optParser :: OP.Parser ProgOpt 33 | optParser = (,) <$> OP.strOption (OP.long "BIM-file" <> OP.metavar "FILE") <*> OP.strOption (OP.long "VCF-file" <> OP.metavar "FILE") 34 | 35 | runWithOptions :: ProgOpt -> IO () 36 | runWithOptions (bimFileName, vcfFileName) = do 37 | runSafeT $ do 38 | (_, vcfProd) <- readVCFfromFile vcfFileName 39 | let vcfProdFiltered = vcfProd >-> P.filter isValidSnp 40 | bimProd = readBimFile bimFileName 41 | mergedProd = orderedZip comp bimProd vcfProdFiltered >> return () 42 | _ <- runEffect $ mergedProd >-> processJointEntries >-> writeEigenstratSnp stdout 43 | return () 44 | where 45 | comp (EigenstratSnpEntry bimChrom bimPos _ _ _ _) vcfEntry = 46 | compare (bimChrom, bimPos) (vcfChrom vcfEntry, vcfPos vcfEntry) 47 | isValidSnp vcf = isBiallelicSnp (vcfRef vcf) (vcfAlt vcf) 48 | 49 | processJointEntries :: (MonadIO m) => 50 | Pipe (Maybe EigenstratSnpEntry, Maybe VCFentry) EigenstratSnpEntry m () 51 | processJointEntries = for cat (\(mes,mvcf) -> do 52 | case (mes, mvcf) of 53 | (Just es, Just vcf) -> 54 | if isJust (vcfId vcf) && fromJust (vcfId vcf) /= snpId es 55 | then 56 | liftIO $ hPutStrLn stderr ("SKIP_ID_MISMATCH: " <> B.unpack (snpId es) <> " <> " <> B.unpack (fromJust (vcfId vcf))) 57 | else do 58 | if snpRef es == B.head (vcfRef vcf) && snpAlt es == B.head (head (vcfAlt vcf)) 59 | then yield es 60 | else do 61 | liftIO $ hPutStrLn stderr ("WARN_ALLELE_CHANGE at " <> B.unpack (snpId es) <> " (" <> unChrom (snpChrom es) <> ":" <> show (snpPos es) <> 62 | "): (" <> [snpRef es] <> "," <> [snpAlt es] <> ") -> (" <> B.unpack (vcfRef vcf) <> "," <> (B.unpack . head) (vcfAlt vcf) <> ")") 63 | yield es {snpRef = B.head (vcfRef vcf), snpAlt = B.head (head (vcfAlt vcf))} 64 | (Just es, Nothing) -> 65 | liftIO $ hPutStrLn stderr ("SKIP_MISSING: Did not find position " <> B.unpack (snpId es) <> " (" <> unChrom (snpChrom es) <> ":" <> show (snpPos es) <> ") in VCF file") 66 | _ -> return ()) 67 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | # Trigger the workflow on the new 'v*' tag created 5 | push: 6 | tags: 7 | - "v*" 8 | 9 | jobs: 10 | create_release: 11 | name: Create Github Release 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Check out code 15 | uses: actions/checkout@v4 16 | 17 | - name: Create Release 18 | id: create_release 19 | uses: ncipollo/release-action@v1 20 | with: 21 | name: Release ${{ github.ref_name }} 22 | draft: true 23 | 24 | build_normal_artifacts: 25 | needs: [create_release] 26 | name: ${{ matrix.os }}/${{ github.ref }} 27 | runs-on: ${{ matrix.os }} 28 | strategy: 29 | matrix: 30 | os: [ubuntu-20.04, macOS-13, macOS-14, windows-latest] 31 | 32 | steps: 33 | - name: Check out code 34 | uses: actions/checkout@v4 35 | 36 | - name: Set tag name 37 | uses: olegtarasov/get-tag@v2.1 38 | id: tagName 39 | with: 40 | tagRegex: "v(.*)" 41 | tagRegexGroup: 1 42 | 43 | - name: Install stack on macOS, where it is not present (https://github.com/freckle/stack-action/issues/80) 44 | if: ${{ runner.os == 'macOS' }} 45 | run: curl -sSL https://get.haskellstack.org/ | sh 46 | 47 | - name: Build executable 48 | uses: freckle/stack-action@v5 49 | id: stack 50 | with: 51 | test: false 52 | stack-build-arguments: --copy-bins --ghc-options="-O2" 53 | 54 | - name: Set binary path name 55 | id: binarypath 56 | run: | 57 | if [ "$RUNNER_OS" == "Windows" ]; then 58 | newEXE="pileupCaller-$RUNNER_OS.exe" 59 | elif [ "$RUNNER_OS" == "macOS" ]; then 60 | newEXE="pileupCaller-$RUNNER_OS-$RUNNER_ARCH" 61 | else 62 | newEXE="pileupCaller-$RUNNER_OS" 63 | fi 64 | currentEXE="${{ steps.stack.outputs.local-bin }}/pileupCaller" 65 | mv $currentEXE $newEXE 66 | echo "BINARY_PATH=$newEXE" >> $GITHUB_OUTPUT 67 | shell: bash 68 | 69 | - name: Compress binary 70 | if: ${{ runner.os != 'macOS' }} # upx is crashing for macOS Ventura or above! 71 | uses: svenstaro/upx-action@v2 72 | with: 73 | files: ${{ steps.binarypath.outputs.BINARY_PATH }} 74 | 75 | - name: Upload Release Asset 76 | id: upload-release-asset 77 | uses: ncipollo/release-action@v1 78 | with: 79 | name: Release ${{ github.ref_name }} 80 | draft: true 81 | allowUpdates: true 82 | artifactErrorsFailBuild: true 83 | artifacts: ${{ steps.binarypath.outputs.BINARY_PATH }} 84 | artifactContentType: application/octet-stream 85 | 86 | build_centos_artifact: 87 | needs: [create_release] 88 | runs-on: ubuntu-latest 89 | 90 | steps: 91 | - name: Checkout repo 92 | uses: actions/checkout@v4 93 | 94 | - name: Build Docker image 95 | run: docker build -t linux -f .github/workflows/Dockerfile.centos . 96 | 97 | - name: Create container 98 | run: docker create --name linuxcontainer linux 99 | 100 | - name: Copy executable 101 | run: docker cp linuxcontainer:/root/.local/bin/pileupCaller pileupCaller-conda-linux 102 | 103 | - name: Upload Release Asset 104 | id: upload-release-asset 105 | uses: ncipollo/release-action@v1 106 | with: 107 | name: Release ${{ github.ref_name }} 108 | draft: true 109 | allowUpdates: true 110 | artifactErrorsFailBuild: true 111 | artifacts: pileupCaller-conda-linux 112 | artifactContentType: application/octet-stream -------------------------------------------------------------------------------- /scripts/unmaintained/makeAdmixtureDat.hs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env stack 2 | -- stack --resolver lts-7.0 --install-ghc runghc --package turtle --package foldl --package text 3 | 4 | {-# LANGUAGE OverloadedStrings #-} 5 | 6 | import Control.Foldl (list) 7 | import Control.Monad (forM_) 8 | import Data.List (groupBy, sortOn) 9 | import Data.Maybe (fromJust, fromMaybe) 10 | import qualified Data.Text as T 11 | import qualified Data.Text.IO as T 12 | import Prelude hiding (FilePath) 13 | import Turtle 14 | 15 | data Options = Options FilePath FilePath FilePath (Maybe Int) Bool (Maybe Int) 16 | 17 | main = do 18 | opts <- options "prepare Admixture data for DataGraph" optParser 19 | printData opts 20 | where 21 | optParser = Options <$> optPath "admixtureFile" 'a' "Input Admixture file (*.Q)" 22 | <*> optPath "indFile" 'i' "Input *.ind file. This should be formatted in \ 23 | \EigenStrat, which includes the individual name in the \ 24 | \first, and the population name in the third column." 25 | <*> optPath "popGroupFile" 'p' "A file with two columns. The first is the \ 26 | \population, the second population group, e.g. a continental group. \ 27 | \Note that the order of populations in this file also determines \ 28 | \the order in the output." 29 | <*> optional (optInt "blankLines" 'b' 30 | "Number of blank lines between populations") 31 | <*> switch "clusterSORT" 'c' "Sort by cluster, ignore order given in \ 32 | \popGroups file" 33 | <*> optional (optInt "padColumns" 'k' "Pad the total number of Q columns \ 34 | \to a given number. The values in padded columns is set \ 35 | \to zero. This is useful for using a single Datagraph file \ 36 | \to produce plots for multiple K") 37 | 38 | printData (Options admixtureF popF popGroupF maybeBlankLines clusterSort maybePad) = do 39 | popGroupDat <- readPopGroupDat popGroupF 40 | admixtureDat <- fold (readAdmixtureDat popGroupDat admixtureF popF) list 41 | let blankLines = fromMaybe 1 maybeBlankLines 42 | let (_, _, _, vals) = head admixtureDat 43 | k = length vals 44 | padLength = fromMaybe k maybePad 45 | sortedDat = if clusterSort 46 | then sortByCluster admixtureDat 47 | else sortByPopGroupFile admixtureDat popGroupDat 48 | legendedDat = putLegend sortedDat 49 | echo . T.intercalate "\t" $ ["Sample", "Pop", "PopGroup", "Label"] ++ 50 | [format ("Q"%d) i | i <- [1..padLength]] 51 | let padColumns = replicate (padLength - k) 0.0 52 | forM_ legendedDat $ \group -> do 53 | forM_ group $ \(sample, pop, popGroup, legend, vals) -> do 54 | echo . T.intercalate "\t" $ [sample, pop, popGroup, legend] ++ map (format g) vals ++ 55 | map (format g) padColumns 56 | replicateM_ blankLines (echo "") 57 | 58 | readPopGroupDat :: FilePath -> IO [(Text, Text)] 59 | readPopGroupDat popGroupF = do 60 | l <- fold (input popGroupF) list 61 | return [(p, pG) | [p, pG] <- map (cut (some space) . T.strip) l] 62 | 63 | readAdmixtureDat :: [(Text, Text)] -> FilePath -> FilePath -> Shell (Text, Text, Text, [Double]) 64 | readAdmixtureDat popGroupDat admixtureF popF = do 65 | (admixtureL, indL) <- paste (input admixtureF) (input popF) 66 | let vals = map (read . T.unpack) . cut (some space) $ admixtureL 67 | [sample, _, pop] = cut (some space) . T.strip $ indL 68 | Just popGroup <- return $ pop `lookup` popGroupDat 69 | return (sample, pop, popGroup, vals) 70 | 71 | sortByPopGroupFile :: [(Text, Text, Text, [Double])] -> [(Text, Text)] 72 | -> [(Text, Text, Text, [Double])] 73 | sortByPopGroupFile admixtureDat popGroupDat = 74 | sortOn (\(_, p, _, _) -> fromJust $ lookup p sortIndices) admixtureDat 75 | where 76 | sortIndices = [(p, i) | ((p, _), i) <- zip popGroupDat [0..]] 77 | 78 | sortByCluster :: [(Text, Text, Text, [Double])] -> [(Text, Text, Text, [Double])] 79 | sortByCluster admixtureDat = 80 | let groups = groupBy (\(_, p1, _, _) (_, p2, _, _) -> p1 == p2) . sortOn (\(_, p, _, _) -> p) $ 81 | admixtureDat 82 | groupClusterWeights = [getColumnAverage . map (\(_, _, _, vals) -> vals) $ g | g <- groups] 83 | internallySortedGroups = zipWith sortInternally groupClusterWeights groups 84 | in concat $ sortExternally groupClusterWeights internallySortedGroups 85 | where 86 | sortInternally weights group = sortOn (\(_, _, _, vals) -> negate (vals !! maxCluster)) group 87 | where 88 | maxCluster = last . map fst . sortOn snd . zip [0..] $ weights 89 | sortExternally weightMatrix groups = map snd . sortOn (maxIndex . fst) . sortOn (negate . maximum . fst) . zip weightMatrix $ groups 90 | where 91 | maxIndex = last . map fst . sortOn snd . zip [0..] 92 | 93 | 94 | getColumnAverage :: [[Double]] -> [Double] 95 | getColumnAverage mat = [(sum . map (!!i) $ mat) / fromIntegral n | i <- [0 .. (k - 1)]] 96 | where 97 | k = length . head $ mat 98 | n = length mat 99 | 100 | putLegend :: [(Text, Text, Text, [Double])] -> [[(Text, Text, Text, Text, [Double])]] 101 | putLegend admixtureDat = do 102 | group <- groups 103 | let l = length group 104 | (_, pop, _, _) = head group 105 | labels = [if i == l `div` 2 then pop else "" | i <- [0..(l - 1)]] 106 | return [(s, p, pg, l, v) | ((s, p, pg, v), l) <- zip group labels] 107 | where 108 | groups = groupBy (\(_, pop1, _, _) (_, pop2, _, _) -> pop1 == pop2) admixtureDat 109 | 110 | -------------------------------------------------------------------------------- /src-executables/vcf2eigenstrat.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | import Pipes.OrderedZip (orderedZip) 4 | import SequenceFormats.Eigenstrat (EigenstratIndEntry (..), 5 | EigenstratSnpEntry (..), Sex (..), 6 | readEigenstratSnpFile, 7 | writeEigenstrat) 8 | import SequenceFormats.FreqSum (FreqSumEntry (..)) 9 | import SequenceFormats.VCF (VCFentry (..), VCFheader (..), 10 | getDosages, isBiallelicSnp, 11 | readVCFfromStdIn, 12 | vcfToFreqSumEntry) 13 | 14 | import SequenceTools.Utils (freqSumToEigenstrat, 15 | versionInfoOpt, versionInfoText) 16 | 17 | import Control.Exception.Base (AssertionFailed (..), throwIO) 18 | import Control.Monad (when) 19 | import Control.Monad.IO.Class (MonadIO, liftIO) 20 | import qualified Data.ByteString.Char8 as B 21 | -- import Debug.Trace (trace) 22 | import qualified Options.Applicative as OP 23 | import Pipes (Pipe, Producer, cat, for, 24 | runEffect, yield, (>->)) 25 | import qualified Pipes.Prelude as P 26 | import Pipes.Safe (MonadSafe, runSafeT) 27 | 28 | -- snpPosFile outPrefix 29 | data ProgOpt = ProgOpt (Maybe FilePath) FilePath 30 | 31 | main :: IO () 32 | main = readOptions >>= runMain 33 | 34 | readOptions :: IO ProgOpt 35 | readOptions = OP.execParser parserInfo 36 | where 37 | parserInfo = OP.info (pure (.) <*> versionInfoOpt <*> OP.helper <*> argParser) 38 | (OP.progDesc ("A program to convert a VCF file (stdin) to Eigenstrat. " ++ versionInfoText)) 39 | 40 | argParser :: OP.Parser ProgOpt 41 | argParser = ProgOpt <$> parseSnpPosFile <*> parseOutPrefix 42 | where 43 | parseSnpPosFile = OP.option (Just <$> OP.str) 44 | (OP.long "snpFile" <> OP.short 'f' <> OP.value Nothing <> OP.metavar "" <> 45 | OP.help "specify an Eigenstrat SNP file with the positions and alleles of a \ 46 | \reference set. If this option is given, only positions that are both in the SNP file \ 47 | \and in the VCF will be output. Without this option, all sites in the VCF will be output") 48 | parseOutPrefix = OP.strOption (OP.long "outPrefix" <> OP.short 'e' <> 49 | OP.metavar "" <> 50 | OP.help "specify the prefix for the EigenStrat output files. Output files will then be \ 51 | \.geno, .snp and .ind") 52 | 53 | runMain :: ProgOpt -> IO () 54 | runMain (ProgOpt maybeSnpPosFile outPrefix) = 55 | runSafeT $ do 56 | (vcfHeader, vcfBody) <- readVCFfromStdIn 57 | let snpOut = outPrefix ++ ".snp" 58 | indOut = outPrefix ++ ".ind" 59 | genoOut = outPrefix ++ ".geno" 60 | VCFheader _ sampleNames = vcfHeader 61 | nrInds = length sampleNames 62 | indEntries = [EigenstratIndEntry n Unknown "Unknown" | n <- sampleNames] 63 | let vcfBodyBiAllelic = vcfBody >-> P.filter (\e -> isBiallelicSnp (vcfRef e) (vcfAlt e)) 64 | vcfProducer <- case maybeSnpPosFile of 65 | Just snpPosFile -> 66 | return $ runJointly vcfBodyBiAllelic nrInds snpPosFile 67 | Nothing -> return $ runSimple vcfBodyBiAllelic 68 | runEffect $ vcfProducer >-> P.map freqSumToEigenstrat >-> writeEigenstrat genoOut snpOut indOut indEntries 69 | 70 | runJointly :: (MonadIO m, MonadSafe m) => Producer VCFentry m r -> Int -> FilePath -> Producer FreqSumEntry m r 71 | runJointly vcfBody nrInds snpPosFile = 72 | let snpProd = readEigenstratSnpFile snpPosFile 73 | jointProd = snd <$> orderedZip cmp snpProd vcfBody 74 | in jointProd >-> processVcfWithSnpFile nrInds 75 | where 76 | cmp (EigenstratSnpEntry snpChrom' snpPos' _ _ _ _) vcfEntry = (snpChrom', snpPos') `compare` (vcfChrom vcfEntry, vcfPos vcfEntry) 77 | 78 | processVcfWithSnpFile :: (MonadIO m) => Int -> Pipe (Maybe EigenstratSnpEntry, Maybe VCFentry) FreqSumEntry m r 79 | processVcfWithSnpFile nrInds = for cat $ \jointEntry -> do 80 | case jointEntry of 81 | (Just (EigenstratSnpEntry snpChrom' snpPos' gpos snpId' snpRef' snpAlt'), Nothing) -> do 82 | let dosages = replicate nrInds Nothing 83 | yield $ FreqSumEntry snpChrom' snpPos' (Just snpId') (Just gpos) snpRef' snpAlt' dosages 84 | (Just (EigenstratSnpEntry snpChrom' snpPos' gpos snpId' snpRef' snpAlt'), Just vcfEntry) -> do 85 | dosages <- liftIO $ getDosages vcfEntry 86 | when (length dosages /= nrInds) $ (liftIO . throwIO) (AssertionFailed "inconsistent number of genotypes.") 87 | let normalizedDosages = 88 | case vcfAlt vcfEntry of 89 | [alt] -> if (vcfRef vcfEntry, alt) == (B.singleton snpRef', B.singleton snpAlt') 90 | then dosages 91 | else 92 | if (vcfRef vcfEntry, alt) == (B.singleton snpAlt', B.singleton snpRef') 93 | then map flipDosages dosages 94 | else replicate nrInds Nothing 95 | _ -> replicate nrInds Nothing 96 | yield $ FreqSumEntry snpChrom' snpPos' (Just snpId') (Just gpos) snpRef' snpAlt' normalizedDosages 97 | _ -> return () 98 | where 99 | flipDosages dos = case dos of 100 | Just (0, 2) -> Just (2, 2) 101 | Just (1, 2) -> Just (1, 2) 102 | Just (2, 2) -> Just (0, 2) 103 | Just (0, 1) -> Just (1, 1) 104 | Just (1, 1) -> Just (0, 1) 105 | _ -> Nothing 106 | 107 | runSimple :: (MonadIO m) => Producer VCFentry m r -> Producer FreqSumEntry m r 108 | runSimple vcfBody = for vcfBody $ \e -> do 109 | fs <- liftIO $ vcfToFreqSumEntry e 110 | yield fs 111 | -------------------------------------------------------------------------------- /src/SequenceTools/PileupCaller.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | module SequenceTools.PileupCaller (callToDosage, Call(..), callGenotypeFromPileup, 3 | callMajorityAllele, findMajorityAlleles, callRandomAllele, 4 | callRandomDiploid, CallingMode(..), 5 | TransitionsMode(..), filterTransitions, cleanSSdamageAllSamples, 6 | computeAlleleFreq) where 7 | 8 | import SequenceFormats.FreqSum (FreqSumEntry (..)) 9 | import SequenceFormats.Pileup (PileupRow (..), Strand (..)) 10 | import SequenceTools.Utils (sampleWithoutReplacement) 11 | 12 | import Data.List (group, sort, sortOn) 13 | import Pipes (Pipe, cat) 14 | import qualified Pipes.Prelude as P 15 | 16 | -- |A datatype to represent a single genotype call 17 | data Call = HaploidCall Char | DiploidCall Char Char | MissingCall deriving (Show, Eq) 18 | 19 | -- |A datatype to specify the calling mode 20 | data CallingMode = MajorityCalling Bool | RandomCalling | RandomDiploidCalling deriving (Show) 21 | 22 | data TransitionsMode = TransitionsMissing | SkipTransitions | SingleStrandMode | AllSites deriving (Eq, Show) 23 | 24 | -- |a function to turn a call into the dosage of non-reference alleles 25 | callToDosage :: Char -> Char -> Call -> Maybe (Int, Int) 26 | callToDosage refA altA call = case call of 27 | HaploidCall a | a == refA -> Just (0, 1) 28 | | a == altA -> Just (1, 1) 29 | | otherwise -> Nothing 30 | DiploidCall a1 a2 | (a1, a2) == (refA, refA) -> Just (0, 2) 31 | | (a1, a2) == (refA, altA) -> Just (1, 2) 32 | | (a1, a2) == (altA, refA) -> Just (1, 2) 33 | | (a1, a2) == (altA, altA) -> Just (2, 2) 34 | | otherwise -> Nothing 35 | MissingCall -> Nothing 36 | 37 | -- |Make a call from alleles 38 | callGenotypeFromPileup :: CallingMode -> Int -> String -> IO Call 39 | callGenotypeFromPileup mode minDepth alleles = 40 | if length alleles < minDepth then return MissingCall else 41 | case mode of 42 | MajorityCalling withDownsampling -> callMajorityAllele withDownsampling minDepth alleles 43 | RandomCalling -> callRandomAllele alleles 44 | RandomDiploidCalling -> callRandomDiploid alleles 45 | 46 | -- |Sample the majority allele, or one of the majority alleles 47 | callMajorityAllele :: Bool -> Int-> String -> IO Call 48 | callMajorityAllele withDownsampling minDepth alleles = do 49 | maybeAlleles <- if withDownsampling 50 | then sampleWithoutReplacement alleles minDepth 51 | else return (Just alleles) 52 | case maybeAlleles of 53 | Nothing -> return MissingCall 54 | Just alleles' -> do 55 | a <- case findMajorityAlleles alleles' of 56 | [] -> error "should not happen" 57 | [a'] -> return a' 58 | listA -> do 59 | r <- sampleWithoutReplacement listA 1 60 | case r of 61 | Just [r'] -> return r' 62 | _ -> error "should not happen" 63 | return $ HaploidCall a 64 | 65 | -- |Find the majority allele(s) 66 | findMajorityAlleles :: String -> [Char] 67 | findMajorityAlleles alleles = 68 | let groupedAlleles = sortOn fst [(length g, head g) | g <- group . sort $ alleles] 69 | majorityCount = fst . last $ groupedAlleles 70 | in [a | (n, a) <- groupedAlleles, n == majorityCount] 71 | 72 | -- |call a random allele 73 | callRandomAllele :: String -> IO Call 74 | callRandomAllele alleles = do 75 | res <- sampleWithoutReplacement alleles 1 76 | case res of 77 | Nothing -> return MissingCall 78 | Just [a] -> return $ HaploidCall a 79 | _ -> error "should not happen" 80 | 81 | -- |call two random alleles 82 | callRandomDiploid :: String -> IO Call 83 | callRandomDiploid alleles = do 84 | res <- sampleWithoutReplacement alleles 2 85 | case res of 86 | Nothing -> return MissingCall 87 | Just [a1, a2] -> return $ DiploidCall a1 a2 88 | _ -> error "should not happen" 89 | 90 | -- the basic information stream is a tuple of a PileupRow (if data is present at a SNP), and a FreqSumEntry that contains the calls. 91 | -- For Eigenstrat and Plink we don't need the PileupRow, but for VCF, we can store additional information beyond the mere calls, 92 | -- that's why we're streaming both, to have an output-agnostic stream. 93 | filterTransitions :: (Monad m) => TransitionsMode -> Pipe (Maybe PileupRow, FreqSumEntry) (Maybe PileupRow, FreqSumEntry) m () 94 | filterTransitions transversionsMode = 95 | case transversionsMode of 96 | SkipTransitions -> 97 | P.filter (\(_, FreqSumEntry _ _ _ _ ref alt _) -> isTransversion ref alt) 98 | TransitionsMissing -> 99 | P.map (\(mp, FreqSumEntry chrom pos id_ gpos ref alt calls) -> 100 | let calls' = if isTransversion ref alt then calls else [Nothing | _ <- calls] 101 | in (mp, FreqSumEntry chrom pos id_ gpos ref alt calls')) 102 | _ -> cat 103 | where 104 | isTransversion ref alt = not $ isTransition ref alt 105 | isTransition ref alt = 106 | ((ref == 'A') && (alt == 'G')) || 107 | ((ref == 'G') && (alt == 'A')) || 108 | ((ref == 'C') && (alt == 'T')) || 109 | ((ref == 'T') && (alt == 'C')) 110 | 111 | cleanSSdamageAllSamples :: Char -> Char -> [String] -> [[Strand]] -> [String] 112 | cleanSSdamageAllSamples ref alt basesPerSample strandPerSample 113 | | (ref, alt) == ('C', 'T') || (ref, alt) == ('T', 'C') = 114 | [removeForwardBases bases strands | (bases, strands) <- zip basesPerSample strandPerSample] 115 | | (ref, alt) == ('G', 'A') || (ref, alt) == ('A', 'G') = 116 | [removeReverseBases bases strands | (bases, strands) <- zip basesPerSample strandPerSample] 117 | | otherwise = 118 | basesPerSample 119 | where 120 | removeForwardBases = removeReads ForwardStrand 121 | removeReverseBases = removeReads ReverseStrand 122 | 123 | removeReads :: Strand -> String -> [Strand] -> String 124 | removeReads strand bases strands = [b | (b, s) <- zip bases strands, s /= strand] 125 | 126 | computeAlleleFreq :: [Maybe (Int, Int)] -> Maybe Double 127 | computeAlleleFreq dosages = 128 | let nrTotalAlleles = sum . map (maybe 0 snd) $ dosages 129 | nrNonRefAlleles = sum . map (maybe 0 fst) $ dosages 130 | in if nrTotalAlleles == 0 then Nothing else 131 | Just (fromIntegral nrNonRefAlleles / fromIntegral nrTotalAlleles) 132 | -------------------------------------------------------------------------------- /src-executables/genoStats.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | import SequenceFormats.Eigenstrat (EigenstratIndEntry (..), 4 | EigenstratSnpEntry (..), 5 | GenoEntry (..), GenoLine, 6 | readEigenstrat) 7 | import SequenceFormats.FreqSum (FreqSumEntry (..), 8 | FreqSumHeader (..), 9 | readFreqSumFile, readFreqSumStdIn) 10 | import SequenceFormats.Utils (Chrom (..)) 11 | import SequenceTools.Utils (dosageToEigenstratGeno, 12 | versionInfoOpt, versionInfoText) 13 | 14 | import Control.Applicative ((<|>)) 15 | import Control.Foldl (Fold (..), purely) 16 | import Control.Monad (forM_) 17 | import Control.Monad.IO.Class (MonadIO, liftIO) 18 | import qualified Data.ByteString.Char8 as B 19 | import qualified Data.Vector as V 20 | import Lens.Family2 (view) 21 | import qualified Options.Applicative as OP 22 | import Pipes (Consumer, Producer, cat, for, 23 | (>->)) 24 | import Pipes.Group (folds, groupsBy) 25 | import qualified Pipes.Prelude as P 26 | import Pipes.Safe (MonadSafe, runSafeT) 27 | import System.IO (hPutStrLn, stderr) 28 | import Text.Printf (printf) 29 | 30 | data ProgOpt = ProgOpt InputOption 31 | 32 | data InputOption = FreqsumInput (Maybe FilePath) | EigenstratInput FilePath FilePath FilePath 33 | 34 | data InputEntry = InputEntry Chrom GenoLine deriving (Show) 35 | 36 | type StatsReportAllSamples = [StatsReport] 37 | 38 | data StatsReport = StatsReport { 39 | srNrSitesMissing :: Int, 40 | srNrSitesHomRef :: Int, 41 | srNrSitesHomAlt :: Int, 42 | srNrSitesHet :: Int 43 | } deriving (Show) 44 | 45 | main :: IO () 46 | main = OP.execParser optionSpec >>= runWithOpts 47 | 48 | optionSpec :: OP.ParserInfo ProgOpt 49 | optionSpec = OP.info (pure (.) <*> versionInfoOpt <*> OP.helper <*> argParser) ( 50 | OP.progDesc ("A program \ 51 | \to evaluate per-chromosome and total statistics \ 52 | \of genotyping data, read either as Eigenstrat (by specifying options -g, -s and -i) or FreqSum (default, or by specifying option -f). " ++ versionInfoText)) 53 | 54 | argParser :: OP.Parser ProgOpt 55 | argParser = ProgOpt <$> (parseFreqsumInput <|> parseEigenstratInput) 56 | 57 | parseFreqsumInput :: OP.Parser InputOption 58 | parseFreqsumInput = 59 | process <$> OP.strOption (OP.long "freqsum" <> OP.short 'f' <> OP.help "a freqsum file to read as input. Use - to read from stdin (the default)" <> 60 | OP.value "-" <> OP.showDefault <> OP.metavar "FILEPATH") 61 | where 62 | process p = 63 | if p == "-" 64 | then FreqsumInput Nothing 65 | else FreqsumInput (Just p) 66 | 67 | parseEigenstratInput :: OP.Parser InputOption 68 | parseEigenstratInput = EigenstratInput <$> parseGenoFile <*> parseSnpFile <*> parseIndFile 69 | where 70 | parseGenoFile = OP.strOption (OP.long "eigenstratGeno" <> OP.short 'g' <> OP.help "Eigenstrat Geno File" <> OP.metavar "FILEPATH") 71 | parseSnpFile = OP.strOption (OP.long "eigenstratSnp" <> OP.short 's' <> OP.help "Eigenstrat Snp File" <> OP.metavar "FILEPATH") 72 | parseIndFile = OP.strOption (OP.long "eigenstratInd" <> OP.short 'i' <> OP.help "Eigenstrat Ind File" <> OP.metavar "FILEPATH") 73 | 74 | runWithOpts :: ProgOpt -> IO () 75 | runWithOpts (ProgOpt inputOpt) = runSafeT $ do 76 | (names, entryProducer) <- case inputOpt of 77 | FreqsumInput fsFile -> runWithFreqSum fsFile 78 | EigenstratInput genoFile snpFile indFile -> runWithEigenstrat genoFile snpFile indFile 79 | liftIO $ hPutStrLn stderr ("processing samples: " <> show names) 80 | let p = runStats names entryProducer >-> P.tee (reportStats names) 81 | totalReport <- purely P.fold (accumulateAllChromStats names) p 82 | printReports names totalReport 83 | 84 | runWithFreqSum :: (MonadSafe m) => Maybe FilePath -> m ([String], Producer InputEntry m ()) 85 | runWithFreqSum fsFile = do 86 | (FreqSumHeader names _, fsProd) <- case fsFile of 87 | Nothing -> readFreqSumStdIn 88 | Just fn -> readFreqSumFile fn 89 | let prod = fsProd >-> P.map (\fs -> InputEntry (fsChrom fs) (V.fromList . map dosageToEigenstratGeno . fsCounts $ fs)) 90 | return (names, prod) 91 | 92 | runWithEigenstrat :: (MonadSafe m) => 93 | FilePath -> FilePath -> FilePath -> m ([String], Producer InputEntry m ()) 94 | runWithEigenstrat genoFile snpFile indFile = do 95 | (indEntries, genoStream) <- readEigenstrat genoFile snpFile indFile 96 | let names = [name | EigenstratIndEntry name _ _ <- indEntries] 97 | let prod = genoStream >-> P.map (\(EigenstratSnpEntry chrom _ _ _ _ _, genoLine) -> InputEntry chrom genoLine) 98 | return (map B.unpack names, prod) 99 | 100 | runStats :: (MonadIO m) => [String] -> Producer InputEntry m () -> 101 | Producer (Chrom, StatsReportAllSamples) m () 102 | runStats names entryProducer = 103 | let groupedProd = view (groupsBy (\(InputEntry c1 _) (InputEntry c2 _) -> c1 == c2)) 104 | entryProducer 105 | in purely folds (runStatsPerChrom (length names)) groupedProd 106 | 107 | runStatsPerChrom :: Int -> Fold InputEntry (Chrom, StatsReportAllSamples) 108 | runStatsPerChrom nrSamples = (,) <$> getChrom <*> 109 | traverse runStatsPerChromPerSample [0..(nrSamples - 1)] 110 | 111 | getChrom :: Fold InputEntry Chrom 112 | getChrom = Fold (\_ (InputEntry c _) -> c) (Chrom "") id 113 | 114 | runStatsPerChromPerSample :: Int -> Fold InputEntry StatsReport 115 | runStatsPerChromPerSample i = Fold step initial extract 116 | where 117 | step :: StatsReport -> InputEntry -> StatsReport 118 | step accum@(StatsReport miss homr homa het) (InputEntry _ line) = case (line V.! i) of 119 | Missing -> accum {srNrSitesMissing = miss + 1} 120 | HomRef -> accum {srNrSitesHomRef = homr + 1} 121 | HomAlt -> accum {srNrSitesHomAlt = homa + 1} 122 | Het -> accum {srNrSitesHet = het + 1} 123 | initial :: StatsReport 124 | initial = StatsReport 0 0 0 0 125 | extract :: StatsReport -> StatsReport 126 | extract = id 127 | 128 | reportStats :: (MonadIO m) => [String] -> Consumer (Chrom, StatsReportAllSamples) m () 129 | reportStats names = do 130 | liftIO . putStrLn $ "Chrom\tSample\tMissing\tHomRef\tHomAlt\tHet" 131 | for cat $ \(chrom, reports) -> printReports names (chrom, reports) 132 | 133 | printReports :: (MonadIO m) => [String] -> (Chrom, StatsReportAllSamples) -> m () 134 | printReports names (chrom, reports) = 135 | forM_ (zip names reports) $ \(n, StatsReport mis ref alt het) -> do 136 | let total = mis + ref + alt + het 137 | misPerc = round $ (fromIntegral mis / fromIntegral total) * (100.0 :: Double) :: Int 138 | liftIO . putStrLn $ printf "%s\t%s\t%d(%d%%)\t%d\t%d\t%d" (show chrom) n mis misPerc ref alt het 139 | 140 | accumulateAllChromStats :: [String] -> 141 | Fold (Chrom, StatsReportAllSamples) (Chrom, StatsReportAllSamples) 142 | accumulateAllChromStats names = Fold step initial extract 143 | where 144 | step :: StatsReportAllSamples -> (Chrom, StatsReportAllSamples) -> StatsReportAllSamples 145 | step sumReports (_, newReports) = do 146 | (StatsReport smiss shomr shoma shet, StatsReport miss homr homa het) <- 147 | zip sumReports newReports 148 | return $ StatsReport (smiss + miss) (shomr + homr) (shoma + homa) (shet + het) 149 | initial :: StatsReportAllSamples 150 | initial = [StatsReport 0 0 0 0 | _ <- names] 151 | extract :: StatsReportAllSamples -> (Chrom, StatsReportAllSamples) 152 | extract r = (Chrom "Total", r) 153 | 154 | 155 | -------------------------------------------------------------------------------- /scripts/unmaintained/barcodeDemulti.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import gzip 5 | from itertools import islice, chain 6 | import sys 7 | import os 8 | from operator import itemgetter 9 | 10 | class FastqEntry: 11 | def __init__(self, lines): 12 | [name, seq, _, quali] = lines 13 | self.name = name 14 | self.seq = seq 15 | self.quali = quali 16 | 17 | class FastQIterator: 18 | def __init__(self, fn): 19 | self.f = gzip.open(fn, "rt") 20 | 21 | def __iter__(self): 22 | return self 23 | 24 | def __next__(self): 25 | lines = [self.f.readline().strip() for _ in range(4)] 26 | if not lines[0]: 27 | raise StopIteration 28 | return FastqEntry(lines) 29 | 30 | def __del__(self): 31 | self.f.close() 32 | 33 | class Index: 34 | def __init__(self, seq, mismatches): 35 | self.sequence = seq 36 | self.mismatches = mismatches 37 | self.hits = {} 38 | 39 | def match(self, querySeq): 40 | if self.sequence == '-': 41 | return True 42 | nrM = sum(t != 'N' and s != t for s, t in zip(querySeq, self.sequence)) 43 | return nrM <= self.mismatches 44 | 45 | def recordHit(self, seq): 46 | clippedSeq = seq[:len(self.sequence)] 47 | if clippedSeq not in self.hits: 48 | self.hits[clippedSeq] = 0 49 | self.hits[clippedSeq] += 1 50 | 51 | def reportStats(self, indexName): 52 | if self.sequence != '-': 53 | p = (self.sequence, self.hits[self.sequence]) if self.sequence in self.hits else 0 54 | print("", "Perfect matches {}:".format(indexName), p, sep="\t") 55 | otherMatches = sorted([(k, v) for k, v in self.hits.items() if k != self.sequence], key=itemgetter(1), reverse=True) 56 | print("", "Other matches {}:".format(indexName), otherMatches, sep="\t") 57 | 58 | 59 | class QuadrupelIndex: 60 | def __init__(self, i1, i2, bc1, bc2, mismatches): 61 | self.i1 = Index(i1, mismatches) 62 | self.i2 = Index(i2, mismatches) 63 | self.bc1 = Index(bc1, mismatches) 64 | self.bc2 = Index(bc2, mismatches) 65 | 66 | def match(self, r1, r2, i1, i2): 67 | if self.i1.match(i1) and self.i2.match(i2) and self.bc1.match(r1) and self.bc2.match(r2): 68 | self.i1.recordHit(i1) 69 | self.i2.recordHit(i2) 70 | self.bc1.recordHit(r1) 71 | self.bc2.recordHit(r2) 72 | return True 73 | else: 74 | return False 75 | 76 | class SampleSheet: 77 | def __init__(self, sample_sheet_file, outputTemplate, mismatches, withUndetermined): 78 | self.samples = {} 79 | f = open(sample_sheet_file, "r") 80 | self.outputHandles = {} 81 | self.nameOrder = [] 82 | self.withUndetermined = withUndetermined 83 | self.undetermined = 0 84 | for line in f: 85 | [name, i1, i2, bc1, bc2] = line.strip().split() 86 | for letter in i1 + i2 + bc1 + bc2: 87 | assert (letter in "ACTGN-"), "illegal Index or Barcode sequence. Must contain only A, C, T, G, N or -" 88 | self.samples[name] = QuadrupelIndex(i1, i2, bc1, bc2, mismatches) 89 | self.nameOrder.append(name) 90 | allNames = chain(self.samples.keys(), ["Undetermined"]) if self.withUndetermined else self.samples.keys() 91 | for name in allNames: 92 | outputNameR1 = outputTemplate.replace("%name%", name).replace("%r%", "1") 93 | outputNameR2 = outputTemplate.replace("%name%", name).replace("%r%", "2") 94 | outputDir = os.path.dirname(outputNameR1) 95 | os.makedirs(outputDir, exist_ok=True) 96 | f1 = gzip.open(outputNameR1, "wt") 97 | f2 = gzip.open(outputNameR2, "wt") 98 | print("generated output file {}".format(outputNameR1), file=sys.stderr) 99 | print("generated output file {}".format(outputNameR2), file=sys.stderr) 100 | self.outputHandles[name] = (f1, f2) 101 | 102 | def __del__(self): 103 | for f1, f2 in self.outputHandles.values(): 104 | f1.close() 105 | f2.close() 106 | 107 | def process(self, fastqR1, fastqR2, fastqI1, fastqI2): 108 | for name, qIndex in self.samples.items(): 109 | if qIndex.match(fastqR1.seq, fastqR2.seq, fastqI1.seq, fastqI2.seq): 110 | queryBC1, queryBC2 = qIndex.bc1.sequence, qIndex.bc2.sequence 111 | clip1 = 0 if queryBC1 == '-' else len(queryBC1) 112 | clip2 = 0 if queryBC2 == '-' else len(queryBC2) 113 | fastqR1.seq = fastqR1.seq[clip1:] 114 | fastqR2.seq = fastqR2.seq[clip2:] 115 | fastqR1.quali = fastqR1.quali[clip1:] 116 | fastqR2.quali = fastqR2.quali[clip2:] 117 | self.writeFastQ(name, (fastqR1, fastqR2)) 118 | break 119 | else: 120 | self.undetermined += 1 121 | if self.withUndetermined: 122 | self.writeFastQ("Undetermined", (fastqR1, fastqR2)) 123 | 124 | def writeFastQ(self, name, fastqPair): 125 | for i in [0, 1]: 126 | lines = [fastqPair[i].name + "\n", fastqPair[i].seq + "\n", "+\n", fastqPair[i].quali + "\n"] 127 | self.outputHandles[name][i].writelines(lines) 128 | 129 | def reportStats(self): 130 | total = 0 131 | for name in self.nameOrder: 132 | print(name) 133 | qi = self.samples[name] 134 | total += sum(qi.i1.hits.values()) 135 | print("", "Total matches:", sum(qi.i1.hits.values()), sep="\t") 136 | qi.i1.reportStats("I1") 137 | qi.i2.reportStats("I2") 138 | qi.bc1.reportStats("BC1") 139 | qi.bc2.reportStats("BC2") 140 | print("Total reads demultiplexed:", total) 141 | print("Undetermined reads:", self.undetermined) 142 | 143 | def buildArgumentParser(): 144 | parser = argparse.ArgumentParser(description="Demultiplex Fastq files and trim barcodes") 145 | parser.add_argument('-s', "--sample_sheet", metavar="", help="The file containing the sample information. Tab separated file, where every sample gets a row. The columsn are: 1) Sample_name; 2) Index 1 (P5); 3) Index2 (P7); 4) Internal Barcode Read 1; 5) Internal Barcode Read2. You can use the special symbol '-' to denote the absence of a barcode.", required=True) 146 | parser.add_argument('-m', "--mismatches", metavar="", type=int, default=1, help="the number of mismatches allowed in the index- and barcode-recognition. Default=1") 147 | parser.add_argument("-o", "--output_template", metavar="", required=True, help="template for files to write. Can be arbitrary paths including magic place holders %%name%%, which will be replaced by the sample name, and %%r%%, which will be replaced by the read (1 or 2)") 148 | parser.add_argument("--endAfter", metavar="", type=int, help="if given, process only so many reads (for debug purposes mainly)") 149 | parser.add_argument("--fastqR1", required=True, metavar="", help="fastq file for read1") 150 | parser.add_argument("--fastqR2", required=True, metavar="", help="fastq file for read2") 151 | parser.add_argument("--fastqI1", required=True, metavar="", help="fastq file for index read1") 152 | parser.add_argument("--fastqI2", required=True, metavar="", help="fastq file for index read2") 153 | parser.add_argument("--withUndetermined", action="store_true", default=False, help="output also Undetermined reads") 154 | return parser 155 | 156 | if __name__ == "__main__": 157 | parser = buildArgumentParser() 158 | args = parser.parse_args() 159 | sampleSheet = SampleSheet(args.sample_sheet, args.output_template, args.mismatches, args.withUndetermined) 160 | nr = 1 161 | for (r1, r2, i1, i2) in zip(FastQIterator(args.fastqR1), FastQIterator(args.fastqR2), FastQIterator(args.fastqI1), FastQIterator(args.fastqI2)): 162 | if nr % 10000 == 0: 163 | print("processing line {}".format(nr), file=sys.stderr) 164 | sampleSheet.process(r1, r2, i1, i2) 165 | nr += 1 166 | if args.endAfter is not None: 167 | if nr > args.endAfter: 168 | break 169 | 170 | sampleSheet.reportStats() 171 | 172 | -------------------------------------------------------------------------------- /test/SequenceTools/PileupCallerSpec.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | module SequenceTools.PileupCallerSpec (spec) where 3 | 4 | import SequenceFormats.Pileup (PileupRow) 5 | import SequenceTools.PileupCaller (Call (..), CallingMode (..), 6 | TransitionsMode (..), 7 | callGenotypeFromPileup, 8 | callMajorityAllele, 9 | callRandomAllele, 10 | callRandomDiploid, callToDosage, 11 | cleanSSdamageAllSamples, 12 | filterTransitions, 13 | findMajorityAlleles) 14 | import SequenceTools.Utils (dosageToEigenstratGeno, 15 | freqSumToEigenstrat) 16 | 17 | import Control.Monad (forM_, replicateM) 18 | import qualified Data.ByteString.Char8 as B 19 | import Data.List (sort) 20 | import Data.Vector (fromList) 21 | import Pipes (each, (>->)) 22 | import qualified Pipes.Prelude as P 23 | import SequenceFormats.Eigenstrat (EigenstratSnpEntry (..), 24 | GenoEntry (..)) 25 | import SequenceFormats.FreqSum (FreqSumEntry (..)) 26 | import SequenceFormats.Pileup (Strand (..)) 27 | import SequenceFormats.Utils (Chrom (..)) 28 | import Test.Hspec 29 | 30 | spec :: Spec 31 | spec = do 32 | testCallToDosage 33 | testCallRandomDiploid 34 | testCallRandomAllele 35 | testCallMajorityAllele 36 | testFindMajorityAlleles 37 | testCallGenotypeFromPileup 38 | testDosageToEigenstratGeno 39 | testFreqSumToEigenstrat 40 | testFilterTransitions 41 | testCleanSSdamageAllSamples 42 | 43 | testCallToDosage :: Spec 44 | testCallToDosage = describe "callToDosage" $ do 45 | it "should return Nothing for missing call" $ 46 | callToDosage 'A' 'C' MissingCall `shouldBe` Nothing 47 | it "should return Nothing for haploid non-congruent call" $ 48 | callToDosage 'A' 'C' (HaploidCall 'G') `shouldBe` Nothing 49 | it "should return 0 for haploid ref call" $ 50 | callToDosage 'A' 'C' (HaploidCall 'A') `shouldBe` Just (0, 1) 51 | it "should return 1 for haploid alt call" $ 52 | callToDosage 'A' 'C' (HaploidCall 'C') `shouldBe` Just (1, 1) 53 | it "should return Nothing for diploid non-congruent call" $ 54 | callToDosage 'A' 'C' (DiploidCall 'A' 'G') `shouldBe` Nothing 55 | it "should return 0 for diploid hom-ref call" $ 56 | callToDosage 'A' 'C' (DiploidCall 'A' 'A') `shouldBe` Just (0, 2) 57 | it "should return 1 for diploid het call" $ do 58 | callToDosage 'A' 'C' (DiploidCall 'A' 'C') `shouldBe` Just (1, 2) 59 | callToDosage 'A' 'C' (DiploidCall 'C' 'A') `shouldBe` Just (1, 2) 60 | it "should return 2 for diploid hom-alt call" $ 61 | callToDosage 'A' 'C' (DiploidCall 'C' 'C') `shouldBe` Just (2, 2) 62 | 63 | 64 | testCallGenotypeFromPileup :: Spec 65 | testCallGenotypeFromPileup = describe "callGenotypeFromPileup" $ do 66 | it "should return missing if pileup below minDepth" $ 67 | callGenotypeFromPileup RandomCalling 3 "A" `shouldReturn` MissingCall 68 | it "should not return missing if pileup above minDepth" $ 69 | callGenotypeFromPileup RandomCalling 3 "AACCC" `shouldNotReturn` MissingCall 70 | 71 | 72 | testCallMajorityAllele :: Spec 73 | testCallMajorityAllele = describe "callMajorityAllele" $ do 74 | it "should call A from AAA" $ 75 | callMajorityAllele False 1 "AAA" `shouldReturn` HaploidCall 'A' 76 | it "should call A from AAAAA with ds" $ 77 | callMajorityAllele True 3 "AAAAA" `shouldReturn` HaploidCall 'A' 78 | it "should call Missing from AA with ds 3" $ 79 | callMajorityAllele True 3 "AA" `shouldReturn` MissingCall 80 | it "should call A from AAC" $ 81 | callMajorityAllele False 1 "AAC" `shouldReturn` HaploidCall 'A' 82 | it "should call C from ACC" $ 83 | callMajorityAllele False 1 "ACC" `shouldReturn` HaploidCall 'C' 84 | it "should call 50/50 from AACC" $ do 85 | r <- replicateM 1000 (callMajorityAllele False 1 "AACC") 86 | let c = [rr | rr <- r, rr == HaploidCall 'A'] 87 | length c `shouldSatisfy` (\c' -> c' >= 418 && c' <= 582) --p < 1e-7 88 | 89 | testFindMajorityAlleles :: Spec 90 | testFindMajorityAlleles = describe "findMajorityAllele" $ do 91 | it "should return A for AAC" $ 92 | findMajorityAlleles "AAC" `shouldBe` "A" 93 | it "should return C for ACC" $ 94 | findMajorityAlleles "ACC" `shouldBe` "C" 95 | it "should return AC for AACC" $ 96 | findMajorityAlleles "AACC" `shouldBe` "AC" 97 | 98 | testCallRandomAllele :: Spec 99 | testCallRandomAllele = describe "callRandomAllele" $ do 100 | it "should return A for AAA" $ 101 | callRandomAllele "AAA" `shouldReturn` HaploidCall 'A' 102 | it "should return C for C" $ 103 | callRandomAllele "C" `shouldReturn` HaploidCall 'C' 104 | it "should return A,C or G for ACG roughly with 30% each" $ do 105 | r <- replicateM 1000 (callRandomAllele "ACG") 106 | forM_ ['A', 'C', 'G'] $ \nuc -> do 107 | let n = length . filter (==HaploidCall nuc) $ r 108 | n `shouldSatisfy` (\nn -> nn >= 257 && nn <= 412) --p < 1e-7 109 | 110 | testCallRandomDiploid :: Spec 111 | testCallRandomDiploid = describe "callRandomDiploid" $ do 112 | it "should return Missing for A" $ 113 | callRandomDiploid "A" `shouldReturn` MissingCall 114 | it "should return AC for AC" $ do 115 | DiploidCall a1 a2 <- callRandomDiploid "AC" 116 | sort [a1, a2] `shouldBe` "AC" 117 | it "should return 50% het for AACC" $ do 118 | r <- replicateM 1000 (callRandomDiploid "AACC") 119 | let n = length ['A' | DiploidCall a1 a2 <- r, a1 /= a2] 120 | n `shouldSatisfy` (\nn -> nn >= 588 && nn < 743) --p < 1e-7 121 | 122 | testDosageToEigenstratGeno :: Spec 123 | testDosageToEigenstratGeno = describe "dosageToEigenstratGeno" $ do 124 | it "should give Hom-Ref for 0 pseudo-haploid" $ 125 | dosageToEigenstratGeno (Just (0, 1)) `shouldBe` HomRef 126 | it "should give Hom-Alt for 1 pseudo-haploid" $ 127 | dosageToEigenstratGeno (Just (1, 1)) `shouldBe` HomAlt 128 | it "should give Missing for Nothing pseudo-haploid" $ 129 | dosageToEigenstratGeno Nothing `shouldBe` Missing 130 | it "should give Hom-Ref for 0 diploid" $ 131 | dosageToEigenstratGeno (Just (0, 2)) `shouldBe` HomRef 132 | it "should give Het for 1 diploid" $ 133 | dosageToEigenstratGeno (Just (1, 2)) `shouldBe` Het 134 | it "should give Hom-Alt for 2 diploid" $ 135 | dosageToEigenstratGeno (Just (2, 2)) `shouldBe` HomAlt 136 | it "should give Missing for Nothing diploid" $ 137 | dosageToEigenstratGeno Nothing `shouldBe` Missing 138 | 139 | testFreqSumToEigenstrat :: Spec 140 | testFreqSumToEigenstrat = describe "freqSumtoEigenstrat" $ do 141 | let fs = FreqSumEntry (Chrom "1") 1000 Nothing Nothing 'A' 'C' [Just (0, 1), Just (1, 1), Just (1, 1), Nothing, Just (0, 1)] 142 | let es = EigenstratSnpEntry (Chrom "1") 1000 0.0 (B.pack "1_1000") 'A' 'C' 143 | genoLine = fromList [HomRef, HomAlt, HomAlt, Missing, HomRef] 144 | it "should convert a freqSum example correctly to eigenstrat" $ 145 | freqSumToEigenstrat fs `shouldBe` (es, genoLine) 146 | it "should convert a freqSum example with rsId correctly to eigenstrat" $ 147 | freqSumToEigenstrat (fs {fsSnpId = Just "rs123"}) `shouldBe` (es {snpId = "rs123"}, genoLine) 148 | 149 | 150 | mockFreqSumData :: [(Maybe PileupRow, FreqSumEntry)] 151 | mockFreqSumData = [ 152 | (Nothing, FreqSumEntry (Chrom "1") 1000 (Just "rs1") Nothing 'A' 'C' [Just (1, 2), Just (2, 2), Nothing, Just (0, 2), Just (0, 2)]), 153 | (Nothing, FreqSumEntry (Chrom "1") 2000 (Just "rs2") Nothing 'C' 'T' [Just (1, 2), Just (2, 2), Nothing, Just (0, 2), Just (0, 2)]), 154 | (Nothing, FreqSumEntry (Chrom "1") 3000 (Just "rs3") Nothing 'A' 'G' [Just (1, 2), Just (2, 2), Nothing, Just (0, 2), Just (0, 2)]), 155 | (Nothing, FreqSumEntry (Chrom "2") 1000 (Just "rs4") Nothing 'A' 'G' [Just (1, 2), Just (2, 2), Nothing, Just (0, 2), Just (0, 2)]), 156 | (Nothing, FreqSumEntry (Chrom "2") 2000 (Just "rs5") Nothing 'T' 'A' [Just (1, 2), Just (2, 2), Nothing, Just (0, 2), Just (0, 2)]), 157 | (Nothing, FreqSumEntry (Chrom "2") 3000 (Just "rs6") Nothing 'T' 'C' [Just (1, 2), Just (2, 2), Nothing, Just (0, 2), Just (0, 2)])] 158 | 159 | testFilterTransitions :: Spec 160 | testFilterTransitions = describe "filterTransitions" $ do 161 | it "should remove transitions with SkipTransitions" $ do 162 | let r = P.toList $ each mockFreqSumData >-> 163 | filterTransitions SkipTransitions 164 | map snd r `shouldBe` [ 165 | FreqSumEntry (Chrom "1") 1000 (Just "rs1") Nothing 'A' 'C' 166 | [Just (1, 2), Just (2, 2), Nothing, Just (0, 2), Just (0, 2)], 167 | FreqSumEntry (Chrom "2") 2000 (Just "rs5") Nothing 'T' 'A' 168 | [Just (1, 2), Just (2, 2), Nothing, Just (0, 2), Just (0, 2)]] 169 | it "should mark transitions as missing with TransitionsMissing" $ do 170 | let r = P.toList $ 171 | each mockFreqSumData >-> 172 | filterTransitions TransitionsMissing 173 | map snd r `shouldBe` [ 174 | FreqSumEntry (Chrom "1") 1000 (Just "rs1") Nothing 'A' 'C' 175 | [Just (1, 2), Just (2, 2), Nothing, Just (0, 2), Just (0, 2)], 176 | FreqSumEntry (Chrom "1") 2000 (Just "rs2") Nothing 'C' 'T' [Nothing, Nothing, Nothing, Nothing, Nothing], 177 | FreqSumEntry (Chrom "1") 3000 (Just "rs3") Nothing 'A' 'G' [Nothing, Nothing, Nothing, Nothing, Nothing], 178 | FreqSumEntry (Chrom "2") 1000 (Just "rs4") Nothing 'A' 'G' [Nothing, Nothing, Nothing, Nothing, Nothing], 179 | FreqSumEntry (Chrom "2") 2000 (Just "rs5") Nothing 'T' 'A' 180 | [Just (1, 2), Just (2, 2), Nothing, Just (0, 2), Just (0, 2)], 181 | FreqSumEntry (Chrom "2") 3000 (Just "rs6") Nothing 'T' 'C' [Nothing, Nothing, Nothing, Nothing, Nothing]] 182 | it "should output all sites with AllSites" $ do 183 | let r = P.toList $ each mockFreqSumData >-> filterTransitions AllSites 184 | r `shouldBe` mockFreqSumData 185 | it "should output all sites with SingleStrandMode" $ do 186 | let r = P.toList $ each mockFreqSumData >-> filterTransitions SingleStrandMode 187 | r `shouldBe` mockFreqSumData 188 | 189 | testCleanSSdamageAllSamples :: Spec 190 | testCleanSSdamageAllSamples = describe "cleanSSdamageAllSamples" $ do 191 | let bases = ["AACATG", "AACATT", "AACTTG"] 192 | strands = [[f, r, r, f, r, r], [r, f, r, f, f, r], [f, f, r, f, f, r]] 193 | it "should not remove anything if not C/T or G/A SNP" $ 194 | cleanSSdamageAllSamples 'C' 'A' bases strands `shouldBe` bases 195 | it "should remove forward reads from C/T SNPs" $ do 196 | cleanSSdamageAllSamples 'C' 'T' bases strands `shouldBe` ["ACTG", "ACT", "CG"] 197 | cleanSSdamageAllSamples 'T' 'C' bases strands `shouldBe` ["ACTG", "ACT", "CG"] 198 | it "should remove reverse reads from G/A SNPs" $ do 199 | cleanSSdamageAllSamples 'A' 'G' bases strands `shouldBe` ["AA", "AAT", "AATT"] 200 | cleanSSdamageAllSamples 'G' 'A' bases strands `shouldBe` ["AA", "AAT", "AATT"] 201 | where 202 | f = ForwardStrand 203 | r = ReverseStrand 204 | -------------------------------------------------------------------------------- /old/filterTrimFastq.d: -------------------------------------------------------------------------------- 1 | import std.stdio; 2 | import std.getopt; 3 | import std.range; 4 | import std.algorithm; 5 | import std.exception; 6 | import std.string; 7 | import std.random; 8 | import std.conv; 9 | // import gzip; 10 | 11 | string fwdTag, bwdTag; 12 | string fwdFilename, bwdFilename; 13 | string fakePrefix; 14 | string fakePrefixQual; 15 | string outPrefix; 16 | long startNr, endNr; 17 | int maxMismatch = 1; 18 | int overlapSize = 20; 19 | int minLength = 30; 20 | int phredQualASCIIOffset = 33; 21 | long[char[2]] mismatchDict; 22 | long totalMismatches; 23 | char[char] revNuc; 24 | long[int] lengthDist; 25 | 26 | 27 | void main(string[] args) { 28 | try { 29 | readArgs(args); 30 | } 31 | catch(Exception e) { 32 | stderr.writeln(e.msg); 33 | printHelp(); 34 | return; 35 | } 36 | run(); 37 | } 38 | 39 | void readArgs(string[] args) { 40 | getopt(args, std.getopt.config.caseSensitive, 41 | "fwdTag|f", &fwdTag, 42 | "bwdTag|b", &bwdTag, 43 | "startNr", &startNr, 44 | "endNr", &endNr, 45 | "outPrefix|o", &outPrefix, 46 | "fakePrefix|p", &fakePrefix, 47 | "minLength", &minLength, 48 | "maxMismatch", &maxMismatch, 49 | "minOverlapSize", &overlapSize); 50 | enforce(args.length == 3, "need two input files"); 51 | fwdFilename = args[1]; 52 | bwdFilename = args[2]; 53 | enforce(endNr >= startNr); 54 | if(fakePrefix.length > 0) { 55 | auto tmp = new char[fakePrefix.length]; 56 | tmp[] = makeQualChar(40); // highest Quality on Illumina machines 57 | fakePrefixQual = tmp.idup; 58 | } 59 | } 60 | 61 | void printHelp() { 62 | stderr.writeln("./filterTrim [options] 63 | Options: 64 | --fwdTag, -f: 5prime Barcode, capital letters 65 | --bwdTag, -b: 3prime Barcode, capital letters 66 | --fakePrefix, -p: Attach an artificial Barcode to the beginning of each forward read. This has been implemented to deal with data generated by a protocol with darkcycles to skip the first barcode. 67 | --outPrefix, -o: Prefix for the output filename. Four files are output: 68 | _1.fastq contains the forward reads that could not be merged. 69 | _2.fastq contains the backward reads that could not be merged. 70 | _merged.fastq contains the merged reads 71 | _stats.fastq contains some statistics 72 | --startNr: If given, start at given read number 73 | --endNr: If given, end at given read number 74 | --maxMismatch: Mismatch allowed between the two reads for merging (Default: 1) 75 | --minLength: Minimum length of the merged read (Default: 30). 76 | --minOverlapSize: Minimum overlap size between the two reads (relevant for long inserts. Default: 20) 77 | WARNING: filterTrimFastq currently does not support GZIP because of a bug. So input files must be uncompressed and output files will be uncompressed as well. Sorry!"); 78 | } 79 | 80 | void run() { 81 | revNuc['A'] = 'T'; 82 | revNuc['T'] = 'A'; 83 | revNuc['C'] = 'G'; 84 | revNuc['G'] = 'C'; 85 | revNuc['N'] = 'N'; 86 | 87 | auto fwdFr = File(fwdFilename).byLineCopy(); //new GzipByLine(fwdFilename); 88 | auto bwdFr = File(bwdFilename).byLineCopy(); //new GzipByLine(bwdFilename); 89 | 90 | auto fwdR = new ChunkRange!(typeof(fwdFr), string, 4)(fwdFr); 91 | auto bwdR = new ChunkRange!(typeof(bwdFr), string, 4)(bwdFr); 92 | 93 | long fwdTagFails, bwdTagFails, nrPassed, nrMerged, nrTotal; 94 | long fwdTagImperfects, bwdTagImperfects; 95 | // auto fwdOutF = new GzipOut(outPrefix ~ "_1.fastq.gz"); 96 | // auto bwdOutF = new GzipOut(outPrefix ~ "_2.fastq.gz"); 97 | // auto mergedOutF = new GzipOut(outPrefix ~ "_merged.fastq.gz"); 98 | auto fwdOutF = File(outPrefix ~ "_1.fastq", "w"); 99 | auto bwdOutF = File(outPrefix ~ "_2.fastq", "w"); 100 | auto mergedOutF = File(outPrefix ~ "_merged.fastq", "w"); 101 | auto statsF = File(outPrefix ~ "_stats.txt", "w"); 102 | 103 | auto cnt = 0; 104 | foreach(pair; zip(fwdR, bwdR)) { 105 | nrTotal += 1; 106 | cnt += 1; 107 | if(cnt % 100000 == 0) 108 | stderr.writeln("processing read nr. ", cnt); 109 | if(endNr > 0) { 110 | if(cnt < startNr) 111 | continue; 112 | if(cnt > endNr) 113 | break; 114 | } 115 | // enforce(pair[0][0][0 .. $ - 1] == pair[1][0][0 .. $ - 1], "read pairs not in equal order"); 116 | auto fwdSeq = fakePrefix ~ pair[0][1]; 117 | auto fwdQual = fakePrefixQual ~ pair[0][3]; 118 | auto bwdSeq = pair[1][1]; 119 | auto bwdQual = pair[1][3]; 120 | auto fwdTagSeq = fwdSeq[0 .. fwdTag.length]; 121 | auto bwdTagSeq = bwdSeq[0 .. bwdTag.length]; 122 | auto fwdTagFail = !seqMatch(fwdTagSeq, fwdTag); 123 | auto bwdTagFail = !seqMatch(bwdTagSeq, bwdTag); 124 | fwdTagFails += fwdTagFail; 125 | bwdTagFails += bwdTagFail; 126 | fwdTagImperfects += canFind(fwdTagSeq, 'N'); 127 | bwdTagImperfects += canFind(bwdTagSeq, 'N'); 128 | if(fwdTagFail || bwdTagFail) 129 | continue; 130 | nrPassed += 1; 131 | auto readEndDist = searchReadEndDist(fwdSeq, bwdSeq, overlapSize); 132 | if(readEndDist >= 0) { 133 | if(readEndDist <= fwdTag.length + bwdTag.length) 134 | continue; 135 | auto merged = mergeReads([fwdSeq, fwdQual], [bwdSeq, bwdQual], readEndDist); 136 | auto mergedSeq = merged[0]; 137 | auto mergedQual = merged[1]; 138 | auto l = to!int(mergedSeq.length); 139 | if(l !in lengthDist) 140 | lengthDist[l] = 0; 141 | lengthDist[l] += 1; 142 | if(l >= minLength) { 143 | nrMerged += 1; 144 | // mergedOutF.compress(pair[0][0][0 .. $ - 2] ~ "\n"); 145 | // mergedOutF.compress(mergedSeq ~ "\n"); 146 | // mergedOutF.compress("+\n"); 147 | // mergedOutF.compress(mergedQual ~ "\n"); 148 | mergedOutF.writeln(pair[0][0][0 .. $ - 2]); 149 | mergedOutF.writeln(mergedSeq); 150 | mergedOutF.writeln("+"); 151 | mergedOutF.writeln(mergedQual); 152 | } 153 | } 154 | else { 155 | // fwdOutF.compress(pair[0][0] ~ "\n"); 156 | // fwdOutF.compress(fwdSeq[fwdTag.length .. $] ~ "\n"); 157 | // fwdOutF.compress(pair[0][2] ~ "\n"); 158 | // fwdOutF.compress(fwdQual[fwdTag.length .. $] ~ "\n"); 159 | // bwdOutF.compress(pair[1][0] ~ "\n"); 160 | // bwdOutF.compress(bwdSeq[bwdTag.length .. $] ~ "\n"); 161 | // bwdOutF.compress(pair[1][2] ~ "\n"); 162 | // bwdOutF.compress(bwdQual[bwdTag.length .. $] ~ "\n"); 163 | fwdOutF.writeln(pair[0][0]); 164 | fwdOutF.writeln(fwdSeq[fwdTag.length .. $]); 165 | fwdOutF.writeln(pair[0][2]); 166 | fwdOutF.writeln(fwdQual[fwdTag.length .. $]); 167 | bwdOutF.writeln(pair[1][0]); 168 | bwdOutF.writeln(bwdSeq[bwdTag.length .. $]); 169 | bwdOutF.writeln(pair[1][2]); 170 | bwdOutF.writeln(bwdQual[bwdTag.length .. $]); 171 | } 172 | } 173 | // mergedOutF.finish(); 174 | // fwdOutF.finish(); 175 | // bwdOutF.finish(); 176 | mergedOutF.close(); 177 | fwdOutF.close(); 178 | bwdOutF.close(); 179 | logPrint(statsF, "Forward strand tag fails:\t%s", fwdTagFails); 180 | logPrint(statsF, "Backward strand tag fails:\t%s", bwdTagFails); 181 | logPrint(statsF, "Forward strand tag contains N:\t%s", fwdTagImperfects); 182 | logPrint(statsF, "Backward strand tag contains N:\t%s", bwdTagImperfects); 183 | logPrint(statsF, "Total reads:\t%s", nrTotal); 184 | logPrint(statsF, "Passed reads:\t%s", nrPassed); 185 | logPrint(statsF, "Merged reads:\t%s", nrMerged); 186 | logPrint(statsF, "Nr of mismatches:\t%s", totalMismatches); 187 | foreach(key, val; mismatchDict) 188 | logPrint(statsF, "Mismatch\t%s,%s\t%s", key[0], key[1], val); 189 | foreach(key; sort(lengthDist.keys())) 190 | logPrint(statsF, "Length\t%s\t%s", key, lengthDist[key]); 191 | 192 | } 193 | 194 | void logPrint(S...)(File f, S s) { 195 | f.writefln(s); 196 | stderr.writefln(s); 197 | } 198 | 199 | class ChunkRange(R, T, size_t L) { 200 | R input; 201 | T[L] frontElem; 202 | bool empty_; 203 | 204 | this(R range) { 205 | input = range; 206 | loadFront(); 207 | } 208 | 209 | void loadFront() { 210 | if(!input.empty) { 211 | foreach(i; 0 .. L) { 212 | frontElem[i] = input.front; 213 | input.popFront(); 214 | } 215 | } 216 | else { 217 | empty_ = true; 218 | } 219 | } 220 | 221 | @property T[L] front() { 222 | return frontElem; 223 | } 224 | 225 | @property bool empty() { 226 | return empty_; 227 | } 228 | 229 | void popFront() { 230 | loadFront(); 231 | } 232 | } 233 | 234 | bool seqMatch(in char[] seq1, in char[] seq2, int nrMismatches=0) { 235 | enforce(seq1.length == seq2.length); 236 | auto sum = 0UL; 237 | foreach(pair; zip(seq1, seq2)) { 238 | if(pair[0] != 'N' && pair[1] != 'N' && pair[0] != pair[1]) 239 | sum += 1; 240 | if(sum > nrMismatches) 241 | return false; 242 | } 243 | return true; 244 | } 245 | 246 | unittest { 247 | assert(seqMatch("AACCT", "ACCCT") == false); 248 | assert(seqMatch("AACCT", "ACCCT", 1) == true); 249 | } 250 | 251 | // the readEndDistance is defined as the distance from the start of the forward read to the end of backward read 252 | int searchReadEndDist(in char[] fwdSeq, in char[] bwdSeq, int overlapSize) { 253 | auto minDist = overlapSize; 254 | auto maxDist = cast(int)(fwdSeq.length + bwdSeq.length - overlapSize); 255 | int[] ret; 256 | foreach(dist; minDist .. maxDist + 1) { 257 | auto end = min(dist, cast(int)bwdSeq.length); 258 | auto offset = max(0, dist - cast(int)fwdSeq.length); 259 | auto cmpSeq = bwdSeq[offset .. end].reverseComplement(); 260 | auto fwdOffset = max(0, dist - cast(int)bwdSeq.length); 261 | // stderr.writeln(dist, " ", cmpSeq.length, " ", fwdOffset, " ", cmpSeq.length, " ", fwdSeq.length); 262 | if(seqMatch(fwdSeq[fwdOffset .. fwdOffset + cast(int)cmpSeq.length], cmpSeq, maxMismatch)) 263 | ret ~= dist; 264 | } 265 | if(ret.length == 0 || ret.length > 1) 266 | return -1; 267 | else 268 | return ret[0]; 269 | } 270 | 271 | unittest { 272 | assert(searchReadEndDist("ACCTGCCTGC", "GCTGGTAATC", 4) == 6); 273 | assert(searchReadEndDist("ACCTGCCTGC", "AGCCAGGTCC", 4) == 8); 274 | assert(searchReadEndDist("ACCTGCCTGC", "GCAGGCTGGT", 8) == 10); 275 | assert(searchReadEndDist("ACCTGCCTGC", "TTTGCAGGCT", 4) == 13); 276 | assert(searchReadEndDist("ACCTGCCTGC", "TTGGTTGGCC", 4) == -1); 277 | } 278 | 279 | string[2] mergeReads(string[2] fwd, string[2] bwd, int readEndDist) { 280 | char[] mergedSeq; 281 | char[] mergedQual; 282 | 283 | auto revcmp = bwd[0].reverseComplement(); 284 | auto revQual = bwd[1].retro.map!"a.to!char()"().array; 285 | foreach(pos; cast(int)fwdTag.length .. readEndDist - cast(int)bwdTag.length) { 286 | if(pos < readEndDist - cast(int)revcmp.length) { 287 | mergedSeq ~= fwd[0][pos]; 288 | mergedQual ~= fwd[1][pos]; 289 | } 290 | else { 291 | auto revOffset = pos - (readEndDist - cast(int)revcmp.length); 292 | if(pos >= cast(int)fwd[0].length) { 293 | mergedSeq ~= revcmp[revOffset]; 294 | mergedQual ~= revQual[revOffset]; 295 | } 296 | else { 297 | auto qual1 = getQualPhred(fwd[1][pos]); 298 | auto qual2 = getQualPhred(revQual[revOffset]); 299 | if(fwd[0][pos] != revcmp[revOffset]) { 300 | if(fwd[0][pos] != 'N' && revcmp[revOffset] != 'N') { 301 | totalMismatches += 1; 302 | char[2] pair = [fwd[0][pos], revcmp[revOffset]]; 303 | if(pair !in mismatchDict) 304 | mismatchDict[pair] = 1; 305 | else 306 | mismatchDict[pair] += 1; 307 | } 308 | if(qual1 > qual2) 309 | mergedSeq ~= fwd[0][pos]; 310 | if(qual1 < qual2) 311 | mergedSeq ~= revcmp[revOffset]; 312 | if(qual1 == qual2) { 313 | auto rn = uniform(0.0, 1.0); 314 | mergedSeq ~= rn < 0.5 ? fwd[0][pos] : revcmp[revOffset]; 315 | } 316 | mergedQual ~= makeQualChar(min(qual1, qual2)); 317 | } 318 | else { 319 | mergedSeq ~= fwd[0][pos]; 320 | mergedQual ~= makeQualChar(min(40,qual1 + qual2)); 321 | } 322 | } 323 | } 324 | } 325 | 326 | // printout for debug 327 | // if(readEndDist >= bwd[0].length) { 328 | // stderr.writeln("forward: ", fwd[0]); 329 | // auto pad = new char[max(0, readEndDist - cast(int)bwd[0].length)]; 330 | // pad[] = ' '; 331 | // stderr.writeln("backward: ", pad[] ~ revcmp); 332 | // pad.length = fwdTag.length; 333 | // pad[] = ' '; 334 | // stderr.writeln("merged: ", pad ~ mergedSeq); 335 | // stderr.writeln(""); 336 | // } 337 | // else { 338 | // auto pad = new char[bwd[0].length - readEndDist]; 339 | // pad[] = ' '; 340 | // stderr.writeln("forward: ", pad ~ fwd[0]); 341 | // stderr.writeln("backward: ", revcmp); 342 | // pad.length = bwd[0].length - readEndDist + fwdTag.length; 343 | // pad[] = ' '; 344 | // stderr.writeln("merged: ", pad ~ mergedSeq); 345 | // stderr.writeln(""); 346 | // } 347 | 348 | return [mergedSeq.idup, mergedQual.idup]; 349 | } 350 | 351 | char[] reverseComplement(in char[] seq) { 352 | auto ret = seq.dup; 353 | auto n = ret.length; 354 | foreach(i; 0 .. n) { 355 | ret[i] = revNuc[seq[n - 1 - i]]; 356 | } 357 | return ret; 358 | } 359 | 360 | unittest { 361 | assert(reverseComplement("ACCTT") == "AAGGT"); 362 | assert(reverseComplement("ACNTT") == "AANGT"); 363 | } 364 | 365 | int getQualPhred(char qualChar) { 366 | return cast(int)qualChar - phredQualASCIIOffset; 367 | } 368 | 369 | char makeQualChar(int qual) { 370 | return cast(char)(qual + phredQualASCIIOffset); 371 | } -------------------------------------------------------------------------------- /old/src-simpleBamCaller/simpleBamCaller.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | import SeqTools.OrderedZip (orderedZip) 4 | import SeqTools.VCF (VCFentry (..), VCFheader (..), 5 | readVCF) 6 | 7 | import Control.Error (headErr) 8 | import Control.Exception.Base (AssertionFailed (..), throwIO) 9 | import Control.Monad.Random (evalRandIO) 10 | import Control.Monad.Trans.Class (lift) 11 | import Control.Monad.Trans.Reader (ReaderT, asks, runReaderT) 12 | import qualified Data.Attoparsec.Text as A 13 | import Data.Char (isSpace) 14 | import Data.List (sortBy) 15 | import qualified Data.Text as T 16 | import qualified Data.Text.IO as T 17 | import Debug.Trace (trace) 18 | import qualified Options.Applicative as OP 19 | import Pipes (Pipe, Producer, cat, for, 20 | runEffect, yield, (>->)) 21 | import Pipes.Attoparsec (parsed) 22 | import Pipes.Cliff (CmdSpec (..), CreateProcess (..), 23 | NonPipe (..), pipeOutput) 24 | import Pipes.Cliff.Core (defaultHandler) 25 | import qualified Pipes.Prelude as P 26 | import Pipes.Safe (SafeT, runSafeT) 27 | import Pipes.Safe.Prelude (withFile) 28 | import Pipes.Text.Encoding (decodeUtf8) 29 | import qualified Pipes.Text.IO as PT 30 | import Prelude hiding (FilePath) 31 | import System.IO (IOMode (..)) 32 | import System.Random (mkStdGen, randomRIO, setStdGen) 33 | import System.Random.Shuffle (shuffleM) 34 | import Turtle hiding (cat, stderr, tab) 35 | 36 | data ProgOpt = ProgOpt { 37 | optCallingMode :: CallingMode, 38 | optSeed :: Maybe Int, 39 | optMinDepth :: Int, 40 | optTransversionsOnly :: Bool, 41 | optSnpFile :: Maybe FilePath, 42 | optRegion :: Text, 43 | optOutChrom :: Maybe Text, 44 | optReference :: FilePath, 45 | optOutFormat :: OutFormat, 46 | optEigenstratOutPrefix :: Maybe FilePath, 47 | optSamtoolsExe :: FilePath, 48 | optBcftoolsExe :: FilePath, 49 | optBamFiles :: [FilePath] 50 | } 51 | 52 | data CallingMode = MajorityCalling | RandomCalling | RareCalling deriving (Show, Read) 53 | data OutFormat = EigenStrat | FreqSumFormat deriving (Show, Read) 54 | data VCFentryReduced = VCFentryReduced T.Text Int [T.Text] [[Int]] deriving (Show) 55 | data FreqSumRow = FreqSumRow T.Text Int T.Text T.Text [Int] deriving (Show) 56 | data SnpEntry = SnpEntry T.Text Int T.Text T.Text deriving (Show)-- Chrom Pos Ref Alt 57 | 58 | main :: IO () 59 | main = OP.execParser parser >>= runSafeT . runReaderT runWithOpts 60 | where 61 | parser = OP.info (OP.helper <*> argParser) 62 | (OP.progDesc "A program to perform simple genotype calling directly from BAM") 63 | 64 | argParser :: OP.Parser ProgOpt 65 | argParser = ProgOpt <$> parseCallingMode <*> parseSeed <*> parseMinDepth <*> 66 | parseTransversionsOnly <*> parseSnpFile <*> parseChrom <*> 67 | parseOutChrom <*> parseRef <*> parseFormat <*> parseEigenstratOutPrefix <*> 68 | parseSamtoolsExe <*> parseBcftoolsExe <*> OP.some parseBams 69 | where 70 | parseCallingMode = OP.option OP.auto (OP.long "mode" <> OP.short 'm' <> 71 | OP.value RandomCalling <> OP.showDefault <> OP.metavar "" <> 72 | OP.help "specify the mode of calling: MajorityCalling, RandomCalling or \ 73 | \RareCalling. MajorityCalling: Pick the allele supported by the \ 74 | \most reads. If equal numbers of Alleles fulfil this, pick one at \ 75 | \random. RandomCalling: Pick one read at random. RareCalling: \ 76 | \Require a number of reads equal to the minDepth supporting the \ 77 | \alternative allele to call a heterozygote. Otherwise call \ 78 | \homozygous reference or missing depending on depth. For \ 79 | \RareCalling you should use --minDepth 2.") 80 | parseSeed = OP.option (Just <$> OP.auto) (OP.long "seed" <> OP.value Nothing <> 81 | OP.metavar "" <> OP.help "random seed used for random calling. If \ 82 | \not given, use system clock to seed the random number generator") 83 | parseMinDepth = OP.option OP.auto (OP.long "minDepth" <> OP.short 'd' <> OP.value 1 <> 84 | OP.showDefault <> 85 | OP.metavar "" <> 86 | OP.help "specify the minimum depth for a call. This has a \ 87 | \special meaning for RareCalling, see --mode") 88 | parseTransversionsOnly = OP.switch (OP.long "transversionsOnly" <> OP.short 't' <> 89 | OP.help "Remove transition SNPs from the output)") 90 | parseSnpFile = OP.option (Just . fromText . T.pack <$> OP.str) 91 | (OP.long "snpFile" <> OP.short 'f' <> OP.value Nothing <> OP.metavar "" <> 92 | OP.help "specify a SNP file for the positions and alleles to call. All \ 93 | \positions in the SNP file will be output, adding missing data where \ 94 | \necessary. The file should have three columns (space- or \ 95 | \tab-separated): Chromosome, position and \ 96 | \alleles, where the alleles should be one reference and one \ 97 | \alternative allele \ 98 | \separated by a comma. Note that this file needs to have a difference \ 99 | \format than \ 100 | \the snp files in Eigenstrat because of samtools specifications. Note \ 101 | \also that simpleBamCaller automatically checks whether alleles in \ 102 | \the SNP file are flipped with respect to the human reference. But \ 103 | \it assumes that the strand-orientation is the same.") 104 | parseChrom = OP.option (T.pack <$> OP.str) (OP.long "chrom" <> OP.short 'c' <> 105 | OP.metavar "" <> OP.help "specify the region in the BAM file to \ 106 | \call from. Can be just the chromosome, or a string of the form \ 107 | \CHROM:START-END") 108 | parseOutChrom = OP.option (Just . T.pack <$> OP.str) (OP.long "outChrom" <> 109 | OP.metavar "" <> 110 | OP.help "specify the output chromosome name" <> OP.value Nothing) 111 | parseRef = OP.option (fromText . T.pack <$> OP.str) (OP.long "reference" <> OP.short 'r' <> 112 | OP.metavar "" <> 113 | OP.help "the reference fasta file") 114 | parseBams = OP.argument (fromText . T.pack <$> OP.str) (OP.metavar "" <> 115 | OP.help "input file, give multiple files for multiple samples") 116 | parseFormat = OP.option OP.auto (OP.metavar "" <> OP.long "format" <> 117 | OP.short 'o' <> 118 | OP.value FreqSumFormat <> OP.showDefault <> 119 | OP.help "specify output format: EigenStrat or FreqSum") 120 | parseEigenstratOutPrefix = OP.option (Just . fromText . T.pack <$> OP.str) 121 | (OP.long "eigenstratOutPrefix" <> 122 | OP.short 'e' <> 123 | OP.value Nothing <> OP.metavar "" <> 124 | OP.help "specify the filenames for the EigenStrat SNP and IND \ 125 | \file outputs: .snp.txt and .ind.txt \ 126 | \Ignored if Output format is not Eigenstrat") 127 | parseSamtoolsExe = OP.option (fromText . T.pack <$> OP.str) (OP.long "samtools" <> 128 | OP.value "samtools" <> OP.showDefault <> 129 | OP.metavar "" <> 130 | OP.help "path to the samtools executable, version >= 1.2") 131 | parseBcftoolsExe = OP.option (fromText . T.pack <$> OP.str) (OP.long "bcftools" <> 132 | OP.value "bcftools" <> OP.showDefault <> 133 | OP.metavar "" <> OP.help "path to the \ 134 | \bcftools executable, version >= 1.2") 135 | 136 | runWithOpts :: ReaderT ProgOpt (SafeT IO) () 137 | runWithOpts = do 138 | seed <- asks optSeed 139 | region <- asks optRegion 140 | outChrom <- asks optOutChrom 141 | outFormat <- asks optOutFormat 142 | eigenStratOutPrefix <- asks optEigenstratOutPrefix 143 | transversionsOnly <- asks optTransversionsOnly 144 | case seed of 145 | Nothing -> return () 146 | Just seed_ -> liftIO . setStdGen $ mkStdGen seed_ 147 | let chrom = head (T.splitOn ":" region) 148 | let outputChrom = case outChrom of 149 | Nothing -> chrom 150 | Just label -> label 151 | (vcfHeader, freqSumProducer) <- runPileup 152 | -- liftIO $ print vcfHeader 153 | case outFormat of 154 | FreqSumFormat -> do 155 | let VCFheader _ n = vcfHeader 156 | echo $ format ("#CHROM\tPOS\tREF\tALT\t"%s) 157 | (T.intercalate "\t" . map (format (s%"(2)")) $ n) 158 | lift . runEffect $ freqSumProducer >-> filterTransitions transversionsOnly >-> 159 | P.map (showFreqSum outputChrom) >-> printToStdOut 160 | EigenStrat -> case eigenStratOutPrefix of 161 | Nothing -> liftIO . throwIO $ AssertionFailed "need an eigenstratPrefix for \ 162 | \EigenStratFormat" 163 | Just fn -> do 164 | let snpOut = fn <.> "snp.txt" 165 | indOut = fn <.> "ind.txt" 166 | lift . withFile (T.unpack . format fp $ indOut) WriteMode $ \indOutHandle -> do 167 | let VCFheader _ sampleNames = vcfHeader 168 | mapM_ (\n -> liftIO $ T.hPutStrLn indOutHandle (format (s%"\tU\tUnknown") n)) 169 | sampleNames 170 | lift . withFile (T.unpack . format fp $ snpOut) WriteMode $ \snpOutHandle -> do 171 | runEffect $ freqSumProducer >-> filterTransitions transversionsOnly >-> 172 | printEigenStrat outputChrom snpOutHandle >-> printToStdOut 173 | where 174 | printToStdOut = for cat (liftIO . T.putStrLn) 175 | filterTransitions transversionsOnly = 176 | if transversionsOnly 177 | then P.filter (\(FreqSumRow _ _ ref alt _) -> isTransversion ref alt) 178 | else cat 179 | isTransversion ref alt = not $ isTransition ref alt 180 | isTransition ref alt = ((ref == "A") && (alt == "G")) || ((ref == "G") && (alt == "A")) || 181 | ((ref == "C") && (alt == "T")) || ((ref == "T") && (alt == "C")) 182 | 183 | runPileup :: ReaderT ProgOpt (SafeT IO) (VCFheader, Producer FreqSumRow (SafeT IO) ()) 184 | runPileup = do 185 | snpFile <- asks optSnpFile 186 | case snpFile of 187 | Nothing -> runPileupSimple 188 | Just fn -> runPileupSnpFile fn 189 | 190 | runPileupSimple :: ReaderT ProgOpt (SafeT IO) (VCFheader, Producer FreqSumRow (SafeT IO) ()) 191 | runPileupSimple = do 192 | samtools <- asks optSamtoolsExe 193 | bcftools <- asks optBcftoolsExe 194 | reference <- asks optReference 195 | region <- asks optRegion 196 | bamFiles <- asks optBamFiles 197 | mode <- asks optCallingMode 198 | minDepth <- asks optMinDepth 199 | let bams = (T.intercalate " " (map (format fp) bamFiles)) 200 | let cmd = format (fp%" mpileup -q30 -Q30 -C50 -I -f "%fp%" -g -t DPR -r "%s%" "%s% 201 | " | "%fp%" view -v snps") samtools reference region bams bcftools 202 | vcfTextProd <- liftIO $ produceFromCommand cmd 203 | (vcfHeader_, vcfProd) <- lift $ readVCF vcfTextProd 204 | let vcfProdPipe = vcfProd >-> processVcfSimple (length bamFiles) mode minDepth 205 | return (vcfHeader_, vcfProdPipe) 206 | 207 | runPileupSnpFile :: FilePath -> 208 | ReaderT ProgOpt (SafeT IO) (VCFheader, Producer FreqSumRow (SafeT IO) ()) 209 | runPileupSnpFile fn = do 210 | samtools <- asks optSamtoolsExe 211 | bcftools <- asks optBcftoolsExe 212 | reference <- asks optReference 213 | region <- asks optRegion 214 | bamFiles <- asks optBamFiles 215 | mode <- asks optCallingMode 216 | minDepth <- asks optMinDepth 217 | let bams = (T.intercalate " " (map (format fp) bamFiles)) 218 | let chrom = head (T.splitOn ":" region) 219 | let cmd = format (fp%" mpileup -q30 -Q30 -C50 -I -f "%fp%" -g -t DPR -r "%s%" -l "%fp% 220 | " "%s%" | "%fp%" view") samtools reference region fn bams bcftools 221 | vcfTextProd <- liftIO $ produceFromCommand cmd 222 | let snpTextProd = PT.readFile ((T.unpack . format fp) fn) 223 | (vcfHeader_, vcfProd) <- lift $ readVCF vcfTextProd 224 | let snpProd = parsed snpParser snpTextProd >-> P.filter (\(SnpEntry c _ _ _) -> c == chrom) 225 | jointProd = orderedZip cmp snpProd vcfProd 226 | jointProdPipe = jointProd >-> processVcfWithSnpFile (length bamFiles) mode minDepth 227 | return (vcfHeader_, fmap snd jointProdPipe) 228 | where 229 | cmp (SnpEntry _ snpPos _ _) vcfEntry = snpPos `compare` (vcfPos vcfEntry) 230 | 231 | produceFromCommand :: Text -> IO (Producer Text (SafeT IO) ()) 232 | produceFromCommand cmd = do 233 | let createProcess = CreateProcess (ShellCommand (T.unpack cmd)) Nothing Nothing False False 234 | False False False False Nothing Nothing defaultHandler 235 | (p, _) <- pipeOutput Inherit Inherit createProcess 236 | return . void . decodeUtf8 $ p 237 | 238 | processVcfSimple :: Int -> CallingMode -> Int -> Pipe VCFentry FreqSumRow (SafeT IO) r 239 | processVcfSimple nrInds mode minDepth = for cat $ \vcfEntry -> do 240 | let Right (VCFentryReduced chrom pos alleles covNums) = makeReducedVCF vcfEntry 241 | -- trace (show vcfEntry) $ return () 242 | -- trace (show (VCFentryReduced chrom pos alleles covNums)) $ return () 243 | when (length covNums /= nrInds) $ (liftIO . throwIO) (AssertionFailed "inconsistent number \ 244 | \of genotypes. Check that bam files have different readgroup sample names") 245 | (normalizedAlleles, normalizedCovNums) <- case alleles of 246 | [_] -> liftIO . throwIO $ AssertionFailed "should not happen, need at least one \ 247 | \alternative allele" 248 | [ref, alt] -> return ([ref, alt], covNums) 249 | _ -> do 250 | let altNumPairs = 251 | [(alleles!!i, sum [c!!i | c <- covNums]) | i <- [1 .. (length alleles - 1)]] 252 | shuffledAltNumPairs <- liftIO $ shuffle altNumPairs 253 | let (alt, _) = head . sortBy (\a b -> snd b `compare` snd a) $ shuffledAltNumPairs 254 | let altIndex = snd . head . filter ((==alt) . fst) $ zip alleles [0..] 255 | when (altIndex == 0) $ (liftIO . throwIO) 256 | (AssertionFailed "should not happen, altIndex==0") 257 | return ([head alleles, alt], [[c !! 0, c !! altIndex] | c <- covNums]) 258 | let [ref, alt] = normalizedAlleles 259 | when (ref /= "N") $ do 260 | genotypes <- liftIO $ mapM (callGenotype mode minDepth) normalizedCovNums 261 | when (any (>0) genotypes) $ yield (FreqSumRow chrom pos ref alt genotypes) 262 | where 263 | shuffle list = evalRandIO (shuffleM list) 264 | 265 | makeReducedVCF :: VCFentry -> Either String VCFentryReduced 266 | makeReducedVCF (VCFentry chrom pos _ ref alt _ _ _ formatS genotypes) = do 267 | dprIndex <- fmap fst . headErr "Did not find DPR tag in format string" . 268 | filter ((=="DPR") . snd) . zip [0..] $ formatS 269 | let covNums = map (getCorrectCovNums . (!!dprIndex)) genotypes 270 | return $ VCFentryReduced chrom pos normalizedAlleles covNums 271 | where 272 | getCorrectCovNums covNumStr = 273 | (\v -> map (v!!) normalizedAlleleIndices) . map (read . T.unpack) . T.splitOn "," $ 274 | covNumStr 275 | normalizedAlleleIndexPairs = filter (\a -> snd a /= "" && snd a /= "<*>") . zip [0..] $ 276 | ref : alt 277 | normalizedAlleles = map snd normalizedAlleleIndexPairs 278 | normalizedAlleleIndices = map fst normalizedAlleleIndexPairs 279 | 280 | 281 | callGenotype :: CallingMode -> Int -> [Int] -> IO Int 282 | callGenotype mode minDepth covs = do 283 | if sum covs < minDepth then return (-1) else do 284 | case covs of 285 | [_] -> return 0 286 | [numRef, numAlt] -> do 287 | case mode of 288 | MajorityCalling -> case numRef `compare` numAlt of 289 | LT -> return 2 290 | GT -> return 0 291 | EQ -> do 292 | rn <- randomRIO (1, numRef + numAlt) 293 | if rn <= numRef then return 0 else return 2 294 | RandomCalling -> do 295 | rn <- randomRIO (1, numRef + numAlt) 296 | if rn <= numRef then return 0 else return 2 297 | RareCalling -> do 298 | if numAlt >= 2 then return 1 else return 0 299 | _ -> throwIO (AssertionFailed "should not happen. CallGenotype called with more \ 300 | \than two alleles") 301 | 302 | processVcfWithSnpFile :: Int -> CallingMode -> Int -> Pipe (Maybe SnpEntry, Maybe VCFentry) 303 | FreqSumRow (SafeT IO) r 304 | processVcfWithSnpFile nrInds mode minDepth = for cat $ \jointEntry -> do 305 | -- trace (show jointEntry) (return ()) 306 | case jointEntry of 307 | (Just (SnpEntry snpChrom snpPos snpRef snpAlt), Nothing) -> do 308 | yield $ FreqSumRow snpChrom snpPos snpRef snpAlt (replicate nrInds (-1)) 309 | (Just (SnpEntry snpChrom snpPos snpRef snpAlt), Just vcfEntry) -> do 310 | let Right (VCFentryReduced _ _ vcfAlleles vcfNums) = makeReducedVCF vcfEntry 311 | when (length vcfNums /= nrInds) $ (liftIO . throwIO) (AssertionFailed "inconsistent \ 312 | \number of genotypes. Check that bam files have different \ 313 | \readgroup sample names") 314 | let normalizedAlleleI = 315 | map snd . filter (\(a, _) -> a == snpRef || a == snpAlt) $ zip vcfAlleles [0..] 316 | normalizedVcfAlleles = map (vcfAlleles!!) normalizedAlleleI 317 | normalizedVcfNums = [map (v!!) normalizedAlleleI | v <- vcfNums] 318 | -- trace (show (VCFentry vcfChrom vcfPos vcfAlleles vcfNums)) (return ()) 319 | genotypes <- case normalizedVcfAlleles of 320 | [] -> return (replicate nrInds (-1)) 321 | [ref] -> if ref == snpRef 322 | then return [if sum c >= minDepth then 0 else (-1) | c <- normalizedVcfNums] 323 | else return [if sum c >= minDepth then 2 else (-1) | c <- normalizedVcfNums] 324 | [ref, alt] -> if [ref, alt] == [snpRef, snpAlt] 325 | then liftIO $ mapM (callGenotype mode minDepth) normalizedVcfNums 326 | else liftIO $ mapM (callGenotype mode minDepth) 327 | (map reverse normalizedVcfNums) 328 | _ -> liftIO . throwIO $ AssertionFailed ("should not happen, can only have \ 329 | \two alleles after normalization: " ++ show jointEntry) 330 | yield (FreqSumRow snpChrom snpPos snpRef snpAlt genotypes) 331 | _ -> return () 332 | 333 | snpParser :: A.Parser SnpEntry 334 | snpParser = do 335 | chrom <- word 336 | tab 337 | pos <- A.decimal 338 | tab 339 | ref <- A.satisfy (A.inClass "ACTG") 340 | _ <- A.char ',' 341 | alt <- A.satisfy (A.inClass "ACTG") 342 | _ <- A.satisfy (\c -> c == '\r' || c == '\n') 343 | let ret = SnpEntry chrom pos (T.singleton ref) (T.singleton alt) 344 | -- trace (show ret) $ return () 345 | return ret 346 | where 347 | word = T.pack <$> A.many1 (A.satisfy (not . isSpace)) 348 | tab = A.char '\t' >> return () 349 | 350 | showFreqSum :: Text -> FreqSumRow -> Text 351 | showFreqSum outChrom (FreqSumRow _ pos ref alt calls) = 352 | format (s%"\t"%d%"\t"%s%"\t"%s%"\t"%s) outChrom pos ref alt callsStr 353 | where 354 | callsStr = (T.intercalate "\t" . map (format d)) calls 355 | 356 | printEigenStrat :: Text -> Handle -> Pipe FreqSumRow Text (SafeT IO) r 357 | printEigenStrat outChrom snpOutHandle = for cat $ \(FreqSumRow _ pos ref alt calls) -> do 358 | let n = format (s%"_"%d) outChrom pos 359 | snpLine = format (s%"\t"%s%"\t0\t"%d%"\t"%s%"\t"%s) n outChrom pos ref alt 360 | liftIO . T.hPutStrLn snpOutHandle $ snpLine 361 | yield . T.concat . map (format d . toEigenStratNum) $ calls 362 | where 363 | toEigenStratNum c = case c of 364 | 0 -> 2 :: Int 365 | 1 -> 1 366 | 2 -> 0 367 | -1 -> 9 368 | _ -> error ("unknown genotype " ++ show c) 369 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SequenceTools 2 | 3 | [Install with Bioconda](https://anaconda.org/bioconda/sequencetools) 4 | 5 | This repository contains some programs that I use for processing sequencing data. 6 | 7 | # Installation 8 | 9 | ## Installation via precompiled executables 10 | 11 | * [Download the latest Executable](https://github.com/stschiff/sequenceTools/releases/latest) that best matches your platform. 12 | 13 | For example, to install `pileupCaller` in Linux, you can run the following commands to get started: 14 | 15 | ```bash 16 | # download the current stable release binary 17 | wget https://github.com/stschiff/sequenceTools/releases/latest/download/pileupCaller-linux 18 | # make it executable 19 | chmod +x pileupCaller-linux 20 | # run it 21 | ./trident-linux -h 22 | ``` 23 | 24 | ## Installation from source 25 | 26 | 1. Clone the repository: `git clone https://github.com/stschiff/sequenceTools.git` 27 | 2. Go into the repository: `cd sequenceTools` 28 | 3. Compile the executables in the repository using `stack`: `stack install` 29 | 30 | This last step will take a while, as it not only compiles the source, but also first downloads the correct version of the Haskell compiler. 31 | 32 | # PileupCaller 33 | 34 | ## Overview 35 | 36 | The main tool in this repository is the program `pileupCaller` to sample alleles from low coverage sequence data. The first step is to generate a “pileup” file at all positions you wish to genotype. To do that, here is a typical command line, which restricts to mapping and base quality of 30 and uses a predefined set of positions to generate the pileup for (optional, see below): 37 | 38 | samtools mpileup -R -B -q30 -Q30 -l \ 39 | -f \ 40 | Sample1.bam Sample2.bam Sample3.bam > pileup.txt 41 | 42 | Important Note: You should definitely use the `-B` flag, which disables base alignment quality recalibration. This mechanism is turned on by default and causes huge reference bias with low coverage ancient DNA data. This flag disables the mechanism. 43 | 44 | In the above command line, if you use a positions-file, it should either contain positions (0-based) or a bed file (see samtools manual for details). The output is a simple text file with all positions that could be genotyped in the three samples. 45 | 46 | Note that the `` file is strictly optional, as pileupCaller is happy to handle sites that it then discards if they are not in the list of alleles to be pulled down. However, it makes the processing much faster, if you already restrict the pileup to the required positions, so this is recommended. 47 | 48 | Next, you need to run pileupCaller, which you run like this: 49 | 50 | pileupCaller --randomHaploid --sampleNames Sample1,Sample2,Sample3 \ 51 | --samplePopName MyPop -f \ 52 | -e < pileup.txt 53 | 54 | Here, options `--sampleNames` gives the names of the samples that is output in the Eigenstrat `*.ind` file, and and `-–samplePopName` is optional to also give the population names in that file (defaults to `Unknown`, you can also change it later in the output). Then, (required) option `-f` needs an Eigenstrat positions file. This is required for pileupCaller to know what is the reference and which the alternative allele in your reference dataset that you want to call. An Eigenstrat positions file is a line-based file format, where each line denotes a SNP position, and there are exactly six required columns, denoting in order i) SNP ID, ii) chromosome, iii) genetic position (can be set to zero), iv) physical position, v) reference allele, vi) alternate allele. Here is an example: 55 | 56 | rs0000 11 0.000000 0 A C 57 | rs1111 11 0.001000 100000 A G 58 | rs2222 11 0.002000 200000 A T 59 | rs3333 11 0.003000 300000 C A 60 | rs4444 11 0.004000 400000 G A 61 | rs5555 11 0.005000 500000 T A 62 | rs6666 11 0.006000 600000 G T 63 | 64 | Finally, the `-e` option specifies Eigenstrat as output format and gives the prefix for the `*.ind`, `*.pos` and `*.geno` files. Without the `-e` option, pileupCaller will output in FreqSum format, described [here](https://rarecoal-docs.readthedocs.io/en/latest/rarecoal-tools.html#vcf2freqsum), which is useful for debugging your pipeline, since it's just a single file that is output into the terminal and can therefore easily be inspected. 65 | 66 | Note that you can also fuse the two steps above into one unix pipe: 67 | 68 | samtools mpileup -R -B -q30 -Q30 -l \ 69 | -f \ 70 | Sample1.bam Sample2.bam Sample3.bam | \ 71 | pileupCaller --randomHaploid --sampleNames Sample1,Sample2,Sample3 \ 72 | --samplePopName MyPop -f \ 73 | -e 74 | 75 | Note that `--randomHaploid` is only one way to call genotypes. If you need stricter calling, you may want to try `--majorityCall --downSampling --minDepth 3`, which calls genotypes only on sites with at least three reads, downsamples to three if there are more, and then calls whatever of the two alleles has the majority. This will reduce errors, but also yield less data in case of lower coverage. 76 | 77 | You will possibly encounter an issue: If you have aligned your read data to a version of the reference genome that uses `chr1`, `chr2` and so on as chromosome names, the resulting Eigenstrat file will be valid, but won't merge with other Eigenstrat datasets that use chromosome names `1`, `2` and so on. I would therefore recommend to strip the `chr` from your chromosome names if necessary. You can do that easily using a little UNIX filter using the `sed` tool. In the full pipeline, it looks like this: 78 | 79 | samtools mpileup -R -B -q30 -Q30 \ 80 | -f \ 81 | Sample1.bam Sample2.bam Sample3.bam | sed 's/chr//' | \ 82 | pileupCaller --sampleNames Sample1,Sample2,Sample3 \ 83 | --samplePopName MyPop -f \ 84 | -o EigenStrat -e 85 | 86 | ## Options 87 | 88 | You can see all options via `pileupCaller --help`, which outputs: 89 | 90 | ``` 91 | Usage: pileupCaller [--version] 92 | (--randomHaploid | --majorityCall [--downSampling] | 93 | --randomDiploid) [--keepIncongruentReads] 94 | [--seed ] [-d|--minDepth ] 95 | [--skipTransitions | --transitionsMissing | 96 | --singleStrandMode] (-f|--snpFile ) 97 | [(-e|--eigenstratOut ) [-z|----zip] | 98 | (-p|--plinkOut ) 99 | [--popNameAsPhenotype | --popNameAsBoth] [-z|----zip] | 100 | --vcf] 101 | (--sampleNames NAME1,NAME2,... | --sampleNameFile ) 102 | [--samplePopName POP(s)] 103 | 104 | PileupCaller is a tool to create genotype calls from bam files using 105 | read-sampling methods. To use this tool, you need to convert bam files into 106 | the mpileup-format, specified at http://www.htslib.org/doc/samtools.html 107 | (under "mpileup"). The recommended command line to create a multi-sample 108 | mpileup file to be processed with pileupCaller is 109 | 110 | samtools mpileup -B -q30 -Q30 -l -R -f 111 | Sample1.bam Sample2.bam Sample3.bam | pileupCaller ... 112 | 113 | You can lookup what these options do in the samtools documentation. Note that 114 | flag -B in samtools is very important to reduce reference bias in low coverage 115 | data. 116 | 117 | 118 | This tool is part of sequenceTools version 1.6.0.0 119 | 120 | Available options: 121 | --version Print version and exit 122 | -h,--help Show this help text 123 | --randomHaploid This method samples one read at random at each site, 124 | and uses the allele on that read as the one for the 125 | actual genotype. This results in a haploid call 126 | --majorityCall Pick the allele supported by the most reads at a 127 | site. If an equal numbers of alleles fulfil this, 128 | pick one at random. This results in a haploid call. 129 | See --downSampling for best practices for calling 130 | rare variants 131 | --downSampling When this switch is given, the MajorityCalling mode 132 | will downsample from the total number of reads a 133 | number of reads (without replacement) equal to the 134 | --minDepth given. This mitigates reference bias in 135 | the MajorityCalling model, which increases with 136 | higher coverage. The recommendation for rare-allele 137 | calling is --majorityCall --downsampling --minDepth 3 138 | --randomDiploid Sample two reads at random (without replacement) at 139 | each site and represent the individual by a diploid 140 | genotype constructed from those two random picks. 141 | This will always assign missing data to positions 142 | where only one read is present, even if minDepth=1. 143 | The main use case for this option is for estimating 144 | mean heterozygosity across sites. 145 | --keepIncongruentReads By default, pileupCaller now removes reads with 146 | tri-allelic alleles that are neither of the two 147 | alleles specified in the SNP file. To keep those 148 | reads for sampling, set this flag. With this option 149 | given, if the sampled read has a tri-allelic allele 150 | that is neither of the two given alleles in the SNP 151 | file, a missing genotype is generated. IMPORTANT 152 | NOTE: The default behaviour has changed in 153 | pileupCaller version 1.4.0. If you want to emulate 154 | the previous behaviour, use this flag. I recommend 155 | now to NOT set this flag and use the new behaviour. 156 | --seed random seed used for the random number generator. If 157 | not given, use system clock to seed the random number 158 | generator. 159 | -d,--minDepth specify the minimum depth for a call. For sites with 160 | fewer reads than this number, declare Missing 161 | (default: 1) 162 | --skipTransitions skip transition SNPs entirely in the output, 163 | resulting in a dataset with fewer sites. 164 | --transitionsMissing mark transitions as missing in the output, but do 165 | output the sites. 166 | --singleStrandMode [THIS IS CURRENTLY AN EXPERIMENTAL FEATURE]. At C/T 167 | polymorphisms, ignore reads aligning to the forward 168 | strand. At G/A polymorphisms, ignore reads aligning 169 | to the reverse strand. This should remove post-mortem 170 | damage in ancient DNA libraries prepared with the 171 | non-UDG single-stranded protocol. 172 | -f,--snpFile an Eigenstrat-formatted SNP list file for the 173 | positions and alleles to call. All positions in the 174 | SNP file will be output, adding missing data where 175 | there is no data. Note that pileupCaller 176 | automatically checks whether alleles in the SNP file 177 | are flipped with respect to the human reference, and 178 | in those cases flips the genotypes accordingly. But 179 | it assumes that the strand-orientation of the SNPs 180 | given in the SNP list is the one in the reference 181 | genome used in the BAM file underlying the pileup 182 | input. Note that both the SNP file and the incoming 183 | pileup data have to be ordered by chromosome and 184 | position, and this is checked. The chromosome order 185 | in humans is 1-22,X,Y,MT. Chromosome can generally 186 | begin with "chr". In case of non-human data with 187 | different chromosome names, you should convert all 188 | names to numbers. They will always considered to be 189 | numerically ordered, even beyond 22. Finally, I note 190 | that for internally, X is converted to 23, Y to 24 191 | and MT to 90. This is the most widely used encoding 192 | in Eigenstrat databases for human data, so using a 193 | SNP file with that encoding will automatically be 194 | correctly aligned to pileup data with actual 195 | chromosome names X, Y and MT (or chrX, chrY and 196 | chrMT, respectively). 197 | -e,--eigenstratOut 198 | Set Eigenstrat as output format. Specify the 199 | filenames for the EigenStrat SNP, IND and GENO file 200 | outputs: .snp, .ind and 201 | .geno. If not set, output will be 202 | FreqSum (Default). Note that freqSum format, 203 | described at 204 | https://rarecoal-docs.readthedocs.io/en/latest/rarecoal-tools.html#vcf2freqsum, 205 | is useful for testing your pipeline, since it's 206 | output to standard out 207 | -z,----zip GZip the output Eigenstrat or Plink genotype and SNP 208 | files. Filenames will be appended with '.gz'. To zip 209 | FreqSum or VCF output, just zip the standard output 210 | of this program, for example `pileupCaller ... --vcf 211 | | gzip -c > out.vcf.gz 212 | -p,--plinkOut 213 | Set Plink as output format. Specify the filenames for 214 | the Plink BIM, FAM and BED file outputs: 215 | .bim, .fam and 216 | .bed. If not set, output will be FreqSum 217 | (Default). Note that freqSum format, described at 218 | https://rarecoal-docs.readthedocs.io/en/latest/rarecoal-tools.html#vcf2freqsum, 219 | is useful for testing your pipeline, since it's 220 | output to standard out 221 | --popNameAsPhenotype Only valid for Plink Output: Write the population 222 | name into the last column of the fam file, as a 223 | Phenotype according to the Plink Spec. By default, 224 | the population name is specified as the first column 225 | only (family name in the Plink spec) 226 | --popNameAsBoth Only valid for Plink Output: Write the population 227 | name into both the first and last column of the fam 228 | file, so both as Family-ID and as a Phenotype 229 | according to the Plink Spec. By default, the 230 | population name is specified only as the first column 231 | (family name in the Plink spec) 232 | -z,----zip GZip the output Eigenstrat or Plink genotype and SNP 233 | files. Filenames will be appended with '.gz'. To zip 234 | FreqSum or VCF output, just zip the standard output 235 | of this program, for example `pileupCaller ... --vcf 236 | | gzip -c > out.vcf.gz 237 | --vcf output VCF format to stdout 238 | --sampleNames NAME1,NAME2,... 239 | give the names of the samples as comma-separated list 240 | (no spaces) 241 | --sampleNameFile give the names of the samples in a file with one name 242 | per line 243 | --samplePopName POP(s) specify the population name(s) of the samples, which 244 | are included in the output *.ind.txt file in 245 | Eigenstrat output. This will be ignored if the output 246 | format is not Eigenstrat. If a single name is given, 247 | it is applied to all samples, if multiple are given, 248 | their number must match the the number of samples 249 | (default: Left "Unknown") 250 | ``` 251 | 252 | ### SingleStrandMode 253 | 254 | pileupCaller supports a special calling mode (`--singleStrandMode`) for sequencing data generated from single-stranded libraries (Gansauge, Marie-Theres, and Matthias Meyer. 2013. “Single-Stranded DNA Library Preparation for the Sequencing of Ancient or Damaged DNA.” Nature Protocols 8 (4): 737–48.). The idea is that at C/T SNPs, forward mapping reads are discarded, and at G/A SNPs, reverse mapping reads are discarded. This will get rid of post-mortem ancient DNA damage in a conservative way, i.e. it will remove more than necessary and make sure that the remainder of the data is clean of DNA damage, improving the overall calling quality. 255 | 256 | There is an important catch: If you have data from paired-end sequencing, and you are using _unmerged_ reads, then this approach will fail, as it will then _not_ discard potentially damaged reads. 257 | 258 | So there are two options if you have Paired-end sequencing data: 259 | 1) Use only merged reads and `--singleStrandMode` 260 | 2) Use all reads but do _not_ use `--singleStrandMode`. Instead, in such cases I recommend to trim reads from both ends to remove ancient DNA damage. Depending on the details of the library construction, you may have UDG-treated data, in which case fewer basepairs would have to be trimmed. 261 | 262 | ### VCF output 263 | VCF output was added in version 1.6.0.0. The VCF format is specified in detail at https://samtools.github.io/hts-specs/VCFv4.5.pdf. I just mention two specifics. First, with calling modes `--randomHaploid` and `--majorityCall`, the output genotypes will be haploid. This means that instead of genotypes like `0/0`, `0/1`, `1/1` or `./.`, you will instead just see `0`, `1` or `.`. Second, I added some possibly useful filters and statistics to the output, which are described in the header of the VCF. Here is the beginning of an example output: 264 | 265 | ``` 266 | ##fileformat=VCFv4.2 267 | ##source=pileupCaller_v1.6.0.0 268 | ##command_line=pileupCaller --randomHaploid --sampleNames I1,I2,I3,I4 -f test/testDat/1240k_eigenstrat_snp_short.snp.txt --vcf 269 | ##group_names=Unknown,Unknown,Unknown,Unknown 270 | ##INFO= 271 | ##INFO= 272 | ##INFO= 273 | ##FILTER= 274 | ##FILTER= 275 | ##FORMAT= 276 | ##FORMAT= 277 | ##FORMAT= 278 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT I1 I2 I3 I4 279 | 1 752566 rs3094315 G A . PASS NS=3;DP=9;AF=1.0 GT:DP:DP8 1:2:1,0,0,0,0,0,1,0 1:4:2,0,0,0,2,0,0,0 1:3:1,0,0,0,2,0,0,0 .:0:0,0,0,0,0,0,0,0 280 | 1 776546 rs12124819 A G . PASS NS=3;DP=6;AF=0.0 GT:DP:DP8 0:1:0,0,0,0,1,0,0,0 0:4:2,0,0,0,2,0,0,0 0:1:0,0,0,0,1,0,0,0 .:0:0,0,0,0,0,0,0,0 281 | 1 832918 rs28765502 T C . PASS NS=4;DP=8;AF=0.25 GT:DP:DP8 1:1:0,1,0,0,0,0,0,0 0:4:0,0,0,2,0,0,0,2 0:2:0,0,0,0,0,0,0,2 0:1:0,0,0,1,0,0,0,0 282 | 1 842013 rs7419119 T G . PASS NS=3;DP=19;AF=0.0 GT:DP:DP8 0:2:0,0,0,1,0,0,0,1 0:11:0,0,0,5,0,0,0,6 0:6:0,0,0,1,0,0,0,5 .:0:0,0,0,0,0,0,0,0 283 | 1 846864 rs950122 G C . PASS NS=4;DP=20;AF=0.0 GT:DP:DP8 0:3:0,0,1,0,0,0,2,0 0:8:0,0,4,0,0,0,4,0 0:8:0,0,3,0,0,0,5,0 0:1:0,0,0,0,0,0,1,0 284 | 1 869303 rs113171913 C T . PASS NS=3;DP=5;AF=0.67 GT:DP:DP8 1:2:0,0,0,2,0,0,0,0 1:1:0,0,0,1,0,0,0,0 0:2:0,1,0,0,0,1,0,0 .:0:0,0,0,0,0,0,0,0 285 | 1 891021 rs13302957 G A . PASS NS=2;DP=14;AF=1.0 GT:DP:DP8 .:0:0,0,0,0,0,0,0,0 1:7:2,0,0,0,5,0,0,0 1:7:4,0,0,0,3,0,0,0 .:0:0,0,0,0,0,0,0,0 286 | 287 | ``` 288 | 289 | As you can see, Info fields NS, DP and AF are added and defined, as well as two filters which might come in handy. Also, beyond the required genotype `GT` tag, I added two per-sample tags `DP` and `DP8` as defined in the header. 290 | 291 | ### Summary Statistics 292 | 293 | PileupCaller automatically outputs a few lines of summary statistics, including the number of sites called for each sample, and the average read depth. These are output to the stderr, so do not affect stdout or file output. 294 | 295 | # vcf2eigenstrat 296 | 297 | Simple tool to convert a VCF file to an Eigenstrat file. Pretty self-explanatory. Please run `vcf2eigenstrat --help` to output some documentation. 298 | 299 | # genoStats 300 | 301 | A simple tool to get some per-individual statistics from an Eigenstrat or Freqsum-file. Run `genoStats --help` for documentation. 302 | 303 | # Scripts 304 | This package also contains several haskell wrapper scripts for the following [ADMIXTOOLS and EIGENSOFT](https://reich.hms.harvard.edu/software) commands: convertf, mergeit, qp3Pop, qpDstat and smartPCA. The original tools require parameter files as input, which I find tedious to use in bioinformatics pipelines. I wrote those wrapper scripts to be able to start the tools with a simple command line option interface. 305 | 306 | If you have `stack` installed your system (see above), you should be able to run those scripts on your machine without any difficult setup. Simply clone this repository, navigate to the `scripts` subfolder and invoke any script using standard bash execution, for example 307 | 308 | ./convertf_wrapper.hs 309 | 310 | If you start this the first time it may take a while, since `stack` downloads all dependencies and even the script interpreter for you, but after that it should start instantanious. If you want to use the scripts from your path, I suggest to put symbolic links into any folder that is already on your path (for example `~/.local/bin`). 311 | -------------------------------------------------------------------------------- /src-executables/pileupCaller.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | import SequenceFormats.Eigenstrat (EigenstratIndEntry (..), 4 | EigenstratSnpEntry (..), 5 | Sex (..), 6 | readEigenstratSnpFile, 7 | writeEigenstrat) 8 | import SequenceFormats.FreqSum (FreqSumEntry (..), 9 | FreqSumHeader (..), 10 | printFreqSumStdOut) 11 | import SequenceFormats.Pileup (PileupRow (..), 12 | readPileupFromStdIn, 13 | Strand(..)) 14 | import SequenceFormats.Plink (PlinkPopNameMode (..), 15 | eigenstratInd2PlinkFam, 16 | writePlink) 17 | import SequenceFormats.Utils (SeqFormatException (..)) 18 | import SequenceFormats.VCF (VCFentry (..), VCFheader (..), 19 | printVCFtoStdOut) 20 | import SequenceTools.PileupCaller (CallingMode (..), 21 | TransitionsMode (..), 22 | callGenotypeFromPileup, 23 | callToDosage, 24 | cleanSSdamageAllSamples, 25 | computeAlleleFreq, 26 | filterTransitions) 27 | import SequenceTools.Utils (UserInputException (..), 28 | freqSumToEigenstrat, 29 | versionInfoOpt, 30 | versionInfoText) 31 | 32 | import Control.Applicative ((<|>)) 33 | import Control.Exception (catch, throwIO) 34 | import Control.Monad (forM_, when) 35 | import Control.Monad.IO.Class (liftIO) 36 | import Control.Monad.Trans.Class (lift) 37 | import Control.Monad.Trans.Reader (ReaderT, asks, 38 | runReaderT) 39 | import qualified Data.ByteString.Char8 as B 40 | import Data.IORef (IORef, modifyIORef', newIORef, 41 | readIORef) 42 | import Data.List (intercalate) 43 | import Data.List.Split (splitOn) 44 | import qualified Data.Text as T 45 | import qualified Data.Vector.Unboxed.Mutable as V 46 | import Data.Version (Version, showVersion) 47 | import qualified Options.Applicative as OP 48 | import qualified Options.Applicative.Help.Pretty as PP 49 | import Paths_sequenceTools (version) 50 | import Pipes (Producer, for, runEffect, 51 | yield, (>->)) 52 | import Pipes.OrderedZip (orderCheckPipe, orderedZip) 53 | import qualified Pipes.Prelude as P 54 | import Pipes.Safe (SafeT, runSafeT) 55 | import qualified Prettyprinter.Util 56 | import System.Environment (getArgs, getProgName) 57 | import System.IO (hPutStrLn, stderr) 58 | import System.Random (mkStdGen, setStdGen) 59 | import Text.Printf (printf) 60 | 61 | data OutFormat = EigenstratFormat FilePath Bool 62 | | PlinkFormat FilePath PlinkPopNameMode Bool 63 | | VCFformat 64 | | FreqSumFormat deriving (Show) 65 | 66 | data ProgOpt = ProgOpt 67 | CallingMode --optCallingMode 68 | Bool --optKeepIncongruentReads 69 | (Maybe Int) --optSeed 70 | Int --optMinDepth 71 | TransitionsMode --optTransitionsMode 72 | FilePath --optSnpFile 73 | OutFormat --optOutFormat 74 | (Either [String] FilePath) --optSampleNames 75 | (Either String [String]) --optPopName 76 | 77 | data ReadStats = ReadStats { 78 | rsTotalSites :: IORef Int, 79 | rsNonMissingSites :: V.IOVector Int, 80 | rsRawReads :: V.IOVector Int, 81 | rsReadsCleanedSS :: V.IOVector Int, 82 | rsReadsCongruent :: V.IOVector Int 83 | } 84 | 85 | data Env = Env { 86 | envCallingMode :: CallingMode, 87 | envKeepInCongruentReads :: Bool, 88 | envMinDepth :: Int, 89 | envTransitionsMode :: TransitionsMode, 90 | envOutFormat :: OutFormat, 91 | envSnpFile :: FilePath, 92 | envSampleNames :: [String], 93 | envPopName :: Either String [String], 94 | envVersion :: Version, 95 | envStats :: ReadStats 96 | } 97 | 98 | instance Show Env where 99 | show e = show ( 100 | envCallingMode e, 101 | envKeepInCongruentReads e, 102 | envMinDepth e, 103 | envTransitionsMode e, 104 | envOutFormat e, 105 | envSnpFile e, 106 | envSampleNames e, 107 | envPopName e, 108 | envVersion e) 109 | 110 | type App = ReaderT Env (SafeT IO) 111 | 112 | main :: IO () 113 | main = do 114 | args <- OP.execParser parserInfo 115 | env <- initialiseEnvironment args 116 | let handler = \(SeqFormatException msg) -> do 117 | throwIO $ SeqFormatException (take 200 msg) 118 | catch (runSafeT $ runReaderT runMain env) handler 119 | 120 | parserInfo :: OP.ParserInfo ProgOpt 121 | parserInfo = OP.info (pure (.) <*> versionInfoOpt <*> OP.helper <*> argParser) 122 | (OP.progDescDoc (Just programHelpDoc)) 123 | 124 | argParser :: OP.Parser ProgOpt 125 | argParser = ProgOpt <$> parseCallingMode 126 | <*> parseKeepIncongruentReads 127 | <*> parseSeed 128 | <*> parseMinDepth 129 | <*> parseTransitionsMode 130 | <*> parseSnpFile 131 | <*> parseFormat 132 | <*> parseSampleNames 133 | <*> parsePopName 134 | where 135 | parseCallingMode = parseRandomCalling <|> parseMajorityCalling <|> parseRandomDiploidCalling 136 | parseRandomCalling = OP.flag' RandomCalling (OP.long "randomHaploid" <> 137 | OP.help "This method samples one read at random at each site, and uses the allele \ 138 | \on that read as the one for the actual genotype. This results in a haploid \ 139 | \call") 140 | parseRandomDiploidCalling = OP.flag' RandomDiploidCalling (OP.long "randomDiploid" <> 141 | OP.help "Sample two reads at random (without replacement) at each site and represent the \ 142 | \individual by a diploid genotype constructed from those two random \ 143 | \picks. This will always assign missing data to positions where only \ 144 | \one read is present, even if minDepth=1. The main use case for this \ 145 | \option is for estimating mean heterozygosity across sites.") 146 | parseMajorityCalling = MajorityCalling <$> (parseMajorityCallingFlag *> parseDownsamplingFlag) 147 | parseMajorityCallingFlag = OP.flag' True (OP.long "majorityCall" <> OP.help 148 | "Pick the allele supported by the \ 149 | \most reads at a site. If an equal numbers of alleles fulfil this, pick one at \ 150 | \random. This results in a haploid call. See --downSampling for best practices \ 151 | \for calling rare variants") 152 | parseDownsamplingFlag = OP.switch (OP.long "downSampling" <> OP.help "When this switch is given, \ 153 | \the MajorityCalling mode will downsample \ 154 | \from the total number of reads a number of reads \ 155 | \(without replacement) equal to the --minDepth given. This mitigates \ 156 | \reference bias in the MajorityCalling model, which increases with higher coverage. \ 157 | \The recommendation for rare-allele calling is --majorityCall --downsampling --minDepth 3") 158 | parseKeepIncongruentReads = OP.switch (OP.long "keepIncongruentReads" <> OP.help "By default, \ 159 | \pileupCaller now removes reads with tri-allelic alleles that are neither of the two alleles specified in the SNP file. \ 160 | \To keep those reads for sampling, set this flag. With this option given, if \ 161 | \the sampled read has a tri-allelic allele that is neither of the two given alleles in the SNP file, a missing genotype is generated. \ 162 | \IMPORTANT NOTE: The default behaviour has changed in pileupCaller version 1.4.0. If you want to emulate the previous \ 163 | \behaviour, use this flag. I recommend now to NOT set this flag and use the new behaviour.") 164 | parseSeed = OP.option (Just <$> OP.auto) (OP.long "seed" <> 165 | OP.value Nothing <> OP.metavar "" <> 166 | OP.help "random seed used for the random number generator. If not given, use \ 167 | \system clock to seed the random number generator.") 168 | parseMinDepth = OP.option OP.auto (OP.long "minDepth" <> OP.short 'd' <> 169 | OP.value 1 <> OP.showDefault <> OP.metavar "" <> 170 | OP.help "specify the minimum depth for a call. For sites with fewer \ 171 | \reads than this number, declare Missing") 172 | parseTransitionsMode = parseSkipTransitions <|> parseTransitionsMissing <|> parseSingleStrandMode <|> pure AllSites 173 | parseSkipTransitions = OP.flag' SkipTransitions (OP.long "skipTransitions" <> 174 | OP.help "skip transition SNPs entirely in the output, resulting in a dataset with fewer sites.") 175 | parseTransitionsMissing = OP.flag' TransitionsMissing (OP.long "transitionsMissing" <> 176 | OP.help "mark transitions as missing in the output, but do output the sites.") 177 | parseSingleStrandMode = OP.flag' SingleStrandMode (OP.long "singleStrandMode" <> 178 | OP.help "[THIS IS CURRENTLY AN EXPERIMENTAL FEATURE]. At C/T polymorphisms, ignore reads aligning to the forward strand. \ 179 | \At G/A polymorphisms, ignore reads aligning to the reverse strand. This should \ 180 | \remove post-mortem damage in ancient DNA libraries prepared with the non-UDG single-stranded protocol.") 181 | parseSnpFile = OP.strOption (OP.long "snpFile" <> OP.short 'f' <> 182 | OP.metavar "" <> OP.help "an Eigenstrat-formatted SNP list file for \ 183 | \the positions and alleles to call. All \ 184 | \positions in the SNP file will be output, adding missing data where \ 185 | \there is no data. Note that pileupCaller automatically checks whether \ 186 | \alleles in the SNP file are flipped with respect to the human \ 187 | \reference, and in those cases flips the genotypes accordingly. \ 188 | \But it assumes that the strand-orientation of the SNPs given in the SNP list is the one \ 189 | \in the reference genome used in the BAM file underlying the pileup input. \ 190 | \Note that both the SNP file and the incoming pileup data have to be \ 191 | \ordered by chromosome and position, and this is checked. The chromosome order in humans is 1-22,X,Y,MT. \ 192 | \Chromosome can generally begin with \"chr\". In case of non-human data with different chromosome \ 193 | \names, you should convert all names to numbers. They will always considered to \ 194 | \be numerically ordered, even beyond 22. Finally, I note that for internally, \ 195 | \X is converted to 23, Y to 24 and MT to 90. This is the most widely used encoding in Eigenstrat \ 196 | \databases for human data, so using a SNP file with that encoding will automatically be correctly aligned \ 197 | \to pileup data with actual chromosome names X, Y and MT (or chrX, chrY and chrMT, respectively).") 198 | parseFormat = (EigenstratFormat <$> parseEigenstratPrefix <*> parseZipOut) <|> 199 | (PlinkFormat <$> parsePlinkPrefix <*> parsePlinkPopMode <*> parseZipOut) <|> 200 | parseVCFformat <|> pure FreqSumFormat 201 | parseZipOut = OP.switch (OP.long "--zip" <> OP.short 'z' <> 202 | OP.help "GZip the output Eigenstrat or Plink genotype and SNP files. \ 203 | \Filenames will be appended with '.gz'. To zip FreqSum or VCF output, just zip the standard output of this\ 204 | \ program, for example `pileupCaller ... --vcf | gzip -c > out.vcf.gz") 205 | parseEigenstratPrefix = OP.strOption (OP.long "eigenstratOut" <> OP.short 'e' <> 206 | OP.metavar "" <> 207 | OP.help "Set Eigenstrat as output format. Specify the filenames for the EigenStrat \ 208 | \SNP, IND and GENO file outputs: .snp, .ind and .geno. \ 209 | \If not set, output will be FreqSum (Default). Note that freqSum format, described at \ 210 | \https://rarecoal-docs.readthedocs.io/en/latest/rarecoal-tools.html#vcf2freqsum, \ 211 | \is useful for testing your pipeline, since it's output to standard out") 212 | parsePlinkPrefix = OP.strOption (OP.long "plinkOut" <> OP.short 'p' <> 213 | OP.metavar "" <> 214 | OP.help "Set Plink as output format. Specify the filenames for the Plink \ 215 | \BIM, FAM and BED file outputs: .bim, .fam and .bed. \ 216 | \If not set, output will be FreqSum (Default). Note that freqSum format, described at \ 217 | \https://rarecoal-docs.readthedocs.io/en/latest/rarecoal-tools.html#vcf2freqsum, \ 218 | \is useful for testing your pipeline, since it's output to standard out") 219 | parsePlinkPopMode = parsePlinkPopPhenotype <|> parsePlinkPopBoth <|> pure PlinkPopNameAsFamily 220 | parsePlinkPopPhenotype = OP.flag' PlinkPopNameAsPhenotype (OP.long "popNameAsPhenotype" <> OP.help "Only valid for Plink Output: \ 221 | \Write the population name into the last column of the fam file, as a Phenotype according to the Plink Spec. \ 222 | \By default, the population name is specified as the first column only (family name in the Plink spec)") 223 | parsePlinkPopBoth = OP.flag' PlinkPopNameAsBoth (OP.long "popNameAsBoth" <> OP.help "Only valid for Plink Output: \ 224 | \Write the population name into both the first and last column of the fam file, so both as Family-ID and as a \ 225 | \Phenotype according to the Plink Spec. By default, the population name is specified only as the first column (family name in the Plink spec)") 226 | parseVCFformat = OP.flag' VCFformat (OP.long "vcf" <> OP.help "output VCF format to stdout") 227 | parseSampleNames = parseSampleNameList <|> parseSampleNameFile 228 | parseSampleNameList = OP.option (Left . splitOn "," <$> OP.str) 229 | (OP.long "sampleNames" <> OP.metavar "NAME1,NAME2,..." <> 230 | OP.help "give the names of the samples as comma-separated list (no spaces)") 231 | parseSampleNameFile = OP.option (Right <$> OP.str) (OP.long "sampleNameFile" <> OP.metavar "" <> 232 | OP.help "give the names of the samples in a file with one name per \ 233 | \line") 234 | parsePopName = OP.option (processPopNames . splitOn "," <$> OP.str) (OP.long "samplePopName" <> OP.value (Left "Unknown") <> OP.showDefault <> 235 | OP.metavar "POP(s)" <> 236 | OP.help "specify the population name(s) of the samples, which are included\ 237 | \ in the output *.ind.txt file in Eigenstrat output. This will be ignored if the output \ 238 | \format is not Eigenstrat. If a single name is given, it is applied to all samples, if multiple are given, their number must match the \ 239 | \the number of samples") 240 | processPopNames names = if length names == 1 then Left (head names) else Right names 241 | 242 | programHelpDoc :: PP.Doc 243 | programHelpDoc = PP.vsep [part1, PP.enclose PP.line PP.line (PP.indent 4 samtoolsExample), 244 | part2, PP.line, PP.fillSep . Prettyprinter.Util.words . T.pack $ versionInfoText] 245 | where 246 | part1 = PP.fillSep . Prettyprinter.Util.words $ 247 | "PileupCaller is a tool to create genotype calls from bam files using read-sampling methods. \ 248 | \To use this tool, you need to convert bam files into the mpileup-format, specified at \ 249 | \http://www.htslib.org/doc/samtools.html (under \"mpileup\"). The recommended command line \ 250 | \to create a multi-sample mpileup file to be processed with pileupCaller is" 251 | samtoolsExample = PP.hang 4 . PP.fillSep . Prettyprinter.Util.words $ 252 | "samtools mpileup -B -q30 -Q30 -l -R -f \ 253 | \Sample1.bam Sample2.bam Sample3.bam | pileupCaller ..." 254 | part2 = PP.fillSep . Prettyprinter.Util.words $ 255 | "You can lookup what these options do in the samtools documentation. \ 256 | \Note that flag -B in samtools is very important to reduce reference \ 257 | \bias in low coverage data." 258 | 259 | initialiseEnvironment :: ProgOpt -> IO Env 260 | initialiseEnvironment args = do 261 | let (ProgOpt callingMode keepInCongruentReads seed minDepth 262 | transitionsMode snpFile outFormat sampleNames popName) = args 263 | case seed of 264 | Nothing -> return () 265 | Just seed_ -> liftIO . setStdGen $ mkStdGen seed_ 266 | sampleNamesList <- case sampleNames of 267 | Left list -> return list 268 | Right fn -> lines <$> readFile fn 269 | let n = length sampleNamesList 270 | readStats <- ReadStats <$> newIORef 0 <*> makeVec n <*> makeVec n <*> makeVec n <*> makeVec n 271 | return $ Env callingMode keepInCongruentReads minDepth transitionsMode 272 | outFormat snpFile sampleNamesList popName version readStats 273 | where 274 | makeVec n = do 275 | v <- V.new n 276 | V.set v 0 277 | return v 278 | 279 | runMain :: App () 280 | runMain = do 281 | let pileupProducer = readPileupFromStdIn 282 | outFormat <- asks envOutFormat 283 | popNameSpec <- asks envPopName 284 | n <- length <$> asks envSampleNames 285 | let popNames = case popNameSpec of 286 | Left singlePopName -> replicate n singlePopName 287 | Right p -> if length p /= n then error "number of specified populations must equal sample size" else p 288 | case outFormat of 289 | FreqSumFormat -> do 290 | freqSumProducer <- pileupToFreqSum pileupProducer 291 | outputFreqSum (freqSumProducer >-> P.map snd) 292 | EigenstratFormat outPrefix zipOut -> do 293 | freqSumProducer <- pileupToFreqSum pileupProducer 294 | outputEigenStratOrPlink outPrefix zipOut popNames Nothing (freqSumProducer >-> P.map snd) 295 | PlinkFormat outPrefix popNameMode zipOut -> do 296 | freqSumProducer <- pileupToFreqSum pileupProducer 297 | outputEigenStratOrPlink outPrefix zipOut popNames (Just popNameMode) (freqSumProducer >-> P.map snd) 298 | VCFformat -> do 299 | freqSumProducer <- pileupToFreqSum pileupProducer 300 | outputVCF popNames freqSumProducer 301 | outputStats 302 | 303 | pileupToFreqSum :: Producer PileupRow (SafeT IO) () -> App (Producer (Maybe PileupRow, FreqSumEntry) (SafeT IO) ()) 304 | pileupToFreqSum pileupProducer = do 305 | snpFileName <- asks envSnpFile 306 | nrSamples <- length <$> asks envSampleNames 307 | let snpProdOrderChecked = 308 | readEigenstratSnpFile snpFileName >-> orderCheckPipe cmpSnpPos 309 | pileupProdOrderChecked = 310 | pileupProducer >-> orderCheckPipe cmpPileupPos 311 | jointProd = 312 | orderedZip cmpSnpToPileupPos snpProdOrderChecked pileupProdOrderChecked 313 | mode <- asks envCallingMode 314 | keepInCongruentReads <- asks envKeepInCongruentReads 315 | transitionsMode <- asks envTransitionsMode 316 | let singleStrandMode = (transitionsMode == SingleStrandMode) 317 | minDepth <- asks envMinDepth 318 | readStats <- asks envStats 319 | let ret = Pipes.for jointProd $ \jointEntry -> 320 | case jointEntry of 321 | (Just esEntry, Nothing) -> do 322 | let (EigenstratSnpEntry chr pos gpos id_ ref alt) = esEntry 323 | dosages = (replicate nrSamples Nothing) 324 | liftIO $ addOneSite readStats 325 | yield $ (Nothing, FreqSumEntry chr pos (Just id_) (Just gpos) ref alt dosages) 326 | (Just esEntry, Just pRow) -> do 327 | let (EigenstratSnpEntry chr pos gpos id_ ref alt) = esEntry 328 | (PileupRow _ _ _ rawPileupBasesPerSample rawStrandInfoPerSample) = pRow 329 | let cleanBasesPerSample = 330 | if singleStrandMode 331 | then cleanSSdamageAllSamples ref alt rawPileupBasesPerSample rawStrandInfoPerSample 332 | else rawPileupBasesPerSample 333 | let congruentBasesPerSample = 334 | if keepInCongruentReads 335 | then cleanBasesPerSample 336 | else map (filter (\c -> c == ref || c == alt)) cleanBasesPerSample 337 | liftIO $ addOneSite readStats 338 | liftIO $ updateStatsAllSamples readStats (map length rawPileupBasesPerSample) 339 | (map length cleanBasesPerSample) (map length congruentBasesPerSample) 340 | calls <- liftIO $ mapM (callGenotypeFromPileup mode minDepth) congruentBasesPerSample 341 | let genotypes = map (callToDosage ref alt) calls 342 | yield $ (Just pRow, FreqSumEntry chr pos (Just id_) (Just gpos) ref alt genotypes) 343 | _ -> return () 344 | return $ (fst <$> ret) >-> filterTransitions transitionsMode 345 | where 346 | cmpSnpPos :: EigenstratSnpEntry -> EigenstratSnpEntry -> Ordering 347 | cmpSnpPos es1 es2 = (snpChrom es1, snpPos es1) `compare` (snpChrom es2, snpPos es2) 348 | cmpPileupPos :: PileupRow -> PileupRow -> Ordering 349 | cmpPileupPos pr1 pr2 = (pileupChrom pr1, pileupPos pr1) `compare` (pileupChrom pr2, pileupPos pr2) 350 | cmpSnpToPileupPos :: EigenstratSnpEntry -> PileupRow -> Ordering 351 | cmpSnpToPileupPos es pr = (snpChrom es, snpPos es) `compare` (pileupChrom pr, pileupPos pr) 352 | 353 | addOneSite :: ReadStats -> IO () 354 | addOneSite readStats = modifyIORef' (rsTotalSites readStats) (+1) 355 | 356 | updateStatsAllSamples :: ReadStats -> [Int] -> [Int] -> [Int] -> IO () 357 | updateStatsAllSamples readStats rawBaseCounts damageCleanedBaseCounts congruencyCleanedBaseCounts = do 358 | let nSamples = V.length (rsRawReads readStats) 359 | when (length rawBaseCounts /= nSamples) . throwIO . UserInputException $ 360 | "number of individuals specified (" ++ show nSamples ++ 361 | ") differs from number of individuals in the pileup input (" ++ 362 | show (length rawBaseCounts) ++ ")" 363 | sequence_ [V.modify (rsRawReads readStats) (+n) i | (i, n) <- zip [0..] rawBaseCounts] 364 | sequence_ [V.modify (rsReadsCleanedSS readStats) (+n) i | (i, n) <- zip [0..] damageCleanedBaseCounts] 365 | sequence_ [V.modify (rsReadsCongruent readStats) (+n) i | (i, n) <- zip [0..] congruencyCleanedBaseCounts] 366 | let nonMissingSites = [if n > 0 then 1 else 0 | n <- congruencyCleanedBaseCounts] 367 | sequence_ [V.modify (rsNonMissingSites readStats) (+n) i | (i, n) <- zip [0..] nonMissingSites] 368 | 369 | outputFreqSum :: Producer FreqSumEntry (SafeT IO) () -> App () 370 | outputFreqSum freqSumProducer = do 371 | callingMode <- asks envCallingMode 372 | sampleNames <- asks envSampleNames 373 | let nrHaplotypes = case callingMode of 374 | MajorityCalling _ -> 1 :: Int 375 | RandomCalling -> 1 376 | RandomDiploidCalling -> 2 377 | let header' = FreqSumHeader sampleNames [nrHaplotypes | _ <- sampleNames] 378 | lift . runEffect $ freqSumProducer >-> printFreqSumStdOut header' 379 | 380 | outputEigenStratOrPlink :: FilePath -> Bool -> [String] -> Maybe PlinkPopNameMode -> Producer FreqSumEntry (SafeT IO) () -> App () 381 | outputEigenStratOrPlink outPrefix zipOut popNames maybePlinkPopMode freqSumProducer = do 382 | sampleNames <- asks envSampleNames 383 | let (snpOut, indOut, genoOut) = case (maybePlinkPopMode, zipOut) of 384 | (Just _, False) -> (outPrefix <> ".bim", outPrefix <> ".fam", outPrefix <> ".bed") 385 | (Just _, True) -> (outPrefix <> ".bim.gz", outPrefix <> ".fam", outPrefix <> ".bed.gz") 386 | (Nothing, False) -> (outPrefix <> ".snp", outPrefix <> ".ind", outPrefix <> ".geno") 387 | (Nothing, True) -> (outPrefix <> ".snp.gz", outPrefix <> ".ind", outPrefix <> ".geno.gz") 388 | let indEntries = [EigenstratIndEntry (B.pack n) Unknown (B.pack p) | (n, p) <- zip sampleNames popNames] 389 | let writeFunc = case maybePlinkPopMode of 390 | Nothing -> (\g s i -> writeEigenstrat g s i indEntries) 391 | Just popMode -> 392 | let famEntries = map (eigenstratInd2PlinkFam popMode) indEntries 393 | in (\g s i -> writePlink g s i famEntries) 394 | lift . runEffect $ freqSumProducer >-> P.map freqSumToEigenstrat >-> writeFunc genoOut snpOut indOut 395 | 396 | outputVCF :: [String] -> Producer (Maybe PileupRow, FreqSumEntry) (SafeT IO) () -> App () 397 | outputVCF popNames freqSumProd = do 398 | sampleNames <- map B.pack <$> asks envSampleNames 399 | ver <- asks envVersion 400 | prog_name <- liftIO getProgName 401 | prog_args <- liftIO getArgs 402 | let command_line = prog_name ++ " " ++ intercalate " " prog_args 403 | let metaInfoLines = map B.pack [ 404 | "##fileformat=VCFv4.2", 405 | "##source=pileupCaller_v" ++ showVersion ver, 406 | "##command_line=" ++ command_line, 407 | "##group_names=" ++ intercalate "," popNames, 408 | "##INFO=", 409 | "##INFO=", 410 | "##INFO=", 411 | "##FILTER=", 412 | "##FILTER=", 413 | "##FORMAT=", 414 | "##FORMAT=", 415 | "##FORMAT="] 416 | vcfh = VCFheader metaInfoLines sampleNames 417 | lift . runEffect $ freqSumProd >-> P.map (\(mpr, fse) -> createVcfEntry mpr fse) >-> printVCFtoStdOut vcfh 418 | 419 | createVcfEntry :: Maybe PileupRow -> FreqSumEntry -> VCFentry 420 | createVcfEntry maybePileupRow (FreqSumEntry chrom pos maybeSnpId _ ref alt calls) = 421 | VCFentry chrom pos maybeSnpId (B.pack [ref]) [B.pack [alt]] Nothing (Just filterString) infoFields genotypeInfos 422 | where 423 | nrMissing = length . filter (==Nothing) $ calls 424 | nrSamples = length calls 425 | filterString = 426 | if nrMissing * 10 > 9 * nrSamples then "s10;s50" 427 | else if nrMissing * 2 > nrSamples then "s50" 428 | else "PASS" 429 | totalDepth = case maybePileupRow of 430 | Nothing -> 0 431 | Just pr -> sum . map length . pileupBases $ pr 432 | nrSamplesWithData = nrSamples - nrMissing 433 | alleleFreq = computeAlleleFreq calls 434 | infoFields = [ 435 | B.pack $ "NS=" ++ show nrSamplesWithData, 436 | B.pack $ "DP=" ++ show totalDepth] ++ 437 | case alleleFreq of 438 | Just f -> 439 | let roundedFreq = fromIntegral (round (f * 100.0) :: Int) / 100.0 :: Double 440 | in [B.pack $ "AF=" ++ show roundedFreq] 441 | Nothing -> [] 442 | formatField = case maybePileupRow of 443 | Nothing -> ["GT"] 444 | Just _ -> ["GT", "DP", "DP8"] 445 | genotypeFields = do -- list monad over samples 446 | i <- [0 .. (nrSamples - 1)] 447 | let ca = calls !! i 448 | let gt = case ca of 449 | Nothing -> "." 450 | Just (0, 1) -> "0" 451 | Just (1, 1) -> "1" 452 | Just (0, 2) -> "0/0" 453 | Just (1, 2) -> "0/1" 454 | Just (2, 2) -> "1/1" 455 | _ -> error "should never happen" 456 | case maybePileupRow of 457 | Nothing -> return [gt] 458 | Just pr -> do 459 | let bases = pileupBases pr !! i 460 | let strands = pileupStrandInfo pr !! i 461 | let dp = length bases 462 | let dp8 = do -- list monad 463 | strand <- [ForwardStrand, ReverseStrand] -- outer loop 464 | allele <- ['A', 'C', 'G', 'T'] -- inner loop 465 | return . show . length . filter (\(a, s) -> a == allele && s == strand) $ zip bases strands 466 | return [gt, B.pack $ show dp, B.pack $ intercalate "," dp8] 467 | genotypeInfos = Just (formatField, genotypeFields) 468 | 469 | 470 | outputStats :: App () 471 | outputStats = do 472 | ReadStats totalSites nonMissingSitesVec rawReadsVec damageCleanedReadsVec congruentReadsVec <- asks envStats 473 | sampleNames <- asks envSampleNames 474 | liftIO $ hPutStrLn stderr 475 | "# Summary Statistics per sample \n\ 476 | \# SampleName: Name of the sample as given by the user \n\ 477 | \# TotalSites: Total number of sites in the given Snp file (before transition filtering) \n\ 478 | \# NonMissingCalls: Total number of sites output with a non-Missing call (before transition filtering) \n\ 479 | \# avgRawReads: mean coverage of raw pileup input data across total sites (incl. missing sites) \n\ 480 | \# avgDamageCleanedReads: mean coverage of pileup after single-stranded damage removal \n\ 481 | \# avgSampledFrom: mean coverage of pileup after removing reads with tri-allelic alleles \n\ 482 | \SampleName\tTotalSites\tNonMissingCalls\tavgRawReads\tavgDamageCleanedReads\tavgSampledFrom" 483 | forM_ (zip [0..] sampleNames) $ \(i, name) -> do 484 | totalS <- liftIO $ readIORef totalSites 485 | nonMissingSites <- V.read nonMissingSitesVec i 486 | rawReads <- V.read rawReadsVec i 487 | damageCleanedReads <- V.read damageCleanedReadsVec i 488 | congruentReads <- V.read congruentReadsVec i 489 | let avgRawReads = (fromIntegral rawReads / fromIntegral totalS) :: Double 490 | avgDamageCleanedReads = (fromIntegral damageCleanedReads / fromIntegral totalS) :: Double 491 | avgCongruentReads = (fromIntegral congruentReads / fromIntegral totalS) :: Double 492 | liftIO . hPutStrLn stderr $ printf "%s\t%d\t%d\t%g\t%g\t%g" name totalS nonMissingSites 493 | avgRawReads avgDamageCleanedReads avgCongruentReads 494 | -------------------------------------------------------------------------------- /test/testDat/1240k_eigenstrat_snp_short.pos.txt: -------------------------------------------------------------------------------- 1 | 1 752566 2 | 1 776546 3 | 1 832918 4 | 1 842013 5 | 1 846864 6 | 1 869303 7 | 1 891021 8 | 1 893462 9 | 1 896271 10 | 1 903426 11 | 1 914852 12 | 1 949654 13 | 1 1003629 14 | 1 1005806 15 | 1 1018704 16 | 1 1021415 17 | 1 1021658 18 | 1 1021695 19 | 1 1030565 20 | 1 1031540 21 | 1 1045331 22 | 1 1048955 23 | 1 1049950 24 | 1 1060235 25 | 1 1061166 26 | 1 1062638 27 | 1 1064979 28 | 1 1066029 29 | 1 1077064 30 | 1 1079198 31 | 1 1087683 32 | 1 1090557 33 | 1 1094738 34 | 1 1099342 35 | 1 1106112 36 | 1 1106473 37 | 1 1108637 38 | 1 1119858 39 | 1 1120431 40 | 1 1121794 41 | 1 1129122 42 | 1 1135242 43 | 1 1143657 44 | 1 1151300 45 | 1 1152631 46 | 1 1156131 47 | 1 1157547 48 | 1 1158277 49 | 1 1161780 50 | 1 1162435 51 | 1 1163804 52 | 1 1170587 53 | 1 1171683 54 | 1 1172907 55 | 1 1176597 56 | 1 1192515 57 | 1 1194804 58 | 1 1197591 59 | 1 1205155 60 | 1 1206343 61 | 1 1211292 62 | 1 1222519 63 | 1 1235792 64 | 1 1235845 65 | 1 1237604 66 | 1 1242084 67 | 1 1247494 68 | 1 1249187 69 | 1 1254255 70 | 1 1268505 71 | 1 1287562 72 | 1 1297422 73 | 1 1310924 74 | 1 1314015 75 | 1 1314245 76 | 1 1316887 77 | 1 1329803 78 | 1 1330931 79 | 1 1336006 80 | 1 1340241 81 | 1 1342612 82 | 1 1366274 83 | 1 1367428 84 | 1 1373373 85 | 1 1406874 86 | 1 1425700 87 | 1 1462766 88 | 1 1470161 89 | 1 1477108 90 | 1 1477244 91 | 1 1478180 92 | 1 1485984 93 | 1 1486834 94 | 1 1486903 95 | 1 1487059 96 | 1 1493727 97 | 1 1497008 98 | 1 1497824 99 | 1 1500941 100 | 1 1505255 101 | 2 21822 102 | 2 28328 103 | 2 29443 104 | 2 31703 105 | 2 33012 106 | 2 34289 107 | 2 34503 108 | 2 36787 109 | 2 38938 110 | 2 39174 111 | 2 43092 112 | 2 47011 113 | 2 47648 114 | 2 47808 115 | 2 54036 116 | 2 63495 117 | 2 70074 118 | 2 72260 119 | 2 72735 120 | 2 79636 121 | 2 80959 122 | 2 81254 123 | 2 84392 124 | 2 85793 125 | 2 86142 126 | 2 89910 127 | 2 90138 128 | 2 100373 129 | 2 104979 130 | 2 106692 131 | 2 107140 132 | 2 111964 133 | 2 112496 134 | 2 114650 135 | 2 115035 136 | 2 118913 137 | 2 118923 138 | 2 121222 139 | 2 128038 140 | 2 128345 141 | 2 131700 142 | 2 139346 143 | 2 141004 144 | 2 141540 145 | 2 142137 146 | 2 142245 147 | 2 149546 148 | 2 151464 149 | 2 153279 150 | 2 153302 151 | 2 167187 152 | 2 167739 153 | 2 170576 154 | 2 170616 155 | 2 171112 156 | 2 181415 157 | 2 184390 158 | 2 186517 159 | 2 188223 160 | 2 189800 161 | 2 189818 162 | 2 189959 163 | 2 189972 164 | 2 193882 165 | 2 200477 166 | 2 202993 167 | 2 203169 168 | 2 206704 169 | 2 210482 170 | 2 211069 171 | 2 212106 172 | 2 217334 173 | 2 224192 174 | 2 224919 175 | 2 226933 176 | 2 226999 177 | 2 228310 178 | 2 230442 179 | 2 238975 180 | 2 239416 181 | 2 239597 182 | 2 240630 183 | 2 242800 184 | 2 251673 185 | 2 256278 186 | 2 261744 187 | 2 262553 188 | 2 268191 189 | 2 268293 190 | 2 272051 191 | 2 272926 192 | 2 276942 193 | 2 280819 194 | 2 282137 195 | 2 282736 196 | 2 285471 197 | 2 295255 198 | 2 296812 199 | 2 298269 200 | 2 301751 201 | 3 63411 202 | 3 66894 203 | 3 76317 204 | 3 82010 205 | 3 95973 206 | 3 104972 207 | 3 105365 208 | 3 107626 209 | 3 108226 210 | 3 108804 211 | 3 108993 212 | 3 110508 213 | 3 114133 214 | 3 115040 215 | 3 117152 216 | 3 118567 217 | 3 119892 218 | 3 120321 219 | 3 121226 220 | 3 121958 221 | 3 125808 222 | 3 126158 223 | 3 127677 224 | 3 133412 225 | 3 133748 226 | 3 134139 227 | 3 135757 228 | 3 135806 229 | 3 135900 230 | 3 140137 231 | 3 140194 232 | 3 140277 233 | 3 140727 234 | 3 141016 235 | 3 141149 236 | 3 142020 237 | 3 142050 238 | 3 142263 239 | 3 142304 240 | 3 143073 241 | 3 143982 242 | 3 144244 243 | 3 147115 244 | 3 148725 245 | 3 149179 246 | 3 149197 247 | 3 149986 248 | 3 151056 249 | 3 151789 250 | 3 152592 251 | 3 156570 252 | 3 157698 253 | 3 159288 254 | 3 160100 255 | 3 162246 256 | 3 163913 257 | 3 168731 258 | 3 170010 259 | 3 170444 260 | 3 170968 261 | 3 171308 262 | 3 171350 263 | 3 172477 264 | 3 172542 265 | 3 173003 266 | 3 174514 267 | 3 174816 268 | 3 175348 269 | 3 178525 270 | 3 179527 271 | 3 181610 272 | 3 182674 273 | 3 182820 274 | 3 185281 275 | 3 188832 276 | 3 188865 277 | 3 189315 278 | 3 189404 279 | 3 189621 280 | 3 190290 281 | 3 191047 282 | 3 191260 283 | 3 191529 284 | 3 193230 285 | 3 194959 286 | 3 195315 287 | 3 195923 288 | 3 196163 289 | 3 197462 290 | 3 198906 291 | 3 201364 292 | 3 201658 293 | 3 201830 294 | 3 201998 295 | 3 206117 296 | 3 206775 297 | 3 207651 298 | 3 211745 299 | 3 216035 300 | 3 220531 301 | 4 51441 302 | 4 71566 303 | 4 73508 304 | 4 75102 305 | 4 90883 306 | 4 94380 307 | 4 95344 308 | 4 99926 309 | 4 102571 310 | 4 104407 311 | 4 109596 312 | 4 110646 313 | 4 111487 314 | 4 112283 315 | 4 117451 316 | 4 117932 317 | 4 121620 318 | 4 121652 319 | 4 122323 320 | 4 123582 321 | 4 125490 322 | 4 127452 323 | 4 134851 324 | 4 136837 325 | 4 142550 326 | 4 150683 327 | 4 166811 328 | 4 167596 329 | 4 171241 330 | 4 174234 331 | 4 179131 332 | 4 181476 333 | 4 195709 334 | 4 198608 335 | 4 200631 336 | 4 200941 337 | 4 214909 338 | 4 217627 339 | 4 219538 340 | 4 225449 341 | 4 231940 342 | 4 232680 343 | 4 233475 344 | 4 243556 345 | 4 245965 346 | 4 252804 347 | 4 258627 348 | 4 276983 349 | 4 279396 350 | 4 282006 351 | 4 282499 352 | 4 292486 353 | 4 302934 354 | 4 303736 355 | 4 308010 356 | 4 309903 357 | 4 321902 358 | 4 331090 359 | 4 331740 360 | 4 344685 361 | 4 352955 362 | 4 367927 363 | 4 367956 364 | 4 384375 365 | 4 395206 366 | 4 403303 367 | 4 408952 368 | 4 415311 369 | 4 419565 370 | 4 420688 371 | 4 429720 372 | 4 434079 373 | 4 452106 374 | 4 469347 375 | 4 470103 376 | 4 470866 377 | 4 473935 378 | 4 473952 379 | 4 475468 380 | 4 495935 381 | 4 497138 382 | 4 500244 383 | 4 500404 384 | 4 500500 385 | 4 500631 386 | 4 502879 387 | 4 502911 388 | 4 504012 389 | 4 504664 390 | 4 507001 391 | 4 509178 392 | 4 511339 393 | 4 514041 394 | 4 514201 395 | 4 515243 396 | 4 516586 397 | 4 516647 398 | 4 517461 399 | 4 520853 400 | 4 521480 401 | 5 26366 402 | 5 27564 403 | 5 38139 404 | 5 58298 405 | 5 75949 406 | 5 81437 407 | 5 90199 408 | 5 93580 409 | 5 99784 410 | 5 103072 411 | 5 103109 412 | 5 105738 413 | 5 107190 414 | 5 111130 415 | 5 112712 416 | 5 113577 417 | 5 114071 418 | 5 118163 419 | 5 118406 420 | 5 129032 421 | 5 141659 422 | 5 143197 423 | 5 143534 424 | 5 157750 425 | 5 161292 426 | 5 163769 427 | 5 163896 428 | 5 169902 429 | 5 173200 430 | 5 174106 431 | 5 174940 432 | 5 175071 433 | 5 177264 434 | 5 180646 435 | 5 182864 436 | 5 183134 437 | 5 183401 438 | 5 183516 439 | 5 184393 440 | 5 184702 441 | 5 185498 442 | 5 187717 443 | 5 188747 444 | 5 188918 445 | 5 189799 446 | 5 190639 447 | 5 191093 448 | 5 192757 449 | 5 192842 450 | 5 192972 451 | 5 195603 452 | 5 196258 453 | 5 196333 454 | 5 199003 455 | 5 199415 456 | 5 200747 457 | 5 204215 458 | 5 204952 459 | 5 207504 460 | 5 207618 461 | 5 207868 462 | 5 208214 463 | 5 208959 464 | 5 214450 465 | 5 214454 466 | 5 223117 467 | 5 225768 468 | 5 230980 469 | 5 231908 470 | 5 233517 471 | 5 239545 472 | 5 264036 473 | 5 267658 474 | 5 268187 475 | 5 269234 476 | 5 275101 477 | 5 282420 478 | 5 299621 479 | 5 302222 480 | 5 306032 481 | 5 306981 482 | 5 308352 483 | 5 309096 484 | 5 309483 485 | 5 311256 486 | 5 311478 487 | 5 314011 488 | 5 317670 489 | 5 327522 490 | 5 330764 491 | 5 336952 492 | 5 337232 493 | 5 337985 494 | 5 339014 495 | 5 342195 496 | 5 345968 497 | 5 346610 498 | 5 346636 499 | 5 347131 500 | 5 348598 501 | 6 165391 502 | 6 183900 503 | 6 184034 504 | 6 187837 505 | 6 191331 506 | 6 192106 507 | 6 192300 508 | 6 196027 509 | 6 199964 510 | 6 203909 511 | 6 204072 512 | 6 204484 513 | 6 204909 514 | 6 205800 515 | 6 205920 516 | 6 205947 517 | 6 206313 518 | 6 206599 519 | 6 208822 520 | 6 209462 521 | 6 209980 522 | 6 211765 523 | 6 211941 524 | 6 212006 525 | 6 212171 526 | 6 212595 527 | 6 213426 528 | 6 213983 529 | 6 214859 530 | 6 215423 531 | 6 220549 532 | 6 221146 533 | 6 223584 534 | 6 224755 535 | 6 225044 536 | 6 226015 537 | 6 227205 538 | 6 227752 539 | 6 227920 540 | 6 227936 541 | 6 228345 542 | 6 229191 543 | 6 230007 544 | 6 232716 545 | 6 233750 546 | 6 236425 547 | 6 239373 548 | 6 240232 549 | 6 240402 550 | 6 240484 551 | 6 241477 552 | 6 242049 553 | 6 245274 554 | 6 245466 555 | 6 246178 556 | 6 247899 557 | 6 249597 558 | 6 252005 559 | 6 252145 560 | 6 252772 561 | 6 253861 562 | 6 254337 563 | 6 254424 564 | 6 256281 565 | 6 274847 566 | 6 284613 567 | 6 285695 568 | 6 298700 569 | 6 299296 570 | 6 299635 571 | 6 304078 572 | 6 305185 573 | 6 305816 574 | 6 308149 575 | 6 311938 576 | 6 312023 577 | 6 312247 578 | 6 335424 579 | 6 340036 580 | 6 340273 581 | 6 341187 582 | 6 344878 583 | 6 348723 584 | 6 350371 585 | 6 358985 586 | 6 362354 587 | 6 362457 588 | 6 363858 589 | 6 365158 590 | 6 372939 591 | 6 374664 592 | 6 378246 593 | 6 381217 594 | 6 383546 595 | 6 383624 596 | 6 384230 597 | 6 387659 598 | 6 389630 599 | 6 390105 600 | 6 394293 601 | 7 42289 602 | 7 45653 603 | 7 46239 604 | 7 53998 605 | 7 54183 606 | 7 57191 607 | 7 60728 608 | 7 63494 609 | 7 65254 610 | 7 67365 611 | 7 67820 612 | 7 85111 613 | 7 93811 614 | 7 94119 615 | 7 95673 616 | 7 97305 617 | 7 98795 618 | 7 98977 619 | 7 108274 620 | 7 112828 621 | 7 113319 622 | 7 123250 623 | 7 123956 624 | 7 128206 625 | 7 139113 626 | 7 145226 627 | 7 148300 628 | 7 149765 629 | 7 152640 630 | 7 155066 631 | 7 168027 632 | 7 173599 633 | 7 175574 634 | 7 176270 635 | 7 179866 636 | 7 186119 637 | 7 187132 638 | 7 197267 639 | 7 198401 640 | 7 198440 641 | 7 198903 642 | 7 199624 643 | 7 200682 644 | 7 208845 645 | 7 220374 646 | 7 227931 647 | 7 618938 648 | 7 620328 649 | 7 626763 650 | 7 630286 651 | 7 726597 652 | 7 730196 653 | 7 731534 654 | 7 733136 655 | 7 734146 656 | 7 735851 657 | 7 736629 658 | 7 739977 659 | 7 746706 660 | 7 746917 661 | 7 760111 662 | 7 765461 663 | 7 769995 664 | 7 786833 665 | 7 788923 666 | 7 796430 667 | 7 803244 668 | 7 805204 669 | 7 816845 670 | 7 828493 671 | 7 831062 672 | 7 833003 673 | 7 833878 674 | 7 848197 675 | 7 848242 676 | 7 848272 677 | 7 857837 678 | 7 861094 679 | 7 864389 680 | 7 869846 681 | 7 871728 682 | 7 872658 683 | 7 872777 684 | 7 872848 685 | 7 876525 686 | 7 881668 687 | 7 884665 688 | 7 890284 689 | 7 892344 690 | 7 892902 691 | 7 893407 692 | 7 893517 693 | 7 894027 694 | 7 901574 695 | 7 905229 696 | 7 906739 697 | 7 909350 698 | 7 912755 699 | 7 917828 700 | 7 925949 701 | 8 164984 702 | 8 170692 703 | 8 170920 704 | 8 176818 705 | 8 187938 706 | 8 189030 707 | 8 190568 708 | 8 191072 709 | 8 193585 710 | 8 194947 711 | 8 197042 712 | 8 198327 713 | 8 198583 714 | 8 198770 715 | 8 200898 716 | 8 205505 717 | 8 208834 718 | 8 209152 719 | 8 212894 720 | 8 212995 721 | 8 219797 722 | 8 219868 723 | 8 220412 724 | 8 221039 725 | 8 221671 726 | 8 221922 727 | 8 222949 728 | 8 223682 729 | 8 224321 730 | 8 224666 731 | 8 226670 732 | 8 226672 733 | 8 227598 734 | 8 227689 735 | 8 227843 736 | 8 229694 737 | 8 230112 738 | 8 230745 739 | 8 230847 740 | 8 231272 741 | 8 231287 742 | 8 236656 743 | 8 237333 744 | 8 238574 745 | 8 238648 746 | 8 239189 747 | 8 239299 748 | 8 240578 749 | 8 260716 750 | 8 263427 751 | 8 280645 752 | 8 281022 753 | 8 281572 754 | 8 285026 755 | 8 285124 756 | 8 286941 757 | 8 287831 758 | 8 293008 759 | 8 293221 760 | 8 293565 761 | 8 296848 762 | 8 297118 763 | 8 298201 764 | 8 298274 765 | 8 298510 766 | 8 300058 767 | 8 304062 768 | 8 304569 769 | 8 305133 770 | 8 308929 771 | 8 311009 772 | 8 323462 773 | 8 323502 774 | 8 323858 775 | 8 330328 776 | 8 330846 777 | 8 331969 778 | 8 332993 779 | 8 333018 780 | 8 333797 781 | 8 334288 782 | 8 334709 783 | 8 335646 784 | 8 336284 785 | 8 337263 786 | 8 344543 787 | 8 346346 788 | 8 354843 789 | 8 365789 790 | 8 372796 791 | 8 373007 792 | 8 400600 793 | 8 400916 794 | 8 401493 795 | 8 401635 796 | 8 401659 797 | 8 402692 798 | 8 403304 799 | 8 404593 800 | 8 405192 801 | 9 185016 802 | 9 204201 803 | 9 204485 804 | 9 205964 805 | 9 206132 806 | 9 206255 807 | 9 208549 808 | 9 209325 809 | 9 209403 810 | 9 212486 811 | 9 212761 812 | 9 212908 813 | 9 214706 814 | 9 215269 815 | 9 215534 816 | 9 216124 817 | 9 217247 818 | 9 217269 819 | 9 217597 820 | 9 219528 821 | 9 223332 822 | 9 223979 823 | 9 224227 824 | 9 224742 825 | 9 224781 826 | 9 224976 827 | 9 225268 828 | 9 227980 829 | 9 228301 830 | 9 228848 831 | 9 229826 832 | 9 236827 833 | 9 236828 834 | 9 237136 835 | 9 237988 836 | 9 238389 837 | 9 240321 838 | 9 240571 839 | 9 244457 840 | 9 244592 841 | 9 245116 842 | 9 245706 843 | 9 245752 844 | 9 245838 845 | 9 247174 846 | 9 248030 847 | 9 248039 848 | 9 248749 849 | 9 249281 850 | 9 249391 851 | 9 249992 852 | 9 250522 853 | 9 250609 854 | 9 250889 855 | 9 251568 856 | 9 251664 857 | 9 253594 858 | 9 256995 859 | 9 257034 860 | 9 257349 861 | 9 257973 862 | 9 257985 863 | 9 258186 864 | 9 258372 865 | 9 261460 866 | 9 264347 867 | 9 264519 868 | 9 266505 869 | 9 266708 870 | 9 266999 871 | 9 268740 872 | 9 269008 873 | 9 269510 874 | 9 270321 875 | 9 270422 876 | 9 271091 877 | 9 271132 878 | 9 271228 879 | 9 271455 880 | 9 271467 881 | 9 272191 882 | 9 272325 883 | 9 273160 884 | 9 274108 885 | 9 274433 886 | 9 274641 887 | 9 279149 888 | 9 279753 889 | 9 280318 890 | 9 281845 891 | 9 281976 892 | 9 282738 893 | 9 283138 894 | 9 284370 895 | 9 286102 896 | 9 286291 897 | 9 286491 898 | 9 286593 899 | 9 289061 900 | 9 290670 901 | 10 113934 902 | 10 126070 903 | 10 135656 904 | 10 135853 905 | 10 148325 906 | 10 151997 907 | 10 158202 908 | 10 158946 909 | 10 159076 910 | 10 159404 911 | 10 162102 912 | 10 175653 913 | 10 177393 914 | 10 201183 915 | 10 203471 916 | 10 230625 917 | 10 231092 918 | 10 236452 919 | 10 237750 920 | 10 244561 921 | 10 252693 922 | 10 255222 923 | 10 269695 924 | 10 273344 925 | 10 273915 926 | 10 277232 927 | 10 279248 928 | 10 291533 929 | 10 291868 930 | 10 291893 931 | 10 291999 932 | 10 297633 933 | 10 309526 934 | 10 313304 935 | 10 315644 936 | 10 323283 937 | 10 325841 938 | 10 326725 939 | 10 326894 940 | 10 327832 941 | 10 329493 942 | 10 331398 943 | 10 331763 944 | 10 332207 945 | 10 338553 946 | 10 343758 947 | 10 353306 948 | 10 353589 949 | 10 354301 950 | 10 356784 951 | 10 357849 952 | 10 359403 953 | 10 364383 954 | 10 372537 955 | 10 373048 956 | 10 384248 957 | 10 384588 958 | 10 387060 959 | 10 394714 960 | 10 395463 961 | 10 396882 962 | 10 406229 963 | 10 407584 964 | 10 408134 965 | 10 412449 966 | 10 413010 967 | 10 413440 968 | 10 423211 969 | 10 423555 970 | 10 426966 971 | 10 429977 972 | 10 432087 973 | 10 432288 974 | 10 437193 975 | 10 451700 976 | 10 453567 977 | 10 458477 978 | 10 462628 979 | 10 462885 980 | 10 464557 981 | 10 466286 982 | 10 466761 983 | 10 467785 984 | 10 468179 985 | 10 468183 986 | 10 468599 987 | 10 470646 988 | 10 470698 989 | 10 471641 990 | 10 476090 991 | 10 480774 992 | 10 487973 993 | 10 489656 994 | 10 491349 995 | 10 495452 996 | 10 499994 997 | 10 500193 998 | 10 514032 999 | 10 514138 1000 | 10 518156 1001 | 11 198510 1002 | 11 199256 1003 | 11 202812 1004 | 11 202856 1005 | 11 203788 1006 | 11 203892 1007 | 11 204062 1008 | 11 204147 1009 | 11 204228 1010 | 11 205198 1011 | 11 206767 1012 | 11 210627 1013 | 11 211482 1014 | 11 212202 1015 | 11 217508 1016 | 11 218613 1017 | 11 219089 1018 | 11 219538 1019 | 11 221584 1020 | 11 221659 1021 | 11 221824 1022 | 11 223272 1023 | 11 224393 1024 | 11 224676 1025 | 11 224832 1026 | 11 224981 1027 | 11 225823 1028 | 11 226186 1029 | 11 228077 1030 | 11 232598 1031 | 11 233067 1032 | 11 233739 1033 | 11 235658 1034 | 11 236871 1035 | 11 237087 1036 | 11 237648 1037 | 11 240022 1038 | 11 242112 1039 | 11 243268 1040 | 11 244106 1041 | 11 244141 1042 | 11 244552 1043 | 11 247986 1044 | 11 248002 1045 | 11 248016 1046 | 11 248181 1047 | 11 249097 1048 | 11 249105 1049 | 11 249131 1050 | 11 252318 1051 | 11 252649 1052 | 11 253841 1053 | 11 254256 1054 | 11 254672 1055 | 11 255014 1056 | 11 257030 1057 | 11 257230 1058 | 11 260466 1059 | 11 264391 1060 | 11 266921 1061 | 11 269147 1062 | 11 269856 1063 | 11 270715 1064 | 11 272010 1065 | 11 273757 1066 | 11 274780 1067 | 11 276012 1068 | 11 283928 1069 | 11 285936 1070 | 11 288505 1071 | 11 290143 1072 | 11 290233 1073 | 11 295343 1074 | 11 295670 1075 | 11 295876 1076 | 11 300364 1077 | 11 306791 1078 | 11 306920 1079 | 11 307244 1080 | 11 308180 1081 | 11 312929 1082 | 11 324699 1083 | 11 329896 1084 | 11 330061 1085 | 11 330630 1086 | 11 331326 1087 | 11 335017 1088 | 11 336332 1089 | 11 345515 1090 | 11 345595 1091 | 11 346015 1092 | 11 350917 1093 | 11 353757 1094 | 11 356090 1095 | 11 358027 1096 | 11 358069 1097 | 11 362668 1098 | 11 364390 1099 | 11 364753 1100 | 11 366251 1101 | 12 174122 1102 | 12 180775 1103 | 12 184095 1104 | 12 192220 1105 | 12 193818 1106 | 12 193920 1107 | 12 195901 1108 | 12 195914 1109 | 12 196610 1110 | 12 197841 1111 | 12 199532 1112 | 12 207231 1113 | 12 207691 1114 | 12 207857 1115 | 12 207886 1116 | 12 209354 1117 | 12 210509 1118 | 12 215121 1119 | 12 216039 1120 | 12 216210 1121 | 12 217028 1122 | 12 217764 1123 | 12 219191 1124 | 12 219988 1125 | 12 221203 1126 | 12 223193 1127 | 12 223258 1128 | 12 223422 1129 | 12 223656 1130 | 12 224000 1131 | 12 224328 1132 | 12 224337 1133 | 12 225315 1134 | 12 225655 1135 | 12 225699 1136 | 12 229541 1137 | 12 232123 1138 | 12 234295 1139 | 12 235962 1140 | 12 236115 1141 | 12 237186 1142 | 12 237521 1143 | 12 238648 1144 | 12 239143 1145 | 12 240777 1146 | 12 243389 1147 | 12 244627 1148 | 12 245335 1149 | 12 245828 1150 | 12 249007 1151 | 12 250239 1152 | 12 250605 1153 | 12 250987 1154 | 12 255195 1155 | 12 257633 1156 | 12 257695 1157 | 12 257987 1158 | 12 258585 1159 | 12 258807 1160 | 12 260682 1161 | 12 260870 1162 | 12 261676 1163 | 12 261784 1164 | 12 261824 1165 | 12 261929 1166 | 12 262041 1167 | 12 263839 1168 | 12 263954 1169 | 12 264136 1170 | 12 265928 1171 | 12 269531 1172 | 12 270682 1173 | 12 270896 1174 | 12 276019 1175 | 12 277464 1176 | 12 278376 1177 | 12 279458 1178 | 12 279586 1179 | 12 280349 1180 | 12 288149 1181 | 12 294544 1182 | 12 295546 1183 | 12 297943 1184 | 12 298877 1185 | 12 299043 1186 | 12 299542 1187 | 12 299715 1188 | 12 300541 1189 | 12 301428 1190 | 12 303143 1191 | 12 303320 1192 | 12 304004 1193 | 12 309753 1194 | 12 310113 1195 | 12 310334 1196 | 12 310419 1197 | 12 310571 1198 | 12 311949 1199 | 12 318366 1200 | 12 318746 1201 | 13 19125669 1202 | 13 19210426 1203 | 13 19234747 1204 | 13 19240775 1205 | 13 19303482 1206 | 13 19311241 1207 | 13 19311780 1208 | 13 19451443 1209 | 13 19451794 1210 | 13 19451890 1211 | 13 19455957 1212 | 13 19468896 1213 | 13 19496434 1214 | 13 19509431 1215 | 13 19513514 1216 | 13 19514214 1217 | 13 19523899 1218 | 13 19526947 1219 | 13 19527206 1220 | 13 19529644 1221 | 13 19534013 1222 | 13 19536546 1223 | 13 19538302 1224 | 13 19541910 1225 | 13 19542317 1226 | 13 19545037 1227 | 13 19548826 1228 | 13 19550892 1229 | 13 19550924 1230 | 13 19552050 1231 | 13 19554059 1232 | 13 19555372 1233 | 13 19555872 1234 | 13 19557010 1235 | 13 19557336 1236 | 13 19564689 1237 | 13 19564715 1238 | 13 19569428 1239 | 13 19571844 1240 | 13 19580972 1241 | 13 19581975 1242 | 13 19584385 1243 | 13 19586168 1244 | 13 19586194 1245 | 13 19588497 1246 | 13 19590336 1247 | 13 19591424 1248 | 13 19591981 1249 | 13 19592334 1250 | 13 19593241 1251 | 13 19593711 1252 | 13 19594024 1253 | 13 19596245 1254 | 13 19596332 1255 | 13 19598001 1256 | 13 19598030 1257 | 13 19611008 1258 | 13 19612262 1259 | 13 19616221 1260 | 13 19618079 1261 | 13 19618097 1262 | 13 19618304 1263 | 13 19620621 1264 | 13 19620986 1265 | 13 19622143 1266 | 13 19626818 1267 | 13 19626928 1268 | 13 19627565 1269 | 13 19628140 1270 | 13 19629479 1271 | 13 19629697 1272 | 13 19631157 1273 | 13 19631178 1274 | 13 19633215 1275 | 13 19633907 1276 | 13 19633920 1277 | 13 19636940 1278 | 13 19640445 1279 | 13 19641229 1280 | 13 19644672 1281 | 13 19645637 1282 | 13 19650222 1283 | 13 19650695 1284 | 13 19653454 1285 | 13 19655333 1286 | 13 19658933 1287 | 13 19659081 1288 | 13 19659780 1289 | 13 19660271 1290 | 13 19660641 1291 | 13 19662154 1292 | 13 19665640 1293 | 13 19666481 1294 | 13 19668347 1295 | 13 19669152 1296 | 13 19670142 1297 | 13 19675536 1298 | 13 19675630 1299 | 13 19686558 1300 | 13 19690836 1301 | 14 20147295 1302 | 14 20213937 1303 | 14 20215448 1304 | 14 20227924 1305 | 14 20295769 1306 | 14 20302379 1307 | 14 20302443 1308 | 14 20305431 1309 | 14 20331117 1310 | 14 20337395 1311 | 14 20343549 1312 | 14 20343911 1313 | 14 20362820 1314 | 14 20389241 1315 | 14 20402139 1316 | 14 20404091 1317 | 14 20409499 1318 | 14 20420338 1319 | 14 20423865 1320 | 14 20424912 1321 | 14 20425051 1322 | 14 20425220 1323 | 14 20426742 1324 | 14 20435805 1325 | 14 20445370 1326 | 14 20445618 1327 | 14 20445905 1328 | 14 20452460 1329 | 14 20463103 1330 | 14 20465757 1331 | 14 20466824 1332 | 14 20467330 1333 | 14 20469792 1334 | 14 20475563 1335 | 14 20480547 1336 | 14 20481594 1337 | 14 20482471 1338 | 14 20484740 1339 | 14 20484786 1340 | 14 20487364 1341 | 14 20488191 1342 | 14 20488658 1343 | 14 20489363 1344 | 14 20489393 1345 | 14 20489478 1346 | 14 20489558 1347 | 14 20489653 1348 | 14 20490465 1349 | 14 20490612 1350 | 14 20490645 1351 | 14 20492492 1352 | 14 20495913 1353 | 14 20497718 1354 | 14 20497737 1355 | 14 20498170 1356 | 14 20499341 1357 | 14 20500762 1358 | 14 20501312 1359 | 14 20501368 1360 | 14 20503541 1361 | 14 20503568 1362 | 14 20504435 1363 | 14 20505191 1364 | 14 20512610 1365 | 14 20512716 1366 | 14 20512772 1367 | 14 20512826 1368 | 14 20512828 1369 | 14 20512898 1370 | 14 20515023 1371 | 14 20515436 1372 | 14 20516012 1373 | 14 20517501 1374 | 14 20517540 1375 | 14 20518391 1376 | 14 20518618 1377 | 14 20518931 1378 | 14 20520979 1379 | 14 20521060 1380 | 14 20522692 1381 | 14 20522721 1382 | 14 20523406 1383 | 14 20524255 1384 | 14 20526831 1385 | 14 20527034 1386 | 14 20528207 1387 | 14 20528321 1388 | 14 20528528 1389 | 14 20529652 1390 | 14 20533188 1391 | 14 20533275 1392 | 14 20533586 1393 | 14 20536377 1394 | 14 20538394 1395 | 14 20539305 1396 | 14 20545088 1397 | 14 20561753 1398 | 14 20561802 1399 | 14 20562080 1400 | 14 20568700 1401 | 15 20071673 1402 | 15 20168817 1403 | 15 20170687 1404 | 15 20174657 1405 | 15 20176300 1406 | 15 20177092 1407 | 15 20192011 1408 | 15 22351239 1409 | 15 22414580 1410 | 15 22449059 1411 | 15 22486763 1412 | 15 22487672 1413 | 15 22755185 1414 | 15 22763396 1415 | 15 22763479 1416 | 15 22765683 1417 | 15 22769771 1418 | 15 22778880 1419 | 15 22782175 1420 | 15 22784095 1421 | 15 22787103 1422 | 15 22789651 1423 | 15 22791431 1424 | 15 22796596 1425 | 15 22799908 1426 | 15 22808289 1427 | 15 22816713 1428 | 15 22834161 1429 | 15 22835646 1430 | 15 22837143 1431 | 15 22839971 1432 | 15 22840279 1433 | 15 22845128 1434 | 15 22850389 1435 | 15 22850512 1436 | 15 22853997 1437 | 15 22855425 1438 | 15 22863001 1439 | 15 22863262 1440 | 15 22863817 1441 | 15 22866498 1442 | 15 22866621 1443 | 15 22868262 1444 | 15 22869547 1445 | 15 22869764 1446 | 15 22869860 1447 | 15 22870276 1448 | 15 22870800 1449 | 15 22872798 1450 | 15 22873784 1451 | 15 22876052 1452 | 15 22876889 1453 | 15 22883542 1454 | 15 22883815 1455 | 15 22883979 1456 | 15 22886225 1457 | 15 22891872 1458 | 15 22904312 1459 | 15 22905570 1460 | 15 22907319 1461 | 15 22908279 1462 | 15 22908392 1463 | 15 22908858 1464 | 15 22909207 1465 | 15 22909421 1466 | 15 22910324 1467 | 15 22914428 1468 | 15 22916162 1469 | 15 22918657 1470 | 15 22918700 1471 | 15 22919415 1472 | 15 22920025 1473 | 15 22920718 1474 | 15 22922064 1475 | 15 22922449 1476 | 15 22922576 1477 | 15 22924097 1478 | 15 22924144 1479 | 15 22924484 1480 | 15 22925034 1481 | 15 22926229 1482 | 15 22927167 1483 | 15 22927952 1484 | 15 22930067 1485 | 15 22930184 1486 | 15 22931008 1487 | 15 22931445 1488 | 15 22932511 1489 | 15 22933317 1490 | 15 22935635 1491 | 15 22937188 1492 | 15 22937838 1493 | 15 22940670 1494 | 15 22946061 1495 | 15 22947667 1496 | 15 22948004 1497 | 15 22948024 1498 | 15 22949828 1499 | 15 22951065 1500 | 15 22953581 1501 | 16 83887 1502 | 16 84170 1503 | 16 87217 1504 | 16 88165 1505 | 16 88245 1506 | 16 88552 1507 | 16 94535 1508 | 16 97354 1509 | 16 101263 1510 | 16 105320 1511 | 16 105444 1512 | 16 107275 1513 | 16 108955 1514 | 16 110165 1515 | 16 113970 1516 | 16 115657 1517 | 16 116923 1518 | 16 117194 1519 | 16 119019 1520 | 16 119729 1521 | 16 128054 1522 | 16 129277 1523 | 16 133946 1524 | 16 143503 1525 | 16 155320 1526 | 16 155344 1527 | 16 161938 1528 | 16 162998 1529 | 16 163598 1530 | 16 163626 1531 | 16 173145 1532 | 16 173183 1533 | 16 176743 1534 | 16 184390 1535 | 16 185123 1536 | 16 187429 1537 | 16 188606 1538 | 16 191306 1539 | 16 192314 1540 | 16 193586 1541 | 16 198093 1542 | 16 198747 1543 | 16 205035 1544 | 16 212649 1545 | 16 223640 1546 | 16 223675 1547 | 16 224619 1548 | 16 225653 1549 | 16 227456 1550 | 16 228306 1551 | 16 235490 1552 | 16 240280 1553 | 16 241210 1554 | 16 247888 1555 | 16 252479 1556 | 16 257548 1557 | 16 269033 1558 | 16 271021 1559 | 16 276187 1560 | 16 277458 1561 | 16 278050 1562 | 16 279684 1563 | 16 286982 1564 | 16 293562 1565 | 16 293915 1566 | 16 295795 1567 | 16 298588 1568 | 16 304514 1569 | 16 304803 1570 | 16 309155 1571 | 16 310380 1572 | 16 314780 1573 | 16 319511 1574 | 16 329671 1575 | 16 332148 1576 | 16 333146 1577 | 16 334890 1578 | 16 335373 1579 | 16 336396 1580 | 16 337336 1581 | 16 339415 1582 | 16 341079 1583 | 16 347326 1584 | 16 349221 1585 | 16 349293 1586 | 16 349331 1587 | 16 351673 1588 | 16 352736 1589 | 16 356689 1590 | 16 358161 1591 | 16 359567 1592 | 16 359953 1593 | 16 362638 1594 | 16 368432 1595 | 16 369562 1596 | 16 370484 1597 | 16 372597 1598 | 16 375782 1599 | 16 376781 1600 | 16 377794 1601 | 17 6157 1602 | 17 6689 1603 | 17 19688 1604 | 17 24036 1605 | 17 24203 1606 | 17 27409 1607 | 17 33974 1608 | 17 36718 1609 | 17 38924 1610 | 17 49733 1611 | 17 51088 1612 | 17 52368 1613 | 17 53011 1614 | 17 58980 1615 | 17 60225 1616 | 17 60532 1617 | 17 65351 1618 | 17 66675 1619 | 17 71870 1620 | 17 73263 1621 | 17 78176 1622 | 17 81136 1623 | 17 81461 1624 | 17 82081 1625 | 17 82969 1626 | 17 83173 1627 | 17 84782 1628 | 17 86350 1629 | 17 86802 1630 | 17 87761 1631 | 17 88525 1632 | 17 88813 1633 | 17 88988 1634 | 17 90429 1635 | 17 96613 1636 | 17 108420 1637 | 17 109700 1638 | 17 109995 1639 | 17 110025 1640 | 17 110245 1641 | 17 110579 1642 | 17 111030 1643 | 17 113034 1644 | 17 113390 1645 | 17 114669 1646 | 17 119382 1647 | 17 126701 1648 | 17 128389 1649 | 17 132712 1650 | 17 135310 1651 | 17 146428 1652 | 17 152362 1653 | 17 155494 1654 | 17 158754 1655 | 17 159851 1656 | 17 163278 1657 | 17 172591 1658 | 17 172875 1659 | 17 173297 1660 | 17 173580 1661 | 17 173906 1662 | 17 174121 1663 | 17 176042 1664 | 17 177014 1665 | 17 177359 1666 | 17 179396 1667 | 17 188593 1668 | 17 198698 1669 | 17 199250 1670 | 17 200541 1671 | 17 202208 1672 | 17 203442 1673 | 17 203531 1674 | 17 204759 1675 | 17 208004 1676 | 17 208573 1677 | 17 208848 1678 | 17 210103 1679 | 17 210420 1680 | 17 212731 1681 | 17 214402 1682 | 17 216763 1683 | 17 217829 1684 | 17 218145 1685 | 17 219559 1686 | 17 222910 1687 | 17 226749 1688 | 17 228978 1689 | 17 237667 1690 | 17 241835 1691 | 17 245185 1692 | 17 245211 1693 | 17 245312 1694 | 17 251503 1695 | 17 257557 1696 | 17 259648 1697 | 17 260142 1698 | 17 260182 1699 | 17 262201 1700 | 17 262335 1701 | 18 69836 1702 | 18 139767 1703 | 18 143973 1704 | 18 158917 1705 | 18 159281 1706 | 18 159382 1707 | 18 159885 1708 | 18 165453 1709 | 18 165503 1710 | 18 166277 1711 | 18 195644 1712 | 18 196829 1713 | 18 204232 1714 | 18 208998 1715 | 18 209527 1716 | 18 213827 1717 | 18 218695 1718 | 18 220658 1719 | 18 225051 1720 | 18 225964 1721 | 18 226485 1722 | 18 226497 1723 | 18 226509 1724 | 18 243219 1725 | 18 244652 1726 | 18 244768 1727 | 18 248266 1728 | 18 248279 1729 | 18 249939 1730 | 18 250047 1731 | 18 252038 1732 | 18 252559 1733 | 18 261903 1734 | 18 264012 1735 | 18 264431 1736 | 18 274013 1737 | 18 276848 1738 | 18 277265 1739 | 18 277763 1740 | 18 278796 1741 | 18 285953 1742 | 18 286176 1743 | 18 286971 1744 | 18 287648 1745 | 18 287705 1746 | 18 289153 1747 | 18 289209 1748 | 18 293519 1749 | 18 294021 1750 | 18 294495 1751 | 18 297712 1752 | 18 297805 1753 | 18 298620 1754 | 18 302224 1755 | 18 304298 1756 | 18 307648 1757 | 18 308555 1758 | 18 311204 1759 | 18 312717 1760 | 18 313108 1761 | 18 313250 1762 | 18 314592 1763 | 18 315138 1764 | 18 316349 1765 | 18 319176 1766 | 18 322183 1767 | 18 322522 1768 | 18 328726 1769 | 18 331506 1770 | 18 332469 1771 | 18 332483 1772 | 18 332528 1773 | 18 333739 1774 | 18 335363 1775 | 18 337271 1776 | 18 338113 1777 | 18 338132 1778 | 18 338297 1779 | 18 339386 1780 | 18 339594 1781 | 18 339671 1782 | 18 339727 1783 | 18 340963 1784 | 18 341151 1785 | 18 342544 1786 | 18 342971 1787 | 18 343982 1788 | 18 344459 1789 | 18 346821 1790 | 18 351004 1791 | 18 351114 1792 | 18 352032 1793 | 18 352577 1794 | 18 354264 1795 | 18 354439 1796 | 18 355410 1797 | 18 355568 1798 | 18 355944 1799 | 18 356095 1800 | 18 357207 1801 | 19 266034 1802 | 19 267039 1803 | 19 267614 1804 | 19 271994 1805 | 19 275688 1806 | 19 276245 1807 | 19 276663 1808 | 19 278419 1809 | 19 280299 1810 | 19 282753 1811 | 19 283037 1812 | 19 288246 1813 | 19 289244 1814 | 19 292082 1815 | 19 292901 1816 | 19 293671 1817 | 19 293913 1818 | 19 294526 1819 | 19 301619 1820 | 19 304499 1821 | 19 315788 1822 | 19 316040 1823 | 19 318586 1824 | 19 319760 1825 | 19 319822 1826 | 19 320982 1827 | 19 321048 1828 | 19 327323 1829 | 19 328855 1830 | 19 331028 1831 | 19 334474 1832 | 19 338412 1833 | 19 338693 1834 | 19 339812 1835 | 19 340700 1836 | 19 342556 1837 | 19 344053 1838 | 19 346307 1839 | 19 348743 1840 | 19 352341 1841 | 19 354070 1842 | 19 354248 1843 | 19 363127 1844 | 19 364695 1845 | 19 366451 1846 | 19 366774 1847 | 19 366804 1848 | 19 366840 1849 | 19 366846 1850 | 19 367313 1851 | 19 372661 1852 | 19 374016 1853 | 19 376382 1854 | 19 379003 1855 | 19 384644 1856 | 19 388413 1857 | 19 389873 1858 | 19 390341 1859 | 19 390627 1860 | 19 391296 1861 | 19 391569 1862 | 19 401714 1863 | 19 405905 1864 | 19 405929 1865 | 19 406370 1866 | 19 406934 1867 | 19 411849 1868 | 19 416834 1869 | 19 416963 1870 | 19 417396 1871 | 19 417714 1872 | 19 417737 1873 | 19 418178 1874 | 19 418436 1875 | 19 419407 1876 | 19 420755 1877 | 19 431187 1878 | 19 450515 1879 | 19 454430 1880 | 19 464612 1881 | 19 468457 1882 | 19 472149 1883 | 19 482984 1884 | 19 483488 1885 | 19 495722 1886 | 19 499978 1887 | 19 501900 1888 | 19 508626 1889 | 19 509717 1890 | 19 510818 1891 | 19 518686 1892 | 19 525739 1893 | 19 533133 1894 | 19 535004 1895 | 19 538092 1896 | 19 538531 1897 | 19 538994 1898 | 19 539266 1899 | 19 539730 1900 | 19 539871 1901 | 20 63244 1902 | 20 63799 1903 | 20 68749 1904 | 20 69094 1905 | 20 74347 1906 | 20 76962 1907 | 20 80655 1908 | 20 87416 1909 | 20 90984 1910 | 20 91508 1911 | 20 97122 1912 | 20 98930 1913 | 20 100505 1914 | 20 100699 1915 | 20 101362 1916 | 20 102181 1917 | 20 108328 1918 | 20 124302 1919 | 20 125121 1920 | 20 126417 1921 | 20 126529 1922 | 20 126614 1923 | 20 126678 1924 | 20 126914 1925 | 20 126923 1926 | 20 127194 1927 | 20 129063 1928 | 20 129613 1929 | 20 131362 1930 | 20 134931 1931 | 20 136568 1932 | 20 139456 1933 | 20 140086 1934 | 20 140280 1935 | 20 147685 1936 | 20 161502 1937 | 20 164083 1938 | 20 165832 1939 | 20 168020 1940 | 20 169995 1941 | 20 170642 1942 | 20 171189 1943 | 20 181638 1944 | 20 181967 1945 | 20 182013 1946 | 20 186405 1947 | 20 188107 1948 | 20 188318 1949 | 20 191797 1950 | 20 192069 1951 | 20 193500 1952 | 20 197162 1953 | 20 206402 1954 | 20 207903 1955 | 20 208550 1956 | 20 208685 1957 | 20 210539 1958 | 20 210634 1959 | 20 211431 1960 | 20 213014 1961 | 20 213423 1962 | 20 213523 1963 | 20 213842 1964 | 20 216811 1965 | 20 217281 1966 | 20 221018 1967 | 20 222632 1968 | 20 222836 1969 | 20 224128 1970 | 20 225058 1971 | 20 227967 1972 | 20 230983 1973 | 20 236802 1974 | 20 237497 1975 | 20 238507 1976 | 20 239294 1977 | 20 239688 1978 | 20 240849 1979 | 20 248926 1980 | 20 250076 1981 | 20 250844 1982 | 20 250983 1983 | 20 251494 1984 | 20 252165 1985 | 20 252721 1986 | 20 254013 1987 | 20 255763 1988 | 20 256573 1989 | 20 256992 1990 | 20 258711 1991 | 20 258865 1992 | 20 264272 1993 | 20 264997 1994 | 20 265365 1995 | 20 265800 1996 | 20 265968 1997 | 20 269006 1998 | 20 272717 1999 | 20 274333 2000 | 20 277832 2001 | 21 10205629 2002 | 21 10971951 2003 | 21 11002011 2004 | 21 14601415 2005 | 21 14652908 2006 | 21 14771811 2007 | 21 14773232 2008 | 21 14847294 2009 | 21 14901920 2010 | 21 15137578 2011 | 21 15181242 2012 | 21 15206684 2013 | 21 15214708 2014 | 21 15231699 2015 | 21 15270083 2016 | 21 15275981 2017 | 21 15330476 2018 | 21 15331478 2019 | 21 15340098 2020 | 21 15375507 2021 | 21 15390510 2022 | 21 15400618 2023 | 21 15412399 2024 | 21 15412831 2025 | 21 15443935 2026 | 21 15445468 2027 | 21 15445694 2028 | 21 15448658 2029 | 21 15450929 2030 | 21 15454659 2031 | 21 15459127 2032 | 21 15463956 2033 | 21 15464776 2034 | 21 15467704 2035 | 21 15469264 2036 | 21 15474153 2037 | 21 15476890 2038 | 21 15478931 2039 | 21 15478975 2040 | 21 15479537 2041 | 21 15481365 2042 | 21 15482605 2043 | 21 15491250 2044 | 21 15491294 2045 | 21 15491560 2046 | 21 15491773 2047 | 21 15491871 2048 | 21 15494621 2049 | 21 15500079 2050 | 21 15501285 2051 | 21 15501522 2052 | 21 15502332 2053 | 21 15502904 2054 | 21 15504006 2055 | 21 15511035 2056 | 21 15512010 2057 | 21 15512083 2058 | 21 15514013 2059 | 21 15515843 2060 | 21 15516012 2061 | 21 15516330 2062 | 21 15518103 2063 | 21 15523797 2064 | 21 15527617 2065 | 21 15531192 2066 | 21 15534637 2067 | 21 15536548 2068 | 21 15545350 2069 | 21 15546137 2070 | 21 15546312 2071 | 21 15546445 2072 | 21 15548739 2073 | 21 15550435 2074 | 21 15551473 2075 | 21 15552993 2076 | 21 15553112 2077 | 21 15554030 2078 | 21 15559454 2079 | 21 15559681 2080 | 21 15576536 2081 | 21 15580827 2082 | 21 15584096 2083 | 21 15588213 2084 | 21 15591689 2085 | 21 15592343 2086 | 21 15593860 2087 | 21 15594371 2088 | 21 15594465 2089 | 21 15594495 2090 | 21 15594829 2091 | 21 15595195 2092 | 21 15595654 2093 | 21 15595743 2094 | 21 15596772 2095 | 21 15597894 2096 | 21 15599765 2097 | 21 15599963 2098 | 21 15601205 2099 | 21 15603589 2100 | 21 15603999 2101 | 22 16888577 2102 | 22 16918335 2103 | 22 17032698 2104 | 22 17054720 2105 | 22 17057138 2106 | 22 17075353 2107 | 22 17087656 2108 | 22 17099107 2109 | 22 17104369 2110 | 22 17152611 2111 | 22 17154984 2112 | 22 17178213 2113 | 22 17178230 2114 | 22 17200597 2115 | 22 17202602 2116 | 22 17212553 2117 | 22 17219625 2118 | 22 17227403 2119 | 22 17256231 2120 | 22 17264565 2121 | 22 17265194 2122 | 22 17273178 2123 | 22 17278762 2124 | 22 17280822 2125 | 22 17281931 2126 | 22 17285949 2127 | 22 17288988 2128 | 22 17289118 2129 | 22 17294251 2130 | 22 17301217 2131 | 22 17301492 2132 | 22 17301843 2133 | 22 17303596 2134 | 22 17304246 2135 | 22 17304887 2136 | 22 17306104 2137 | 22 17306184 2138 | 22 17306270 2139 | 22 17310741 2140 | 22 17311027 2141 | 22 17312596 2142 | 22 17315102 2143 | 22 17315503 2144 | 22 17317233 2145 | 22 17318150 2146 | 22 17319156 2147 | 22 17326181 2148 | 22 17326432 2149 | 22 17326668 2150 | 22 17327181 2151 | 22 17331656 2152 | 22 17339003 2153 | 22 17347294 2154 | 22 17393239 2155 | 22 17393876 2156 | 22 17394989 2157 | 22 17395610 2158 | 22 17396612 2159 | 22 17397875 2160 | 22 17398508 2161 | 22 17398800 2162 | 22 17398812 2163 | 22 17399211 2164 | 22 17405173 2165 | 22 17407349 2166 | 22 17407566 2167 | 22 17409897 2168 | 22 17410373 2169 | 22 17410790 2170 | 22 17412168 2171 | 22 17412216 2172 | 22 17412806 2173 | 22 17413554 2174 | 22 17414103 2175 | 22 17414640 2176 | 22 17415572 2177 | 22 17421298 2178 | 22 17426401 2179 | 22 17427037 2180 | 22 17427455 2181 | 22 17429384 2182 | 22 17429434 2183 | 22 17430040 2184 | 22 17433210 2185 | 22 17433888 2186 | 22 17434084 2187 | 22 17434253 2188 | 22 17435074 2189 | 22 17436283 2190 | 22 17436846 2191 | 22 17437310 2192 | 22 17437500 2193 | 22 17441524 2194 | 22 17442154 2195 | 22 17442987 2196 | 22 17443131 2197 | 22 17444748 2198 | 22 17445502 2199 | 22 17446157 2200 | 22 17446914 2201 | X 990180 2202 | X 1501471 2203 | X 2700157 2204 | X 2709331 2205 | X 2710887 2206 | X 2711961 2207 | X 2713211 2208 | X 2719111 2209 | X 2729625 2210 | X 2732096 2211 | X 2732166 2212 | X 2743286 2213 | X 2743627 2214 | X 2746489 2215 | X 2746835 2216 | X 2759615 2217 | X 2765370 2218 | X 2765925 2219 | X 2767020 2220 | X 2767269 2221 | X 2767366 2222 | X 2774700 2223 | X 2775998 2224 | X 2777107 2225 | X 2777560 2226 | X 2779345 2227 | X 2783126 2228 | X 2783555 2229 | X 2785740 2230 | X 2786596 2231 | X 2786608 2232 | X 2789848 2233 | X 2790762 2234 | X 2791113 2235 | X 2791278 2236 | X 2791604 2237 | X 2792617 2238 | X 2793443 2239 | X 2794217 2240 | X 2800295 2241 | X 2800624 2242 | X 2817304 2243 | X 2820455 2244 | X 2821978 2245 | X 2822601 2246 | X 2823658 2247 | X 2823683 2248 | X 2825363 2249 | X 2825403 2250 | X 2835863 2251 | X 2836037 2252 | X 2836041 2253 | X 2836084 2254 | X 2836181 2255 | X 2836349 2256 | X 2847017 2257 | X 2847133 2258 | X 2850901 2259 | X 2865296 2260 | X 2873356 2261 | X 2876682 2262 | X 2876683 2263 | X 2881200 2264 | X 2885257 2265 | X 2885262 2266 | X 2885723 2267 | X 2886144 2268 | X 2891545 2269 | X 2892237 2270 | X 2893167 2271 | X 2893255 2272 | X 2896017 2273 | X 2897195 2274 | X 2908602 2275 | X 2910100 2276 | X 2910684 2277 | X 2911463 2278 | X 2918142 2279 | X 2920459 2280 | X 2924934 2281 | X 2931530 2282 | X 2931940 2283 | X 2933955 2284 | X 2936675 2285 | X 2947274 2286 | X 2951434 2287 | X 2961605 2288 | X 2962707 2289 | X 2996976 2290 | X 3005242 2291 | X 3007480 2292 | X 3010500 2293 | X 3010694 2294 | X 3011751 2295 | X 3012405 2296 | X 3019465 2297 | X 3025174 2298 | X 3028385 2299 | X 3030426 2300 | X 3030760 2301 | Y 2649694 2302 | Y 2649696 2303 | Y 2650701 2304 | Y 2650749 2305 | Y 2651325 2306 | Y 2651356 2307 | Y 2652676 2308 | Y 2653037 2309 | Y 2653069 2310 | Y 2653141 2311 | Y 2654333 2312 | Y 2655180 2313 | Y 2656127 2314 | Y 2656959 2315 | Y 2657176 2316 | Y 2657349 2317 | Y 2657411 2318 | Y 2657780 2319 | Y 2657915 2320 | Y 2658271 2321 | Y 2658285 2322 | Y 2658341 2323 | Y 2658357 2324 | Y 2658869 2325 | Y 2659191 2326 | Y 2659347 2327 | Y 2659355 2328 | Y 2659661 2329 | Y 2660712 2330 | Y 2660764 2331 | Y 2660891 2332 | Y 2661236 2333 | Y 2661367 2334 | Y 2661405 2335 | Y 2661694 2336 | Y 2661813 2337 | Y 2661836 2338 | Y 2661873 2339 | Y 2661881 2340 | Y 2661906 2341 | Y 2662100 2342 | Y 2662121 2343 | Y 2662692 2344 | Y 2662740 2345 | Y 2662745 2346 | Y 2662805 2347 | Y 2663160 2348 | Y 2663241 2349 | Y 2663707 2350 | Y 2663889 2351 | Y 2663920 2352 | Y 2663943 2353 | Y 2665280 2354 | Y 2665537 2355 | Y 2665570 2356 | Y 2666847 2357 | Y 2667538 2358 | Y 2667652 2359 | Y 2667783 2360 | Y 2667926 2361 | Y 2668007 2362 | Y 2668224 2363 | Y 2668395 2364 | Y 2668456 2365 | Y 2668533 2366 | Y 2668809 2367 | Y 2669416 2368 | Y 2669716 2369 | Y 2670114 2370 | Y 2671852 2371 | Y 2672087 2372 | Y 2672414 2373 | Y 2672425 2374 | Y 2672454 2375 | Y 2673440 2376 | Y 2673442 2377 | Y 2673443 2378 | Y 2673446 2379 | Y 2678179 2380 | Y 2680720 2381 | Y 2681272 2382 | Y 2681367 2383 | Y 2681740 2384 | Y 2683227 2385 | Y 2685228 2386 | Y 2686422 2387 | Y 2686642 2388 | Y 2686727 2389 | Y 2687198 2390 | Y 2688442 2391 | Y 2688711 2392 | Y 2689022 2393 | Y 2690677 2394 | Y 2691002 2395 | Y 2691258 2396 | Y 2691796 2397 | Y 2692142 2398 | Y 2693892 2399 | Y 2693921 2400 | Y 2693957 2401 | --------------------------------------------------------------------------------