├── .gitignore ├── example_vcfs ├── testsamples.vcf └── test.vcf ├── doc.go ├── CONTRIBUTING.md ├── svtype_string.go ├── LICENSE ├── example_test.go ├── README.md ├── info.go ├── vcf.go ├── vcf_whitebox_test.go └── vcf_blackbox_test.go /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | -------------------------------------------------------------------------------- /example_vcfs/testsamples.vcf: -------------------------------------------------------------------------------- 1 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 111222 2 | 1 762589 . G C 40 PASS 3 | -------------------------------------------------------------------------------- /example_vcfs/test.vcf: -------------------------------------------------------------------------------- 1 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 111222 2 | 1 762589 . G C 40 PASS AC=2;AF=1.00;AN=2;DP=5;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=43.32;MQ0=0;QD=29.99;VQSLOD=1.18;culprit=MQ;set=variant GT:AD:GQ:PL 1/1:0,5:15:470,15,0 3 | -------------------------------------------------------------------------------- /doc.go: -------------------------------------------------------------------------------- 1 | // Package vcf provides an API for parsing genomic data compliant with the Variant Call Format 4.2 Specification 2 | // 3 | // This API is built with channels, assuming asynchronous computation. Variants parsed successfully are sent 4 | // immediately to the consumer of the API through a channel, as well as variants that fail to be processed. 5 | package vcf 6 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Contributions are extremely welcome. Feel free to add issues and pull requests. 4 | 5 | Before contributing code changes, keep the following in mind: 6 | 7 | * Make sure to write tests for new code. TDD is encouraged. 8 | 9 | * The `vcf` package uses [testify](https://github.com/stretchr/testify) to write its tests. To keep consistency in the code base, new tests should also be written with it. 10 | 11 | * All tests must pass, both existing and new. 12 | -------------------------------------------------------------------------------- /svtype_string.go: -------------------------------------------------------------------------------- 1 | // generated by stringer -type=SVType; DO NOT EDIT 2 | 3 | package vcf 4 | 5 | import "fmt" 6 | 7 | const _SVType_name = "DeletionDuplicationInsertionInversionCopyNumberVariationTandemDuplicationDeletionMobileElementInsertionMobileElementBreakend" 8 | 9 | var _SVType_index = [...]uint8{0, 8, 19, 28, 37, 56, 73, 94, 116, 124} 10 | 11 | func (i SVType) String() string { 12 | if i < 0 || i >= SVType(len(_SVType_index)-1) { 13 | return fmt.Sprintf("SVType(%d)", i) 14 | } 15 | return _SVType_name[_SVType_index[i]:_SVType_index[i+1]] 16 | } 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, Mendelics Análise Genômica S.A. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of the copyright holder nor the names of its contributors 15 | may be used to endorse or promote products derived from this software 16 | without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /example_test.go: -------------------------------------------------------------------------------- 1 | package vcf 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "os" 7 | ) 8 | 9 | // Channels should be initialized and passed to the ToChannel function. The client should not close the channels 10 | // This will happen inside ToChannel, when the input is exhausted. 11 | func Example() { 12 | validVariants := make(chan *Variant, 100) // buffered channel for correctly parsed variants 13 | invalidVariants := make(chan InvalidLine, 100) // buffered channel for variants that fail to parse 14 | 15 | filename := "example_vcfs/test.vcf" 16 | 17 | vcfFile, err := os.Open(filename) 18 | if err != nil { 19 | log.Fatalln("can't open file", filename) 20 | } 21 | defer vcfFile.Close() 22 | 23 | go func() { 24 | err := ToChannel(vcfFile, validVariants, invalidVariants) 25 | if err != nil { 26 | log.Fatalln(err) 27 | } 28 | }() 29 | 30 | go func() { 31 | // consume invalid variants channel asynchronously 32 | for invalid := range invalidVariants { 33 | fmt.Println("failed to parse line", invalid.Line, "with error", invalid.Err) 34 | } 35 | }() 36 | 37 | for variant := range validVariants { 38 | fmt.Println(variant) 39 | if variant.Qual != nil { 40 | fmt.Println("Quality:", *variant.Qual) 41 | } 42 | fmt.Println("Filter:", variant.Filter) 43 | fmt.Println("Allele Count:", *variant.AlleleCount) 44 | fmt.Println("Allele Frequency:", *variant.AlleleFrequency) 45 | fmt.Println("Total Alleles:", *variant.TotalAlleles) 46 | fmt.Println("Depth:", *variant.Depth) 47 | fmt.Println("Mapping Quality:", *variant.MappingQuality) 48 | fmt.Println("MAPQ0 Reads:", *variant.MAPQ0Reads) 49 | 50 | rawInfo := variant.Info 51 | vqslod := rawInfo["VQSLOD"] 52 | fmt.Println("VQSLOD:", vqslod) 53 | } 54 | 55 | // output: 56 | // Chromosome: 1 Position: 762588 Reference: G Alternative: C 57 | // Quality: 40 58 | // Filter: PASS 59 | // Allele Count: 2 60 | // Allele Frequency: 1 61 | // Total Alleles: 2 62 | // Depth: 5 63 | // Mapping Quality: 43.32 64 | // MAPQ0 Reads: 0 65 | // VQSLOD: 1.18 66 | } 67 | 68 | func ExampleSampleIDs() { 69 | filename := "example_vcfs/testsamples.vcf" 70 | vcfFile, err := os.Open(filename) 71 | if err != nil { 72 | log.Fatalln("can't open file", filename) 73 | } 74 | defer vcfFile.Close() 75 | 76 | sampleIDs, err := SampleIDs(vcfFile) 77 | if err == nil && sampleIDs != nil { 78 | for i, sample := range sampleIDs { 79 | fmt.Printf("sample %d: %s\n", i, sample) 80 | } 81 | } 82 | // output: 83 | // sample 0: 111222 84 | } 85 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | vcf 2 | === 3 | 4 | `vcf` is a `golang` package that parses data from an `io.Reader` adhering to the [Variant Call Format v4.2 Specification](https://samtools.github.io/hts-specs/VCFv4.2.pdf). 5 | 6 | Data is read asynchronously and returned through two channels, one with correctly parsed variants and one with unknown variants whose parsing failed. Proper initialization and buffering of these channels is a responsibility of the client. 7 | 8 | This package is still work in progress, subject to change at any time without notice. Releases will follow [Semantic Versioning 2.0.0](http://semver.org/spec/v2.0.0.html). Major is still in `v0` to reflect the early stage development this package is in. 9 | 10 | ### INFO 11 | 12 | Currently, parsing can handle Samples, optional fields such as ID, Quality and Filter, as well as the INFO field. INFO is exposed in two ways: 13 | 14 | * As a `map[string]interface{}` exposing all fields found on the INFO for each variant, without any treatment. Key-value pairs are added to this map. In the case of keys such as `DB` which don't have a value, the value used is a `true` boolean. 15 | * As a series of sub-fields listed on section `1.4.1-8` of the [VCF 4.2 spec](https://samtools.github.io/hts-specs/VCFv4.2.pdf). These sub-fields are provided in a best effort manner. Failure to parse one of these sub-fields will only cause its corresponding pointer to be `nil`, not generating an error. The raw data can always be found on the map. 16 | 17 | ### Genotype fields 18 | 19 | Genotype fields (section `1.4.2` on the [spec](https://samtools.github.io/hts-specs/VCFv4.2.pdf)) do not have the same kind of treatment yet. They are separated by sample, but the only form represented is a raw map. Easy access to sub-fields is intended in the future. 20 | 21 | ### Structural variants 22 | 23 | Structural variants have not been addressed as of version [`0.1.0`](https://github.com/mendelics/vcf/releases/tag/0.1.0). 24 | 25 | ### License 26 | 27 | This software uses the [BSD 3-Clause License](http://opensource.org/licenses/BSD-3-Clause). 28 | 29 | --- 30 | 31 | Copyright (c) 2015, Mendelics Análise Genômica S.A. 32 | All rights reserved. 33 | 34 | Redistribution and use in source and binary forms, with or without 35 | modification, are permitted provided that the following conditions are met: 36 | 37 | * Redistributions of source code must retain the above copyright notice, this 38 | list of conditions and the following disclaimer. 39 | 40 | * Redistributions in binary form must reproduce the above copyright notice, 41 | this list of conditions and the following disclaimer in the documentation 42 | and/or other materials provided with the distribution. 43 | 44 | * Neither the name of the copyright holder nor the names of its contributors 45 | may be used to endorse or promote products derived from this software 46 | without specific prior written permission. 47 | 48 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 49 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 51 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 52 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 54 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 55 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 56 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 57 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 58 | -------------------------------------------------------------------------------- /info.go: -------------------------------------------------------------------------------- 1 | package vcf 2 | 3 | import ( 4 | "strconv" 5 | "strings" 6 | ) 7 | 8 | func infoToMap(info string) map[string]interface{} { 9 | infoMap := make(map[string]interface{}) 10 | fields := strings.Split(info, ";") 11 | for _, field := range fields { 12 | if strings.Contains(field, "=") { 13 | split := strings.Split(field, "=") 14 | fieldName, fieldValue := split[0], split[1] 15 | infoMap[fieldName] = fieldValue 16 | } else { 17 | infoMap[field] = true 18 | } 19 | } 20 | return infoMap 21 | } 22 | 23 | func buildInfoSubFields(variant *Variant) { 24 | info := variant.Info 25 | variant.Depth = parseIntFromInfoMap("DP", info) 26 | variant.AlleleFrequency = parseFloatFromInfoMap("AF", info) 27 | variant.AncestralAllele = parseStringFromInfoMap("AA", info) 28 | variant.AlleleCount = parseIntFromInfoMap("AC", info) 29 | variant.TotalAlleles = parseIntFromInfoMap("AN", info) 30 | variant.End = parseIntFromInfoMap("END", info) 31 | variant.MAPQ0Reads = parseIntFromInfoMap("MQ0", info) 32 | variant.NumberOfSamples = parseIntFromInfoMap("NS", info) 33 | variant.MappingQuality = parseFloatFromInfoMap("MQ", info) 34 | variant.Cigar = parseStringFromInfoMap("CIGAR", info) 35 | variant.InDBSNP = parseBoolFromInfoMap("DB", info) 36 | variant.InHapmap2 = parseBoolFromInfoMap("H2", info) 37 | variant.InHapmap3 = parseBoolFromInfoMap("H3", info) 38 | variant.IsSomatic = parseBoolFromInfoMap("SOMATIC", info) 39 | variant.IsValidated = parseBoolFromInfoMap("VALIDATED", info) 40 | variant.In1000G = parseBoolFromInfoMap("1000G", info) 41 | variant.BaseQuality = parseFloatFromInfoMap("BQ", info) 42 | variant.StrandBias = parseFloatFromInfoMap("SB", info) 43 | variant.Imprecise = parseBoolFromInfoMap("IMPRECISE", info) 44 | variant.Novel = parseBoolFromInfoMap("NOVEL", info) 45 | 46 | if rawSVType := parseStringFromInfoMap("SVTYPE", info); rawSVType != nil { 47 | variant.StructuralVariantType = svTypeFromString(rawSVType) 48 | } 49 | 50 | variant.StructuralVariantLength = parseIntFromInfoMap("SVLEN", info) 51 | variant.ConfidenceIntervalAroundPosition = parseIntFromInfoMap("CIPOS", info) 52 | variant.ConfidenceIntervalAroundEnd = parseIntFromInfoMap("CIEND", info) 53 | } 54 | 55 | func parseIntFromInfoMap(key string, info map[string]interface{}) *int { 56 | if value, found := info[key]; found { 57 | if str, ok := value.(string); ok { 58 | intvalue, err := strconv.Atoi(str) 59 | if err == nil { 60 | return &intvalue 61 | } 62 | } 63 | } 64 | return nil 65 | } 66 | 67 | func parseStringFromInfoMap(key string, info map[string]interface{}) *string { 68 | if value, found := info[key]; found { 69 | if str, ok := value.(string); ok { 70 | return &str 71 | } 72 | } 73 | return nil 74 | } 75 | 76 | func parseFloatFromInfoMap(key string, info map[string]interface{}) *float64 { 77 | if value, found := info[key]; found { 78 | if str, ok := value.(string); ok { 79 | floatvalue, err := strconv.ParseFloat(str, 64) 80 | if err == nil { 81 | return &floatvalue 82 | } 83 | } 84 | } 85 | return nil 86 | } 87 | 88 | func parseBoolFromInfoMap(key string, info map[string]interface{}) *bool { 89 | if value, found := info[key]; found { 90 | if b, ok := value.(bool); ok { 91 | return &b 92 | } 93 | } 94 | return nil 95 | } 96 | 97 | var svTypeMap = map[string]SVType{ 98 | "DEL": Deletion, 99 | "DUP": Duplication, 100 | "INS": Insertion, 101 | "INV": Inversion, 102 | "CNV": CopyNumberVariation, 103 | "DUP:TANDEM": TandemDuplication, 104 | "DEL:ME": DeletionMobileElement, 105 | "INS:ME": InsertionMobileElement, 106 | "BND": Breakend, 107 | } 108 | 109 | func svTypeFromString(s *string) *SVType { 110 | if k, exists := svTypeMap[*s]; exists { 111 | return &k 112 | } 113 | return nil 114 | } 115 | 116 | func splitMultipleAltInfos(info map[string]interface{}, numberOfAlternatives int) []map[string]interface{} { 117 | maps := make([]map[string]interface{}, 0, 2) 118 | separator := "," 119 | 120 | for key, v := range info { 121 | if value, ok := v.(string); ok { 122 | if strings.Contains(value, separator) { 123 | alternatives := strings.Split(value, separator) 124 | for position, alt := range alternatives { 125 | maps = insertMapSlice(maps, position, key, alt) 126 | } 127 | } else { 128 | for i := 0; i < numberOfAlternatives; i++ { 129 | maps = insertMapSlice(maps, i, key, value) 130 | } 131 | } 132 | } else { 133 | maps = insertMapSlice(maps, 0, key, v) 134 | } 135 | } 136 | 137 | return maps 138 | } 139 | 140 | func insertMapSlice(maps []map[string]interface{}, position int, key string, alt interface{}) []map[string]interface{} { 141 | if len(maps) <= position { 142 | for i := len(maps); i <= position; i++ { 143 | maps = append(maps, make(map[string]interface{})) 144 | } 145 | } 146 | maps[position][key] = alt 147 | return maps 148 | } 149 | -------------------------------------------------------------------------------- /vcf.go: -------------------------------------------------------------------------------- 1 | package vcf 2 | 3 | import ( 4 | "bufio" 5 | "errors" 6 | "fmt" 7 | "io" 8 | "log" 9 | "strconv" 10 | "strings" 11 | ) 12 | 13 | //go:generate stringer -type=SVType 14 | type SVType int 15 | 16 | const ( 17 | Deletion SVType = iota 18 | Duplication 19 | Insertion 20 | Inversion 21 | CopyNumberVariation 22 | TandemDuplication 23 | DeletionMobileElement 24 | InsertionMobileElement 25 | Breakend 26 | ) 27 | 28 | // Variant is a struct representing the fields specified in the VCF 4.2 spec. 29 | // 30 | // When the variant is generated through the API of the vcf package, the required fields are guaranteed to be valid, 31 | // otherwise the parsing for the variant fails and is reported. 32 | // 33 | // Multiple alternatives are parsed as separated instances of the type Variant. All other fields are optional and will 34 | // not cause parsing fails if missing or non-conformant. 35 | type Variant struct { 36 | // Required fields 37 | Chrom string 38 | Pos int 39 | Ref string 40 | Alt string 41 | 42 | ID string 43 | 44 | // Qual is a pointer so that it can be set to nil when it is a dot '.' 45 | Qual *float64 46 | 47 | Filter string 48 | 49 | // Info is a map containing all the keys present in the INFO field, with their corresponding value. 50 | // For keys without corresponding values, the value is a `true` bool. 51 | // No attempt at parsing is made on this field, data is raw. 52 | // The only exception is for multiple alternatives data. These are reported separately for each variant. 53 | Info map[string]interface{} 54 | 55 | // Genotype fields for each sample 56 | Samples []map[string]string 57 | 58 | // Optional info fields. These are the reserved fields listed on the VCF 4.2 spec, session 1.4.1, number 8. 59 | // The parsing is lenient, if the fields do not conform to the expected type listed here, they will be set to nil. 60 | // The fields are meant as helpers for common scenarios, since the generic usage is covered by the Info map. 61 | // Definitions used in the metadata section of the header are not used. 62 | AncestralAllele *string 63 | Depth *int 64 | AlleleFrequency *float64 65 | AlleleCount *int 66 | TotalAlleles *int 67 | End *int 68 | MAPQ0Reads *int 69 | NumberOfSamples *int 70 | MappingQuality *float64 71 | Cigar *string 72 | InDBSNP *bool 73 | InHapmap2 *bool 74 | InHapmap3 *bool 75 | IsSomatic *bool 76 | IsValidated *bool 77 | In1000G *bool 78 | BaseQuality *float64 79 | StrandBias *float64 80 | 81 | // Structural variants 82 | Imprecise *bool 83 | Novel *bool 84 | StructuralVariantType *SVType 85 | StructuralVariantLength *int 86 | ConfidenceIntervalAroundPosition *int 87 | ConfidenceIntervalAroundEnd *int 88 | } 89 | 90 | // String provides a representation of the variant key: the fields Chrom, Pos, Ref and Alt 91 | // compatible with fmt.Stringer 92 | func (v *Variant) String() string { 93 | return fmt.Sprintf("Chromosome: %s Position: %d Reference: %s Alternative: %s", v.Chrom, v.Pos, v.Ref, v.Alt) 94 | } 95 | 96 | // InvalidLine represents a VCF line that could not be parsed. 97 | // It encapsulates the problematic line with its corresponding error. 98 | type InvalidLine struct { 99 | Line string 100 | Err error 101 | } 102 | 103 | // ToChannel reads from an io.Reader and puts all variants into an already initialized channel. 104 | // Variants whose parsing fails go into a specific channel for failing variants. 105 | // If any of the two channels are full, ToChannel will block. 106 | // The consumer must guarantee there is enough buffer space on the channels. 107 | // Both channels are closed when the reader is fully scanned. 108 | func ToChannel(reader io.Reader, output chan<- *Variant, invalids chan<- InvalidLine) error { 109 | bufferedReader := bufio.NewReaderSize(reader, 100*1024) 110 | header, err := vcfHeader(bufferedReader) 111 | if err != nil { 112 | return err 113 | } 114 | 115 | for { 116 | line, readError := bufferedReader.ReadString('\n') 117 | if readError != nil && readError != io.EOF { 118 | // If an error that is not an EOF happens break immediately 119 | err = readError 120 | break 121 | } 122 | if line == "" && readError == io.EOF { 123 | // If there is an empty line at EOF, end the loop without propagating the error 124 | break 125 | } 126 | if isHeaderLine(line) { 127 | continue 128 | } 129 | variants, err := parseVcfLine(line, header) 130 | if variants != nil && err == nil { 131 | for _, variant := range variants { 132 | fixedVariant := fixRefAltSuffix(variant) 133 | output <- fixedVariant 134 | } 135 | } else if err != nil { 136 | invalids <- InvalidLine{line, err} 137 | } 138 | // Check again for a read error. This is only possible on EOF 139 | if readError != nil { 140 | break 141 | } 142 | } 143 | 144 | close(output) 145 | close(invalids) 146 | 147 | return err 148 | } 149 | 150 | // SampleIDs reads a vcf header from an io.Reader and returns a slice with all the sample IDs contained in that header. 151 | // If there are no samples on the header, a nil slice is returned 152 | func SampleIDs(reader io.Reader) ([]string, error) { 153 | bufferedReader := bufio.NewReaderSize(reader, 100*1024) 154 | header, err := vcfHeader(bufferedReader) 155 | if err != nil { 156 | return nil, err 157 | } 158 | if len(header) > 9 { 159 | return header[9:], nil 160 | } 161 | return nil, nil 162 | } 163 | 164 | func vcfHeader(bufferedReader *bufio.Reader) ([]string, error) { 165 | for { 166 | line, err := bufferedReader.ReadString('\n') 167 | if err != nil { 168 | return nil, err 169 | } 170 | if strings.HasPrefix(line, "#") && !strings.HasPrefix(line, "##") { 171 | line = strings.TrimSpace(line) 172 | return strings.Split(line[1:], "\t"), nil 173 | } 174 | } 175 | return nil, errors.New("vcf header not found on file") 176 | } 177 | 178 | func isHeaderLine(line string) bool { 179 | return strings.HasPrefix(line, "#") 180 | } 181 | 182 | type vcfLine struct { 183 | Chr, Pos, ID, Ref, Alt, Qual, Filter, Info string 184 | Format []string 185 | Samples []map[string]string 186 | } 187 | 188 | func parseVcfLine(line string, header []string) ([]*Variant, error) { 189 | line = strings.TrimSpace(line) 190 | vcfLine, err := splitVcfFields(line) 191 | if err != nil { 192 | return nil, errors.New("unable to parse apparently misformatted VCF line: " + line) 193 | } 194 | 195 | baseVariant := Variant{} 196 | 197 | if strings.Contains(vcfLine.Chr, "chr") { 198 | baseVariant.Chrom = strings.Replace(vcfLine.Chr, "chr", "", -1) 199 | } else { 200 | baseVariant.Chrom = vcfLine.Chr 201 | } 202 | pos, _ := strconv.Atoi(vcfLine.Pos) 203 | baseVariant.Pos = pos - 1 // converts variant to 0-based 204 | baseVariant.Ref = strings.ToUpper(vcfLine.Ref) 205 | baseVariant.Alt = strings.ToUpper(strings.Replace(vcfLine.Alt, ".", "", -1)) 206 | 207 | baseVariant.ID = vcfLine.ID 208 | floatQuality, err := strconv.ParseFloat(vcfLine.Qual, 64) 209 | if err == nil { 210 | baseVariant.Qual = &floatQuality 211 | } else if vcfLine.Qual == "." { 212 | baseVariant.Qual = nil 213 | } else { 214 | baseVariant.Qual = nil 215 | log.Println("unable to parse quality as float, setting as nil") 216 | } 217 | baseVariant.Filter = vcfLine.Filter 218 | baseVariant.Samples = vcfLine.Samples 219 | baseVariant.Info = infoToMap(vcfLine.Info) 220 | 221 | alternatives := strings.Split(baseVariant.Alt, ",") 222 | 223 | info := splitMultipleAltInfos(baseVariant.Info, len(alternatives)) 224 | 225 | result := make([]*Variant, 0, 64) 226 | for i, alternative := range alternatives { 227 | var altinfo map[string]interface{} 228 | if i >= len(info) { 229 | altinfo = info[0] 230 | } else { 231 | altinfo = info[i] 232 | } 233 | 234 | variant := &Variant{ 235 | Chrom: baseVariant.Chrom, 236 | Pos: baseVariant.Pos, 237 | Ref: baseVariant.Ref, 238 | Alt: alternative, 239 | ID: baseVariant.ID, 240 | Samples: baseVariant.Samples, 241 | Info: altinfo, 242 | Qual: baseVariant.Qual, 243 | Filter: baseVariant.Filter, 244 | } 245 | buildInfoSubFields(variant) 246 | 247 | result = append(result, variant) 248 | } 249 | return result, nil 250 | } 251 | 252 | func splitVcfFields(line string) (ret *vcfLine, err error) { 253 | line = strings.TrimSpace(line) 254 | 255 | fields := strings.Split(line, "\t") 256 | 257 | if len(fields) < 8 { 258 | return nil, errors.New("wrong amount of columns: " + string(len(fields))) 259 | } 260 | ret = &vcfLine{} 261 | 262 | ret.Chr = fields[0] 263 | ret.Pos = fields[1] 264 | ret.ID = fields[2] 265 | ret.Ref = fields[3] 266 | ret.Alt = fields[4] 267 | ret.Qual = fields[5] 268 | ret.Filter = fields[6] 269 | ret.Info = fields[7] 270 | 271 | if len(fields) > 8 { 272 | samples := fields[9:len(fields)] 273 | ret.Samples = make([]map[string]string, len(fields)-9) 274 | ret.Format = strings.Split(fields[8], ":") 275 | for i, sample := range samples { 276 | ret.Samples[i] = parseSample(ret.Format, sample) 277 | } 278 | } 279 | 280 | return 281 | } 282 | 283 | func parseSample(format []string, unparsedSample string) map[string]string { 284 | sampleMapping := make(map[string]string) 285 | sampleFields := strings.Split(unparsedSample, ":") 286 | for i, field := range sampleFields { 287 | sampleMapping[format[i]] = field 288 | } 289 | return sampleMapping 290 | } 291 | 292 | func fixRefAltSuffix(variant *Variant) *Variant { 293 | ref := variant.Ref 294 | alt := variant.Alt 295 | i := len(ref) - 1 296 | j := len(alt) - 1 297 | for i > 0 && j > 0 && ref[i] == alt[j] { 298 | i-- 299 | j-- 300 | } 301 | newRef := ref[:i+1] 302 | newAlt := alt[:j+1] 303 | variant.Ref = newRef 304 | variant.Alt = newAlt 305 | return variant 306 | } 307 | -------------------------------------------------------------------------------- /vcf_whitebox_test.go: -------------------------------------------------------------------------------- 1 | package vcf 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | "github.com/stretchr/testify/suite" 8 | ) 9 | 10 | type ParseVcfLineSuite struct { 11 | suite.Suite 12 | } 13 | 14 | var defaultHeader = []string{"CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO"} 15 | 16 | func (s *ParseVcfLineSuite) TestBlankLineShouldReturnError() { 17 | result, err := parseVcfLine("\t ", defaultHeader) 18 | assert.Error(s.T(), err, "Line with only blanks should return empty and an error") 19 | assert.Empty(s.T(), result, "Line with only blanks should return emptyand an error") 20 | } 21 | 22 | func (s *ParseVcfLineSuite) TestContinuousLineShouldReturnError() { 23 | result, err := parseVcfLine("Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc tellus ligula, faucibus sed nibh sed, fringilla viverra enim.", defaultHeader) 24 | assert.Error(s.T(), err, "Line with continuous text should return empty and an error") 25 | assert.Empty(s.T(), result, "Line with continuous text should return empty and an error") 26 | } 27 | 28 | func (s *ParseVcfLineSuite) TestEmptyFieldedLineShouldReturnError() { 29 | result, err := parseVcfLine("\t\t\t\t\t", defaultHeader) 30 | assert.Error(s.T(), err, "Line with empty fields should return empty and an error") 31 | assert.Empty(s.T(), result, "Line with empty fields should return empty and an error") 32 | } 33 | 34 | func (s *ParseVcfLineSuite) TestWrongFormattedFieldedLineShouldReturnError() { 35 | result, err := parseVcfLine("A\tB\tC\tD\tE\tF", defaultHeader) 36 | assert.Error(s.T(), err, "Line with wrong formatted fields should return empty and an error") 37 | assert.Empty(s.T(), result, "Line with wrong formatted fields should return empty and an error") 38 | } 39 | 40 | func (s *ParseVcfLineSuite) TestValidLineShouldReturnOneElementAndNoErrors() { 41 | result, err := parseVcfLine("1\t847491\trs28407778\tGT\tA\t745.77\tPASS\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.842;ClippingRankSum=0.147;DB;DP=41;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.109;QD=18.19;ReadPosRankSum=0.334;VQSLOD=2.70;culprit=FS;set=variant\tGT:AD:DP:GQ:PL\t0/1:16,25:41:99:774,0,434", defaultHeader) 42 | 43 | assert.NoError(s.T(), err, "Valid VCF line should not return error") 44 | assert.NotNil(s.T(), result, "Valid VCF line should not return nil") 45 | assert.Exactly(s.T(), len(result), 1, "Valid VCF should return a list with one element") 46 | assert.Equal(s.T(), result[0].Chrom, "1", "result.Chrom should be 1") 47 | assert.Equal(s.T(), result[0].Pos, 847490, "result.Pos should be 0-based to 847490") 48 | assert.Equal(s.T(), result[0].Ref, "GT", "result.Ref should be GT") 49 | assert.Equal(s.T(), result[0].Alt, "A", "result.Alt should be A") 50 | } 51 | 52 | func (s *ParseVcfLineSuite) TestValidLineWithChrShouldStripIt() { 53 | result, err := parseVcfLine("chr1\t847491\trs28407778\tGT\tA\t745.77\tPASS\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.842;ClippingRankSum=0.147;DB;DP=41;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.109;QD=18.19;ReadPosRankSum=0.334;VQSLOD=2.70;culprit=FS;set=variant\tGT:AD:DP:GQ:PL\t0/1:16,25:41:99:774,0,434", defaultHeader) 54 | 55 | assert.NoError(s.T(), err, "Valid VCF line should not return error") 56 | assert.NotNil(s.T(), result, "Valid VCF line should not return nil") 57 | assert.Exactly(s.T(), len(result), 1, "Valid VCF should return a list with one element") 58 | assert.Equal(s.T(), result[0].Chrom, "1", "result.Chrom should be 1") 59 | assert.Equal(s.T(), result[0].Pos, 847490, "result.Pos should be 0-based to 847490") 60 | assert.Equal(s.T(), result[0].Ref, "GT", "result.Ref should be GT") 61 | assert.Equal(s.T(), result[0].Alt, "A", "result.Alt should be A") 62 | } 63 | 64 | func (s *ParseVcfLineSuite) TestValidLineWithLowercaseRefAndAltShouldReturnOneElementAndNoErrors() { 65 | result, err := parseVcfLine("1\t847491\trs28407778\tgt\ta\t745.77\tPASS\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.842;ClippingRankSum=0.147;DB;DP=41;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.109;QD=18.19;ReadPosRankSum=0.334;VQSLOD=2.70;culprit=FS;set=variant\tGT:AD:DP:GQ:PL\t0/1:16,25:41:99:774,0,434", defaultHeader) 66 | 67 | assert.NoError(s.T(), err, "Valid VCF line should not return error") 68 | assert.NotNil(s.T(), result, "Valid VCF line should not return nil") 69 | assert.Exactly(s.T(), len(result), 1, "Valid VCF should return a list with one element") 70 | assert.Equal(s.T(), result[0].Chrom, "1", "result.Chrom should be 1") 71 | assert.Equal(s.T(), result[0].Pos, 847490, "result.Pos should be 0-based to 847490") 72 | assert.Equal(s.T(), result[0].Ref, "GT", "result.Ref should be GT") 73 | assert.Equal(s.T(), result[0].Alt, "A", "result.Alt should be A") 74 | } 75 | 76 | func (s *ParseVcfLineSuite) TestDotsShouldBeRemovedFromValidLineAlternative() { 77 | result, err := parseVcfLine("1\t847491\trs28407778\tGTTTA\tG....\t745.77\tPASS\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.842;ClippingRankSum=0.147;DB;DP=41;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.109;QD=18.19;ReadPosRankSum=0.334;VQSLOD=2.70;culprit=FS;set=variant\tGT:AD:DP:GQ:PL\t0/1:16,25:41:99:774,0,434", defaultHeader) 78 | 79 | assert.NoError(s.T(), err, "Valid VCF line should not return error") 80 | assert.NotNil(s.T(), result, "Valid VCF line should not return nil") 81 | assert.Exactly(s.T(), len(result), 1, "Valid VCF should return a list with one element") 82 | assert.Equal(s.T(), result[0].Chrom, "1", "result.Chrom should be 1") 83 | assert.Equal(s.T(), result[0].Pos, 847490, "result.Pos should be 0-based to 847490") 84 | assert.Equal(s.T(), result[0].Ref, "GTTTA", "result.Ref should be GTTTA") 85 | assert.Equal(s.T(), result[0].Alt, "G", "result.Alt should be G") 86 | } 87 | 88 | func (s *ParseVcfLineSuite) TestValidLineWithMultipleAlternativesShouldReturnThreeElementsAndNoErrors() { 89 | result, err := parseVcfLine("1\t847491\trs28407778\tGT\tA,C,G\t745.77\tPASS\tAC=1;AF=0.300,0.300,0.400;AN=2;BaseQRankSum=0.842;ClippingRankSum=0.147;DB;DP=41;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.109;QD=18.19;ReadPosRankSum=0.334;VQSLOD=2.70;culprit=FS;set=variant\tGT:AD:DP:GQ:PL\t0/1:16,25:41:99:774,0,434", defaultHeader) 90 | 91 | assert.NoError(s.T(), err, "Valid VCF line should not return error") 92 | assert.NotNil(s.T(), result, "Valid VCF line should not return nil") 93 | assert.Exactly(s.T(), len(result), 3, "Valid VCF should return a list with one element") 94 | 95 | assert.Equal(s.T(), result[0].Chrom, "1", "result[0].Chrom should be 1") 96 | assert.Equal(s.T(), result[0].Pos, 847490, "result[0].Pos should be 0-based to 847490") 97 | assert.Equal(s.T(), result[0].Ref, "GT", "result[0].Ref should be GT") 98 | assert.Equal(s.T(), result[0].Alt, "A", "result[0].Alt should be A") 99 | 100 | assert.Equal(s.T(), result[1].Chrom, "1", "result[1].Chrom should be 1") 101 | assert.Equal(s.T(), result[1].Pos, 847490, "result[1].Pos should be 0-based to 847490") 102 | assert.Equal(s.T(), result[1].Ref, "GT", "result[1].Ref should be GT") 103 | assert.Equal(s.T(), result[1].Alt, "C", "result[1].Alt should be A") 104 | 105 | assert.Equal(s.T(), result[2].Chrom, "1", "result[2].Chrom should be 1") 106 | assert.Equal(s.T(), result[2].Pos, 847490, "result[2].Pos should be 0-based to 847490") 107 | assert.Equal(s.T(), result[2].Ref, "GT", "result[2].Ref should be GT") 108 | assert.Equal(s.T(), result[2].Alt, "G", "result[2].Alt should be A") 109 | } 110 | 111 | func (s *ParseVcfLineSuite) TestValidLineWithSampleGenotypeFields() { 112 | result, err := parseVcfLine("1\t847491\trs28407778\tGTTTA\tG....\t745.77\tPASS\tAC=1;AF=0.500;AN=2;BaseQRankSum=0.842;ClippingRankSum=0.147;DB;DP=41;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.109;QD=18.19;ReadPosRankSum=0.334;VQSLOD=2.70;culprit=FS;set=variant\tGT:AD:DP:GQ:PL\t0/1:16,25:41:99:774,0,434", defaultHeader) 113 | 114 | assert.NoError(s.T(), err, "Valid VCF line should not return error") 115 | assert.NotNil(s.T(), result, "Valid VCF line should not return nil") 116 | assert.Exactly(s.T(), len(result), 1, "Valid VCF should return a list with one element") 117 | 118 | samples := result[0].Samples 119 | assert.NotNil(s.T(), samples, "Valid VCF should contain slice of sample maps") 120 | assert.Exactly(s.T(), len(samples), 1, "Valid VCF should contain one sample") 121 | sampleMap := samples[0] 122 | assert.NotNil(s.T(), sampleMap, "Genotype field mapping should not return nil") 123 | assert.Exactly(s.T(), len(sampleMap), 5, "Sample map should have as many keys as there are formats") 124 | 125 | gt, ok := sampleMap["GT"] 126 | assert.True(s.T(), ok, "GT key must be found") 127 | assert.Equal(s.T(), gt, "0/1", "gt") 128 | 129 | ad, ok := sampleMap["AD"] 130 | assert.True(s.T(), ok, "AD key must be found") 131 | assert.Equal(s.T(), ad, "16,25", "ad") 132 | 133 | dp, ok := sampleMap["DP"] 134 | assert.True(s.T(), ok, "AD key must be found") 135 | assert.Equal(s.T(), dp, "41", "dp") 136 | 137 | gq, ok := sampleMap["GQ"] 138 | assert.True(s.T(), ok, "GQ key must be found") 139 | assert.Equal(s.T(), gq, "99", "gq") 140 | 141 | pl, ok := sampleMap["PL"] 142 | assert.True(s.T(), ok, "PL key must be found") 143 | assert.Equal(s.T(), pl, "774,0,434", "pl") 144 | } 145 | 146 | func (s *ParseVcfLineSuite) TestInfoFields() { 147 | result, err := parseVcfLine("1\t847491\trs28407778\tG\tA,C\t745.77\tPASS\tAC=1,2;AF=0.500,0.335;AN=2;BQ=30.00;BaseQRankSum=0.842;ClippingRankSum=0.147;DB;DP=41;FS=0.000;NS=27;H2;H3;SOMATIC;VALIDATED;1000G;MLEAC=1;MLEAF=0.500;END=847492;MQ=60.00;MQ0=0;SB=0.127;MQRankSum=-1.109;QD=18.19;ReadPosRankSum=0.334;VQSLOD=2.70;CIGAR=a;culprit=FS;set=variant\tGT:AD:DP:GQ:PL\t0/1:16,25:41:99:774,0,434", defaultHeader) 148 | 149 | assert.NoError(s.T(), err, "Valid VCF line should not return error") 150 | assert.NotNil(s.T(), result, "Valid VCF line should not return nil") 151 | assert.Exactly(s.T(), len(result), 2, "Valid VCF should return a list with two elements") 152 | 153 | info := result[0].Info 154 | assert.NotNil(s.T(), info, "Valid VCF should contain info map") 155 | assert.Exactly(s.T(), len(info), 28, "Info should contain 20 keys") 156 | 157 | ac, ok := info["AC"] 158 | assert.True(s.T(), ok, "AC key must be found") 159 | assert.Equal(s.T(), ac, "1", "ac") 160 | 161 | af, ok := info["AF"] 162 | assert.True(s.T(), ok, "AF key must be found") 163 | assert.Equal(s.T(), af, "0.500", "af") 164 | 165 | af, ok = result[1].Info["AF"] 166 | assert.True(s.T(), ok, "AF key must be found") 167 | assert.Equal(s.T(), af, "0.335", "af") 168 | 169 | db, ok := info["DB"] 170 | assert.True(s.T(), ok, "DB key must be found") 171 | booldb, isbool := db.(bool) 172 | assert.True(s.T(), isbool, "DB value must be a boolean") 173 | assert.True(s.T(), booldb) 174 | 175 | _, ok = info["AA"] 176 | assert.False(s.T(), ok, "AA key must not be found") 177 | 178 | aa := result[0].AncestralAllele 179 | assert.Nil(s.T(), aa, "No AA field") 180 | 181 | dp := result[0].Depth 182 | assert.NotNil(s.T(), dp, "Depth field of first element must be found") 183 | assert.Equal(s.T(), *dp, 41) 184 | 185 | dp = result[1].Depth 186 | assert.NotNil(s.T(), dp, "Depth field of second element must be found") 187 | assert.Equal(s.T(), *dp, 41) 188 | 189 | freq := result[0].AlleleFrequency 190 | assert.NotNil(s.T(), freq, "AlleleFrequency field must be found") 191 | assert.Equal(s.T(), *freq, 0.500) 192 | freq = result[1].AlleleFrequency 193 | assert.NotNil(s.T(), freq, "AlleleFrequency field must be found") 194 | assert.Equal(s.T(), *freq, 0.335) 195 | 196 | count := result[0].AlleleCount 197 | assert.NotNil(s.T(), count, "AlleleCount field must be found") 198 | assert.Equal(s.T(), *count, 1) 199 | count = result[1].AlleleCount 200 | assert.NotNil(s.T(), count, "AlleleCount field must be found") 201 | assert.Equal(s.T(), *count, 2) 202 | 203 | total := result[0].TotalAlleles 204 | assert.NotNil(s.T(), total, "TotalAlleles field must be found") 205 | assert.Equal(s.T(), *total, 2) 206 | 207 | end := result[0].End 208 | assert.NotNil(s.T(), end, "End field must be found") 209 | assert.Equal(s.T(), *end, 847492) 210 | 211 | mapq0reads := result[0].MAPQ0Reads 212 | assert.NotNil(s.T(), mapq0reads, "MAPQ0Reads field must be found") 213 | assert.Equal(s.T(), *mapq0reads, 0) 214 | 215 | numSamples := result[0].NumberOfSamples 216 | assert.NotNil(s.T(), numSamples, "NumberOfSamples field must be found") 217 | assert.Equal(s.T(), *numSamples, 27) 218 | 219 | mq := result[0].MappingQuality 220 | assert.NotNil(s.T(), mq, "MappingQuality field must be found") 221 | assert.Equal(s.T(), *mq, 60.0) 222 | 223 | cigar := result[0].Cigar 224 | assert.NotNil(s.T(), cigar, "Cigar field must be found") 225 | assert.Equal(s.T(), *cigar, "a") 226 | 227 | dbsnp := result[0].InDBSNP 228 | assert.NotNil(s.T(), dbsnp, "InDBSNP field must be found") 229 | assert.True(s.T(), *dbsnp) 230 | 231 | h2 := result[0].InHapmap2 232 | assert.NotNil(s.T(), h2, "InHapmap2 field must be found") 233 | assert.True(s.T(), *h2) 234 | 235 | h3 := result[0].InHapmap3 236 | assert.NotNil(s.T(), h3, "InHapmap3 field must be found") 237 | assert.True(s.T(), *h3) 238 | 239 | somatic := result[0].IsSomatic 240 | assert.NotNil(s.T(), somatic, "IsSomatic field must be found") 241 | assert.True(s.T(), *somatic) 242 | 243 | validated := result[0].IsValidated 244 | assert.NotNil(s.T(), validated, "IsValidated field must be found") 245 | assert.True(s.T(), *validated) 246 | 247 | thousand := result[0].In1000G 248 | assert.NotNil(s.T(), thousand, "In1000G field must be found") 249 | assert.True(s.T(), *thousand) 250 | 251 | bq := result[0].BaseQuality 252 | assert.NotNil(s.T(), bq, "BaseQuality field must be found") 253 | assert.Equal(s.T(), *bq, 30.0) 254 | 255 | strandBias := result[0].StrandBias 256 | assert.NotNil(s.T(), strandBias, "StrandBias field must be found") 257 | assert.Equal(s.T(), *strandBias, 0.127) 258 | } 259 | 260 | func (s *ParseVcfLineSuite) TestInfoWithoutFormat() { 261 | result, err := parseVcfLine("1\t847491\trs28407778\tG\tA,C\t745.77\tPASS\tAC=1,2;AF=0.500,0.335;AN=2;BQ=30.00;BaseQRankSum=0.842;ClippingRankSum=0.147;DB;DP=41;FS=0.000;NS=27;H2;H3;SOMATIC;VALIDATED;1000G;MLEAC=1;MLEAF=0.500;END=847492;MQ=60.00;MQ0=0;SB=0.127;MQRankSum=-1.109;QD=18.19;ReadPosRankSum=0.334;VQSLOD=2.70;CIGAR=a;culprit=FS;toxic\n", defaultHeader) 262 | 263 | assert.NoError(s.T(), err, "Valid VCF line should not return error") 264 | assert.NotNil(s.T(), result, "Valid VCF line should not return nil") 265 | assert.Exactly(s.T(), len(result), 2, "Valid VCF should return a list with two elements") 266 | 267 | info := result[0].Info 268 | assert.NotNil(s.T(), info, "Valid VCF should contain info map") 269 | assert.Exactly(s.T(), len(info), 28, "Info should contain 28 keys") 270 | 271 | ac, ok := info["AC"] 272 | assert.True(s.T(), ok, "AC key must be found") 273 | assert.Equal(s.T(), ac, "1", "ac") 274 | 275 | toxic, ok := info["toxic"] 276 | assert.True(s.T(), ok, "toxic key must be found") 277 | booltoxic, isbool := toxic.(bool) 278 | assert.True(s.T(), isbool, "toxic value must be a boolean") 279 | assert.True(s.T(), booltoxic) 280 | } 281 | 282 | func (s *ParseVcfLineSuite) TestAncestralAllele() { 283 | result, _ := parseVcfLine("1\t847491\trs28407778\tG\tA,C\t745.77\tPASS\tAC=1;AF=0.500,0.335;AN=2;BaseQRankSum=0.842;ClippingRankSum=0.147;DB;AA=T;DP=41;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.109;QD=18.19;ReadPosRankSum=0.334;VQSLOD=2.70;culprit=FS;set=variant\tGT:AD:DP:GQ:PL\t0/1:16,25:41:99:774,0,434", defaultHeader) 284 | 285 | aa := result[0].AncestralAllele 286 | assert.NotNil(s.T(), aa, "AncestralAllele field must be found") 287 | assert.Equal(s.T(), *aa, "T") 288 | } 289 | 290 | func (s *ParseVcfLineSuite) TestAlternateFormatOptionalField() { 291 | var result []*Variant 292 | var err error 293 | 294 | assert.NotPanics(s.T(), func() { 295 | result, err = parseVcfLine("1\t847491\trs28407778\tG\tA\t745.77\tPASS\tSB=strong;AA\tGT:AD:DP:GQ:PL\t0/1:16,25:41:99:774,0,434", defaultHeader) 296 | }) 297 | 298 | assert.NoError(s.T(), err, "Valid VCF line should not return error") 299 | assert.NotNil(s.T(), result, "Valid VCF line should not return nil") 300 | 301 | info := result[0].Info 302 | assert.NotNil(s.T(), info, "Valid VCF should contain info map") 303 | assert.Exactly(s.T(), len(info), 2, "Info should contain 2 keys") 304 | 305 | sb, ok := info["SB"] 306 | assert.True(s.T(), ok, "SB key must be found") 307 | assert.Equal(s.T(), sb, "strong") 308 | 309 | aa, ok := info["AA"] 310 | assert.True(s.T(), ok, "AA key must be found") 311 | boolaa, isbool := aa.(bool) 312 | assert.True(s.T(), isbool, "AA value must be a boolean") 313 | assert.True(s.T(), boolaa) 314 | } 315 | 316 | func (s *ParseVcfLineSuite) TestGenotype() { 317 | result, _ := parseVcfLine("1\t847491\trs28407778\tG\tA,C\t745.77\tPASS\tAC=1;AF=0.500,0.335;AN=2;BaseQRankSum=0.842;ClippingRankSum=0.147;DB;AA=T;DP=41;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.109;QD=18.19;ReadPosRankSum=0.334;VQSLOD=2.70;culprit=FS;set=variant\tGT:AD:DP:GQ:PL\t0/1:16,25:41:99:774,0,434", defaultHeader) 318 | 319 | samples := result[0].Samples 320 | assert.NotNil(s.T(), samples, "Samples must be found") 321 | assert.Len(s.T(), samples, 1, "There must be one sample") 322 | 323 | sample := samples[0] 324 | gt, found := sample["GT"] 325 | assert.True(s.T(), found, "GT key must be found") 326 | assert.Equal(s.T(), gt, "0/1") 327 | } 328 | 329 | func TestParseVcfLineSuite(t *testing.T) { 330 | suite.Run(t, new(ParseVcfLineSuite)) 331 | } 332 | 333 | func (s *ParseVcfLineSuite) TestValidCNVShouldReturnOneElementAndNoErrors() { 334 | result, err := parseVcfLine("22\t16533236\tSI_BD_17525\tC\t\t100\tPASS\tAC=125;AF=0.0249601;AN=5008;CIEND=-50,141;CIPOS=-141,50;CS=DEL_union;END=16536204;NS=2504;SVLEN=-2968;SVTYPE=DEL;DP=14570;EAS_AF=0;AMR_AF=0.0086;AFR_AF=0.09;EUR_AF=0;SAS_AF=0\tGT", defaultHeader) 335 | 336 | assert.NoError(s.T(), err, "Valid VCF line should not return error") 337 | assert.NotNil(s.T(), result, "Valid VCF line should not return nil") 338 | assert.Exactly(s.T(), len(result), 1, "Valid VCF should return a list with one element") 339 | assert.Equal(s.T(), result[0].Chrom, "22", "result.Chrom should be 2") 340 | assert.Equal(s.T(), result[0].Pos, 16533235, "result.Pos should be 0-based to 16533235") 341 | assert.Equal(s.T(), result[0].Ref, "C", "result.Ref should be C") 342 | assert.Equal(s.T(), result[0].Alt, "", "result.Alt should be A") 343 | } 344 | 345 | type FixSuffixSuite struct { 346 | suite.Suite 347 | } 348 | 349 | func (s *FixSuffixSuite) TestNoSuffix() { 350 | variant := Variant{ 351 | Ref: "T", 352 | Alt: "C", 353 | } 354 | result := fixRefAltSuffix(&variant) 355 | assert.Equal(s.T(), variant.Ref, result.Ref, "no suffix in common should return the same ref") 356 | assert.Equal(s.T(), variant.Alt, result.Alt, "no suffix in common should return the same alt") 357 | } 358 | 359 | func (s *FixSuffixSuite) TestSmallSuffix() { 360 | variant := Variant{ 361 | Ref: "GC", 362 | Alt: "TC", 363 | } 364 | result := fixRefAltSuffix(&variant) 365 | assert.Equal(s.T(), "G", result.Ref, "GC -> TC should become ref G") 366 | assert.Equal(s.T(), "T", result.Alt, "GC -> TC should become alt T") 367 | } 368 | 369 | func (s *FixSuffixSuite) TestBigSuffix() { 370 | variant := Variant{ 371 | Ref: "CGGCCACGTCCCCCTATGGAGGG", 372 | Alt: "TGGCCACGTCCCCCTATGGAGGG", 373 | } 374 | result := fixRefAltSuffix(&variant) 375 | assert.Equal(s.T(), "C", result.Ref, "CGGCCACGTCCCCCTATGGAGGG -> TGGCCACGTCCCCCTATGGAGGG should become ref C") 376 | assert.Equal(s.T(), "T", result.Alt, "CGGCCACGTCCCCCTATGGAGGG -> TGGCCACGTCCCCCTATGGAGGG should become alt T") 377 | } 378 | 379 | func (s *FixSuffixSuite) TestBigSuffixWithBigResult() { 380 | variant := Variant{ 381 | Ref: "CGGCCACGTCCCCCTATGGAGGG", 382 | Alt: "CGGCCACGTCCCCCTATGGAGGGGGCCACGTCCCCCTATGGAGGG", 383 | } 384 | result := fixRefAltSuffix(&variant) 385 | assert.Equal(s.T(), "C", result.Ref, "CGGCCACGTCCCCCTATGGAGGG -> CGGCCACGTCCCCCTATGGAGGGGGCCACGTCCCCCTATGGAGGG should become ref C") 386 | assert.Equal(s.T(), "CGGCCACGTCCCCCTATGGAGGG", result.Alt, "CGGCCACGTCCCCCTATGGAGGG -> CGGCCACGTCCCCCTATGGAGGGGGCCACGTCCCCCTATGGAGGG should become alt T") 387 | } 388 | 389 | func TestFixSuffixSuite(t *testing.T) { 390 | suite.Run(t, new(FixSuffixSuite)) 391 | } 392 | 393 | type SplitVcfFieldsSuite struct { 394 | suite.Suite 395 | } 396 | 397 | func (s *SplitVcfFieldsSuite) TestNewlineChomp() { 398 | line := "X\t32632420\t.\tN\t\t87.2\tPASS\tSVTYPE=DEL;END=32717410;EXPECTED=1071;OBSERVED=17;RATIO=0.0159;BF=87.2\tGT\t1/1\n" 399 | vcfLine, err := splitVcfFields(line) 400 | 401 | assert.NoError(s.T(), err, "split should not fail") 402 | assert.NotNil(s.T(), vcfLine, "vcf line can't be nil") 403 | assert.NotEmpty(s.T(), vcfLine.Samples, "samples can't be empty") 404 | gt := vcfLine.Samples[0]["GT"] 405 | assert.Equal(s.T(), "1/1", gt) 406 | } 407 | 408 | func TestSplitVcfFieldsSuite(t *testing.T) { 409 | suite.Run(t, new(SplitVcfFieldsSuite)) 410 | } 411 | -------------------------------------------------------------------------------- /vcf_blackbox_test.go: -------------------------------------------------------------------------------- 1 | package vcf_test 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | "testing" 7 | 8 | "github.com/mendelics/vcf" 9 | "github.com/stretchr/testify/assert" 10 | "github.com/stretchr/testify/suite" 11 | ) 12 | 13 | type ChannelSuite struct { 14 | suite.Suite 15 | 16 | outChannel chan *vcf.Variant 17 | invalidChannel chan vcf.InvalidLine 18 | } 19 | 20 | func (suite *ChannelSuite) SetupTest() { 21 | suite.outChannel = make(chan *vcf.Variant, 10) 22 | suite.invalidChannel = make(chan vcf.InvalidLine, 10) 23 | } 24 | 25 | func (s *ChannelSuite) TestNoHeader() { 26 | vcfLine := `1 847491 rs28407778 GTTTA G.... 745.77 PASS AC=1;AF=0.500;AN=2;BaseQRankSum=0.842;ClippingRankSum=0.147;DB;DP=41;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.109;QD=18.19;ReadPosRankSum=0.334;VQSLOD=2.70;culprit=FS;set=variant GT:AD:DP:GQ:PL 0/1:16,25:41:99:774,0,434` 27 | ioreader := strings.NewReader(vcfLine) 28 | err := vcf.ToChannel(ioreader, s.outChannel, s.invalidChannel) 29 | 30 | assert.Error(s.T(), err, "VCF line without header should return error") 31 | } 32 | 33 | func (s *ChannelSuite) TestInvalidLinesShouldReturnNothing() { 34 | vcfLine := `#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 185423 35 | 36 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc tellus ligula, faucibus sed nibh sed, fringilla viverra enim. 37 | 38 | A B C D E F` 39 | 40 | ioreader := strings.NewReader(vcfLine) 41 | err := vcf.ToChannel(ioreader, s.outChannel, s.invalidChannel) 42 | assert.NoError(s.T(), err, "VCF with valid header should not return an error") 43 | 44 | _, hasMore := <-s.outChannel 45 | assert.False(s.T(), hasMore, "No variant should come out of the channel, it should be closed") 46 | 47 | totalLines := 4 48 | for i := 0; i < totalLines; i++ { 49 | invalid := <-s.invalidChannel 50 | assert.NotNil(s.T(), invalid) 51 | assert.Error(s.T(), invalid.Err) 52 | } 53 | 54 | _, hasMore = <-s.invalidChannel 55 | assert.False(s.T(), hasMore, fmt.Sprintf("More than %d variants came out of the invalid channel, it should be closed", totalLines)) 56 | } 57 | 58 | func (s *ChannelSuite) TestToChannel() { 59 | vcfLine := `#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 185423 60 | 1 847491 rs28407778 GTTTA G.... 745.77 PASS AC=1;AF=0.500;AN=2;BaseQRankSum=0.842;ClippingRankSum=0.147;DB;DP=41;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.109;QD=18.19;ReadPosRankSum=0.334;VQSLOD=2.70;culprit=FS;set=variant GT:AD:DP:GQ:PL 0/1:16,25:41:99:774,0,434` 61 | ioreader := strings.NewReader(vcfLine) 62 | 63 | err := vcf.ToChannel(ioreader, s.outChannel, s.invalidChannel) 64 | assert.NoError(s.T(), err, "Valid VCF line should not return error") 65 | 66 | variant := <-s.outChannel 67 | assert.NotNil(s.T(), variant, "One variant should come out of channel") 68 | 69 | assert.Equal(s.T(), variant.Chrom, "1") 70 | assert.Equal(s.T(), variant.Pos, 847490) 71 | assert.Equal(s.T(), variant.Ref, "GTTTA") 72 | assert.Equal(s.T(), variant.Alt, "G") 73 | assert.Equal(s.T(), variant.ID, "rs28407778") 74 | assert.Equal(s.T(), *variant.Qual, 745.77) 75 | assert.Equal(s.T(), variant.Filter, "PASS") 76 | 77 | assert.NotNil(s.T(), variant.Info) 78 | assert.Exactly(s.T(), len(variant.Info), 18) 79 | ac, ok := variant.Info["AC"] 80 | assert.True(s.T(), ok, "AC key must be found") 81 | assert.Equal(s.T(), ac, "1", "ac") 82 | af, ok := variant.Info["AF"] 83 | assert.True(s.T(), ok, "AF key must be found") 84 | assert.Equal(s.T(), af, "0.500", "af") 85 | db, ok := variant.Info["DB"] 86 | assert.True(s.T(), ok, "DB key must be found") 87 | booldb, isbool := db.(bool) 88 | assert.True(s.T(), isbool, "DB value must be a boolean") 89 | assert.True(s.T(), booldb) 90 | 91 | assert.NotNil(s.T(), variant.Samples) 92 | assert.Exactly(s.T(), len(variant.Samples), 1, "Valid VCF should contain one sample") 93 | sampleMap := variant.Samples[0] 94 | assert.NotNil(s.T(), sampleMap, "Genotype field mapping should not return nil") 95 | assert.Exactly(s.T(), len(sampleMap), 5, "Sample map should have as many keys as there are formats") 96 | 97 | gt, ok := sampleMap["GT"] 98 | assert.True(s.T(), ok, "GT key must be found") 99 | assert.Equal(s.T(), gt, "0/1", "gt") 100 | 101 | ad, ok := sampleMap["AD"] 102 | assert.True(s.T(), ok, "AD key must be found") 103 | assert.Equal(s.T(), ad, "16,25", "ad") 104 | 105 | dp, ok := sampleMap["DP"] 106 | assert.True(s.T(), ok, "AD key must be found") 107 | assert.Equal(s.T(), dp, "41", "dp") 108 | 109 | gq, ok := sampleMap["GQ"] 110 | assert.True(s.T(), ok, "GQ key must be found") 111 | assert.Equal(s.T(), gq, "99", "gq") 112 | 113 | pl, ok := sampleMap["PL"] 114 | assert.True(s.T(), ok, "PL key must be found") 115 | assert.Equal(s.T(), pl, "774,0,434", "pl") 116 | 117 | _, hasMore := <-s.outChannel 118 | assert.False(s.T(), hasMore, "No second variant should come out of the channel, it should be closed") 119 | _, hasMore = <-s.invalidChannel 120 | assert.False(s.T(), hasMore, "No variant should come out of invalid channel, it should be closed") 121 | } 122 | 123 | func (s *ChannelSuite) TestChrParsedProperly() { 124 | vcfLine := `#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 185423 125 | chr1 847491 rs28407778 GTTTA G.... 745.77 PASS AC=1;AF=0.500;AN=2;BaseQRankSum=0.842;ClippingRankSum=0.147;DB;DP=41;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.109;QD=18.19;ReadPosRankSum=0.334;VQSLOD=2.70;culprit=FS;set=variant GT:AD:DP:GQ:PL 0/1:16,25:41:99:774,0,434` 126 | ioreader := strings.NewReader(vcfLine) 127 | 128 | err := vcf.ToChannel(ioreader, s.outChannel, s.invalidChannel) 129 | assert.NoError(s.T(), err, "Valid VCF line should not return error") 130 | 131 | variant := <-s.outChannel 132 | assert.NotNil(s.T(), variant, "One variant should come out of channel") 133 | 134 | assert.Equal(s.T(), variant.Chrom, "1") 135 | 136 | _, hasMore := <-s.outChannel 137 | assert.False(s.T(), hasMore, "No second variant should come out of the channel, it should be closed") 138 | _, hasMore = <-s.invalidChannel 139 | assert.False(s.T(), hasMore, "No variant should come out of invalid channel, it should be closed") 140 | } 141 | 142 | func (s *ChannelSuite) TestLowercaseRefAlt() { 143 | vcfLine := `#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 185423 144 | 1 847491 rs28407778 gt t 745.77 PASS AC=1;AF=0.500;AN=2;BaseQRankSum=0.842;ClippingRankSum=0.147;DB;DP=41;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.109;QD=18.19;ReadPosRankSum=0.334;VQSLOD=2.70;culprit=FS;set=variant GT:AD:DP:GQ:PL 0/1:16,25:41:99:774,0,434` 145 | ioreader := strings.NewReader(vcfLine) 146 | 147 | err := vcf.ToChannel(ioreader, s.outChannel, s.invalidChannel) 148 | assert.NoError(s.T(), err, "Valid VCF line should not return error") 149 | 150 | variant := <-s.outChannel 151 | assert.NotNil(s.T(), variant, "One variant should come out of channel") 152 | assert.Equal(s.T(), variant.Ref, "GT") 153 | assert.Equal(s.T(), variant.Alt, "T") 154 | 155 | _, hasMore := <-s.outChannel 156 | assert.False(s.T(), hasMore, "No second variant should come out of the channel, it should be closed") 157 | _, hasMore = <-s.invalidChannel 158 | assert.False(s.T(), hasMore, "No variant should come out of invalid channel, it should be closed") 159 | } 160 | 161 | func (s *ChannelSuite) TestMultipleAlternatives() { 162 | vcfLine := `#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 185423 163 | 1 847491 rs28407778 G A,C 745.77 PASS AC=1;AF=0.500;AN=2;BaseQRankSum=0.842;ClippingRankSum=0.147;DB;DP=41;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.109;QD=18.19;ReadPosRankSum=0.334;VQSLOD=2.70;culprit=FS;set=variant GT:AD:DP:GQ:PL 0/1:16,25:41:99:774,0,434` 164 | ioreader := strings.NewReader(vcfLine) 165 | 166 | err := vcf.ToChannel(ioreader, s.outChannel, s.invalidChannel) 167 | assert.NoError(s.T(), err, "Valid VCF line should not return error") 168 | 169 | variant := <-s.outChannel 170 | assert.NotNil(s.T(), variant, "One variant should come out of channel") 171 | assert.Equal(s.T(), variant.Alt, "A") 172 | variant = <-s.outChannel 173 | assert.NotNil(s.T(), variant, "Second variant should come out of channel") 174 | assert.Equal(s.T(), variant.Alt, "C") 175 | 176 | _, hasMore := <-s.outChannel 177 | assert.False(s.T(), hasMore, "No third variant should come out of the channel, it should be closed") 178 | _, hasMore = <-s.invalidChannel 179 | assert.False(s.T(), hasMore, "No variant should come out of invalid channel, it should be closed") 180 | } 181 | 182 | func TestChannelSuite(t *testing.T) { 183 | suite.Run(t, new(ChannelSuite)) 184 | } 185 | 186 | type SampleSuite struct { 187 | suite.Suite 188 | } 189 | 190 | func (s *SampleSuite) TestNoHeader() { 191 | vcfLine := `1 847491 rs28407778 GTTTA G.... 745.77 PASS AC=1;AF=0.500;AN=2;BaseQRankSum=0.842;ClippingRankSum=0.147;DB;DP=41;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.109;QD=18.19;ReadPosRankSum=0.334;VQSLOD=2.70;culprit=FS;set=variant GT:AD:DP:GQ:PL 0/1:16,25:41:99:774,0,434` 192 | ioreader := strings.NewReader(vcfLine) 193 | sampleIDs, err := vcf.SampleIDs(ioreader) 194 | 195 | assert.Error(s.T(), err, "VCF without header should return error") 196 | assert.Nil(s.T(), sampleIDs, "No slice of ids is expected on a vcf without header") 197 | } 198 | 199 | func (s *SampleSuite) TestValidHeaderNoSample() { 200 | vcfLine := `#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 201 | 1 847491 rs28407778 GTTTA G.... 745.77 PASS AC=1;AF=0.500;AN=2;BaseQRankSum=0.842;ClippingRankSum=0.147;DB;DP=41;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.109;QD=18.19;ReadPosRankSum=0.334;VQSLOD=2.70;culprit=FS;set=variant GT:AD:DP:GQ:PL 0/1:16,25:41:99:774,0,434` 202 | ioreader := strings.NewReader(vcfLine) 203 | sampleIDs, err := vcf.SampleIDs(ioreader) 204 | 205 | assert.NoError(s.T(), err, "VCF with valid header should not return error") 206 | assert.Nil(s.T(), sampleIDs, "No slice of ids should be returned on a vcf with a valid header that doesn't contain any sample") 207 | } 208 | 209 | func (s *SampleSuite) TestOneSample() { 210 | vcfLine := `#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 185423 211 | 1 847491 rs28407778 GTTTA G.... 745.77 PASS AC=1;AF=0.500;AN=2;BaseQRankSum=0.842;ClippingRankSum=0.147;DB;DP=41;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.109;QD=18.19;ReadPosRankSum=0.334;VQSLOD=2.70;culprit=FS;set=variant GT:AD:DP:GQ:PL 0/1:16,25:41:99:774,0,434` 212 | ioreader := strings.NewReader(vcfLine) 213 | sampleIDs, err := vcf.SampleIDs(ioreader) 214 | 215 | assert.NoError(s.T(), err, "VCF with valid header should not return error") 216 | assert.NotNil(s.T(), sampleIDs, "A slice of ids should be returned on a vcf with a valid header") 217 | assert.Exactly(s.T(), len(sampleIDs), 1, "Slice of ids should have only one element") 218 | assert.Equal(s.T(), sampleIDs[0], "185423") 219 | } 220 | 221 | func (s *SampleSuite) TestThreeSamples() { 222 | vcfLine := `#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 185423 776182 091635 223 | 1 847491 rs28407778 GTTTA G.... 745.77 PASS AC=1;AF=0.500;AN=2;BaseQRankSum=0.842;ClippingRankSum=0.147;DB;DP=41;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.109;QD=18.19;ReadPosRankSum=0.334;VQSLOD=2.70;culprit=FS;set=variant GT:AD:DP:GQ:PL 0/1:16,25:41:99:774,0,434` 224 | ioreader := strings.NewReader(vcfLine) 225 | sampleIDs, err := vcf.SampleIDs(ioreader) 226 | 227 | assert.NoError(s.T(), err, "VCF with valid header should not return error") 228 | assert.NotNil(s.T(), sampleIDs, "A slice of ids should be returned on a vcf with a valid header") 229 | assert.Exactly(s.T(), len(sampleIDs), 3, "Slice of ids should have three elements") 230 | assert.Equal(s.T(), sampleIDs[0], "185423") 231 | assert.Equal(s.T(), sampleIDs[1], "776182") 232 | assert.Equal(s.T(), sampleIDs[2], "091635") 233 | } 234 | 235 | func TestSampleSuite(t *testing.T) { 236 | suite.Run(t, new(SampleSuite)) 237 | } 238 | 239 | type InfoSuite struct { 240 | suite.Suite 241 | 242 | outChannel chan *vcf.Variant 243 | invalidChannel chan vcf.InvalidLine 244 | } 245 | 246 | func (suite *InfoSuite) SetupTest() { 247 | suite.outChannel = make(chan *vcf.Variant, 10) 248 | suite.invalidChannel = make(chan vcf.InvalidLine, 10) 249 | } 250 | 251 | func (s *InfoSuite) TestInfo() { 252 | vcfLine := `#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 185423 253 | 1 847491 rs28407778 GTTTA G.... 745.77 PASS AC=1;AF=0.500;AN=2;BaseQRankSum=0.842;ClippingRankSum=0.147;DB;DP=41;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.109;QD=18.19;ReadPosRankSum=0.334;VQSLOD=2.70;culprit=FS;set=variant GT:AD:DP:GQ:PL 0/1:16,25:41:99:774,0,434` 254 | ioreader := strings.NewReader(vcfLine) 255 | 256 | err := vcf.ToChannel(ioreader, s.outChannel, s.invalidChannel) 257 | assert.NoError(s.T(), err, "Valid VCF line should not return error") 258 | 259 | variant := <-s.outChannel 260 | assert.NotNil(s.T(), variant, "One variant should come out of channel") 261 | 262 | assert.NotNil(s.T(), variant.AlleleCount) 263 | assert.Equal(s.T(), *variant.AlleleCount, 1) 264 | assert.NotNil(s.T(), variant.AlleleFrequency) 265 | assert.Equal(s.T(), *variant.AlleleFrequency, 0.5) 266 | assert.NotNil(s.T(), variant.TotalAlleles) 267 | assert.Equal(s.T(), *variant.TotalAlleles, 2) 268 | assert.NotNil(s.T(), variant.InDBSNP) 269 | assert.True(s.T(), *variant.InDBSNP) 270 | assert.NotNil(s.T(), variant.Depth) 271 | assert.Equal(s.T(), *variant.Depth, 41) 272 | assert.NotNil(s.T(), variant.MappingQuality) 273 | assert.Equal(s.T(), *variant.MappingQuality, 60.0) 274 | assert.NotNil(s.T(), variant.MAPQ0Reads) 275 | assert.Equal(s.T(), *variant.MAPQ0Reads, 0) 276 | 277 | assert.Nil(s.T(), variant.AncestralAllele) 278 | assert.Nil(s.T(), variant.BaseQuality) 279 | assert.Nil(s.T(), variant.Cigar) 280 | assert.Nil(s.T(), variant.End) 281 | assert.Nil(s.T(), variant.InHapmap2) 282 | assert.Nil(s.T(), variant.InHapmap3) 283 | assert.Nil(s.T(), variant.NumberOfSamples) 284 | assert.Nil(s.T(), variant.StrandBias) 285 | assert.Nil(s.T(), variant.IsSomatic) 286 | assert.Nil(s.T(), variant.IsValidated) 287 | assert.Nil(s.T(), variant.In1000G) 288 | 289 | _, hasMore := <-s.outChannel 290 | assert.False(s.T(), hasMore, "No second variant should come out of the channel, it should be closed") 291 | _, hasMore = <-s.invalidChannel 292 | assert.False(s.T(), hasMore, "No variant should come out of invalid channel, it should be closed") 293 | } 294 | 295 | func (s *InfoSuite) TestMultiple() { 296 | vcfLine := `#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 185423 297 | 5 159478089 rs80263784 GTT G,GT 198.19 . AC=1,2;AF=0.500,0.600;AN=2;BaseQRankSum=1.827;ClippingRankSum=1.323;DB;DP=20;FS=0.000;MLEAC=1,1;MLEAF=0.500,0.500;MQ=60.00;MQ0=0;MQRankSum=0.441;QD=5.74;ReadPosRankSum=0.063;set=variant5 GT:AD:DP:GQ:PL 1/2:2,9,9:20:99:425,145,183,175,0,166 298 | 5 159478089 rs80263784 GTT G,GT 198.19 . AC=1,2;AF=0.500,0.600;AN=3,4;BaseQRankSum=1.827;ClippingRankSum=1.323;DB;DP=20;FS=0.000;MLEAC=1,1;MLEAF=0.500,0.500;MQ=60.00;MQ0=0;MQRankSum=0.441;QD=5.74;ReadPosRankSum=0.063;set=variant5 GT:AD:DP:GQ:PL 1/2:2,9,9:20:99:425,145,183,175,0,166` 299 | ioreader := strings.NewReader(vcfLine) 300 | 301 | err := vcf.ToChannel(ioreader, s.outChannel, s.invalidChannel) 302 | assert.NoError(s.T(), err, "Valid VCF line should not return error") 303 | 304 | // first variant 305 | variant := <-s.outChannel 306 | assert.NotNil(s.T(), variant, "One variant should come out of channel") 307 | assert.NotNil(s.T(), variant.AlleleCount) 308 | assert.Equal(s.T(), *variant.AlleleCount, 1) 309 | assert.NotNil(s.T(), variant.AlleleFrequency) 310 | assert.Equal(s.T(), *variant.AlleleFrequency, 0.5) 311 | assert.NotNil(s.T(), variant.TotalAlleles) 312 | assert.Equal(s.T(), *variant.TotalAlleles, 2) 313 | 314 | // second variant 315 | variant, hasMore := <-s.outChannel 316 | assert.True(s.T(), hasMore, "Second variant should be in the channel") 317 | assert.NotNil(s.T(), variant, "Second variant should come out of channel") 318 | assert.NotNil(s.T(), variant.AlleleCount) 319 | assert.Equal(s.T(), *variant.AlleleCount, 2) 320 | assert.NotNil(s.T(), variant.AlleleFrequency) 321 | assert.Equal(s.T(), *variant.AlleleFrequency, 0.6) 322 | assert.NotNil(s.T(), variant.TotalAlleles) 323 | assert.Equal(s.T(), *variant.TotalAlleles, 2) 324 | 325 | // third variant 326 | variant, hasMore = <-s.outChannel 327 | assert.True(s.T(), hasMore, "Third variant should be in the channel") 328 | assert.NotNil(s.T(), variant, "Third variant should come out of channel") 329 | assert.NotNil(s.T(), variant.TotalAlleles) 330 | assert.Equal(s.T(), *variant.TotalAlleles, 3) 331 | 332 | // fourth variant 333 | variant, hasMore = <-s.outChannel 334 | assert.True(s.T(), hasMore, "Fourth variant should be in the channel") 335 | assert.NotNil(s.T(), variant, "Fourth variant should come out of channel") 336 | assert.NotNil(s.T(), variant.TotalAlleles) 337 | assert.Equal(s.T(), *variant.TotalAlleles, 4) 338 | 339 | _, hasMore = <-s.outChannel 340 | assert.False(s.T(), hasMore, "No fifth variant should come out of the channel, it should be closed") 341 | _, hasMore = <-s.invalidChannel 342 | assert.False(s.T(), hasMore, "No variant should come out of invalid channel, it should be closed") 343 | } 344 | 345 | func TestInfoSuite(t *testing.T) { 346 | suite.Run(t, new(InfoSuite)) 347 | } 348 | 349 | type FixSuffixSuite struct { 350 | suite.Suite 351 | 352 | outChannel chan *vcf.Variant 353 | invalidChannel chan vcf.InvalidLine 354 | } 355 | 356 | func (suite *FixSuffixSuite) SetupTest() { 357 | suite.outChannel = make(chan *vcf.Variant, 10) 358 | suite.invalidChannel = make(chan vcf.InvalidLine, 10) 359 | } 360 | 361 | func (s *FixSuffixSuite) TestSimpleVariant() { 362 | vcfLine := `#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 185423 363 | 1 138829 . GC TC,G 198.19 . AC=1,2;AF=0.500,0.600;AN=2;BaseQRankSum=1.827;ClippingRankSum=1.323;DB;DP=20;FS=0.000;MLEAC=1,1;MLEAF=0.500,0.500;MQ=60.00;MQ0=0;MQRankSum=0.441;QD=5.74;ReadPosRankSum=0.063;set=variant5 GT:AD:DP:GQ:PL 1/2:2,9,9:20:99:425,145,183,175,0,166` 364 | ioreader := strings.NewReader(vcfLine) 365 | 366 | err := vcf.ToChannel(ioreader, s.outChannel, s.invalidChannel) 367 | assert.NoError(s.T(), err, "Valid VCF line should not return error") 368 | 369 | // first variant 370 | variant := <-s.outChannel 371 | assert.NotNil(s.T(), variant, "One variant should come out of channel") 372 | assert.Equal(s.T(), variant.Ref, "G") 373 | assert.Equal(s.T(), variant.Alt, "T") 374 | 375 | // second variant 376 | variant, hasMore := <-s.outChannel 377 | assert.True(s.T(), hasMore, "Second variant should be in the channel") 378 | assert.NotNil(s.T(), variant, "Second variant should come out of channel") 379 | assert.Equal(s.T(), variant.Ref, "GC") 380 | assert.Equal(s.T(), variant.Alt, "G") 381 | 382 | _, hasMore = <-s.outChannel 383 | assert.False(s.T(), hasMore, "No third variant should come out of the channel, it should be closed") 384 | _, hasMore = <-s.invalidChannel 385 | assert.False(s.T(), hasMore, "No variant should come out of invalid channel, it should be closed") 386 | } 387 | 388 | func (s *FixSuffixSuite) TestBigSuffix() { 389 | vcfLine := `#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 185423 390 | 1 879415 . CGGCCACGTCCCCCTATGGAGGG C,TGGCCACGTCCCCCTATGGAGGG,CGGCCACGTCCCCCTATGGAGGGGGCCACGTCCCCCTATGGAGGG 198.19 . AC=1,2;AF=0.500,0.600;AN=2;BaseQRankSum=1.827;ClippingRankSum=1.323;DB;DP=20;FS=0.000;MLEAC=1,1;MLEAF=0.500,0.500;MQ=60.00;MQ0=0;MQRankSum=0.441;QD=5.74;ReadPosRankSum=0.063;set=variant5 GT:AD:DP:GQ:PL 1/2:2,9,9:20:99:425,145,183,175,0,166` 391 | ioreader := strings.NewReader(vcfLine) 392 | 393 | err := vcf.ToChannel(ioreader, s.outChannel, s.invalidChannel) 394 | assert.NoError(s.T(), err, "Valid VCF line should not return error") 395 | 396 | // first variant 397 | variant := <-s.outChannel 398 | assert.NotNil(s.T(), variant, "One variant should come out of channel") 399 | assert.Equal(s.T(), variant.Ref, "CGGCCACGTCCCCCTATGGAGGG") 400 | assert.Equal(s.T(), variant.Alt, "C") 401 | 402 | // second variant 403 | variant, hasMore := <-s.outChannel 404 | assert.True(s.T(), hasMore, "Second variant should be in the channel") 405 | assert.NotNil(s.T(), variant, "Second variant should come out of channel") 406 | assert.Equal(s.T(), variant.Ref, "C") 407 | assert.Equal(s.T(), variant.Alt, "T") 408 | 409 | // third variant 410 | variant, hasMore = <-s.outChannel 411 | assert.True(s.T(), hasMore, "Third variant should be in the channel") 412 | assert.NotNil(s.T(), variant, "Third variant should come out of channel") 413 | assert.Equal(s.T(), variant.Ref, "C") 414 | assert.Equal(s.T(), variant.Alt, "CGGCCACGTCCCCCTATGGAGGG") 415 | 416 | _, hasMore = <-s.outChannel 417 | assert.False(s.T(), hasMore, "No fourth variant should come out of the channel, it should be closed") 418 | _, hasMore = <-s.invalidChannel 419 | assert.False(s.T(), hasMore, "No variant should come out of invalid channel, it should be closed") 420 | } 421 | 422 | func TestFixSuffixSuite(t *testing.T) { 423 | suite.Run(t, new(FixSuffixSuite)) 424 | } 425 | 426 | type StructuralSuite struct { 427 | suite.Suite 428 | 429 | outChannel chan *vcf.Variant 430 | invalidChannel chan vcf.InvalidLine 431 | } 432 | 433 | func (suite *StructuralSuite) SetupTest() { 434 | suite.outChannel = make(chan *vcf.Variant, 10) 435 | suite.invalidChannel = make(chan vcf.InvalidLine, 10) 436 | } 437 | 438 | func (s *StructuralSuite) TestNoSpecificStructuralVariantFieldsSet() { 439 | vcfLine := `#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 185423 440 | 1 847491 CNVR8241.1 G A 745.77 PASS AC=1 GT 0/1` 441 | ioreader := strings.NewReader(vcfLine) 442 | 443 | err := vcf.ToChannel(ioreader, s.outChannel, s.invalidChannel) 444 | assert.NoError(s.T(), err, "Valid VCF line should not return error") 445 | 446 | variant := <-s.outChannel 447 | assert.NotNil(s.T(), variant, "One variant should come out of channel") 448 | 449 | assert.Equal(s.T(), variant.Chrom, "1") 450 | assert.Equal(s.T(), variant.Ref, "G") 451 | assert.Equal(s.T(), variant.Alt, "A") 452 | assert.Equal(s.T(), *variant.Qual, 745.77) 453 | assert.Equal(s.T(), variant.Filter, "PASS") 454 | 455 | assert.NotNil(s.T(), variant.Info) 456 | assert.Exactly(s.T(), len(variant.Info), 1) 457 | ac, ok := variant.Info["AC"] 458 | assert.True(s.T(), ok, "AC key must be found") 459 | assert.Equal(s.T(), ac, "1", "ac") 460 | 461 | assert.Nil(s.T(), variant.Imprecise) 462 | assert.Nil(s.T(), variant.Novel) 463 | assert.Nil(s.T(), variant.End) 464 | assert.Nil(s.T(), variant.StructuralVariantType) 465 | assert.Nil(s.T(), variant.StructuralVariantLength) 466 | assert.Nil(s.T(), variant.ConfidenceIntervalAroundPosition) 467 | assert.Nil(s.T(), variant.ConfidenceIntervalAroundEnd) 468 | 469 | _, hasMore := <-s.outChannel 470 | assert.False(s.T(), hasMore, "No second variant should come out of the channel, it should be closed") 471 | _, hasMore = <-s.invalidChannel 472 | assert.False(s.T(), hasMore, "No variant should come out of invalid channel, it should be closed") 473 | } 474 | 475 | func (s *StructuralSuite) TestImpreciseNovel() { 476 | vcfLine := `#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 185423 477 | 1 847491 CNVR8241.1 G A 745.77 PASS IMPRECISE;NOVEL GT 0/1` 478 | ioreader := strings.NewReader(vcfLine) 479 | 480 | err := vcf.ToChannel(ioreader, s.outChannel, s.invalidChannel) 481 | assert.NoError(s.T(), err, "Valid VCF line should not return error") 482 | 483 | variant := <-s.outChannel 484 | assert.NotNil(s.T(), variant, "One variant should come out of channel") 485 | 486 | assert.Equal(s.T(), variant.Chrom, "1") 487 | assert.Equal(s.T(), variant.Ref, "G") 488 | assert.Equal(s.T(), variant.Alt, "A") 489 | assert.Equal(s.T(), *variant.Qual, 745.77) 490 | assert.Equal(s.T(), variant.Filter, "PASS") 491 | 492 | assert.NotNil(s.T(), variant.Imprecise) 493 | assert.True(s.T(), *variant.Imprecise) 494 | assert.NotNil(s.T(), variant.Novel) 495 | assert.True(s.T(), *variant.Novel) 496 | 497 | _, hasMore := <-s.outChannel 498 | assert.False(s.T(), hasMore, "No second variant should come out of the channel, it should be closed") 499 | _, hasMore = <-s.invalidChannel 500 | assert.False(s.T(), hasMore, "No variant should come out of invalid channel, it should be closed") 501 | } 502 | 503 | func (s *StructuralSuite) TestInfoEnd() { 504 | vcfLine := `#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 185423 505 | 1 847491 CNVR8241.1 G A 755.77 PASS END=1752234 GT 0/1 506 | 1 847491 rs28407778 GTTTA G.... 745.77 PASS AC=1;AF=0.500;AN=2;BaseQRankSum=0.842;ClippingRankSum=0.147;DB;DP=41;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQ0=0;MQRankSum=-1.109;QD=18.19;ReadPosRankSum=0.334;VQSLOD=2.70;culprit=FS;set=variant GT:AD:DP:GQ:PL 0/1:16,25:41:99:774,0,434` 507 | ioreader := strings.NewReader(vcfLine) 508 | 509 | err := vcf.ToChannel(ioreader, s.outChannel, s.invalidChannel) 510 | assert.NoError(s.T(), err, "Valid VCF line should not return error") 511 | 512 | variant := <-s.outChannel 513 | assert.NotNil(s.T(), variant, "One variant should come out of channel") 514 | 515 | assert.Equal(s.T(), variant.Chrom, "1") 516 | assert.Equal(s.T(), variant.Ref, "G") 517 | assert.Equal(s.T(), variant.Alt, "A") 518 | assert.Equal(s.T(), *variant.Qual, 755.77) 519 | assert.Equal(s.T(), variant.Filter, "PASS") 520 | 521 | assert.NotNil(s.T(), variant.End) 522 | assert.Equal(s.T(), *variant.End, 1752234) 523 | 524 | variant = <-s.outChannel 525 | assert.NotNil(s.T(), variant, "Second variant should come out of channel") 526 | 527 | assert.Equal(s.T(), variant.Chrom, "1") 528 | assert.Equal(s.T(), variant.Ref, "GTTTA") 529 | assert.Equal(s.T(), variant.Alt, "G") 530 | assert.Equal(s.T(), *variant.Qual, 745.77) 531 | assert.Equal(s.T(), variant.Filter, "PASS") 532 | 533 | assert.Nil(s.T(), variant.End) 534 | 535 | _, hasMore := <-s.outChannel 536 | assert.False(s.T(), hasMore, "No third variant should come out of the channel, it should be closed") 537 | _, hasMore = <-s.invalidChannel 538 | assert.False(s.T(), hasMore, "No variant should come out of invalid channel, it should be closed") 539 | } 540 | 541 | func (s *StructuralSuite) TestSVType() { 542 | vcfLine := `#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 185423 543 | 1 847491 CNVR8241.1 G A 755.77 PASS SVTYPE=DEL GT 0/1 544 | 1 847491 CNVR8241.1 G A 755.77 PASS SVTYPE=DUP GT 0/1 545 | 1 847491 CNVR8241.1 G A 755.77 PASS SVTYPE=INS GT 0/1 546 | 1 847491 CNVR8241.1 G A 755.77 PASS SVTYPE=INV GT 0/1 547 | 1 847491 CNVR8241.1 G A 755.77 PASS SVTYPE=CNV GT 0/1 548 | 1 847491 CNVR8241.1 G A 755.77 PASS SVTYPE=DUP:TANDEM GT 0/1 549 | 1 847491 CNVR8241.1 G A 755.77 PASS SVTYPE=DEL:ME GT 0/1 550 | 1 847491 CNVR8241.1 G A 755.77 PASS SVTYPE=INS:ME GT 0/1 551 | 1 847491 CNVR8241.1 G A 755.77 PASS SVTYPE=BND GT 0/1 552 | 1 847491 CNVR8241.1 G A 755.77 PASS SVTYPE=INVALID GT 0/1` 553 | ioreader := strings.NewReader(vcfLine) 554 | 555 | err := vcf.ToChannel(ioreader, s.outChannel, s.invalidChannel) 556 | assert.NoError(s.T(), err, "Valid VCF line should not return error") 557 | 558 | variant := <-s.outChannel 559 | assert.NotNil(s.T(), variant, "One variant should come out of channel") 560 | 561 | assert.Equal(s.T(), variant.Chrom, "1") 562 | assert.Equal(s.T(), variant.Ref, "G") 563 | assert.Equal(s.T(), variant.Alt, "A") 564 | assert.Equal(s.T(), *variant.Qual, 755.77) 565 | assert.Equal(s.T(), variant.Filter, "PASS") 566 | 567 | assert.NotNil(s.T(), variant.StructuralVariantType) 568 | assert.Equal(s.T(), *variant.StructuralVariantType, vcf.Deletion) 569 | 570 | variant = <-s.outChannel 571 | assert.NotNil(s.T(), variant) 572 | assert.NotNil(s.T(), variant.StructuralVariantType) 573 | assert.Equal(s.T(), *variant.StructuralVariantType, vcf.Duplication) 574 | 575 | variant = <-s.outChannel 576 | assert.NotNil(s.T(), variant) 577 | assert.NotNil(s.T(), variant.StructuralVariantType) 578 | assert.Equal(s.T(), *variant.StructuralVariantType, vcf.Insertion) 579 | 580 | variant = <-s.outChannel 581 | assert.NotNil(s.T(), variant) 582 | assert.NotNil(s.T(), variant.StructuralVariantType) 583 | assert.Equal(s.T(), *variant.StructuralVariantType, vcf.Inversion) 584 | 585 | variant = <-s.outChannel 586 | assert.NotNil(s.T(), variant) 587 | assert.NotNil(s.T(), variant.StructuralVariantType) 588 | assert.Equal(s.T(), *variant.StructuralVariantType, vcf.CopyNumberVariation) 589 | 590 | variant = <-s.outChannel 591 | assert.NotNil(s.T(), variant) 592 | assert.NotNil(s.T(), variant.StructuralVariantType) 593 | assert.Equal(s.T(), *variant.StructuralVariantType, vcf.TandemDuplication) 594 | 595 | variant = <-s.outChannel 596 | assert.NotNil(s.T(), variant) 597 | assert.NotNil(s.T(), variant.StructuralVariantType) 598 | assert.Equal(s.T(), *variant.StructuralVariantType, vcf.DeletionMobileElement) 599 | 600 | variant = <-s.outChannel 601 | assert.NotNil(s.T(), variant) 602 | assert.NotNil(s.T(), variant.StructuralVariantType) 603 | assert.Equal(s.T(), *variant.StructuralVariantType, vcf.InsertionMobileElement) 604 | 605 | variant = <-s.outChannel 606 | assert.NotNil(s.T(), variant) 607 | assert.NotNil(s.T(), variant.StructuralVariantType) 608 | assert.Equal(s.T(), *variant.StructuralVariantType, vcf.Breakend) 609 | 610 | variant = <-s.outChannel 611 | assert.NotNil(s.T(), variant) 612 | assert.Nil(s.T(), variant.StructuralVariantType) 613 | assert.NotNil(s.T(), variant.Info) 614 | assert.Exactly(s.T(), len(variant.Info), 1) 615 | svtype, ok := variant.Info["SVTYPE"] 616 | assert.True(s.T(), ok, "SVTYPE key must be found") 617 | assert.Equal(s.T(), svtype, "INVALID") 618 | 619 | _, hasMore := <-s.outChannel 620 | assert.False(s.T(), hasMore, "No more variants should come out of the channel, it should be closed") 621 | _, hasMore = <-s.invalidChannel 622 | assert.False(s.T(), hasMore, "No variant should come out of invalid channel, it should be closed") 623 | } 624 | 625 | func (s *StructuralSuite) TestStructuralVariantInts() { 626 | vcfLine := `#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 185423 627 | 1 847491 CNVR8241.1 G A 745.77 PASS SVTYPE=DUP;SVLEN=337;CIPOS=10;CIEND=7 GT 0/1` 628 | ioreader := strings.NewReader(vcfLine) 629 | 630 | err := vcf.ToChannel(ioreader, s.outChannel, s.invalidChannel) 631 | assert.NoError(s.T(), err, "Valid VCF line should not return error") 632 | 633 | variant := <-s.outChannel 634 | assert.NotNil(s.T(), variant, "One variant should come out of channel") 635 | 636 | assert.Equal(s.T(), variant.Chrom, "1") 637 | assert.Equal(s.T(), variant.Ref, "G") 638 | assert.Equal(s.T(), variant.Alt, "A") 639 | assert.Equal(s.T(), *variant.Qual, 745.77) 640 | assert.Equal(s.T(), variant.Filter, "PASS") 641 | 642 | assert.NotNil(s.T(), variant.StructuralVariantType) 643 | assert.Equal(s.T(), *variant.StructuralVariantType, vcf.Duplication) 644 | 645 | assert.NotNil(s.T(), variant.StructuralVariantLength) 646 | assert.Equal(s.T(), *variant.StructuralVariantLength, 337) 647 | assert.NotNil(s.T(), variant.ConfidenceIntervalAroundPosition) 648 | assert.Equal(s.T(), *variant.ConfidenceIntervalAroundPosition, 10) 649 | assert.NotNil(s.T(), variant.ConfidenceIntervalAroundEnd) 650 | assert.Equal(s.T(), *variant.ConfidenceIntervalAroundEnd, 7) 651 | 652 | _, hasMore := <-s.outChannel 653 | assert.False(s.T(), hasMore, "No second variant should come out of the channel, it should be closed") 654 | _, hasMore = <-s.invalidChannel 655 | assert.False(s.T(), hasMore, "No variant should come out of invalid channel, it should be closed") 656 | } 657 | 658 | func (s *StructuralSuite) TestNegativeSVLen() { 659 | vcfLine := `#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 185423 660 | 1 847491 CNVR8241.1 G A 745.77 PASS SVTYPE=DEL;SVLEN=-52;CIPOS=10;CIEND=7 GT 0/1` 661 | ioreader := strings.NewReader(vcfLine) 662 | 663 | err := vcf.ToChannel(ioreader, s.outChannel, s.invalidChannel) 664 | assert.NoError(s.T(), err, "Valid VCF line should not return error") 665 | 666 | variant := <-s.outChannel 667 | assert.NotNil(s.T(), variant, "One variant should come out of channel") 668 | 669 | assert.Equal(s.T(), variant.Chrom, "1") 670 | assert.Equal(s.T(), variant.Ref, "G") 671 | assert.Equal(s.T(), variant.Alt, "A") 672 | assert.Equal(s.T(), *variant.Qual, 745.77) 673 | assert.Equal(s.T(), variant.Filter, "PASS") 674 | 675 | assert.NotNil(s.T(), variant.StructuralVariantType) 676 | assert.Equal(s.T(), *variant.StructuralVariantType, vcf.Deletion) 677 | 678 | assert.NotNil(s.T(), variant.StructuralVariantLength) 679 | assert.Equal(s.T(), *variant.StructuralVariantLength, -52) 680 | 681 | _, hasMore := <-s.outChannel 682 | assert.False(s.T(), hasMore, "No second variant should come out of the channel, it should be closed") 683 | _, hasMore = <-s.invalidChannel 684 | assert.False(s.T(), hasMore, "No variant should come out of invalid channel, it should be closed") 685 | } 686 | 687 | func (s *StructuralSuite) TestCompleteStructuralVariants() { 688 | vcfLine := `#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT N8N1T6 689 | X 1734042 CNVR8241.1 REF 30.2 PASS SVTYPE=DUP;END=1752234;EXPECTED=3407;OBSERVED=4449;RATIO=1.31;BF=30.2 GT 0/1 690 | X 6451689 . REF 35.2 PASS SVTYPE=DEL;END=6452594;EXPECTED=367;OBSERVED=111;RATIO=0.302;BF=35.2 GT 1/1 691 | X 101576281 . REF 28.3 LOWBFSCORE SVTYPE=DEL;END=101581456;EXPECTED=134;OBSERVED=4;RATIO=0.0299;BF=28.3 GT 1/1` 692 | ioreader := strings.NewReader(vcfLine) 693 | 694 | err := vcf.ToChannel(ioreader, s.outChannel, s.invalidChannel) 695 | assert.NoError(s.T(), err, "Valid VCF line should not return error") 696 | 697 | variant := <-s.outChannel 698 | assert.NotNil(s.T(), variant) 699 | 700 | assert.Equal(s.T(), variant.Chrom, "X") 701 | assert.Equal(s.T(), variant.Pos, 1734041) 702 | assert.Equal(s.T(), *variant.Qual, 30.2) 703 | assert.Equal(s.T(), variant.Filter, "PASS") 704 | 705 | assert.NotNil(s.T(), variant.StructuralVariantType) 706 | assert.Equal(s.T(), *variant.StructuralVariantType, vcf.Duplication) 707 | assert.NotNil(s.T(), variant.End) 708 | assert.Equal(s.T(), *variant.End, 1752234) 709 | assert.NotNil(s.T(), variant.Info) 710 | expected, ok := variant.Info["EXPECTED"] 711 | assert.True(s.T(), ok, "EXPECTED key must be found") 712 | assert.Equal(s.T(), expected, "3407") 713 | assert.NotNil(s.T(), variant.Info) 714 | observed, ok := variant.Info["OBSERVED"] 715 | assert.True(s.T(), ok, "OBSERVED key must be found") 716 | assert.Equal(s.T(), observed, "4449") 717 | ratio, ok := variant.Info["RATIO"] 718 | assert.True(s.T(), ok, "RATIO key must be found") 719 | assert.Equal(s.T(), ratio, "1.31") 720 | bf, ok := variant.Info["BF"] 721 | assert.True(s.T(), ok, "BF key must be found") 722 | assert.Equal(s.T(), bf, "30.2") 723 | 724 | variant = <-s.outChannel 725 | assert.NotNil(s.T(), variant) 726 | 727 | assert.Equal(s.T(), variant.Chrom, "X") 728 | assert.Equal(s.T(), variant.Pos, 6451688) 729 | assert.Equal(s.T(), *variant.Qual, 35.2) 730 | assert.Equal(s.T(), variant.Filter, "PASS") 731 | 732 | assert.NotNil(s.T(), variant.StructuralVariantType) 733 | assert.Equal(s.T(), *variant.StructuralVariantType, vcf.Deletion) 734 | assert.NotNil(s.T(), variant.End) 735 | assert.Equal(s.T(), *variant.End, 6452594) 736 | assert.NotNil(s.T(), variant.Info) 737 | expected, ok = variant.Info["EXPECTED"] 738 | assert.True(s.T(), ok, "EXPECTED key must be found") 739 | assert.Equal(s.T(), expected, "367") 740 | assert.NotNil(s.T(), variant.Info) 741 | observed, ok = variant.Info["OBSERVED"] 742 | assert.True(s.T(), ok, "OBSERVED key must be found") 743 | assert.Equal(s.T(), observed, "111") 744 | ratio, ok = variant.Info["RATIO"] 745 | assert.True(s.T(), ok, "RATIO key must be found") 746 | assert.Equal(s.T(), ratio, "0.302") 747 | bf, ok = variant.Info["BF"] 748 | assert.True(s.T(), ok, "BF key must be found") 749 | assert.Equal(s.T(), bf, "35.2") 750 | 751 | variant = <-s.outChannel 752 | assert.NotNil(s.T(), variant) 753 | 754 | assert.Equal(s.T(), variant.Chrom, "X") 755 | assert.Equal(s.T(), variant.Pos, 101576280) 756 | assert.Equal(s.T(), *variant.Qual, 28.3) 757 | assert.Equal(s.T(), variant.Filter, "LOWBFSCORE") 758 | 759 | assert.NotNil(s.T(), variant.StructuralVariantType) 760 | assert.Equal(s.T(), *variant.StructuralVariantType, vcf.Deletion) 761 | assert.NotNil(s.T(), variant.End) 762 | assert.Equal(s.T(), *variant.End, 101581456) 763 | assert.NotNil(s.T(), variant.Info) 764 | expected, ok = variant.Info["EXPECTED"] 765 | assert.True(s.T(), ok, "EXPECTED key must be found") 766 | assert.Equal(s.T(), expected, "134") 767 | assert.NotNil(s.T(), variant.Info) 768 | observed, ok = variant.Info["OBSERVED"] 769 | assert.True(s.T(), ok, "OBSERVED key must be found") 770 | assert.Equal(s.T(), observed, "4") 771 | ratio, ok = variant.Info["RATIO"] 772 | assert.True(s.T(), ok, "RATIO key must be found") 773 | assert.Equal(s.T(), ratio, "0.0299") 774 | bf, ok = variant.Info["BF"] 775 | assert.True(s.T(), ok, "BF key must be found") 776 | assert.Equal(s.T(), bf, "28.3") 777 | 778 | _, hasMore := <-s.outChannel 779 | assert.False(s.T(), hasMore, "No more variants should come out of the channel, it should be closed") 780 | _, hasMore = <-s.invalidChannel 781 | assert.False(s.T(), hasMore, "No variant should come out of invalid channel, it should be closed") 782 | } 783 | 784 | func TestStructuralSuite(t *testing.T) { 785 | suite.Run(t, new(StructuralSuite)) 786 | } 787 | --------------------------------------------------------------------------------