├── tsconfig.lint.json ├── codecov.yml ├── src ├── index.ts ├── parseMetaString.ts ├── parseGenotypesOnly.ts ├── parseBreakend.ts ├── vcfReserved.ts └── parse.ts ├── .prettierrc.json ├── test ├── data │ ├── pedigree.vcf │ ├── vcf4.3_spec_bnd.vcf │ ├── breakends.vcf │ ├── weird_info_and_missing_format.vcf │ ├── sample2genotype.vcf │ ├── spec-example.vcf │ ├── multipleAltSVs.vcf │ ├── simple.vcf │ ├── vcf4.3_spec_snippet.vcf │ ├── sniffles.vcf │ ├── vcf44_spec.vcf │ ├── y-chrom-haploid.vcf │ ├── clinvar.header.vcf │ └── 1000genomes.vcf ├── __snapshots__ │ ├── parseGenotypesOnly.test.ts.snap │ └── parseMetaString.test.ts.snap ├── parseGenotypesOnly.test.ts ├── parseMetaString.test.ts ├── parseGenotypesOnly-ultrafast-edge.test.ts ├── index.test.ts ├── parse.test.ts └── parseGenotypesOnly-edge-cases.test.ts ├── .editorconfig ├── tsconfig.json ├── .github └── workflows │ └── push.yml ├── scripts └── build-both-branches.sh ├── LICENSE ├── .gitignore ├── benchmark └── master-vs-current.bench.ts ├── package.json ├── eslint.config.mjs ├── CHANGELOG.md └── README.md /tsconfig.lint.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig", 3 | "include": ["src", "test"] 4 | } 5 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | status: 3 | patch: false 4 | project: false 5 | github_checks: 6 | annotations: false 7 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | export type { Variant } from './parse.ts' 2 | 3 | export { default } from './parse.ts' 4 | export * from './parseBreakend.ts' 5 | -------------------------------------------------------------------------------- /.prettierrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "semi": false, 3 | "trailingComma": "all", 4 | "singleQuote": true, 5 | "arrowParens": "avoid", 6 | "proseWrap": "always" 7 | } 8 | -------------------------------------------------------------------------------- /test/data/pedigree.vcf: -------------------------------------------------------------------------------- 1 | ##PEDIGREE= 2 | ##PEDIGREE= 3 | ##PEDIGREE= 4 | ##PEDIGREE= 5 | #CHROM POS ID REF ALT QUAL FILTER INFO 6 | -------------------------------------------------------------------------------- /test/data/vcf4.3_spec_bnd.vcf: -------------------------------------------------------------------------------- 1 | #CHROM POS ID REF ALT QUAL FILTER INFO 2 | 2 321681 bnd_W G G]17:198982] 6 PASS SVTYPE=BND 3 | 2 321682 bnd_V T ]13:123456]T 6 PASS SVTYPE=BND 4 | 13 123456 bnd_U C C[2:321682[ 6 PASS SVTYPE=BND 5 | 13 123457 bnd_X A [17:198983[A 6 PASS SVTYPE=BND 6 | 17 198982 bnd_Y A A]2:321681] 6 PASS SVTYPE=BND 7 | 17 198983 bnd_Z C [13:123457[C 6 PASS SVTYPE=BND 8 | -------------------------------------------------------------------------------- /test/__snapshots__/parseGenotypesOnly.test.ts.snap: -------------------------------------------------------------------------------- 1 | // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html 2 | 3 | exports[`parse genotypes 1`] = ` 4 | { 5 | "h1": "./.", 6 | "h2": "./.", 7 | } 8 | `; 9 | 10 | exports[`parse genotypes 2`] = ` 11 | { 12 | "h1": "./.", 13 | "h2": "./.", 14 | } 15 | `; 16 | 17 | exports[`parse genotypes 3`] = ` 18 | { 19 | "h1": "./.", 20 | "h2": "./.", 21 | } 22 | `; 23 | -------------------------------------------------------------------------------- /test/parseGenotypesOnly.test.ts: -------------------------------------------------------------------------------- 1 | import { expect, test } from 'vitest' 2 | 3 | import { parseGenotypesOnly } from '../src/parseGenotypesOnly' 4 | 5 | test('parse genotypes', () => { 6 | expect(parseGenotypesOnly('GT', './.\t./.', ['h1', 'h2'])).toMatchSnapshot() 7 | expect( 8 | parseGenotypesOnly('GT:RT', './.:1\t./.', ['h1', 'h2']), 9 | ).toMatchSnapshot() 10 | expect( 11 | parseGenotypesOnly('RT:GT', '1:./.\t2:./.', ['h1', 'h2']), 12 | ).toMatchSnapshot() 13 | }) 14 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig helps developers define and maintain consistent 2 | # coding styles between different editors and IDEs 3 | # editorconfig.org 4 | 5 | root = true 6 | 7 | [*] 8 | 9 | # Change these settings to your own preference 10 | indent_style = space 11 | indent_size = 2 12 | 13 | # We recommend you to keep these unchanged 14 | end_of_line = lf 15 | charset = utf-8 16 | trim_trailing_whitespace = true 17 | insert_final_newline = true 18 | 19 | [*.md] 20 | trim_trailing_whitespace = false 21 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "include": ["src"], 3 | "compilerOptions": { 4 | "outDir": "dist", 5 | "lib": ["dom", "esnext"], 6 | "skipLibCheck": true, 7 | "declaration": true, 8 | "moduleResolution": "node", 9 | "sourceMap": true, 10 | "strict": true, 11 | "noImplicitReturns": true, 12 | "noFallthroughCasesInSwitch": true, 13 | "noUncheckedIndexedAccess": true, 14 | "allowImportingTsExtensions": true, 15 | "rewriteRelativeImportExtensions": true, 16 | "esModuleInterop": true 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /test/data/breakends.vcf: -------------------------------------------------------------------------------- 1 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT BAMs/caudaus.sorted.sam 2 | 11 94975747 MantaBND:0:2:3:0:0:0:1 G G]8:107653520] . PASS SVTYPE=BND;MATEID=MantaBND:0:2:3:0:0:0:0;CIPOS=0,2;HOMLEN=2;HOMSEQ=TT;BND_DEPTH=216;MATE_BND_DEPTH=735 PR:SR 722,9:463,15 3 | 11 94975753 MantaDEL:0:1:2:0:0:0 T . PASS END=94987865;SVTYPE=DEL;SVLEN=12112;IMPRECISE;CIPOS=-156,156;CIEND=-150,150 PR 161,13 4 | 11 94987872 MantaBND:0:0:1:0:0:0:0 T T[8:107653411[ . PASS SVTYPE=BND;MATEID=MantaBND:0:0:1:0:0:0:1;BND_DEPTH=171;MATE_BND_DEPTH=830 PR:SR 489,4:520,19 5 | -------------------------------------------------------------------------------- /test/data/weird_info_and_missing_format.vcf: -------------------------------------------------------------------------------- 1 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT BAMs/caudaus.sorted.sam 2 | lcl|Scaffald_1 80465 rs118266897 R A 29 PASS NS=3;0,14;AF=0.5;DB;112;PG2.1 3 | lcl|Scaffald_1 84818 rs118269296 R G 29 PASS NS=3;0,14;AF=0.5;DB;112;PG2.1 4 | lcl|Scaffald_1 95414 rs118218236 W T 29 PASS NS=3;0,14;AF=0.5;DB;112;PG2.1 5 | lcl|Scaffald_1 231384 rs118264755 R A 29 PASS NS=3;0,14;AF=0.5;DB;112;PG2.1 6 | lcl|Scaffald_1 236429 rs118223336 R G 29 PASS NS=3;0,14;AF=6.5;DB;112;PG2.1 7 | lcl|Scaffald_1 245378 rs118217257 R G 29 PASS NS=3;0,14;AF=0.5;DB;112;PG2.1 8 | -------------------------------------------------------------------------------- /test/__snapshots__/parseMetaString.test.ts.snap: -------------------------------------------------------------------------------- 1 | // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html 2 | 3 | exports[`array in values 1`] = ` 4 | { 5 | "ID": "Assay", 6 | "Number": ".", 7 | "Type": "String", 8 | "Values": [ 9 | "WholeGenome", 10 | "Exome", 11 | ], 12 | } 13 | `; 14 | 15 | exports[`equals in description 1`] = ` 16 | { 17 | "Description": "Allelic Probability, P(Allele=1|Haplotype)", 18 | "ID": "AP", 19 | "Number": "2", 20 | "Type": "Float", 21 | } 22 | `; 23 | 24 | exports[`quoted string with comma in description 1`] = ` 25 | { 26 | "Description": "dbSNP membership, build 129", 27 | "ID": "DB", 28 | "Number": "0", 29 | "Type": "Flag", 30 | } 31 | `; 32 | -------------------------------------------------------------------------------- /test/data/sample2genotype.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.1 2 | ##fileDate=2024-12-01 3 | ##source=ClinVar 4 | ##reference=GRCh37 5 | ##META= 6 | ##META= 7 | ##META= 8 | ##META= 9 | ##SAMPLE= 10 | ##SAMPLE= 11 | #CHROM POS ID REF ALT QUAL FILTER INFO 12 | -------------------------------------------------------------------------------- /.github/workflows/push.yml: -------------------------------------------------------------------------------- 1 | name: Push 2 | 3 | on: push 4 | 5 | jobs: 6 | test: 7 | name: Lint, build, and test on node 20.x and ubuntu-latest 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v4 11 | - name: Use Node.js 20.x 12 | uses: actions/setup-node@v4 13 | with: 14 | node-version: 20.x 15 | - name: Install deps (with cache) 16 | uses: bahmutov/npm-install@v1 17 | - name: Lint codebase 18 | run: yarn lint 19 | - name: Build codebase 20 | run: yarn build 21 | - name: Test codebase 22 | run: yarn test --coverage 23 | - name: Upload coverage 24 | run: bash <(curl -s https://codecov.io/bash) 25 | -------------------------------------------------------------------------------- /scripts/build-both-branches.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | if ! git diff --quiet || ! git diff --cached --quiet; then 6 | echo "Error: Uncommitted changes detected. Please commit or stash your changes first." 7 | exit 1 8 | fi 9 | 10 | CURRENT_BRANCH=$(git branch --show-current) 11 | BRANCH1="${1:-master}" 12 | BRANCH2="${2:-$CURRENT_BRANCH}" 13 | 14 | rm -rf esm_branch1 esm_branch2 15 | 16 | echo "Building $BRANCH1 branch..." 17 | 18 | git checkout "$BRANCH1" 19 | yarn 20 | yarn build:esm 21 | mv esm esm_branch1 22 | echo "$BRANCH1" >esm_branch1/branchname.txt 23 | 24 | echo "Building $BRANCH2 branch..." 25 | git checkout "$BRANCH2" 26 | yarn 27 | yarn build:esm 28 | mv esm esm_branch2 29 | echo "$BRANCH2" >esm_branch2/branchname.txt 30 | 31 | echo "Build complete!" 32 | echo "$BRANCH1 build: esm_branch1/index.js" 33 | echo "$BRANCH2 build: esm_branch2/index.js" 34 | -------------------------------------------------------------------------------- /test/parseMetaString.test.ts: -------------------------------------------------------------------------------- 1 | import { expect, test } from 'vitest' 2 | 3 | import { parseMetaString } from '../src/parseMetaString' 4 | 5 | test('array in values', () => { 6 | const result1 = parseMetaString( 7 | '', 8 | ) 9 | const result2 = parseMetaString( 10 | '', 11 | ) 12 | expect(result1).toEqual(result2) 13 | expect(result1).toMatchSnapshot() 14 | }) 15 | 16 | test('quoted string with comma in description', () => { 17 | expect( 18 | parseMetaString( 19 | '', 20 | ), 21 | ).toMatchSnapshot() 22 | }) 23 | 24 | test('equals in description', () => { 25 | expect( 26 | parseMetaString( 27 | '', 28 | ), 29 | ).toMatchSnapshot() 30 | }) 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Garrett Stevens 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Project-specific 2 | dist/ 3 | .vscode/ 4 | 5 | # Logs 6 | logs 7 | *.log 8 | npm-debug.log* 9 | yarn-debug.log* 10 | yarn-error.log* 11 | 12 | # Runtime data 13 | pids 14 | *.pid 15 | *.seed 16 | *.pid.lock 17 | 18 | # Directory for instrumented libs generated by jscoverage/JSCover 19 | lib-cov 20 | 21 | # Coverage directory used by tools like istanbul 22 | coverage 23 | 24 | # nyc test coverage 25 | .nyc_output 26 | 27 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 28 | .grunt 29 | 30 | # Bower dependency directory (https://bower.io/) 31 | bower_components 32 | 33 | # node-waf configuration 34 | .lock-wscript 35 | 36 | # Compiled binary addons (https://nodejs.org/api/addons.html) 37 | build/Release 38 | 39 | # Dependency directories 40 | node_modules/ 41 | jspm_packages/ 42 | 43 | # TypeScript v1 declaration files 44 | typings/ 45 | 46 | # Optional npm cache directory 47 | .npm 48 | 49 | # Optional eslint cache 50 | .eslintcache 51 | 52 | # Optional REPL history 53 | .node_repl_history 54 | 55 | # Output of 'npm pack' 56 | *.tgz 57 | 58 | # Yarn Integrity file 59 | .yarn-integrity 60 | 61 | # dotenv environment variables file 62 | .env 63 | 64 | # next.js build output 65 | .next 66 | *.swp 67 | 68 | esm-master 69 | esm-optimized 70 | esm 71 | 72 | *.cpuprofile 73 | *-results.* 74 | esm_*/ 75 | -------------------------------------------------------------------------------- /test/data/spec-example.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.3 2 | ##fileDate=20090805 3 | ##source=myImputationProgramV3.1 4 | ##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta 5 | ##contig= 6 | ##phasing=partial 7 | ##INFO= 8 | ##INFO= 9 | ##INFO= 10 | ##INFO= 11 | ##INFO= 12 | ##INFO= 13 | ##INFO= 14 | ##INFO= 15 | ##FILTER= 16 | ##FILTER= 17 | ##FORMAT= 18 | ##FORMAT= 19 | ##FORMAT= 20 | ##FORMAT= 21 | ##FORMAT= 22 | ##FORMAT= 23 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 24 | -------------------------------------------------------------------------------- /test/data/multipleAltSVs.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.2 2 | ##ALT= 3 | ##ALT= 4 | ##ALT= 5 | ##ALT= 6 | ##FILTER= 7 | ##INFO= 8 | ##INFO= 9 | ##INFO= 10 | ##INFO= 11 | ##INFO= 12 | ##INFO= 13 | ##INFO= 14 | ##INFO= 15 | ##INFO= 16 | ##INFO= 17 | ##INFO= 18 | ##contig= 19 | #CHROM POS ID REF ALT QUAL FILTER INFO 20 | chr1 1 . A . . 21 | chr1 1 . A C, . . 22 | chr1 1 . A , . . 23 | chr1 1 . A A[chr1:1[,A]chr1:1] . . SVTYPE=BND 24 | -------------------------------------------------------------------------------- /test/data/simple.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.3 2 | ##fileDate=20090805 3 | ##source=myImputationProgramV3.1 4 | ##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta 5 | ##contig= 6 | ##phasing=partial 7 | ##INFO= 8 | ##INFO= 9 | ##INFO= 10 | ##INFO= 11 | ##INFO= 12 | ##INFO= 13 | ##FILTER= 14 | ##FILTER= 15 | ##FORMAT= 16 | ##FORMAT= 17 | ##FORMAT= 18 | ##FORMAT= 19 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 20 | 20 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,. 21 | 20 17330 . T A 3 q10 NS=3;DP=11;AF=0.017 GT:GQ:DP:HQ 0|0:49:3:58,50 0|1:3:5:65,3 0/0:41:3 22 | 20 1110696 rs6040355 A G,T 67 PASS NS=2;DP=10;AF=0.333,0.667;AA=T;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2 2/2:35:4 23 | 20 1230237 . T . 47 PASS NS=3;DP=13;AA=T GT:GQ:DP:HQ 0|0:54:7:56,60 0|0:48:4:51,51 0/0:61:2 24 | 20 1234567 microsat1 GTC G,GTCT 50 PASS NS=3;DP=9;AA=G GT:GQ:DP 0/1:35:4 0/2:17:2 1/1:40:3 25 | -------------------------------------------------------------------------------- /test/data/vcf4.3_spec_snippet.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.3 2 | ##fileDate=20090805 3 | ##source=myImputationProgramV3.1 4 | ##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta 5 | ##contig= 6 | ##phasing=partial 7 | ##INFO= 8 | ##INFO= 9 | ##INFO= 10 | ##INFO= 11 | ##INFO= 12 | ##INFO= 13 | ##FILTER= 14 | ##FILTER= 15 | ##FORMAT= 16 | ##FORMAT= 17 | ##FORMAT= 18 | ##FORMAT= 19 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 20 | 20 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,. 21 | 20 17330 . T A 3 q10 NS=3;DP=11;AF=0.017 GT:GQ:DP:HQ 0|0:49:3:58,50 0|1:3:5:65,3 0/0:41:3 22 | 20 1110696 rs6040355 A G,T 67 PASS NS=2;DP=10;AF=0.333,0.667;AA=T;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2 2/2:35:4 23 | 20 1230237 . T . 47 PASS NS=3;DP=13;AA=T GT:GQ:DP:HQ 0|0:54:7:56,60 0|0:48:4:51,51 0/0:61:2 24 | 20 1234567 microsat1 GTC G,GTCT 50 PASS NS=3;DP=9;AA=G GT:GQ:DP 0/1:35:4 0/2:17:2 1/1:40:3 25 | -------------------------------------------------------------------------------- /test/parseGenotypesOnly-ultrafast-edge.test.ts: -------------------------------------------------------------------------------- 1 | import { expect, test } from 'vitest' 2 | 3 | import { parseGenotypesOnly } from '../src/parseGenotypesOnly' 4 | 5 | test('ultra-fast path should not be tricked by mixed ploidy with matching length', () => { 6 | // 4 samples with total length = 15 = 4*4-1 7 | // Mix of triploid (5 chars) and haploid (1 char) 8 | const result = parseGenotypesOnly('GT', '0/1/2\t0\t1/2/3\t1', [ 9 | 'S1', 10 | 'S2', 11 | 'S3', 12 | 'S4', 13 | ]) 14 | 15 | // Should correctly parse each genotype 16 | expect(result).toEqual({ 17 | S1: '0/1/2', 18 | S2: '0', 19 | S3: '1/2/3', 20 | S4: '1', 21 | }) 22 | }) 23 | 24 | test('another ultra-fast path edge case with >10 samples', () => { 25 | // 11 samples: mix to create length = 11*4-1 = 43 26 | // Need: 43 chars total 27 | // Try: 5 × 5-char + 6 × 1-char = 5*6 + 6*2 - 1 = 30 + 12 - 1 = 41 (not quite) 28 | // Try: 6 × 5-char + 5 × 1-char = 6*6 + 5*2 - 1 = 36 + 10 - 1 = 45 (too much) 29 | // Try: different mix... let's calculate properly 30 | // For 12 samples: expected = 47 31 | // 6 × 5-char + 6 × 1-char = 6*6 + 6*2 - 1 = 36 + 12 - 1 = 47 ✓ 32 | const data = '0/1/2\t0\t1/2/3\t1\t2/3/4\t2\t3/4/5\t3\t4/5/6\t4\t5/6/7\t5' 33 | const samples = [ 34 | 'S1', 35 | 'S2', 36 | 'S3', 37 | 'S4', 38 | 'S5', 39 | 'S6', 40 | 'S7', 41 | 'S8', 42 | 'S9', 43 | 'S10', 44 | 'S11', 45 | 'S12', 46 | ] 47 | const result = parseGenotypesOnly('GT', data, samples) 48 | 49 | expect(result).toEqual({ 50 | S1: '0/1/2', 51 | S2: '0', 52 | S3: '1/2/3', 53 | S4: '1', 54 | S5: '2/3/4', 55 | S6: '2', 56 | S7: '3/4/5', 57 | S8: '3', 58 | S9: '4/5/6', 59 | S10: '4', 60 | S11: '5/6/7', 61 | S12: '5', 62 | }) 63 | }) 64 | -------------------------------------------------------------------------------- /test/data/sniffles.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.2 2 | ##source=Sniffles 3 | ##fileDate=20170420 4 | ##ALT= 5 | ##ALT= 6 | ##ALT= 7 | ##ALT= 8 | ##ALT= 9 | ##ALT= 10 | ##INFO= 11 | ##INFO= 12 | ##INFO= 13 | ##INFO= 14 | ##INFO= 15 | ##INFO= 16 | ##INFO= 17 | ##INFO= 18 | ##INFO= 19 | ##FORMAT= 20 | ##FORMAT= 21 | ##FORMAT= 22 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT /seq/schatz/fritz/sv-paper/real/Nanopore_NA12878/mapped/ngm_Nanopore_human_ngmlr-0.2.3_mapped.bam 23 | 8 17709115 28329_0 N . PASS PRECISE;SVMETHOD=Snifflesv1.0.3;CHR2=8;END=17709148;STD_quant_start=0.000000;STD_quant_stop=0.000000;Kurtosis_quant_start=20.524521;Kurtosis_quant_stop=3.925926;SVTYPE=DEL;SUPTYPE=AL;SVLEN=33;STRANDS=+-;STRANDS2=20,14,20,14;RE=34;AF=0.971429 GT:DR:DV 1/1:1:34 24 | -------------------------------------------------------------------------------- /test/data/vcf44_spec.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.4 2 | ##ALT= 3 | ##ALT= 4 | ##ALT= 5 | ##ALT= 6 | ##ALT= 7 | ##ALT= 8 | ##INFO= 9 | ##INFO= 10 | ##INFO= 11 | ##INFO= 12 | ##INFO= 13 | ##INFO= 14 | ##INFO= 15 | ##INFO= 16 | ##INFO= 17 | ##contig= 18 | ##FORMAT= 19 | ##custom_header_field_containing_chr_seq=ATGCGAAAAAAATGT 20 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample 21 | chrA 2 . TGC T . . EVENT=DEL_seq GT 0/1 22 | chrA 2 . T . . SVLEN=2;SVCLAIM=DJ;EVENT=DEL_symbolic;END=4 GT 0/1 23 | chrA 2 delbp1 T T[chrA:5[ . . MATEID=delbp2;EVENT=DEL_split_bp_cn GT 0/1 24 | chrA 2 delbp2 A ]chrA:2]A . . MATEID=delbp1;EVENT=DEL_split_bp_cn GT 0/1 25 | chrA 2 . T . . SVLEN=2;SVCLAIM=D;EVENT=DEL_split_bp_cn;END=4 GT 0/1 26 | chrA 5 . G GAAA . . EVENT=homology_seq GT 1/1 27 | chrA 5 . G . . SVLEN=3;CIPOS=0,5;EVENT=homology_dup;END=8 GT 1/1 28 | chrA 14 . T . . IMPRECISE;SVLEN=100;CILEN=-50,50;CIPOS=-10,10;END=14 GT 0/1 29 | chrA 14 . G .CCCCCCG . . EVENT=single_breakend GT 0/1 30 | -------------------------------------------------------------------------------- /test/index.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, expect, it } from 'vitest' 2 | 3 | import { Breakend, parseBreakend } from '../src' 4 | 5 | describe('testBreakend', () => { 6 | it('can parse breakends', () => { 7 | // Breakends from https://samtools.github.io/hts-specs/VCFv4.3.pdf 8 | const breakendsAndParsed = [ 9 | [ 10 | 'G]17:198982]', 11 | { 12 | MatePosition: '17:198982', 13 | Join: 'right', 14 | Replacement: 'G', 15 | MateDirection: 'left', 16 | }, 17 | ], 18 | [ 19 | ']13:123456]T', 20 | { 21 | MatePosition: '13:123456', 22 | Join: 'left', 23 | Replacement: 'T', 24 | MateDirection: 'left', 25 | }, 26 | ], 27 | [ 28 | 'C[2:321682[', 29 | { 30 | MatePosition: '2:321682', 31 | Join: 'right', 32 | Replacement: 'C', 33 | MateDirection: 'right', 34 | }, 35 | ], 36 | [ 37 | '[17:198983[A', 38 | { 39 | MatePosition: '17:198983', 40 | Join: 'left', 41 | Replacement: 'A', 42 | MateDirection: 'right', 43 | }, 44 | ], 45 | [ 46 | 'A]2:321681]', 47 | { 48 | MatePosition: '2:321681', 49 | Join: 'right', 50 | Replacement: 'A', 51 | MateDirection: 'left', 52 | }, 53 | ], 54 | [ 55 | '[13:123457[C', 56 | { 57 | MatePosition: '13:123457', 58 | Join: 'left', 59 | Replacement: 'C', 60 | MateDirection: 'right', 61 | }, 62 | ], 63 | ] as [string, Breakend][] 64 | breakendsAndParsed.forEach(([breakend, parsedBreakend]) => { 65 | expect(parseBreakend(breakend)).toEqual(parsedBreakend) 66 | }) 67 | }) 68 | 69 | it('throws on invalid breakend', () => { 70 | expect(() => parseBreakend('[13:123457[')).toThrow(/Invalid breakend/) 71 | }) 72 | 73 | it('returns "undefined" for non-breakend', () => { 74 | expect(parseBreakend('A')).toBeUndefined() 75 | }) 76 | }) 77 | -------------------------------------------------------------------------------- /benchmark/master-vs-current.bench.ts: -------------------------------------------------------------------------------- 1 | import { readFileSync } from 'node:fs' 2 | import { bench, describe } from 'vitest' 3 | 4 | import { parseGenotypesOnly as branch1Fn } from '../esm_branch1/parseGenotypesOnly.js' 5 | import { parseGenotypesOnly as branch2Fn } from '../esm_branch2/parseGenotypesOnly.js' 6 | 7 | const branch1Name = readFileSync('esm_branch1/branchname.txt', 'utf8').trim() 8 | const branch2Name = readFileSync('esm_branch2/branchname.txt', 'utf8').trim() 9 | 10 | function generateTestData(numSamples: number, format: string) { 11 | const samples = Array.from({ length: numSamples }, (_, i) => `SAMPLE_${i}`) 12 | let genotypeData: string 13 | if (format === 'GT') { 14 | genotypeData = Array.from({ length: numSamples }, () => '0/1').join('\t') 15 | } else if (format === 'GT:DP:GQ') { 16 | genotypeData = Array.from({ length: numSamples }, () => '0/1:23:99').join( 17 | '\t', 18 | ) 19 | } else if (format === 'DP:GQ:GT') { 20 | genotypeData = Array.from({ length: numSamples }, () => '23:99:0/1').join( 21 | '\t', 22 | ) 23 | } else { 24 | genotypeData = Array.from({ length: numSamples }, () => '0/1').join('\t') 25 | } 26 | return { samples, genotypeData, format } 27 | } 28 | 29 | function benchParseGenotypes( 30 | name: string, 31 | numSamples: number, 32 | format: string, 33 | opts?: { iterations?: number; warmupIterations?: number }, 34 | ) { 35 | const { samples, genotypeData } = generateTestData(numSamples, format) 36 | 37 | describe(name, () => { 38 | bench( 39 | branch1Name, 40 | () => { 41 | branch1Fn(format, genotypeData, samples) 42 | }, 43 | opts, 44 | ) 45 | bench( 46 | branch2Name, 47 | () => { 48 | branch2Fn(format, genotypeData, samples) 49 | }, 50 | opts, 51 | ) 52 | }) 53 | } 54 | 55 | const formats = ['GT', 'GT:DP:GQ', 'DP:GQ:GT'] as const 56 | const sampleCounts = [10, 100, 1000, 5000] as const 57 | const opts = { iterations: 1000, warmupIterations: 100 } 58 | 59 | for (const format of formats) { 60 | for (const numSamples of sampleCounts) { 61 | benchParseGenotypes(`${numSamples} samples - ${format}`, numSamples, format, opts) 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/parseMetaString.ts: -------------------------------------------------------------------------------- 1 | // constructed with the assistance of claude AI 2 | // 3 | // I first prompted it with a regex that splits a comma separated string with 4 | // awareness of quotation from this stackoverflow question 5 | // https://stackoverflow.com/a/18893443/2129219, and asked it to add support 6 | // for square brackets 7 | // 8 | // it undid the regex into serial logic and the result was this function 9 | function customSplit(str: string) { 10 | const result = [] 11 | const chars = [] 12 | let inQuotes = false 13 | let inBrackets = false 14 | const strLen = str.length 15 | 16 | for (let i = 0; i < strLen; i++) { 17 | const char = str[i]! 18 | if (char === '"') { 19 | inQuotes = !inQuotes 20 | chars.push(char) 21 | } else if (char === '[') { 22 | inBrackets = true 23 | chars.push(char) 24 | } else if (char === ']') { 25 | inBrackets = false 26 | chars.push(char) 27 | } else if (char === ',' && !inQuotes && !inBrackets) { 28 | result.push(chars.join('').trim()) 29 | chars.length = 0 30 | } else { 31 | chars.push(char) 32 | } 33 | } 34 | 35 | if (chars.length > 0) { 36 | result.push(chars.join('').trim()) 37 | } 38 | 39 | return result 40 | } 41 | 42 | function splitFirst(str: string, split: string) { 43 | const index = str.indexOf(split) 44 | return [str.slice(0, index), str.slice(index + 1)] 45 | } 46 | 47 | export function parseMetaString(metaString: string) { 48 | const inside = metaString.slice(1, -1) 49 | const parts = customSplit(inside) 50 | const entries: [string, any][] = [] 51 | for (let i = 0; i < parts.length; i++) { 52 | const f = parts[i]! 53 | const [key, val] = splitFirst(f, '=') 54 | if (val && val.startsWith('[') && val.endsWith(']')) { 55 | const items = val.slice(1, -1).split(',') 56 | for (let j = 0; j < items.length; j++) { 57 | items[j] = items[j]!.trim() 58 | } 59 | entries.push([key!, items]) 60 | } else if (val && val.startsWith('"') && val.endsWith('"')) { 61 | entries.push([key!, val.slice(1, -1)]) 62 | } else { 63 | entries.push([key!, val]) 64 | } 65 | } 66 | return Object.fromEntries(entries) 67 | } 68 | -------------------------------------------------------------------------------- /src/parseGenotypesOnly.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Extracts genotype (GT) values from VCF sample data. 3 | */ 4 | export function parseGenotypesOnly( 5 | format: string, 6 | prerest: string, 7 | samples: string[], 8 | ) { 9 | const genotypes = Object.create(null) as Record 10 | 11 | const samplesLen = samples.length 12 | const prerestLen = prerest.length 13 | const TAB = 9 14 | const COLON = 58 15 | let pos = 0 16 | 17 | // Fast path: format is exactly "GT" 18 | if (format === 'GT') { 19 | for (let idx = 0; idx < samplesLen; idx++) { 20 | const start = pos 21 | while (pos < prerestLen && prerest.charCodeAt(pos) !== TAB) { 22 | pos++ 23 | } 24 | genotypes[samples[idx]!] = prerest.slice(start, pos) 25 | pos++ 26 | } 27 | return genotypes 28 | } 29 | 30 | // Check if GT field exists 31 | const gtIdx = format.indexOf('GT') 32 | if (gtIdx === -1) { 33 | return genotypes 34 | } 35 | 36 | // GT is first field but not only field 37 | if (gtIdx === 0) { 38 | for (let idx = 0; idx < samplesLen; idx++) { 39 | const start = pos 40 | while ( 41 | pos < prerestLen && 42 | prerest.charCodeAt(pos) !== COLON && 43 | prerest.charCodeAt(pos) !== TAB 44 | ) { 45 | pos++ 46 | } 47 | genotypes[samples[idx]!] = prerest.slice(start, pos) 48 | while (pos < prerestLen && prerest.charCodeAt(pos) !== TAB) { 49 | pos++ 50 | } 51 | pos++ 52 | } 53 | return genotypes 54 | } 55 | 56 | // GT is not first field 57 | let colonCount = 0 58 | for (let j = 0; j < gtIdx; j++) { 59 | if (format.charCodeAt(j) === COLON) { 60 | colonCount++ 61 | } 62 | } 63 | for (let idx = 0; idx < samplesLen; idx++) { 64 | const sampleStart = pos 65 | let tabIdx = pos 66 | while (tabIdx < prerestLen && prerest.charCodeAt(tabIdx) !== TAB) { 67 | tabIdx++ 68 | } 69 | 70 | let colons = 0 71 | let fieldStart = sampleStart 72 | for (let j = sampleStart; j <= tabIdx; j++) { 73 | if (j === tabIdx || prerest.charCodeAt(j) === COLON) { 74 | if (colons === colonCount) { 75 | genotypes[samples[idx]!] = prerest.slice(fieldStart, j) 76 | break 77 | } 78 | colons++ 79 | fieldStart = j + 1 80 | } 81 | } 82 | pos = tabIdx + 1 83 | } 84 | 85 | return genotypes 86 | } 87 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@gmod/vcf", 3 | "version": "6.1.0", 4 | "description": "High performance streaming Variant Call Format (VCF) parser in pure JavaScript", 5 | "type": "module", 6 | "types": "./dist/index.d.ts", 7 | "exports": { 8 | "import": { 9 | "import": "./esm/index.js" 10 | }, 11 | "require": { 12 | "require": "./dist/index.js" 13 | } 14 | }, 15 | "repository": "github:GMOD/vcf-js", 16 | "author": { 17 | "name": "Garrett Stevens", 18 | "email": "stevens.garrett.j@gmail.com", 19 | "url": "https://github.com/garrettjstevens" 20 | }, 21 | "license": "MIT", 22 | "homepage": "https://github.com/GMOD/vcf-js#vcf-js", 23 | "bugs": { 24 | "url": "https://github.com/GMOD/vcf-js/issues" 25 | }, 26 | "files": [ 27 | "dist", 28 | "esm", 29 | "src" 30 | ], 31 | "engines": { 32 | "node": ">=6" 33 | }, 34 | "scripts": { 35 | "test": "vitest", 36 | "benchonly": "vitest bench", 37 | "bench": "./scripts/build-both-branches.sh \"$BRANCH1\" \"$BRANCH2\" && vitest bench", 38 | "lint": "eslint --report-unused-disable-directives --max-warnings 0", 39 | "docs": "documentation readme src/parse.ts --section=API --shallow", 40 | "format": "prettier --write .", 41 | "postdocs": "prettier --write README.md", 42 | "clean": "rimraf dist esm", 43 | "build:esm": "tsc --target es2020 --outDir esm", 44 | "build:es5": "tsc --target es2020 --module commonjs --outDir dist", 45 | "build": "yarn build:esm && yarn build:es5", 46 | "prebuild": "yarn clean", 47 | "postbuild:es5": "echo '{\"type\": \"commonjs\"}' > dist/package.json", 48 | "preversion": "yarn lint && yarn test --run && yarn build", 49 | "version": "standard-changelog && git add CHANGELOG.md", 50 | "postversion": "git push --follow-tags" 51 | }, 52 | "devDependencies": { 53 | "@babel/core": "^7.20.5", 54 | "@eslint/js": "^9.7.0", 55 | "@types/node": "^24.10.1", 56 | "@typescript-eslint/eslint-plugin": "^8.48.0", 57 | "@typescript-eslint/parser": "^8.48.0", 58 | "@vitest/coverage-v8": "^4.0.14", 59 | "documentation": "^14.0.1", 60 | "eslint": "^9.7.0", 61 | "eslint-plugin-import": "^2.32.0", 62 | "eslint-plugin-unicorn": "^62.0.0", 63 | "prettier": "^3.2.4", 64 | "rimraf": "^6.0.1", 65 | "standard-changelog": "^7.0.1", 66 | "typescript": "^5.3.3", 67 | "typescript-eslint": "^8.48.0", 68 | "vitest": "^4.0.14" 69 | }, 70 | "keywords": [ 71 | "vcf", 72 | "genomics", 73 | "bionode", 74 | "biojs" 75 | ], 76 | "publishConfig": { 77 | "access": "public" 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /src/parseBreakend.ts: -------------------------------------------------------------------------------- 1 | export interface Breakend { 2 | Join: string 3 | Replacement: string 4 | MatePosition?: string 5 | MateDirection?: string 6 | SingleBreakend?: boolean 7 | } 8 | 9 | const ANGLE_BRACKET_START_REGEX = /<(.*)>(.*)/ 10 | const ANGLE_BRACKET_END_REGEX = /(.*)<(.*)>/ 11 | 12 | export function parseBreakend(breakendString: string): Breakend | undefined { 13 | const firstChar = breakendString[0] 14 | const lastChar = breakendString[breakendString.length - 1] 15 | 16 | if ( 17 | firstChar === '[' || 18 | firstChar === ']' || 19 | lastChar === '[' || 20 | lastChar === ']' 21 | ) { 22 | const tokens = breakendString.split(/[[\]]/) 23 | const MateDirection = breakendString.includes('[') ? 'right' : 'left' 24 | let Join 25 | let Replacement 26 | let MatePosition 27 | const tokensLen = tokens.length 28 | for (let i = 0; i < tokensLen; i++) { 29 | const tok = tokens[i]! 30 | if (tok) { 31 | if (tok.includes(':')) { 32 | MatePosition = tok 33 | Join = Replacement ? 'right' : 'left' 34 | } else { 35 | Replacement = tok 36 | } 37 | } 38 | } 39 | if (!(MatePosition && Join && Replacement)) { 40 | throw new Error(`Invalid breakend: ${breakendString}`) 41 | } 42 | return { MatePosition, Join, Replacement, MateDirection } 43 | } 44 | 45 | if (firstChar === '.') { 46 | return { 47 | Join: 'left', 48 | SingleBreakend: true, 49 | Replacement: breakendString.slice(1), 50 | } 51 | } 52 | 53 | if (lastChar === '.') { 54 | return { 55 | Join: 'right', 56 | SingleBreakend: true, 57 | Replacement: breakendString.slice(0, -1), 58 | } 59 | } 60 | 61 | if (firstChar === '<') { 62 | const res = ANGLE_BRACKET_START_REGEX.exec(breakendString) 63 | if (!res) { 64 | throw new Error(`failed to parse ${breakendString}`) 65 | } 66 | const Replacement = res[2] 67 | return Replacement 68 | ? { 69 | Join: 'left', 70 | Replacement, 71 | MateDirection: 'right', 72 | MatePosition: `<${res[1]!}>:1`, 73 | } 74 | : undefined 75 | } 76 | 77 | if (breakendString.includes('<')) { 78 | const res = ANGLE_BRACKET_END_REGEX.exec(breakendString) 79 | if (!res) { 80 | throw new Error(`failed to parse ${breakendString}`) 81 | } 82 | const Replacement = res[1] 83 | return Replacement 84 | ? { 85 | Join: 'right', 86 | Replacement, 87 | MateDirection: 'right', 88 | MatePosition: `<${res[2]!}>:1`, 89 | } 90 | : undefined 91 | } 92 | 93 | return undefined 94 | } 95 | -------------------------------------------------------------------------------- /test/data/y-chrom-haploid.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.1 2 | ##FILTER= 3 | ##fileDate=20150218 4 | ##reference=ftp://ftp.1000genomes.ebi.ac.uk//vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz 5 | ##contig= 6 | ##source=freeBayes v0.9.9.2 | GT values over-written with maximum likelihood state (subject to threshold) OR phylogenetic imputation 7 | ##INFO= 8 | ##FORMAT= 9 | ##source=GenomeSTRiP_v1.04 10 | ##ALT= 11 | ##FILTER= 12 | ##FILTER= 13 | ##FILTER== 0.5 && GSDUPLICATESCORE >= 0.0"> 14 | ##FILTER== 2.0"> 15 | ##FILTER= 16 | ##FILTER== 13.0"> 17 | ##FORMAT= 18 | ##FORMAT= 19 | ##FORMAT= 20 | ##FORMAT= 21 | ##FORMAT= 22 | ##FORMAT= 23 | ##FORMAT= 24 | ##FORMAT= 25 | ##INFO= 26 | ##INFO= 27 | ##INFO= 28 | ##INFO= 29 | ##INFO= 30 | ##INFO= 31 | ##INFO= 32 | ##INFO= 33 | ##INFO= 34 | ##INFO= 35 | ##INFO= 36 | ##INFO= 37 | ##INFO= 38 | ##INFO= 39 | ##INFO= 40 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00101 HG00103 HG001055 41 | Y 14483990 CNV_Y_14483990_15232198 C 100 PASS AC=1;AF=0.000817661;AN=1223;END=15232198;NS=1233;SVTYPE=CNV;AMR_AF=0;AFR_AF=0;EUR_AF=0.0042;SAS_AF=0;EAS_AF=0;VT=SV;EX_TARGET GT:CN:CNL:CNP:CNQ:GP:GQ:PL 0:1:-1000,0,-119.08:-1000,0,-218.16:99:0,-1000:99:0,10000 0:1:-1000,0,-43.56:-1000,0,-142.64:99:0,-1000:99:0,10000 .:.:.:.:.:.:.:. .:.:.:.:.:.:.:. 42 | Y 2655180 rs11575897 G A 100 PASS AA=G;AC=22;AF=0.0178427;AN=1233;DP=84761;NS=1233;AMR_AF=0;AFR_AF=0;EUR_AF=0;SAS_AF=0;EAS_AF=0.0902;VT=SNP;EX_TARGET GT 0 0 0 . 43 | -------------------------------------------------------------------------------- /eslint.config.mjs: -------------------------------------------------------------------------------- 1 | import eslint from '@eslint/js' 2 | import eslintPluginUnicorn from 'eslint-plugin-unicorn' 3 | import { defineConfig } from 'eslint/config' 4 | import importPlugin from 'eslint-plugin-import' 5 | import tseslint from 'typescript-eslint' 6 | 7 | export default defineConfig( 8 | { 9 | ignores: [ 10 | 'analyze-profile.cjs', 11 | 'esm_*/', 12 | 'benchmark/', 13 | 'esm/', 14 | 'dist/', 15 | '*.js', 16 | '*.mjs', 17 | 'example/*', 18 | ], 19 | }, 20 | { 21 | languageOptions: { 22 | parserOptions: { 23 | project: ['./tsconfig.lint.json'], 24 | tsconfigRootDir: import.meta.dirname, 25 | }, 26 | }, 27 | }, 28 | eslint.configs.recommended, 29 | ...tseslint.configs.recommended, 30 | ...tseslint.configs.stylisticTypeChecked, 31 | ...tseslint.configs.strictTypeChecked, 32 | importPlugin.flatConfigs.recommended, 33 | eslintPluginUnicorn.configs.recommended, 34 | { 35 | rules: { 36 | curly: 'error', 37 | 'no-console': [ 38 | 'warn', 39 | { 40 | allow: ['error', 'warn'], 41 | }, 42 | ], 43 | 'spaced-comment': [ 44 | 'error', 45 | 'always', 46 | { 47 | markers: ['/'], 48 | }, 49 | ], 50 | '@typescript-eslint/no-unused-vars': [ 51 | 'warn', 52 | { 53 | argsIgnorePattern: '^_', 54 | caughtErrors: 'none', 55 | ignoreRestSiblings: true, 56 | }, 57 | ], 58 | 59 | '@typescript-eslint/ban-ts-comment': 'off', 60 | '@typescript-eslint/no-this-alias': 'off', 61 | '@typescript-eslint/no-unsafe-member-access': 'off', 62 | '@typescript-eslint/no-unsafe-argument': 'off', 63 | '@typescript-eslint/no-explicit-any': 'off', 64 | '@typescript-eslint/no-unsafe-assignment': 'off', 65 | '@typescript-eslint/no-unsafe-call': 'off', 66 | '@typescript-eslint/no-unsafe-return': 'off', 67 | '@typescript-eslint/no-non-null-assertion': 'off', 68 | '@typescript-eslint/restrict-template-expressions': 'off', 69 | '@typescript-eslint/prefer-for-of': 'off', 70 | 71 | 'unicorn/no-new-array': 'off', 72 | 'unicorn/no-empty-file': 'off', 73 | 'unicorn/prefer-type-error': 'off', 74 | 'unicorn/prefer-modern-math-apis': 'off', 75 | 'unicorn/prefer-node-protocol': 'off', 76 | 'unicorn/no-unreadable-array-destructuring': 'off', 77 | 'unicorn/no-abusive-eslint-disable': 'off', 78 | 'unicorn/no-array-callback-reference': 'off', 79 | 'unicorn/number-literal-case': 'off', 80 | 'unicorn/prefer-add-event-listener': 'off', 81 | 'unicorn/prefer-top-level-await': 'off', 82 | 'unicorn/consistent-function-scoping': 'off', 83 | 'unicorn/no-await-expression-member': 'off', 84 | 'unicorn/no-lonely-if': 'off', 85 | 'unicorn/consistent-destructuring': 'off', 86 | 'unicorn/prefer-module': 'off', 87 | 'unicorn/prefer-optional-catch-binding': 'off', 88 | 'unicorn/no-useless-undefined': 'off', 89 | 'unicorn/no-null': 'off', 90 | 'unicorn/no-nested-ternary': 'off', 91 | 'unicorn/filename-case': 'off', 92 | 'unicorn/catch-error-name': 'off', 93 | 'unicorn/prevent-abbreviations': 'off', 94 | 'unicorn/prefer-code-point': 'off', 95 | 'unicorn/numeric-separators-style': 'off', 96 | 'unicorn/no-array-for-each': 'off', 97 | 'unicorn/prefer-spread': 'off', 98 | 'unicorn/explicit-length-check': 'off', 99 | 'unicorn/prefer-regexp-test': 'off', 100 | 'unicorn/relative-url-style': 'off', 101 | 'unicorn/prefer-math-trunc': 'off', 102 | 'unicorn/prefer-query-selector': 'off', 103 | 'unicorn/no-negated-condition': 'off', 104 | 'unicorn/switch-case-braces': 'off', 105 | 'unicorn/prefer-switch': 'off', 106 | 'unicorn/better-regex': 'off', 107 | 'unicorn/no-for-loop': 'off', 108 | 'unicorn/escape-case': 'off', 109 | 'unicorn/prefer-number-properties': 'off', 110 | 'unicorn/no-process-exit': 'off', 111 | 'unicorn/prefer-at': 'off', 112 | 'unicorn/prefer-structured-clone': 'off', 113 | 'unicorn/prefer-string-replace-all': 'off', 114 | 115 | 'import/no-unresolved': 'off', 116 | 'import/order': [ 117 | 'error', 118 | { 119 | named: true, 120 | 'newlines-between': 'always', 121 | alphabetize: { 122 | order: 'asc', 123 | }, 124 | groups: [ 125 | 'builtin', 126 | ['external', 'internal'], 127 | ['parent', 'sibling', 'index', 'object'], 128 | 'type', 129 | ], 130 | }, 131 | ], 132 | }, 133 | }, 134 | ) 135 | -------------------------------------------------------------------------------- /test/data/clinvar.header.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.1 2 | ##fileDate=2024-12-01 3 | ##source=ClinVar 4 | ##reference=GRCh37 5 | ##ID= 6 | ##INFO= 7 | ##INFO= 8 | ##INFO= 9 | ##INFO= 10 | ##INFO= 11 | ##INFO= 12 | ##INFO= 13 | ##INFO= 14 | ##INFO= 15 | ##INFO= 16 | ##INFO= 17 | ##INFO= 18 | ##INFO= 19 | ##INFO= 20 | ##INFO= 21 | ##INFO= 22 | ##INFO= 23 | ##INFO= 24 | ##INFO= 25 | ##INFO= 26 | ##INFO= 27 | ##INFO= 28 | ##INFO= 29 | ##INFO= 30 | ##INFO= 31 | ##INFO= 32 | ##INFO= 33 | ##INFO= 34 | ##INFO= 35 | ##INFO= 36 | ##INFO= 37 | ##INFO= 38 | ##INFO= 39 | ##INFO= 40 | ##INFO= 41 | ##INFO= 42 | #CHROM POS ID REF ALT QUAL FILTER INFO 43 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # [6.1.0](https://github.com/GMOD/vcf-js/compare/v6.0.9...v6.1.0) (2025-11-26) 2 | 3 | 4 | 5 | ## [6.0.9](https://github.com/GMOD/vcf-js/compare/v6.0.8...v6.0.9) (2025-04-01) 6 | 7 | ## [6.0.8](https://github.com/GMOD/vcf-js/compare/v6.0.7...v6.0.8) (2025-02-13) 8 | 9 | ## [6.0.7](https://github.com/GMOD/vcf-js/compare/v6.0.6...v6.0.7) (2025-01-29) 10 | 11 | ## [6.0.6](https://github.com/GMOD/vcf-js/compare/v6.0.5...v6.0.6) (2025-01-16) 12 | 13 | ## [6.0.5](https://github.com/GMOD/vcf-js/compare/v6.0.4...v6.0.5) (2025-01-16) 14 | 15 | ## [6.0.4](https://github.com/GMOD/vcf-js/compare/v6.0.3...v6.0.4) (2025-01-16) 16 | 17 | ## [6.0.3](https://github.com/GMOD/vcf-js/compare/v6.0.2...v6.0.3) (2025-01-16) 18 | 19 | ## [6.0.2](https://github.com/GMOD/vcf-js/compare/v6.0.1...v6.0.2) (2025-01-07) 20 | 21 | ## [6.0.1](https://github.com/GMOD/vcf-js/compare/v6.0.0...v6.0.1) (2024-12-17) 22 | 23 | # [6.0.0](https://github.com/GMOD/vcf-js/compare/v5.0.10...v6.0.0) (2024-11-30) 24 | 25 | - Changes the default Variant object to have a SAMPLES() function call instead 26 | of a SAMPLES getter, to make it more abundantly clear that it is a lazy 27 | operation. Also adds a GENOTYPES() function that returns the raw string of 28 | genotype fields 29 | 30 | ## [5.0.10](https://github.com/GMOD/vcf-js/compare/v5.0.9...v5.0.10) (2022-12-17) 31 | 32 | - Use es2015 for nodejs build 33 | 34 | ## [5.0.9](https://github.com/GMOD/vcf-js/compare/v5.0.8...v5.0.9) (2022-11-23) 35 | 36 | - Fix erroneous parsing of symbolic alleles as breakends 37 | 38 | ## [5.0.8](https://github.com/GMOD/vcf-js/compare/v5.0.7...v5.0.8) (2022-11-20) 39 | 40 | - Parse single breakends and large insertion shorthand notation (#95) 41 | 42 | 43 | 44 | ## [5.0.7](https://github.com/GMOD/vcf-js/compare/v5.0.6...v5.0.7) (2022-08-24) 45 | 46 | - Don't throw error when there is a FORMAT column but no genotypes 47 | 48 | 49 | 50 | ## [5.0.6](https://github.com/GMOD/vcf-js/compare/v5.0.5...v5.0.6) (2022-03-30) 51 | 52 | - Include src directory for better source maps 53 | 54 | 55 | 56 | ## [5.0.5](https://github.com/GMOD/vcf-js/compare/v5.0.4...v5.0.5) (2022-01-12) 57 | 58 | - Add optimization related to better allocation of variant records, thanks to 59 | @bpow for contributing 60 | 61 | 62 | 63 | ## [5.0.4](https://github.com/GMOD/vcf-js/compare/v5.0.3...v5.0.4) (2021-12-23) 64 | 65 | - Make the strict field in the constructor optional 66 | - Export `Breakend` type for typescript users 67 | 68 | 69 | 70 | ## [5.0.3](https://github.com/GMOD/vcf-js/compare/v5.0.2...v5.0.3) (2021-12-14) 71 | 72 | - Add typescripting and esm module build 73 | 74 | 75 | 76 | ## [5.0.2](https://github.com/GMOD/vcf-js/compare/v5.0.1...v5.0.2) (2021-11-13) 77 | 78 | - Update package description to refer to variant call format 79 | 80 | 81 | 82 | ## [5.0.1](https://github.com/GMOD/vcf-js/compare/v5.0.0...v5.0.1) (2021-11-04) 83 | 84 | - Add URI decoding to INFO field 85 | 86 | 87 | 88 | # [5.0.0](https://github.com/GMOD/vcf-js/compare/v4.0.4...v5.0.0) (2021-09-06) 89 | 90 | - Make parseBreakends an optional helper function, all ALTs are plain strings 91 | now instead of string|Breakend. This is a breaking change so a major version 92 | bump is applied 93 | 94 | 95 | 96 | ## [4.0.4](https://github.com/GMOD/vcf-js/compare/v4.0.1...v4.0.4) (2021-08-04) 97 | 98 | - Fix issue when there is extra whitespace on the header line 99 | 100 | 101 | 102 | ## [4.0.3](https://github.com/GMOD/vcf-js/compare/v4.0.1...v4.0.3) (2021-03-31) 103 | 104 | - Include github automated fixes in release from before 4.0.2 105 | 106 | 107 | 108 | ## [4.0.2](https://github.com/GMOD/vcf-js/compare/v4.0.1...v4.0.2) (2021-03-31) 109 | 110 | - Avoid modifying built-in exports with parseMetadata, fixes issue with using 111 | parseMetadata from jest tests (#63) 112 | 113 | 114 | 115 | ## [4.0.1](https://github.com/GMOD/vcf-js/compare/v4.0.0...v4.0.1) (2019-10-30) 116 | 117 | - Add toString for Breakend ALTs so they are easily interpretable 118 | 119 | ## [4.0.0](https://github.com/GMOD/vcf-js/compare/v3.0.0...v4.0.0) (2019-06-14) 120 | 121 | - Breaking change: INFO entries that are type Flag now evaluate to `true` 122 | instead of `null` 123 | 124 | ## [3.0.0](https://github.com/GMOD/vcf-js/compare/v2.0.3...v3.0.0) (2019-05-31) 125 | 126 | - Breaking change: ALT entries in breakend format now parse into a breakend 127 | object instead of a string 128 | - Performance improvements 129 | 130 | ## [2.0.3](https://github.com/GMOD/vcf-js/compare/v2.0.2...v2.0.3) (2019-02-23) 131 | 132 | - Upgrade to Babel 7 133 | 134 | ## [2.0.2](https://github.com/GMOD/vcf-js/compare/v2.0.1...v2.0.2) (2018-11-26) 135 | 136 | - Remove errant unused dependency 137 | 138 | ## [2.0.1](https://github.com/GMOD/vcf-js/compare/v2.0.0...v2.0.1) (2018-11-08) 139 | 140 | - Bugfix for getMetadata() 141 | 142 | ## [2.0.0](https://github.com/GMOD/vcf-js/compare/v1.0.4...v2.0.0) (2018-11-07) 143 | 144 | - Breaking change: SAMPLES attribute of the variant is now evaluated lazily 145 | 146 | ## [1.0.4](https://github.com/GMOD/vcf-js/compare/v1.0.3...v1.0.4) (2018-11-06) 147 | 148 | - Decode %-encoded entries in INFO and FORMAT 149 | 150 | ## [1.0.3](https://github.com/GMOD/vcf-js/compare/v1.0.2...v1.0.3) (2018-11-05) 151 | 152 | - Fix for parsing missing genotypes 153 | 154 | ## [1.0.2](https://github.com/GMOD/vcf-js/compare/v1.0.1...v1.0.2) (2018-10-11) 155 | 156 | - Better handle filter metadata 157 | 158 | ## [1.0.1](https://github.com/GMOD/vcf-js/compare/v1.0.0...v1.0.1) (2018-10-05) 159 | 160 | - Fix bug in interpreting "Number" in header metadata 161 | 162 | ## 1.0.0 (2018-10-05) 163 | 164 | - Initial release 165 | -------------------------------------------------------------------------------- /test/parse.test.ts: -------------------------------------------------------------------------------- 1 | import fs from 'fs' 2 | 3 | import { expect, test } from 'vitest' 4 | 5 | import VCF, { parseBreakend } from '../src' 6 | 7 | const readVcf = (file: string) => { 8 | const f = fs.readFileSync(file, 'utf8') 9 | const lines = f.split('\n') 10 | const header = [] as string[] 11 | const rest = [] as string[] 12 | lines.forEach(line => { 13 | if (line.startsWith('#')) { 14 | header.push(line) 15 | } else if (line) { 16 | rest.push(line) 17 | } 18 | }) 19 | return { 20 | header: header.join('\n'), 21 | lines: rest, 22 | } 23 | } 24 | 25 | function makeParser() { 26 | const { header } = readVcf('test/data/spec-example.vcf') 27 | return new VCF({ 28 | header, 29 | }) 30 | } 31 | 32 | test('can get metadata from the header', () => { 33 | const VCFParser = makeParser() 34 | // Note that there is a custom PL that overrides the default PL 35 | expect(VCFParser.getMetadata()).toMatchSnapshot() 36 | expect(VCFParser.getMetadata('nonexistent')).toBe(undefined) 37 | expect(VCFParser.getMetadata('fileDate')).toBe('20090805') 38 | expect(VCFParser.getMetadata('INFO')).toMatchSnapshot() 39 | expect(VCFParser.getMetadata('INFO', 'nonexistent')).toBe(undefined) 40 | expect(VCFParser.getMetadata('INFO', 'AA')).toEqual({ 41 | Description: 'Ancestral Allele', 42 | Number: 1, 43 | Type: 'String', 44 | }) 45 | expect(VCFParser.getMetadata('INFO', 'AA', 'nonexistent')).toBe(undefined) 46 | expect(VCFParser.getMetadata('INFO', 'AA', 'Type')).toBe('String') 47 | expect(VCFParser.getMetadata('INFO', 'AA', 'Type', 'nonexistent')).toBe( 48 | undefined, 49 | ) 50 | expect(VCFParser.getMetadata('INFO', 'TEST')).toEqual({ 51 | Description: 'Used for testing', 52 | Number: 1, 53 | Type: 'String', 54 | }) 55 | 56 | expect(VCFParser.getMetadata('INFO', 'AC')).toEqual({ 57 | Number: 'A', 58 | Type: 'Integer', 59 | Description: 60 | 'Allele count in genotypes, for each ALT allele, in the same order as listed', 61 | }) 62 | }) 63 | 64 | test('can parse a line from the VCF spec', () => { 65 | const VCFParser = makeParser() 66 | const variant = VCFParser.parseLine( 67 | '20\t14370\trs6054257\tG\tA\t29\tPASS\tNS=3;DP=14;AF=0.5;DB;H2\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5:.,.\n', 68 | ) 69 | expect(variant).toMatchSnapshot() 70 | expect(variant.SAMPLES()).toMatchSnapshot() 71 | }) 72 | 73 | test('can parse a line with minimal entries', () => { 74 | const VCFParser = makeParser() 75 | const variant = VCFParser.parseLine( 76 | '20\t14370\t.\tG\tA\t.\t.\t.\tGT:GQ:DP:HQ\t.\t.\t.\n', 77 | ) 78 | expect(variant).toMatchSnapshot() 79 | expect(variant.SAMPLES()).toMatchSnapshot() 80 | }) 81 | 82 | test('parses a line with a breakend ALT', () => { 83 | const VCFParser = makeParser() 84 | const variant = VCFParser.parseLine( 85 | '2\t321681\tbnd_W\tG\tG]17:198982]\t6\tPASS\tSVTYPE=BND', 86 | ) 87 | expect(variant.ALT?.length).toBe(1) 88 | expect(variant.INFO.SVTYPE).toEqual(['BND']) 89 | expect(variant).toMatchSnapshot() 90 | }) 91 | 92 | test(`parses a line with mix of multiple breakends and non breakends`, () => { 93 | const VCFParser = makeParser() 94 | const variant = VCFParser.parseLine( 95 | `13\t123456\tbnd_U\tC\tCTATGTCG,C[2 : 321682[,C[17 : 198983[\t6\tPASS\tSVTYPE=BND;MATEID=bnd V,bnd Z`, 96 | ) 97 | expect(variant.ALT?.length).toBe(3) 98 | expect(variant.INFO.SVTYPE).toEqual(['BND']) 99 | expect(variant).toMatchSnapshot() 100 | }) 101 | 102 | test('throws errors with bad header lines', () => { 103 | expect(() => { 104 | new VCF({ header: 'notARealHeader' }) 105 | }).toThrow('Bad line in header') 106 | expect(() => { 107 | new VCF({ 108 | header: '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\n', 109 | }) 110 | }).toThrow('VCF header missing columns') 111 | expect(() => { 112 | new VCF({ 113 | header: '#CHROM\tPS\tID\tRF\tALT\tQUAL\tFILTER\tINFO\n', 114 | }) 115 | }).toThrow('VCF column headers not correct') 116 | expect(() => { 117 | new VCF({ header: '##this=badHeader\n' }) 118 | }).toThrow(/No format line/) 119 | }) 120 | 121 | test('sniffles vcf', () => { 122 | const { header, lines } = readVcf('test/data/sniffles.vcf') 123 | const VCFParser = new VCF({ 124 | header, 125 | }) 126 | const variant = VCFParser.parseLine(lines[0]) 127 | expect(variant).toMatchSnapshot() 128 | expect(variant.SAMPLES()).toMatchSnapshot() 129 | }) 130 | 131 | test('can parse a line from the VCF spec Y chrom (haploid))', () => { 132 | const { header, lines } = readVcf('test/data/y-chrom-haploid.vcf') 133 | const VCFParser = new VCF({ 134 | header, 135 | }) 136 | const variant = VCFParser.parseLine(lines[0]) 137 | const variant2 = VCFParser.parseLine(lines[1]) 138 | expect(variant).toMatchSnapshot() 139 | expect(variant.SAMPLES()).toMatchSnapshot() 140 | expect(variant2).toMatchSnapshot() 141 | expect(variant2.SAMPLES()).toMatchSnapshot() 142 | }) 143 | 144 | test('snippet from VCF 4.3 spec', () => { 145 | const { header, lines } = readVcf('test/data/vcf4.3_spec_snippet.vcf') 146 | const VCFParser = new VCF({ 147 | header, 148 | }) 149 | const variants = lines.map(line => VCFParser.parseLine(line)) 150 | expect(variants).toMatchSnapshot() 151 | expect(variants.map(variant => variant.SAMPLES())).toMatchSnapshot() 152 | }) 153 | test('can parse breakends', () => { 154 | const { header, lines } = readVcf('test/data/breakends.vcf') 155 | const VCFParser = new VCF({ 156 | header, 157 | }) 158 | 159 | expect(lines.map(line => VCFParser.parseLine(line))).toMatchSnapshot() 160 | }) 161 | 162 | // from https://github.com/GMOD/jbrowse/issues/1358 163 | test('vcf lines with weird info field and missing format/genotypes', () => { 164 | const { header, lines } = readVcf( 165 | 'test/data/weird_info_and_missing_format.vcf', 166 | ) 167 | const VCFParser = new VCF({ 168 | header, 169 | }) 170 | 171 | expect(lines.map(line => VCFParser.parseLine(line))).toMatchSnapshot() 172 | }) 173 | test('test no info strict', () => { 174 | const { header, lines } = readVcf('test/data/multipleAltSVs.vcf') 175 | const VCFParser = new VCF({ 176 | header, 177 | strict: true, 178 | }) 179 | expect(() => VCFParser.parseLine(lines[0])).toThrow(/INFO/) 180 | }) 181 | 182 | test('test no info non-strict', () => { 183 | const { header, lines } = readVcf('test/data/multipleAltSVs.vcf') 184 | const VCFParser = new VCF({ 185 | header, 186 | strict: false, 187 | }) 188 | expect(VCFParser.parseLine(lines[0])).toBeTruthy() 189 | expect(VCFParser.parseLine(lines[0]).GENOTYPES()).toEqual({}) 190 | }) 191 | 192 | test('empty header lines', () => { 193 | expect(() => new VCF({ header: '\n' })).toThrow(/no non-empty/) 194 | }) 195 | 196 | test('shortcut parsing with 1000 genomes', () => { 197 | const { header, lines } = readVcf('test/data/1000genomes.vcf') 198 | 199 | const VCFParser = new VCF({ header }) 200 | expect(lines.map(line => VCFParser.parseLine(line))).toMatchSnapshot() 201 | }) 202 | 203 | test('shortcut parsing with vcf 4.3 bnd example', () => { 204 | const { header, lines } = readVcf('test/data/vcf4.3_spec_bnd.vcf') 205 | 206 | const VCFParser = new VCF({ header }) 207 | const variants = lines.map(line => VCFParser.parseLine(line)) 208 | expect(variants.map(m => m.ALT?.[0].toString())).toEqual( 209 | lines.map(line => line.split('\t')[4]), 210 | ) 211 | 212 | expect(variants).toMatchSnapshot() 213 | }) 214 | 215 | test('vcf 4.3 single breakends', () => { 216 | // single breakend 217 | expect(parseBreakend('G.')).toMatchSnapshot() 218 | expect(parseBreakend('ACGT.')).toMatchSnapshot() 219 | expect(parseBreakend('.ACGT')).toMatchSnapshot() 220 | }) 221 | 222 | test('vcf 4.3 insertion shorthand', () => { 223 | expect(parseBreakend('G')).toMatchSnapshot() 224 | expect(parseBreakend('G')).toMatchSnapshot() 225 | expect(parseBreakend('C[:1[')).toMatchSnapshot() 226 | expect(parseBreakend(']13:123456]AGTNNNNNCAT')).toMatchSnapshot() 227 | }) 228 | 229 | test('parse breakend on symbolic alleles', () => { 230 | expect(parseBreakend('')).not.toBeTruthy() 231 | expect(parseBreakend('')).not.toBeTruthy() 232 | expect(parseBreakend('')).not.toBeTruthy() 233 | expect(parseBreakend('')).not.toBeTruthy() 234 | }) 235 | 236 | test('parse breakend on thing that looks like symbolic allele but is actually a feature', () => { 237 | expect(parseBreakend('C')).toMatchSnapshot() 238 | }) 239 | 240 | test('clinvar metadata', () => { 241 | const { header } = readVcf('test/data/clinvar.header.vcf') 242 | const VCFParser = new VCF({ 243 | header, 244 | }) 245 | expect(VCFParser.getMetadata()).toMatchSnapshot() 246 | }) 247 | 248 | test('sample to genotype information', () => { 249 | const { header } = readVcf('test/data/sample2genotype.vcf') 250 | const VCFParser = new VCF({ 251 | header, 252 | }) 253 | expect(VCFParser.getMetadata().META).toMatchSnapshot() 254 | expect(VCFParser.getMetadata().SAMPLES).toMatchSnapshot() 255 | }) 256 | 257 | test('pedigree', () => { 258 | const { header } = readVcf('test/data/pedigree.vcf') 259 | const VCFParser = new VCF({ 260 | header, 261 | }) 262 | expect(VCFParser.getMetadata()).toMatchSnapshot() 263 | }) 264 | 265 | // https://github.com/samtools/hts-specs/blob/master/examples/vcf/sv44.vcf 266 | test('x vcf44 spec', () => { 267 | const { header, lines } = readVcf('test/data/vcf44_spec.vcf') 268 | const VCFParser = new VCF({ 269 | header, 270 | }) 271 | expect(VCFParser.getMetadata()).toMatchSnapshot() 272 | expect( 273 | lines.map(l => { 274 | const entry = VCFParser.parseLine(l) 275 | return { 276 | ...entry, 277 | SAMPLES: entry.SAMPLES(), 278 | } 279 | }), 280 | ).toMatchSnapshot() 281 | }) 282 | 283 | // https://github.com/samtools/hts-specs/blob/master/examples/vcf/simple.vcf 284 | test('x simple spec', () => { 285 | const { header, lines } = readVcf('test/data/simple.vcf') 286 | const VCFParser = new VCF({ 287 | header, 288 | }) 289 | expect(VCFParser.getMetadata()).toMatchSnapshot() 290 | expect( 291 | lines.map(l => { 292 | const entry = VCFParser.parseLine(l) 293 | return { 294 | ...entry, 295 | SAMPLES: entry.SAMPLES(), 296 | } 297 | }), 298 | ).toMatchSnapshot() 299 | }) 300 | 301 | test('pedigree', () => { 302 | const { header } = readVcf('test/data/pedigree.vcf') 303 | const VCFParser = new VCF({ 304 | header, 305 | }) 306 | expect(VCFParser.getMetadata()).toMatchSnapshot() 307 | }) 308 | -------------------------------------------------------------------------------- /src/vcfReserved.ts: -------------------------------------------------------------------------------- 1 | export default { 2 | // INFO fields 3 | InfoFields: { 4 | // from the VCF4.3 spec, https://samtools.github.io/hts-specs/VCFv4.3.pdf 5 | AA: { Number: 1, Type: 'String', Description: 'Ancestral allele' }, 6 | AC: { 7 | Number: 'A', 8 | Type: 'Integer', 9 | Description: 10 | 'Allele count in genotypes, for each ALT allele, in the same order as listed', 11 | }, 12 | AD: { 13 | Number: 'R', 14 | Type: 'Integer', 15 | Description: 'Total read depth for each allele', 16 | }, 17 | ADF: { 18 | Number: 'R', 19 | Type: 'Integer', 20 | Description: 'Read depth for each allele on the forward strand', 21 | }, 22 | ADR: { 23 | Number: 'R', 24 | Type: 'Integer', 25 | Description: 'Read depth for each allele on the reverse strand', 26 | }, 27 | AF: { 28 | Number: 'A', 29 | Type: 'Float', 30 | Description: 31 | 'Allele frequency for each ALT allele in the same order as listed (estimated from primary data, not called genotypes)', 32 | }, 33 | AN: { 34 | Number: 1, 35 | Type: 'Integer', 36 | Description: 'Total number of alleles in called genotypes', 37 | }, 38 | BQ: { 39 | Number: 1, 40 | Type: 'Float', 41 | Description: 'RMS base quality', 42 | }, 43 | CIGAR: { 44 | Number: 1, 45 | Type: 'Float', 46 | Description: 47 | 'Cigar string describing how to align an alternate allele to the reference allele', 48 | }, 49 | DB: { 50 | Number: 0, 51 | Type: 'Flag', 52 | Description: 'dbSNP membership', 53 | }, 54 | DP: { 55 | Number: 1, 56 | Type: 'Integer', 57 | Description: 'combined depth across samples', 58 | }, 59 | END: { 60 | Number: 1, 61 | Type: 'Integer', 62 | Description: 'End position (for use with symbolic alleles)', 63 | }, 64 | H2: { 65 | Number: 0, 66 | Type: 'Flag', 67 | Description: 'HapMap2 membership', 68 | }, 69 | H3: { 70 | Number: 0, 71 | Type: 'Flag', 72 | Description: 'HapMap3 membership', 73 | }, 74 | MQ: { 75 | Number: 1, 76 | Type: null, 77 | Description: 'RMS mapping quality', 78 | }, 79 | MQ0: { 80 | Number: 1, 81 | Type: 'Integer', 82 | Description: 'Number of MAPQ == 0 reads', 83 | }, 84 | NS: { 85 | Number: 1, 86 | Type: 'Integer', 87 | Description: 'Number of samples with data', 88 | }, 89 | SB: { 90 | Number: 4, 91 | Type: 'Integer', 92 | Description: 'Strand bias', 93 | }, 94 | SOMATIC: { 95 | Number: 0, 96 | Type: 'Flag', 97 | Description: 'Somatic mutation (for cancer genomics)', 98 | }, 99 | VALIDATED: { 100 | Number: 0, 101 | Type: 'Flag', 102 | Description: 'Validated by follow-up experiment', 103 | }, 104 | '1000G': { 105 | Number: 0, 106 | Type: 'Flag', 107 | Description: '1000 Genomes membership', 108 | }, 109 | // specifically for structural variants 110 | IMPRECISE: { 111 | Number: 0, 112 | Type: 'Flag', 113 | Description: 'Imprecise structural variation', 114 | }, 115 | NOVEL: { 116 | Number: 0, 117 | Type: 'Flag', 118 | Description: 'Indicates a novel structural variation', 119 | }, 120 | // For precise variants, END is POS + length of REF allele - 1, 121 | // and the for imprecise variants the corresponding best estimate. 122 | SVTYPE: { 123 | Number: 1, 124 | Type: 'String', 125 | Description: 'Type of structural variant', 126 | }, 127 | // Value should be one of DEL, INS, DUP, INV, CNV, BND. This key can 128 | // be derived from the REF/ALT fields but is useful for filtering. 129 | SVLEN: { 130 | Number: null, 131 | Type: 'Integer', 132 | Description: 'Difference in length between REF and ALT alleles', 133 | }, 134 | // One value for each ALT allele. Longer ALT alleles (e.g. insertions) 135 | // have positive values, shorter ALT alleles (e.g. deletions) 136 | // have negative values. 137 | CIPOS: { 138 | Number: 2, 139 | Type: 'Integer', 140 | Description: 'Confidence interval around POS for imprecise variants', 141 | }, 142 | CIEND: { 143 | Number: 2, 144 | Type: 'Integer', 145 | Description: 'Confidence interval around END for imprecise variants', 146 | }, 147 | HOMLEN: { 148 | Type: 'Integer', 149 | Description: 150 | 'Length of base pair identical micro-homology at event breakpoints', 151 | }, 152 | HOMSEQ: { 153 | Type: 'String', 154 | Description: 155 | 'Sequence of base pair identical micro-homology at event breakpoints', 156 | }, 157 | BKPTID: { 158 | Type: 'String', 159 | Description: 'ID of the assembled alternate allele in the assembly file', 160 | }, 161 | // For precise variants, the consensus sequence the alternate allele assembly 162 | // is derivable from the REF and ALT fields. However, the alternate allele 163 | // assembly file may contain additional information about the characteristics 164 | // of the alt allele contigs. 165 | MEINFO: { 166 | Number: 4, 167 | Type: 'String', 168 | Description: 'Mobile element info of the form NAME,START,END,POLARITY', 169 | }, 170 | METRANS: { 171 | Number: 4, 172 | Type: 'String', 173 | Description: 174 | 'Mobile element transduction info of the form CHR,START,END,POLARITY', 175 | }, 176 | DGVID: { 177 | Number: 1, 178 | Type: 'String', 179 | Description: 'ID of this element in Database of Genomic Variation', 180 | }, 181 | DBVARID: { 182 | Number: 1, 183 | Type: 'String', 184 | Description: 'ID of this element in DBVAR', 185 | }, 186 | DBRIPID: { 187 | Number: 1, 188 | Type: 'String', 189 | Description: 'ID of this element in DBRIP', 190 | }, 191 | MATEID: { 192 | Number: null, 193 | Type: 'String', 194 | Description: 'ID of mate breakends', 195 | }, 196 | PARID: { 197 | Number: 1, 198 | Type: 'String', 199 | Description: 'ID of partner breakend', 200 | }, 201 | EVENT: { 202 | Number: 1, 203 | Type: 'String', 204 | Description: 'ID of event associated to breakend', 205 | }, 206 | CILEN: { 207 | Number: 2, 208 | Type: 'Integer', 209 | Description: 210 | 'Confidence interval around the inserted material between breakend', 211 | }, 212 | DPADJ: { Type: 'Integer', Description: 'Read Depth of adjacency' }, 213 | CN: { 214 | Number: 1, 215 | Type: 'Integer', 216 | Description: 'Copy number of segment containing breakend', 217 | }, 218 | CNADJ: { 219 | Number: null, 220 | Type: 'Integer', 221 | Description: 'Copy number of adjacency', 222 | }, 223 | CICN: { 224 | Number: 2, 225 | Type: 'Integer', 226 | Description: 'Confidence interval around copy number for the segment', 227 | }, 228 | CICNADJ: { 229 | Number: null, 230 | Type: 'Integer', 231 | Description: 'Confidence interval around copy number for the adjacency', 232 | }, 233 | }, 234 | 235 | // FORMAT fields 236 | GenotypeFields: { 237 | // from the VCF4.3 spec, https://samtools.github.io/hts-specs/VCFv4.3.pdf 238 | AD: { 239 | Number: 'R', 240 | Type: 'Integer', 241 | Description: 'Read depth for each allele', 242 | }, 243 | ADF: { 244 | Number: 'R', 245 | Type: 'Integer', 246 | Description: 'Read depth for each allele on the forward strand', 247 | }, 248 | ADR: { 249 | Number: 'R', 250 | Type: 'Integer', 251 | Description: 'Read depth for each allele on the reverse strand', 252 | }, 253 | DP: { 254 | Number: 1, 255 | Type: 'Integer', 256 | Description: 'Read depth', 257 | }, 258 | EC: { 259 | Number: 'A', 260 | Type: 'Integer', 261 | Description: 'Expected alternate allele counts', 262 | }, 263 | FT: { 264 | Number: 1, 265 | Type: 'String', 266 | Description: 'Filter indicating if this genotype was "called"', 267 | }, 268 | GL: { 269 | Number: 'G', 270 | Type: 'Float', 271 | Description: 'Genotype likelihoods', 272 | }, 273 | GP: { 274 | Number: 'G', 275 | Type: 'Float', 276 | Description: 'Genotype posterior probabilities', 277 | }, 278 | GQ: { 279 | Number: 1, 280 | Type: 'Integer', 281 | Description: 'Conditional genotype quality', 282 | }, 283 | GT: { 284 | Number: 1, 285 | Type: 'String', 286 | Description: 'Genotype', 287 | }, 288 | HQ: { 289 | Number: 2, 290 | Type: 'Integer', 291 | Description: 'Haplotype quality', 292 | }, 293 | MQ: { 294 | Number: 1, 295 | Type: 'Integer', 296 | Description: 'RMS mapping quality', 297 | }, 298 | PL: { 299 | Number: 'G', 300 | Type: 'Integer', 301 | Description: 302 | 'Phred-scaled genotype likelihoods rounded to the closest integer', 303 | }, 304 | PQ: { 305 | Number: 1, 306 | Type: 'Integer', 307 | Description: 'Phasing quality', 308 | }, 309 | PS: { 310 | Number: 1, 311 | Type: 'Integer', 312 | Description: 'Phase set', 313 | }, 314 | }, 315 | 316 | // ALT fields 317 | AltTypes: { 318 | DEL: { 319 | Description: 'Deletion relative to the reference', 320 | }, 321 | INS: { 322 | Description: 'Insertion of novel sequence relative to the reference', 323 | }, 324 | DUP: { 325 | Description: 'Region of elevated copy number relative to the reference', 326 | }, 327 | INV: { 328 | Description: 'Inversion of reference sequence', 329 | }, 330 | CNV: { 331 | Description: 332 | 'Copy number variable region (may be both deletion and duplication)', 333 | }, 334 | 'DUP:TANDEM': { 335 | Description: 'Tandem duplication', 336 | }, 337 | 'DEL:ME': { 338 | Description: 'Deletion of mobile element relative to the reference', 339 | }, 340 | 'INS:ME': { 341 | Description: 'Insertion of a mobile element relative to the reference', 342 | }, 343 | NON_REF: { 344 | Description: 345 | 'Represents any possible alternative allele at this location', 346 | }, 347 | '*': { 348 | Description: 349 | 'Represents any possible alternative allele at this location', 350 | }, 351 | }, 352 | 353 | // FILTER fields 354 | FilterTypes: { 355 | PASS: { 356 | Description: 'Passed all filters', 357 | }, 358 | }, 359 | } 360 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # vcf-js 2 | 3 | High performance Variant Call Format (VCF) parser in pure JavaScript. 4 | 5 | ## Status 6 | 7 | [![NPM version](https://img.shields.io/npm/v/@gmod/vcf.svg?logo=npm&style=flat-square)](https://npmjs.org/package/@gmod/vcf) 8 | [![Coverage Status](https://img.shields.io/codecov/c/github/GMOD/vcf-js/master.svg?style=flat-square)](https://codecov.io/gh/GMOD/vcf-js/branch/master) 9 | [![Build Status](https://img.shields.io/github/actions/workflow/status/GMOD/vcf-js/push.yml?branch=master)](https://github.com/GMOD/vcf-js/actions) 10 | 11 | ## Usage 12 | 13 | This module is best used when combined with some easy way of retrieving the 14 | header and individual lines from a VCF, like the `@gmod/tabix` module. 15 | 16 | ```typescript 17 | import { TabixIndexedFile } from '@gmod/tabix' 18 | 19 | // with import 20 | import VCF, { parseBreakend } from '@gmod/vcf' 21 | 22 | // with require 23 | const { default: VCF, parseBreakend } = require('@gmod/vcf') 24 | 25 | const tbiIndexed = new TabixIndexedFile({ path: '/path/to/my.vcf.gz' }) 26 | 27 | async function doStuff() { 28 | const headerText = await tbiIndexed.getHeader() 29 | const tbiVCFParser = new VCF({ header: headerText }) 30 | const variants = [] 31 | await tbiIndexed.getLines('ctgA', 200, 300, line => 32 | variants.push(tbiVCFParser.parseLine(line)), 33 | ) 34 | console.log(variants) 35 | } 36 | ``` 37 | 38 | If you want to stream a VCF file, you can alternatively use something like this 39 | 40 | ```typescript 41 | const fs = require('fs') 42 | const VCF = require('@gmod/vcf').default 43 | const { createGunzip } = require('zlib') 44 | const readline = require('readline') 45 | 46 | const rl = readline.createInterface({ 47 | input: fs.createReadStream(process.argv[2]).pipe(createGunzip()), 48 | }) 49 | 50 | let header = [] 51 | let elts = [] 52 | let parser = undefined 53 | 54 | rl.on('line', function (line) { 55 | if (line.startsWith('#')) { 56 | header.push(line) 57 | return 58 | } else if (!parser) { 59 | parser = new VCF({ header: header.join('\n') }) 60 | } 61 | const elt = parser.parseLine(line) 62 | elts.push(elt.INFO.AN[0]) 63 | }) 64 | 65 | rl.on('close', function () { 66 | console.log(elts.reduce((a, b) => a + b, 0) / elts.length) 67 | }) 68 | ``` 69 | 70 | This method is used to test @gmod/vcf in https://github.com/brentp/vcf-bench 71 | 72 | ## Methods 73 | 74 | Given a VCF with a single variant line 75 | 76 | ```text 77 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 78 | contigA 3000 rs17883296 G T,A 100 PASS NS=3;DP=14;AF=0.5;DB;XYZ=5 GT:AP 0|0:0.000,0.000 79 | ``` 80 | 81 | The `variant` object returned by `parseLine()` would be 82 | 83 | ```typescript 84 | { 85 | CHROM: 'contigA', 86 | POS: 3000, 87 | ID: ['rs17883296'], 88 | REF: 'G', 89 | ALT: ['T', 'A'], 90 | QUAL: 100, 91 | FILTER: 'PASS', 92 | INFO: { 93 | NS: [3], 94 | DP: [14], 95 | AF: [0.5], 96 | DB: true, 97 | XYZ: ['5'], 98 | }, 99 | SAMPLES: () => ({ 100 | HG00096: { 101 | GT: ['0|0'], 102 | AP: ['0.000', '0.000'], 103 | }, 104 | }), 105 | GENOTYPES: () => ({ 106 | HG00096: '0|0', 107 | }), 108 | } 109 | ``` 110 | 111 | The `variant.SAMPLES()` and `variant.GENOTYPES()` are functions because it does 112 | not try to eagerly parse all the genotype data, so will only do so when you call 113 | either of these which can save time especially if your VCF has a lot of samples 114 | in it. 115 | 116 | The `variant.SAMPLES()` function parses out the FORMAT fields, while 117 | `variant.GENOTYPES()` returns just the genotypes string which can be faster if 118 | that is the only information you are interested in 119 | 120 | The parser will try to convert the values in INFO and FORMAT to the proper types 121 | using the header metadata. For example, if there is a header line like 122 | 123 | ```text 124 | ##INFO= 125 | ``` 126 | 127 | The parser will expect any INFO entry ABC to be an array of two integers, so it 128 | would convert `ABC=12,20` to `{ ABC: [12, 20] }`. 129 | 130 | Each INFO entry value will be an array unless `Type=Flag` is specified, in which 131 | case it will be `true`. If no metadata can be found for the entry, it will 132 | assume `Number=1` and `Type=String`. 133 | 134 | Some fields are pre-defined by the 135 | [VCF spec](https://samtools.github.io/hts-specs/VCFv4.3.pdf), which is why in 136 | the variant object above "DP" was parsed as an integer (it is defined in the VCF 137 | spec), but "XYZ" was left as a string (it is not defined in either the VCF spec 138 | or the header). 139 | 140 | Metadata can be accessed with the `getMetadata()` method, including all the 141 | built-in metadata from the VCF spec. With no parameters it will return all the 142 | data. Any parameters passed will further filter the metadata. For example, for a 143 | VCF with this header: 144 | 145 | ```text 146 | ##INFO= 147 | #CHROM POS ID REF ALT QUAL FILTER INFO 148 | ``` 149 | 150 | you can access the VCF's header metadata like (some output omitted for clarity): 151 | 152 | ```typescript 153 | > console.log(vcfParser.getMetadata()) 154 | { INFO: 155 | { AA: 156 | { Number: 1, Type: 'String', Description: 'Ancestral Allele' }, 157 | 158 | ... 159 | 160 | ABC: { Number: 2, Type: 'Integer', Description: 'A description' } }, 161 | FORMAT: 162 | { AD: 163 | { Number: 'R', 164 | Type: 'Integer', 165 | Description: 'Read depth for each allele' }, 166 | 167 | ... 168 | 169 | ALT: 170 | { DEL: { Description: 'Deletion relative to the reference' }, 171 | 172 | ... 173 | 174 | FILTER: { PASS: { Description: 'Passed all filters' } } } 175 | 176 | > console.log(vcfParser.getMetadata('INFO')) 177 | { AA: 178 | { Number: 1, Type: 'String', Description: 'Ancestral Allele' }, 179 | AC: 180 | { Number: 'A', 181 | Type: 'Integer', 182 | Description: 183 | 'Allele count in genotypes, for each ALT allele, in the same order as listed' }, 184 | AD: 185 | { Number: 'R', 186 | Type: 'Integer', 187 | Description: 'Total read depth for each allele' }, 188 | 189 | ... 190 | 191 | ABC: { Number: 2, Type: 'Integer', Description: 'A description' } } 192 | 193 | > console.log(vcfParser.getMetadata('INFO', 'DP')) 194 | { Number: 1, Type: 'Integer', Description: 'Total Depth' } 195 | 196 | > console.log(vcfParser.getMetadata('INFO', 'DP', 'Number')) 197 | 1 198 | ``` 199 | 200 | A list of sample names is also available in the `samples` attribute of the 201 | parser object: 202 | 203 | ```typescript 204 | > console.log(vcfParser.samples) 205 | [ 'HG00096' ] 206 | ``` 207 | 208 | ## Breakends 209 | 210 | We offer a helper function to parse breakend strings. We used to parse these 211 | automatically but it is now a helper function 212 | 213 | ```js 214 | import { parseBreakend } from '@gmod/vcf' 215 | parseBreakend('C[2:321682[') 216 | 217 | // output 218 | // 219 | // { 220 | // "MateDirection": "right", 221 | // "Replacement": "C", 222 | // "MatePosition": "2:321682", 223 | // "Join": "right" 224 | // } 225 | ``` 226 | 227 | - The C\[2:321682\[ parses as "Join": "right" because the BND is after the C 228 | base 229 | - The C\[2:321682\[ also is given "MateDirection": "right" because the square 230 | brackets point to the right. 231 | - The spec never has the square brackets pointing in different directions. 232 | Instead, the different types of joins can be imagined as follows 233 | 234 | For the above vcf line where chr13:123456->C\[2:321682\[ then we have this 235 | 236 | chr13:123456 237 | -------------C\ 238 | \ 239 | \ 240 | \ 241 | \ 242 | \ 243 | \ 244 | \ 245 | \-------------- 246 | chr2:321682 247 | 248 | If the alt was instead chr13:123456->\[2:321682\[C then the the "Join" would be 249 | "left" since the "BND" is before "C" and then the breakend structure looks like 250 | this 251 | 252 | chr13:123456 253 | 254 | |C-------------------- 255 | | 256 | | 257 | | 258 | | 259 | | 260 | | 261 | | 262 | | 263 | | 264 | | 265 | ---------------------- 266 | chr2:321682 267 | 268 | ## API 269 | 270 | 271 | 272 | #### Table of Contents 273 | 274 | - [VCFParser](#vcfparser) 275 | - [Parameters](#parameters) 276 | - [getMetadata](#getmetadata) 277 | - [Parameters](#parameters-1) 278 | - [parseLine](#parseline) 279 | - [Parameters](#parameters-2) 280 | 281 | ### VCFParser 282 | 283 | Class representing a VCF parser, instantiated with the VCF header. 284 | 285 | #### Parameters 286 | 287 | - `args` 288 | **[object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** 289 | - `args.header` 290 | **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** 291 | The VCF header. Supports both LF and CRLF newlines. (optional, default `''`) 292 | - `args.strict` 293 | **[boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)** 294 | Whether to parse in strict mode or not (default true) (optional, default 295 | `true`) 296 | 297 | #### getMetadata 298 | 299 | Get metadata filtered by the elements in args. For example, can pass ('INFO', 300 | 'DP') to only get info on an metadata tag that was like "##INFO=\" 301 | 302 | ##### Parameters 303 | 304 | - `args` 305 | **...[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** 306 | List of metadata filter strings. 307 | 308 | Returns **any** An object, string, or number, depending on the filtering 309 | 310 | #### parseLine 311 | 312 | Parse a VCF line into an object like 313 | 314 | ```typescript 315 | { 316 | CHROM: 'contigA', 317 | POS: 3000, 318 | ID: ['rs17883296'], 319 | REF: 'G', 320 | ALT: ['T', 'A'], 321 | QUAL: 100, 322 | FILTER: 'PASS', 323 | INFO: { 324 | NS: [3], 325 | DP: [14], 326 | AF: [0.5], 327 | DB: true, 328 | XYZ: ['5'], 329 | }, 330 | SAMPLES: () => ({ 331 | HG00096: { 332 | GT: ['0|0'], 333 | AP: ['0.000', '0.000'], 334 | } 335 | }), 336 | GENOTYPES: () => ({ 337 | HG00096: '0|0' 338 | }) 339 | } 340 | ``` 341 | 342 | SAMPLES and GENOTYPES methods are functions instead of static data fields 343 | because it avoids parsing the potentially long list of samples from e.g. 1000 344 | genotypes data unless requested. 345 | 346 | The SAMPLES function gives all info about the samples 347 | 348 | The GENOTYPES function only extracts the raw GT string if it exists, for 349 | potentially optimized parsing by programs that need it 350 | 351 | ##### Parameters 352 | 353 | - `line` 354 | **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** 355 | A string of a line from a VCF 356 | -------------------------------------------------------------------------------- /test/parseGenotypesOnly-edge-cases.test.ts: -------------------------------------------------------------------------------- 1 | import { expect, test } from 'vitest' 2 | 3 | import { parseGenotypesOnly } from '../src/parseGenotypesOnly' 4 | 5 | test('last sample with 3-char GT and no trailing tab', () => { 6 | // Single sample, no trailing tab 7 | const result = parseGenotypesOnly('GT', '0/1', ['S1']) 8 | expect(result).toEqual({ S1: '0/1' }) 9 | }) 10 | 11 | test('last sample with 3-char GT in multi-sample', () => { 12 | // Multiple samples, last one is 3-char with no trailing tab 13 | const result = parseGenotypesOnly('GT', '0/1\t1/1', ['S1', 'S2']) 14 | expect(result).toEqual({ S1: '0/1', S2: '1/1' }) 15 | }) 16 | 17 | test('last sample with non-3-char GT', () => { 18 | const result = parseGenotypesOnly('GT', '0/1\t.', ['S1', 'S2']) 19 | expect(result).toEqual({ S1: '0/1', S2: '.' }) 20 | }) 21 | 22 | test('single sample with 1-char GT', () => { 23 | const result = parseGenotypesOnly('GT', '.', ['S1']) 24 | expect(result).toEqual({ S1: '.' }) 25 | }) 26 | 27 | test('GT:DP:GQ - last sample with 3-char GT', () => { 28 | const result = parseGenotypesOnly('GT:DP:GQ', '0/1:20:99', ['S1']) 29 | expect(result).toEqual({ S1: '0/1' }) 30 | }) 31 | 32 | test('GT:DP:GQ - multiple samples, last with 3-char GT', () => { 33 | const result = parseGenotypesOnly('GT:DP:GQ', '0/1:20:99\t1/1:30:99', [ 34 | 'S1', 35 | 'S2', 36 | ]) 37 | expect(result).toEqual({ S1: '0/1', S2: '1/1' }) 38 | }) 39 | 40 | test('GT:DP:GQ - last sample with 1-char GT', () => { 41 | const result = parseGenotypesOnly('GT:DP:GQ', '0/1:20:99\t.:30:99', [ 42 | 'S1', 43 | 'S2', 44 | ]) 45 | expect(result).toEqual({ S1: '0/1', S2: '.' }) 46 | }) 47 | 48 | test('empty prerest string', () => { 49 | // Returns empty string for sample when no data present 50 | const result = parseGenotypesOnly('GT', '', ['S1']) 51 | expect(result).toEqual({ S1: '' }) 52 | }) 53 | 54 | test('more samples than data', () => { 55 | // Returns empty strings for samples beyond available data 56 | const result = parseGenotypesOnly('GT', '0/1', ['S1', 'S2', 'S3']) 57 | expect(result).toEqual({ S1: '0/1', S2: '', S3: '' }) 58 | }) 59 | 60 | test('haploid genotypes - single character', () => { 61 | const result = parseGenotypesOnly('GT', '0\t1\t0\t1\t0', [ 62 | 'S1', 63 | 'S2', 64 | 'S3', 65 | 'S4', 66 | 'S5', 67 | ]) 68 | expect(result).toEqual({ S1: '0', S2: '1', S3: '0', S4: '1', S5: '0' }) 69 | }) 70 | 71 | test('haploid genotypes - with missing', () => { 72 | const result = parseGenotypesOnly('GT', '0\t.\t1\t.\t0', [ 73 | 'S1', 74 | 'S2', 75 | 'S3', 76 | 'S4', 77 | 'S5', 78 | ]) 79 | expect(result).toEqual({ S1: '0', S2: '.', S3: '1', S4: '.', S5: '0' }) 80 | }) 81 | 82 | test('haploid genotypes - multi-allelic', () => { 83 | const result = parseGenotypesOnly('GT', '0\t1\t2\t3\t4\t5', [ 84 | 'S1', 85 | 'S2', 86 | 'S3', 87 | 'S4', 88 | 'S5', 89 | 'S6', 90 | ]) 91 | expect(result).toEqual({ 92 | S1: '0', 93 | S2: '1', 94 | S3: '2', 95 | S4: '3', 96 | S5: '4', 97 | S6: '5', 98 | }) 99 | }) 100 | 101 | test('haploid genotypes - double-digit alleles', () => { 102 | const result = parseGenotypesOnly('GT', '10\t11\t20\t99', [ 103 | 'S1', 104 | 'S2', 105 | 'S3', 106 | 'S4', 107 | ]) 108 | expect(result).toEqual({ S1: '10', S2: '11', S3: '20', S4: '99' }) 109 | }) 110 | 111 | test('haploid genotypes - triple-digit alleles', () => { 112 | const result = parseGenotypesOnly('GT', '100\t200\t999', ['S1', 'S2', 'S3']) 113 | expect(result).toEqual({ S1: '100', S2: '200', S3: '999' }) 114 | }) 115 | 116 | test('haploid genotypes - many samples', () => { 117 | const samples = Array.from({ length: 50 }, (_, i) => `S${i}`) 118 | const gts = Array.from({ length: 50 }, (_, i) => String(i % 2)) 119 | const result = parseGenotypesOnly('GT', gts.join('\t'), samples) 120 | 121 | const expected = {} as Record 122 | samples.forEach((s, i) => { 123 | expected[s] = gts[i]! 124 | }) 125 | 126 | expect(result).toEqual(expected) 127 | }) 128 | 129 | test('haploid genotypes - ending without tab', () => { 130 | const result = parseGenotypesOnly('GT', '0\t1\t2', ['S1', 'S2', 'S3']) 131 | expect(result).toEqual({ S1: '0', S2: '1', S3: '2' }) 132 | }) 133 | 134 | test('haploid genotypes - single sample', () => { 135 | const result = parseGenotypesOnly('GT', '1', ['S1']) 136 | expect(result).toEqual({ S1: '1' }) 137 | }) 138 | 139 | test('mixed diploid and haploid genotypes', () => { 140 | const result = parseGenotypesOnly('GT', '0/1\t0\t1/1\t1\t0/0\t.', [ 141 | 'S1', 142 | 'S2', 143 | 'S3', 144 | 'S4', 145 | 'S5', 146 | 'S6', 147 | ]) 148 | expect(result).toEqual({ 149 | S1: '0/1', 150 | S2: '0', 151 | S3: '1/1', 152 | S4: '1', 153 | S5: '0/0', 154 | S6: '.', 155 | }) 156 | }) 157 | 158 | test('haploid with GT:DP:GQ format', () => { 159 | const result = parseGenotypesOnly( 160 | 'GT:DP:GQ', 161 | '0:20:99\t1:25:99\t0:30:99\t.:15:50', 162 | ['S1', 'S2', 'S3', 'S4'], 163 | ) 164 | expect(result).toEqual({ S1: '0', S2: '1', S3: '0', S4: '.' }) 165 | }) 166 | 167 | test('many samples with 3-char diploid GTs', () => { 168 | const result = parseGenotypesOnly( 169 | 'GT', 170 | '0/1\t1/1\t0/0\t0/1\t1/0\t0/1\t1/1\t0/0\t0/1\t1/0', 171 | ['S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8', 'S9', 'S10'], 172 | ) 173 | expect(result).toEqual({ 174 | S1: '0/1', 175 | S2: '1/1', 176 | S3: '0/0', 177 | S4: '0/1', 178 | S5: '1/0', 179 | S6: '0/1', 180 | S7: '1/1', 181 | S8: '0/0', 182 | S9: '0/1', 183 | S10: '1/0', 184 | }) 185 | }) 186 | 187 | test('many samples with phased 3-char GTs', () => { 188 | const result = parseGenotypesOnly( 189 | 'GT', 190 | '0|1\t1|1\t0|0\t0|1\t1|0\t0|1\t1|1\t0|0', 191 | ['S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8'], 192 | ) 193 | expect(result).toEqual({ 194 | S1: '0|1', 195 | S2: '1|1', 196 | S3: '0|0', 197 | S4: '0|1', 198 | S5: '1|0', 199 | S6: '0|1', 200 | S7: '1|1', 201 | S8: '0|0', 202 | }) 203 | }) 204 | 205 | test('many samples with GT:DP:GQ format', () => { 206 | const result = parseGenotypesOnly( 207 | 'GT:DP:GQ', 208 | '0/1:20:99\t1/1:25:99\t0/0:30:99\t.:15:50\t0/1:22:99\t1/0:28:99', 209 | ['S1', 'S2', 'S3', 'S4', 'S5', 'S6'], 210 | ) 211 | expect(result).toEqual({ 212 | S1: '0/1', 213 | S2: '1/1', 214 | S3: '0/0', 215 | S4: '.', 216 | S5: '0/1', 217 | S6: '1/0', 218 | }) 219 | }) 220 | 221 | test('many samples with DP:GQ:GT format', () => { 222 | const result = parseGenotypesOnly( 223 | 'DP:GQ:GT', 224 | '20:99:0/1\t25:99:1/1\t30:99:0/0\t15:50:.\t22:99:0/1\t28:99:1/0', 225 | ['S1', 'S2', 'S3', 'S4', 'S5', 'S6'], 226 | ) 227 | expect(result).toEqual({ 228 | S1: '0/1', 229 | S2: '1/1', 230 | S3: '0/0', 231 | S4: '.', 232 | S5: '0/1', 233 | S6: '1/0', 234 | }) 235 | }) 236 | 237 | test('complex multi-allelic genotypes', () => { 238 | const result = parseGenotypesOnly('GT', '0/1\t1/2\t2/2\t0/2\t1/1\t./.', [ 239 | 'S1', 240 | 'S2', 241 | 'S3', 242 | 'S4', 243 | 'S5', 244 | 'S6', 245 | ]) 246 | expect(result).toEqual({ 247 | S1: '0/1', 248 | S2: '1/2', 249 | S3: '2/2', 250 | S4: '0/2', 251 | S5: '1/1', 252 | S6: './.', 253 | }) 254 | }) 255 | 256 | test('triploid genotypes', () => { 257 | const result = parseGenotypesOnly('GT', '0/0/1\t0/1/1\t1/1/1\t0/0/0', [ 258 | 'S1', 259 | 'S2', 260 | 'S3', 261 | 'S4', 262 | ]) 263 | expect(result).toEqual({ 264 | S1: '0/0/1', 265 | S2: '0/1/1', 266 | S3: '1/1/1', 267 | S4: '0/0/0', 268 | }) 269 | }) 270 | 271 | test('tetraploid genotypes', () => { 272 | const result = parseGenotypesOnly( 273 | 'GT', 274 | '0/0/0/1\t0/1/1/1\t1/1/1/1\t0/0/0/0', 275 | ['S1', 'S2', 'S3', 'S4'], 276 | ) 277 | expect(result).toEqual({ 278 | S1: '0/0/0/1', 279 | S2: '0/1/1/1', 280 | S3: '1/1/1/1', 281 | S4: '0/0/0/0', 282 | }) 283 | }) 284 | 285 | test('hexaploid genotypes', () => { 286 | const result = parseGenotypesOnly('GT', '0/0/0/0/0/1\t0/1/1/1/1/1', [ 287 | 'S1', 288 | 'S2', 289 | ]) 290 | expect(result).toEqual({ 291 | S1: '0/0/0/0/0/1', 292 | S2: '0/1/1/1/1/1', 293 | }) 294 | }) 295 | 296 | test('mixed ploidy - haploid, diploid, triploid, tetraploid', () => { 297 | const result = parseGenotypesOnly( 298 | 'GT', 299 | '0\t0/1\t0/1/2\t0/1/2/3\t1\t./.\t0/0/0', 300 | ['S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S7'], 301 | ) 302 | expect(result).toEqual({ 303 | S1: '0', 304 | S2: '0/1', 305 | S3: '0/1/2', 306 | S4: '0/1/2/3', 307 | S5: '1', 308 | S6: './.', 309 | S7: '0/0/0', 310 | }) 311 | }) 312 | 313 | test('polyploid with phased genotypes', () => { 314 | const result = parseGenotypesOnly('GT', '0|0|1\t0|1|1\t1|1|1\t0|0|0', [ 315 | 'S1', 316 | 'S2', 317 | 'S3', 318 | 'S4', 319 | ]) 320 | expect(result).toEqual({ 321 | S1: '0|0|1', 322 | S2: '0|1|1', 323 | S3: '1|1|1', 324 | S4: '0|0|0', 325 | }) 326 | }) 327 | 328 | test('polyploid with multi-allelic variants', () => { 329 | const result = parseGenotypesOnly( 330 | 'GT', 331 | '0/1/2\t1/2/3\t2/3/4\t0/0/0\t./././.', 332 | ['S1', 'S2', 'S3', 'S4', 'S5'], 333 | ) 334 | expect(result).toEqual({ 335 | S1: '0/1/2', 336 | S2: '1/2/3', 337 | S3: '2/3/4', 338 | S4: '0/0/0', 339 | S5: './././.', 340 | }) 341 | }) 342 | 343 | test('large scale mixed ploidy', () => { 344 | const samples = Array.from({ length: 20 }, (_, i) => `S${i}`) 345 | const gts = [ 346 | '0', 347 | '0/1', 348 | '0/1/2', 349 | '0/1/2/3', 350 | '1', 351 | '1/1', 352 | '0/0/0', 353 | '1/1/1/1', 354 | '.', 355 | './.', 356 | '0', 357 | '0/1', 358 | '0/1/2', 359 | '0/1/2/3', 360 | '1', 361 | '1/1', 362 | '0/0/0', 363 | '1/1/1/1', 364 | '.', 365 | './.', 366 | ] 367 | const result = parseGenotypesOnly('GT', gts.join('\t'), samples) 368 | 369 | const expected = {} as Record 370 | samples.forEach((s, i) => { 371 | expected[s] = gts[i]! 372 | }) 373 | 374 | expect(result).toEqual(expected) 375 | }) 376 | 377 | test('very long polyploid genotypes', () => { 378 | const result = parseGenotypesOnly('GT', '0/1/2/3/4/5/6/7\t0/0/0/0/0/0/0/0', [ 379 | 'S1', 380 | 'S2', 381 | ]) 382 | expect(result).toEqual({ 383 | S1: '0/1/2/3/4/5/6/7', 384 | S2: '0/0/0/0/0/0/0/0', 385 | }) 386 | }) 387 | 388 | test('polyploid with double-digit alleles', () => { 389 | const result = parseGenotypesOnly('GT', '0/10/20\t10/11/12\t0/0/0', [ 390 | 'S1', 391 | 'S2', 392 | 'S3', 393 | ]) 394 | expect(result).toEqual({ 395 | S1: '0/10/20', 396 | S2: '10/11/12', 397 | S3: '0/0/0', 398 | }) 399 | }) 400 | 401 | test('mixed ploidy ending with haploid', () => { 402 | const result = parseGenotypesOnly('GT', '0/1\t0/1/2\t0/1/2/3\t1', [ 403 | 'S1', 404 | 'S2', 405 | 'S3', 406 | 'S4', 407 | ]) 408 | expect(result).toEqual({ 409 | S1: '0/1', 410 | S2: '0/1/2', 411 | S3: '0/1/2/3', 412 | S4: '1', 413 | }) 414 | }) 415 | 416 | test('mixed ploidy ending with polyploid', () => { 417 | const result = parseGenotypesOnly('GT', '0\t0/1\t0/1/2/3/4', [ 418 | 'S1', 419 | 'S2', 420 | 'S3', 421 | ]) 422 | expect(result).toEqual({ 423 | S1: '0', 424 | S2: '0/1', 425 | S3: '0/1/2/3/4', 426 | }) 427 | }) 428 | 429 | test('alternating ploidy pattern', () => { 430 | const result = parseGenotypesOnly('GT', '0\t0/1\t0\t0/1\t0\t0/1\t0\t0/1', [ 431 | 'S1', 432 | 'S2', 433 | 'S3', 434 | 'S4', 435 | 'S5', 436 | 'S6', 437 | 'S7', 438 | 'S8', 439 | ]) 440 | expect(result).toEqual({ 441 | S1: '0', 442 | S2: '0/1', 443 | S3: '0', 444 | S4: '0/1', 445 | S5: '0', 446 | S6: '0/1', 447 | S7: '0', 448 | S8: '0/1', 449 | }) 450 | }) 451 | -------------------------------------------------------------------------------- /src/parse.ts: -------------------------------------------------------------------------------- 1 | import { parseGenotypesOnly } from './parseGenotypesOnly.ts' 2 | import { parseMetaString } from './parseMetaString.ts' 3 | import vcfReserved from './vcfReserved.ts' 4 | 5 | function decodeURIComponentNoThrow(uri: string) { 6 | try { 7 | return decodeURIComponent(uri) 8 | } catch (_e) { 9 | // avoid throwing exception on a failure to decode URI component 10 | return uri 11 | } 12 | } 13 | 14 | /** 15 | * Class representing a VCF parser, instantiated with the VCF header. 16 | * 17 | * @param {object} args 18 | * 19 | * @param {string} args.header - The VCF header. Supports both LF and CRLF 20 | * newlines. 21 | * 22 | * @param {boolean} args.strict - Whether to parse in strict mode or not 23 | * (default true) 24 | */ 25 | export default class VCFParser { 26 | private metadata: Record 27 | public strict: boolean 28 | public samples: string[] 29 | 30 | constructor({ 31 | header = '', 32 | strict = true, 33 | }: { 34 | header: string 35 | strict?: boolean 36 | }) { 37 | if (!header.length) { 38 | throw new Error('empty header received') 39 | } 40 | const headerLines = header.split(/[\r\n]+/).filter(Boolean) 41 | if (!headerLines.length) { 42 | throw new Error('no non-empty header lines specified') 43 | } 44 | 45 | this.strict = strict 46 | this.metadata = { 47 | INFO: { ...vcfReserved.InfoFields }, 48 | FORMAT: { ...vcfReserved.GenotypeFields }, 49 | ALT: { ...vcfReserved.AltTypes }, 50 | FILTER: { ...vcfReserved.FilterTypes }, 51 | } 52 | 53 | let lastLine: string | undefined 54 | for (let i = 0; i < headerLines.length; i++) { 55 | const line = headerLines[i]! 56 | if (!line.startsWith('#')) { 57 | throw new Error(`Bad line in header:\n${line}`) 58 | } else if (line.startsWith('##')) { 59 | this.parseMetadata(line) 60 | } else { 61 | lastLine = line 62 | } 63 | } 64 | 65 | if (!lastLine) { 66 | throw new Error('No format line found in header') 67 | } 68 | const fields = lastLine.trim().split('\t') 69 | const thisHeader = fields.slice(0, 8) 70 | const correctHeader = [ 71 | '#CHROM', 72 | 'POS', 73 | 'ID', 74 | 'REF', 75 | 'ALT', 76 | 'QUAL', 77 | 'FILTER', 78 | 'INFO', 79 | ] 80 | if (fields.length < 8) { 81 | throw new Error(`VCF header missing columns:\n${lastLine}`) 82 | } else if ( 83 | thisHeader.length !== correctHeader.length || 84 | !thisHeader.every((value, index) => value === correctHeader[index]) 85 | ) { 86 | throw new Error(`VCF column headers not correct:\n${lastLine}`) 87 | } 88 | this.samples = fields.slice(9) 89 | } 90 | 91 | private parseInfo(infoStr: string) { 92 | const result: Record = {} 93 | const hasDecode = infoStr.includes('%') 94 | const infoPairs = infoStr.split(';') 95 | const infoMeta = this.metadata.INFO as Record 96 | const pairsLen = infoPairs.length 97 | 98 | for (let i = 0; i < pairsLen; i++) { 99 | const pair = infoPairs[i]! 100 | const eqIdx = pair.indexOf('=') 101 | const key = eqIdx === -1 ? pair : pair.slice(0, eqIdx) 102 | const val = eqIdx === -1 ? undefined : pair.slice(eqIdx + 1) 103 | const itemType = infoMeta[key]?.Type 104 | 105 | if (itemType === 'Flag') { 106 | result[key] = true 107 | } else if (!val) { 108 | result[key] = true 109 | } else { 110 | const isNumber = itemType === 'Integer' || itemType === 'Float' 111 | const rawItems = val.split(',') 112 | const itemsLen = rawItems.length 113 | 114 | if (hasDecode) { 115 | const items: (string | number | undefined)[] = [] 116 | for (let j = 0; j < itemsLen; j++) { 117 | const v = rawItems[j]! 118 | if (v === '.') { 119 | items.push(undefined) 120 | } else { 121 | const decoded = decodeURIComponentNoThrow(v) 122 | items.push(isNumber ? Number(decoded) : decoded) 123 | } 124 | } 125 | result[key] = items 126 | } else { 127 | const items: (string | number | undefined)[] = [] 128 | for (let j = 0; j < itemsLen; j++) { 129 | const v = rawItems[j]! 130 | if (v === '.') { 131 | items.push(undefined) 132 | } else { 133 | items.push(isNumber ? Number(v) : v) 134 | } 135 | } 136 | result[key] = items 137 | } 138 | } 139 | } 140 | return result 141 | } 142 | 143 | private parseSamples(format: string, prerest: string) { 144 | const genotypes = {} as Record< 145 | string, 146 | Record 147 | > 148 | if (format) { 149 | const rest = prerest.split('\t') 150 | const formatKeys = format.split(':') 151 | const formatMeta = this.metadata.FORMAT as Record 152 | const isNumberType: boolean[] = [] 153 | for (let i = 0; i < formatKeys.length; i++) { 154 | const r = formatMeta[formatKeys[i]!]?.Type 155 | isNumberType.push(r === 'Integer' || r === 'Float') 156 | } 157 | const numKeys = formatKeys.length 158 | const samplesLen = this.samples.length 159 | for (let i = 0; i < samplesLen; i++) { 160 | const sample = this.samples[i]! 161 | const sampleData: Record< 162 | string, 163 | (string | number | undefined)[] | undefined 164 | > = {} 165 | const sampleStr = rest[i]! 166 | const sampleStrLen = sampleStr.length 167 | let colStart = 0 168 | let colIdx = 0 169 | 170 | for (let j = 0; j <= sampleStrLen; j++) { 171 | if (j === sampleStrLen || sampleStr[j] === ':') { 172 | const val = sampleStr.slice(colStart, j) 173 | if (val === '' || val === '.') { 174 | sampleData[formatKeys[colIdx]!] = undefined 175 | } else { 176 | const items = val.split(',') 177 | const result: (string | number | undefined)[] = [] 178 | if (isNumberType[colIdx]) { 179 | for (let k = 0; k < items.length; k++) { 180 | const ent = items[k]! 181 | result.push(ent === '.' ? undefined : +ent) 182 | } 183 | } else { 184 | for (let k = 0; k < items.length; k++) { 185 | const ent = items[k]! 186 | result.push(ent === '.' ? undefined : ent) 187 | } 188 | } 189 | sampleData[formatKeys[colIdx]!] = result 190 | } 191 | colStart = j + 1 192 | colIdx += 1 193 | if (colIdx >= numKeys) { 194 | break 195 | } 196 | } 197 | } 198 | genotypes[sample] = sampleData 199 | } 200 | } 201 | return genotypes 202 | } 203 | 204 | /** 205 | * Parse a VCF metadata line (i.e. a line that starts with "##") and add its 206 | * properties to the object. 207 | * 208 | * @param {string} line - A line from the VCF. Supports both LF and CRLF 209 | * newlines. 210 | */ 211 | private parseMetadata(line: string) { 212 | const match = /^##(.+?)=(.*)/.exec(line.trim()) 213 | if (!match) { 214 | throw new Error(`Line is not a valid metadata line: ${line}`) 215 | } 216 | const [metaKey, metaVal] = match.slice(1, 3) 217 | 218 | const r = metaKey! 219 | if (metaVal?.startsWith('<')) { 220 | if (!(r in this.metadata)) { 221 | this.metadata[r] = {} 222 | } 223 | const [id, keyVals] = this.parseStructuredMetaVal(metaVal) 224 | if (id) { 225 | // if there is an ID field in the <> metadata 226 | // e.g. ##INFO= 227 | ;(this.metadata[r] as Record)[id] = keyVals 228 | } else { 229 | // if there is not an ID field in the <> metadata 230 | // e.g. ##ID= 231 | this.metadata[r] = keyVals 232 | } 233 | } else { 234 | this.metadata[r] = metaVal 235 | } 236 | } 237 | 238 | /** 239 | * Parse a VCF header structured meta string (i.e. a meta value that starts 240 | * with "" 263 | * 264 | * @param {...string} args - List of metadata filter strings. 265 | * 266 | * @returns {any} An object, string, or number, depending on the filtering 267 | */ 268 | getMetadata(...args: string[]) { 269 | let filteredMetadata: any = this.metadata 270 | const argsLen = args.length 271 | for (let i = 0; i < argsLen; i++) { 272 | filteredMetadata = filteredMetadata[args[i]!] 273 | if (!filteredMetadata) { 274 | return filteredMetadata 275 | } 276 | } 277 | return filteredMetadata 278 | } 279 | 280 | /** 281 | * Parse a VCF line into an object like 282 | * 283 | * ```typescript 284 | * { 285 | * CHROM: 'contigA', 286 | * POS: 3000, 287 | * ID: ['rs17883296'], 288 | * REF: 'G', 289 | * ALT: ['T', 'A'], 290 | * QUAL: 100, 291 | * FILTER: 'PASS', 292 | * INFO: { 293 | * NS: [3], 294 | * DP: [14], 295 | * AF: [0.5], 296 | * DB: true, 297 | * XYZ: ['5'], 298 | * }, 299 | * SAMPLES: () => ({ 300 | * HG00096: { 301 | * GT: ['0|0'], 302 | * AP: ['0.000', '0.000'], 303 | * } 304 | * }), 305 | * GENOTYPES: () => ({ 306 | * HG00096: '0|0' 307 | * }) 308 | * } 309 | * ``` 310 | * 311 | * SAMPLES and GENOTYPES methods are functions instead of static data fields 312 | * because it avoids parsing the potentially long list of samples from e.g. 313 | * 1000 genotypes data unless requested. 314 | * 315 | * The SAMPLES function gives all info about the samples 316 | * 317 | * The GENOTYPES function only extracts the raw GT string if it exists, for 318 | * potentially optimized parsing by programs that need it 319 | * 320 | * @param {string} line - A string of a line from a VCF 321 | */ 322 | parseLine(line: string) { 323 | let currChar = 0 324 | let tabCount = 0 325 | while (currChar < line.length && tabCount < 9) { 326 | if (line[currChar] === '\t') { 327 | tabCount += 1 328 | } 329 | currChar += 1 330 | } 331 | const splitPos = tabCount === 9 ? currChar - 1 : currChar 332 | const fields = line.slice(0, splitPos).split('\t') 333 | const rest = line.slice(splitPos + 1) 334 | const [CHROM, POS, ID, REF, ALT, QUAL, FILTER] = fields 335 | const chrom = CHROM 336 | const pos = +POS! 337 | const id = ID === '.' ? undefined : ID!.split(';') 338 | const ref = REF 339 | const alt = ALT === '.' ? undefined : ALT!.split(',') 340 | const qual = QUAL === '.' ? undefined : +QUAL! 341 | const filter = FILTER === '.' ? undefined : FILTER!.split(';') 342 | const format = fields[8] 343 | 344 | if (this.strict && !fields[7]) { 345 | throw new Error( 346 | "no INFO field specified, must contain at least a '.' (turn off strict mode to allow)", 347 | ) 348 | } 349 | const info = 350 | fields[7] === undefined || fields[7] === '.' 351 | ? {} 352 | : this.parseInfo(fields[7]) 353 | 354 | return { 355 | CHROM: chrom, 356 | POS: pos, 357 | ALT: alt, 358 | INFO: info, 359 | REF: ref, 360 | FILTER: filter?.length === 1 && filter[0] === 'PASS' ? 'PASS' : filter, 361 | ID: id, 362 | QUAL: qual, 363 | FORMAT: format, 364 | SAMPLES: () => this.parseSamples(fields[8] ?? '', rest), 365 | GENOTYPES: () => parseGenotypesOnly(fields[8] ?? '', rest, this.samples), 366 | } 367 | } 368 | } 369 | 370 | export type Variant = ReturnType 371 | -------------------------------------------------------------------------------- /test/data/1000genomes.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.1 2 | ##FILTER= 3 | ##fileDate=20150218 4 | ##reference=ftp://ftp.1000genomes.ebi.ac.uk//vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz 5 | ##contig= 6 | ##source=freeBayes v0.9.9.2 | GT values over-written with maximum likelihood state (subject to threshold) OR phylogenetic imputation 7 | ##INFO= 8 | ##FORMAT= 9 | ##source=GenomeSTRiP_v1.04 10 | ##ALT= 11 | ##FILTER= 12 | ##FILTER= 13 | ##FILTER== 0.5 && GSDUPLICATESCORE >= 0.0"> 14 | ##FILTER== 2.0"> 15 | ##FILTER= 16 | ##FILTER== 13.0"> 17 | ##FORMAT= 18 | ##FORMAT= 19 | ##FORMAT= 20 | ##FORMAT= 21 | ##FORMAT= 22 | ##FORMAT= 23 | ##FORMAT= 24 | ##FORMAT= 25 | ##INFO= 26 | ##INFO= 27 | ##INFO= 28 | ##INFO= 29 | ##INFO= 30 | ##INFO= 31 | ##INFO= 32 | ##INFO= 33 | ##INFO= 34 | ##INFO= 35 | ##INFO= 36 | ##INFO= 37 | ##INFO= 38 | ##INFO= 39 | ##INFO= 40 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00101 HG00103 HG00105 HG00107 HG00108 HG00109 HG00112 HG00113 HG00114 HG00115 HG00116 HG00117 HG00119 HG00126 HG00129 HG00131 HG00136 HG00138 HG00139 HG00140 HG00141 HG00142 HG00143 HG00145 HG00148 HG00149 HG00151 HG00155 HG00157 HG00159 HG00160 HG00181 HG00182 HG00183 HG00185 HG00186 HG00187 HG00188 HG00189 HG00190 HG00234 HG00242 HG00243 HG00244 HG00246 HG00251 HG00252 HG00256 HG00260 HG00264 HG00265 HG00267 HG00271 HG00273 HG00277 HG00278 HG00280 HG00284 HG00290 HG00308 HG00310 HG00311 HG00321 HG00325 HG00329 HG00335 HG00336 HG00338 HG00341 HG00342 HG00345 HG00351 HG00358 HG00360 HG00366 HG00369 HG00371 HG00372 HG00375 HG00382 HG00403 HG00406 HG00409 HG00421 HG00436 HG00442 HG00445 HG00448 HG00451 HG00457 HG00463 HG00472 HG00475 HG00478 HG00500 HG00524 HG00530 HG00533 HG00536 HG00542 HG00553 HG00556 HG00559 HG00565 HG00580 HG00583 HG00589 HG00592 HG00595 HG00598 HG00607 HG00610 HG00613 HG00619 HG00622 HG00625 HG00628 HG00631 HG00634 HG00637 HG00640 HG00650 HG00653 HG00656 HG00662 HG00671 HG00674 HG00683 HG00689 HG00692 HG00698 HG00701 HG00704 HG00707 HG00728 HG00731 HG00736 HG00739 HG00742 HG00844 HG00881 HG00982 HG01028 HG01031 HG01047 HG01048 HG01051 HG01054 HG01060 HG01063 HG01066 HG01069 HG01072 HG01075 HG01079 HG01082 HG01085 HG01088 HG01094 HG01097 HG01101 HG01104 HG01107 HG01110 HG01112 HG01121 HG01124 HG01130 HG01133 HG01136 HG01139 HG01142 HG01148 HG01161 HG01164 HG01167 HG01170 HG01173 HG01176 HG01182 HG01187 HG01190 HG01197 HG01200 HG01204 HG01241 HG01247 HG01250 HG01253 HG01256 HG01259 HG01271 HG01277 HG01280 HG01286 HG01302 HG01305 HG01308 HG01311 HG01325 HG01334 HG01341 HG01344 HG01350 HG01353 HG01356 HG01359 HG01362 HG01365 HG01374 HG01377 HG01383 HG01389 HG01392 HG01395 HG01398 HG01402 HG01405 HG01412 HG01413 HG01431 HG01437 HG01440 HG01443 HG01455 HG01461 HG01464 HG01479 HG01485 HG01488 HG01491 HG01494 HG01497 HG01500 HG01503 HG01506 HG01509 HG01512 HG01515 HG01518 HG01521 HG01524 HG01527 HG01530 HG01536 HG01550 HG01556 HG01565 HG01571 HG01577 HG01583 HG01586 HG01589 HG01596 HG01603 HG01606 HG01608 HG01610 HG01615 HG01617 HG01619 HG01624 HG01625 HG01630 HG01631 HG01669 HG01672 HG01675 HG01678 HG01680 HG01682 HG01686 HG01694 HG01699 HG01700 HG01705 HG01708 HG01709 HG01747 HG01756 HG01761 HG01765 HG01767 HG01771 HG01775 HG01777 HG01781 HG01783 HG01785 HG01789 HG01791 HG01810 HG01811 HG01816 HG01840 HG01842 HG01844 HG01846 HG01849 HG01852 HG01860 HG01861 HG01864 HG01865 HG01866 HG01867 HG01872 HG01873 HG01879 HG01882 HG01885 HG01890 HG01892 HG01912 HG01914 HG01917 HG01920 HG01923 HG01926 HG01932 HG01935 HG01938 HG01941 HG01944 HG01947 HG01950 HG01953 HG01961 HG01967 HG01970 HG01974 HG01977 HG01979 HG01982 HG01986 HG01988 HG01990 HG01991 HG02002 HG02008 HG02009 HG02013 HG02014 HG02017 HG02020 HG02023 HG02026 HG02029 HG02032 HG02035 HG02040 HG02047 HG02050 HG02051 HG02053 HG02058 HG02061 HG02064 HG02067 HG02070 HG02073 HG02076 HG02079 HG02082 HG02085 HG02088 HG02090 HG02104 HG02107 HG02116 HG02122 HG02128 HG02131 HG02134 HG02137 HG02138 HG02141 HG02143 HG02146 HG02150 HG02219 HG02221 HG02224 HG02231 HG02233 HG02236 HG02238 HG02250 HG02253 HG02255 HG02259 HG02262 HG02265 HG02271 HG02274 HG02277 HG02281 HG02283 HG02284 HG02285 HG02291 HG02299 HG02304 HG02307 HG02314 HG02317 HG02323 HG02330 HG02332 HG02334 HG02343 HG02351 HG02353 HG02355 HG02356 HG02360 HG02364 HG02367 HG02371 HG02373 HG02374 HG02375 HG02379 HG02380 HG02382 HG02383 HG02384 HG02385 HG02386 HG02389 HG02390 HG02391 HG02392 HG02394 HG02395 HG02396 HG02397 HG02398 HG02399 HG02401 HG02402 HG02406 HG02407 HG02408 HG02409 HG02410 HG02420 HG02429 HG02433 HG02439 HG02442 HG02445 HG02449 HG02455 HG02461 HG02464 HG02470 HG02481 HG02484 HG02489 HG02490 HG02493 HG02496 HG02501 HG02512 HG02521 HG02536 HG02541 HG02545 HG02554 HG02557 HG02561 HG02570 HG02573 HG02582 HG02585 HG02588 HG02594 HG02597 HG02600 HG02603 HG02610 HG02613 HG02620 HG02623 HG02628 HG02634 HG02642 HG02645 HG02648 HG02651 HG02654 HG02657 HG02660 HG02666 HG02675 HG02678 HG02681 HG02684 HG02687 HG02690 HG02696 HG02699 HG02702 HG02715 HG02721 HG02724 HG02727 HG02733 HG02736 HG02756 HG02759 HG02768 HG02771 HG02774 HG02780 HG02783 HG02786 HG02789 HG02792 HG02798 HG02804 HG02807 HG02810 HG02813 HG02816 HG02819 HG02836 HG02839 HG02851 HG02854 HG02860 HG02878 HG02881 HG02884 HG02887 HG02890 HG02895 HG02923 HG02938 HG02941 HG02944 HG02947 HG02953 HG02968 HG02971 HG02973 HG02977 HG02981 HG02982 HG03006 HG03009 HG03012 HG03015 HG03018 HG03021 HG03024 HG03027 HG03039 HG03045 HG03048 HG03054 HG03057 HG03060 HG03063 HG03066 HG03069 HG03072 HG03074 HG03077 HG03078 HG03081 HG03084 HG03096 HG03100 HG03103 HG03109 HG03112 HG03115 HG03118 HG03120 HG03124 HG03127 HG03130 HG03133 HG03136 HG03139 HG03157 HG03160 HG03163 HG03166 HG03169 HG03172 HG03175 HG03190 HG03193 HG03196 HG03199 HG03202 HG03209 HG03224 HG03225 HG03228 HG03234 HG03237 HG03240 HG03246 HG03258 HG03265 HG03268 HG03271 HG03280 HG03295 HG03298 HG03301 HG03304 HG03311 HG03313 HG03343 HG03352 HG03367 HG03370 HG03376 HG03382 HG03385 HG03388 HG03391 HG03394 HG03397 HG03432 HG03433 HG03436 HG03439 HG03442 HG03445 HG03451 HG03457 HG03460 HG03469 HG03472 HG03478 HG03484 HG03490 HG03515 HG03518 HG03521 HG03538 HG03547 HG03556 HG03559 HG03565 HG03571 HG03577 HG03585 HG03593 HG03594 HG03600 HG03603 HG03615 HG03624 HG03629 HG03636 HG03644 HG03646 HG03649 HG03652 HG03660 HG03663 HG03667 HG03672 HG03679 HG03680 HG03681 HG03685 HG03686 HG03687 HG03691 HG03693 HG03694 HG03695 HG03696 HG03697 HG03702 HG03705 HG03708 HG03711 HG03713 HG03716 HG03718 HG03720 HG03727 HG03729 HG03738 HG03740 HG03742 HG03743 HG03744 HG03745 HG03746 HG03750 HG03753 HG03755 HG03767 HG03771 HG03773 HG03775 HG03777 HG03778 HG03779 HG03785 HG03786 HG03788 HG03790 HG03792 HG03800 HG03803 HG03809 HG03812 HG03815 HG03821 HG03824 HG03830 HG03833 HG03837 HG03844 HG03846 HG03848 HG03850 HG03851 HG03854 HG03856 HG03864 HG03866 HG03867 HG03869 HG03870 HG03871 HG03872 HG03875 HG03885 HG03887 HG03890 HG03896 HG03899 HG03900 HG03902 HG03905 HG03908 HG03911 HG03914 HG03917 HG03920 HG03926 HG03941 HG03943 HG03950 HG03953 HG03960 HG03963 HG03965 HG03967 HG03969 HG03971 HG03974 HG03976 HG03978 HG03985 HG03990 HG03991 HG03998 HG03999 HG04002 HG04003 HG04006 HG04015 HG04017 HG04019 HG04020 HG04022 HG04023 HG04033 HG04039 HG04056 HG04060 HG04061 HG04080 HG04093 HG04094 HG04096 HG04098 HG04100 HG04107 HG04131 HG04134 HG04140 HG04146 HG04152 HG04155 HG04158 HG04161 HG04164 HG04173 HG04176 HG04182 HG04185 HG04188 HG04194 HG04198 HG04206 HG04210 HG04211 HG04219 HG04222 HG04225 HG04229 HG04235 HG04238 HG04239 NA06984 NA06986 NA06994 NA07048 NA07051 NA07347 NA07357 NA10851 NA11829 NA11831 NA11843 NA11881 NA11893 NA11919 NA11930 NA11932 NA11992 NA11994 NA12003 NA12005 NA12043 NA12045 NA12144 NA12154 NA12155 NA12272 NA12282 NA12286 NA12340 NA12342 NA12347 NA12399 NA12413 NA12546 NA12716 NA12748 NA12750 NA12760 NA12762 NA12775 NA12777 NA12812 NA12814 NA12827 NA12829 NA12842 NA12872 NA12874 NA12889 NA18486 NA18498 NA18501 NA18504 NA18507 NA18510 NA18516 NA18519 NA18522 NA18530 NA18534 NA18536 NA18543 NA18544 NA18546 NA18548 NA18549 NA18557 NA18558 NA18559 NA18561 NA18562 NA18563 NA18572 NA18603 NA18605 NA18606 NA18608 NA18609 NA18611 NA18612 NA18613 NA18620 NA18621 NA18622 NA18623 NA18624 NA18629 NA18632 NA18633 NA18635 NA18636 NA18637 NA18638 NA18639 NA18643 NA18645 NA18647 NA18648 NA18740 NA18745 NA18747 NA18748 NA18749 NA18757 NA18853 NA18856 NA18865 NA18868 NA18871 NA18874 NA18877 NA18879 NA18908 NA18910 NA18915 NA18917 NA18923 NA18934 NA18940 NA18943 NA18944 NA18945 NA18948 NA18952 NA18953 NA18959 NA18960 NA18961 NA18962 NA18965 NA18966 NA18967 NA18970 NA18971 NA18974 NA18977 NA18982 NA18983 NA18984 NA18985 NA18986 NA18988 NA18989 NA18990 NA18994 NA18995 NA19000 NA19004 NA19005 NA19006 NA19007 NA19009 NA19012 NA19020 NA19025 NA19026 NA19027 NA19028 NA19031 NA19035 NA19041 NA19043 NA19055 NA19056 NA19058 NA19060 NA19062 NA19063 NA19066 NA19067 NA19068 NA19070 NA19072 NA19075 NA19076 NA19079 NA19082 NA19083 NA19085 NA19086 NA19088 NA19089 NA19091 NA19092 NA19096 NA19098 NA19107 NA19113 NA19117 NA19119 NA19121 NA19130 NA19138 NA19141 NA19144 NA19146 NA19153 NA19160 NA19171 NA19175 NA19184 NA19189 NA19198 NA19200 NA19207 NA19210 NA19213 NA19223 NA19236 NA19239 NA19248 NA19256 NA19307 NA19308 NA19309 NA19312 NA19317 NA19318 NA19319 NA19331 NA19334 NA19346 NA19347 NA19350 NA19360 NA19372 NA19374 NA19375 NA19376 NA19380 NA19383 NA19384 NA19385 NA19393 NA19394 NA19397 NA19428 NA19429 NA19430 NA19443 NA19448 NA19451 NA19452 NA19454 NA19455 NA19461 NA19466 NA19649 NA19652 NA19655 NA19658 NA19661 NA19664 NA19670 NA19676 NA19679 NA19682 NA19700 NA19703 NA19711 NA19717 NA19720 NA19723 NA19726 NA19729 NA19732 NA19735 NA19741 NA19747 NA19750 NA19756 NA19759 NA19762 NA19771 NA19774 NA19777 NA19780 NA19783 NA19786 NA19789 NA19792 NA19795 NA19818 NA19834 NA19900 NA19904 NA19908 NA19916 NA19920 NA19922 NA19982 NA19984 NA20126 NA20278 NA20281 NA20291 NA20298 NA20318 NA20340 NA20342 NA20346 NA20348 NA20351 NA20356 NA20362 NA20509 NA20510 NA20511 NA20512 NA20513 NA20515 NA20516 NA20518 NA20519 NA20520 NA20521 NA20524 NA20525 NA20527 NA20528 NA20532 NA20534 NA20536 NA20538 NA20539 NA20543 NA20544 NA20581 NA20586 NA20588 NA20752 NA20754 NA20755 NA20758 NA20759 NA20762 NA20763 NA20765 NA20767 NA20770 NA20778 NA20783 NA20785 NA20787 NA20792 NA20796 NA20798 NA20801 NA20803 NA20805 NA20806 NA20809 NA20810 NA20811 NA20812 NA20814 NA20815 NA20827 NA20845 NA20846 NA20850 NA20852 NA20858 NA20861 NA20863 NA20864 NA20866 NA20867 NA20870 NA20884 NA20885 NA20887 NA20889 NA20890 NA20891 NA20895 NA20897 NA20901 NA20903 NA20904 NA20905 NA20911 NA21087 NA21090 NA21091 NA21092 NA21093 NA21094 NA21095 NA21098 NA21099 NA21100 NA21104 NA21105 NA21107 NA21109 NA21111 NA21112 NA21113 NA21114 NA21115 NA21116 NA21117 NA21118 NA21119 NA21123 NA21124 NA21126 NA21127 NA21128 NA21129 NA21130 NA21133 NA21135 41 | Y 2655180 rs11575897 G A 100 PASS AA=G;AC=22;AF=0.0178427;AN=1233;DP=84761;NS=1233;AMR_AF=0;AFR_AF=0;EUR_AF=0;SAS_AF=0;EAS_AF=0.0902;VT=SNP;EX_TARGET GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 1 1 0 0 0 0 0 1 0 0 0 1 1 0 0 1 1 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 1 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42 | Y 2655471 . A C 100 PASS AA=A;AC=5;AF=0.00405515;AN=1233;DP=72067;NS=1233;AMR_AF=0;AFR_AF=0;EUR_AF=0;SAS_AF=0;EAS_AF=0.0205;VT=SNP;EX_TARGET GT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 43 | --------------------------------------------------------------------------------