├── tsconfig.lint.json
├── codecov.yml
├── src
    ├── index.ts
    ├── parseMetaString.ts
    ├── parseGenotypesOnly.ts
    ├── parseBreakend.ts
    ├── vcfReserved.ts
    └── parse.ts
├── .prettierrc.json
├── test
    ├── data
    │   ├── pedigree.vcf
    │   ├── vcf4.3_spec_bnd.vcf
    │   ├── breakends.vcf
    │   ├── weird_info_and_missing_format.vcf
    │   ├── sample2genotype.vcf
    │   ├── spec-example.vcf
    │   ├── multipleAltSVs.vcf
    │   ├── simple.vcf
    │   ├── vcf4.3_spec_snippet.vcf
    │   ├── sniffles.vcf
    │   ├── vcf44_spec.vcf
    │   ├── y-chrom-haploid.vcf
    │   ├── clinvar.header.vcf
    │   └── 1000genomes.vcf
    ├── __snapshots__
    │   ├── parseGenotypesOnly.test.ts.snap
    │   └── parseMetaString.test.ts.snap
    ├── parseGenotypesOnly.test.ts
    ├── parseMetaString.test.ts
    ├── parseGenotypesOnly-ultrafast-edge.test.ts
    ├── index.test.ts
    ├── parse.test.ts
    └── parseGenotypesOnly-edge-cases.test.ts
├── .editorconfig
├── tsconfig.json
├── .github
    └── workflows
    │   └── push.yml
├── scripts
    └── build-both-branches.sh
├── LICENSE
├── .gitignore
├── benchmark
    └── master-vs-current.bench.ts
├── package.json
├── eslint.config.mjs
├── CHANGELOG.md
└── README.md


/tsconfig.lint.json:
--------------------------------------------------------------------------------
1 | {
2 |   "extends": "./tsconfig",
3 |   "include": ["src", "test"]
4 | }
5 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | coverage:
2 |   status:
3 |     patch: false
4 |     project: false
5 | github_checks:
6 |   annotations: false
7 | 


--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
1 | export type { Variant } from './parse.ts'
2 | 
3 | export { default } from './parse.ts'
4 | export * from './parseBreakend.ts'
5 | 


--------------------------------------------------------------------------------
/.prettierrc.json:
--------------------------------------------------------------------------------
1 | {
2 |   "semi": false,
3 |   "trailingComma": "all",
4 |   "singleQuote": true,
5 |   "arrowParens": "avoid",
6 |   "proseWrap": "always"
7 | }
8 | 


--------------------------------------------------------------------------------
/test/data/pedigree.vcf:
--------------------------------------------------------------------------------
1 | ##PEDIGREE=<ID=TumourSample,Original=GermlineID>
2 | ##PEDIGREE=<ID=SomaticNonTumour,Original=GermlineID>
3 | ##PEDIGREE=<ID=ChildID,Father=FatherID,Mother=MotherID>
4 | ##PEDIGREE=<ID=SampleID,Name_1=Ancestor_1,Name_2=Ancestor_N>
5 | #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
6 | 


--------------------------------------------------------------------------------
/test/data/vcf4.3_spec_bnd.vcf:
--------------------------------------------------------------------------------
1 | #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
2 | 2	321681	bnd_W	G	G]17:198982]	6	PASS	SVTYPE=BND
3 | 2	321682	bnd_V	T	]13:123456]T	6	PASS	SVTYPE=BND
4 | 13	123456	bnd_U	C	C[2:321682[	6	PASS	SVTYPE=BND
5 | 13	123457	bnd_X	A	[17:198983[A	6	PASS	SVTYPE=BND
6 | 17	198982	bnd_Y	A	A]2:321681]	6	PASS	SVTYPE=BND
7 | 17	198983	bnd_Z	C	[13:123457[C	6	PASS	SVTYPE=BND
8 | 


--------------------------------------------------------------------------------
/test/__snapshots__/parseGenotypesOnly.test.ts.snap:
--------------------------------------------------------------------------------
 1 | // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
 2 | 
 3 | exports[`parse genotypes 1`] = `
 4 | {
 5 |   "h1": "./.",
 6 |   "h2": "./.",
 7 | }
 8 | `;
 9 | 
10 | exports[`parse genotypes 2`] = `
11 | {
12 |   "h1": "./.",
13 |   "h2": "./.",
14 | }
15 | `;
16 | 
17 | exports[`parse genotypes 3`] = `
18 | {
19 |   "h1": "./.",
20 |   "h2": "./.",
21 | }
22 | `;
23 | 


--------------------------------------------------------------------------------
/test/parseGenotypesOnly.test.ts:
--------------------------------------------------------------------------------
 1 | import { expect, test } from 'vitest'
 2 | 
 3 | import { parseGenotypesOnly } from '../src/parseGenotypesOnly'
 4 | 
 5 | test('parse genotypes', () => {
 6 |   expect(parseGenotypesOnly('GT', './.\t./.', ['h1', 'h2'])).toMatchSnapshot()
 7 |   expect(
 8 |     parseGenotypesOnly('GT:RT', './.:1\t./.', ['h1', 'h2']),
 9 |   ).toMatchSnapshot()
10 |   expect(
11 |     parseGenotypesOnly('RT:GT', '1:./.\t2:./.', ['h1', 'h2']),
12 |   ).toMatchSnapshot()
13 | })
14 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # EditorConfig helps developers define and maintain consistent
 2 | # coding styles between different editors and IDEs
 3 | # editorconfig.org
 4 | 
 5 | root = true
 6 | 
 7 | [*]
 8 | 
 9 | # Change these settings to your own preference
10 | indent_style = space
11 | indent_size = 2
12 | 
13 | # We recommend you to keep these unchanged
14 | end_of_line = lf
15 | charset = utf-8
16 | trim_trailing_whitespace = true
17 | insert_final_newline = true
18 | 
19 | [*.md]
20 | trim_trailing_whitespace = false
21 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "include": ["src"],
 3 |   "compilerOptions": {
 4 |     "outDir": "dist",
 5 |     "lib": ["dom", "esnext"],
 6 |     "skipLibCheck": true,
 7 |     "declaration": true,
 8 |     "moduleResolution": "node",
 9 |     "sourceMap": true,
10 |     "strict": true,
11 |     "noImplicitReturns": true,
12 |     "noFallthroughCasesInSwitch": true,
13 |     "noUncheckedIndexedAccess": true,
14 |     "allowImportingTsExtensions": true,
15 |     "rewriteRelativeImportExtensions": true,
16 |     "esModuleInterop": true
17 |   }
18 | }
19 | 


--------------------------------------------------------------------------------
/test/data/breakends.vcf:
--------------------------------------------------------------------------------
1 | #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	BAMs/caudaus.sorted.sam
2 | 11	94975747	MantaBND:0:2:3:0:0:0:1	G	G]8:107653520]	.	PASS	SVTYPE=BND;MATEID=MantaBND:0:2:3:0:0:0:0;CIPOS=0,2;HOMLEN=2;HOMSEQ=TT;BND_DEPTH=216;MATE_BND_DEPTH=735	PR:SR	722,9:463,15
3 | 11	94975753	MantaDEL:0:1:2:0:0:0	T	<DEL>	.	PASS	END=94987865;SVTYPE=DEL;SVLEN=12112;IMPRECISE;CIPOS=-156,156;CIEND=-150,150	PR	161,13
4 | 11	94987872	MantaBND:0:0:1:0:0:0:0	T	T[8:107653411[	.	PASS	SVTYPE=BND;MATEID=MantaBND:0:0:1:0:0:0:1;BND_DEPTH=171;MATE_BND_DEPTH=830	PR:SR	489,4:520,19
5 | 


--------------------------------------------------------------------------------
/test/data/weird_info_and_missing_format.vcf:
--------------------------------------------------------------------------------
1 | #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	BAMs/caudaus.sorted.sam
2 | lcl|Scaffald_1	80465	rs118266897	R	A	29	PASS	NS=3;0,14;AF=0.5;DB;112;PG2.1
3 | lcl|Scaffald_1	84818	rs118269296	R	G	29	PASS	NS=3;0,14;AF=0.5;DB;112;PG2.1
4 | lcl|Scaffald_1	95414	rs118218236	W	T	29	PASS	NS=3;0,14;AF=0.5;DB;112;PG2.1
5 | lcl|Scaffald_1	231384	rs118264755	R	A	29	PASS	NS=3;0,14;AF=0.5;DB;112;PG2.1
6 | lcl|Scaffald_1	236429	rs118223336	R	G	29	PASS	NS=3;0,14;AF=6.5;DB;112;PG2.1
7 | lcl|Scaffald_1	245378	rs118217257	R	G	29	PASS	NS=3;0,14;AF=0.5;DB;112;PG2.1
8 | 


--------------------------------------------------------------------------------
/test/__snapshots__/parseMetaString.test.ts.snap:
--------------------------------------------------------------------------------
 1 | // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
 2 | 
 3 | exports[`array in values 1`] = `
 4 | {
 5 |   "ID": "Assay",
 6 |   "Number": ".",
 7 |   "Type": "String",
 8 |   "Values": [
 9 |     "WholeGenome",
10 |     "Exome",
11 |   ],
12 | }
13 | `;
14 | 
15 | exports[`equals in description 1`] = `
16 | {
17 |   "Description": "Allelic Probability, P(Allele=1|Haplotype)",
18 |   "ID": "AP",
19 |   "Number": "2",
20 |   "Type": "Float",
21 | }
22 | `;
23 | 
24 | exports[`quoted string with comma in description 1`] = `
25 | {
26 |   "Description": "dbSNP membership, build 129",
27 |   "ID": "DB",
28 |   "Number": "0",
29 |   "Type": "Flag",
30 | }
31 | `;
32 | 


--------------------------------------------------------------------------------
/test/data/sample2genotype.vcf:
--------------------------------------------------------------------------------
 1 | ##fileformat=VCFv4.1
 2 | ##fileDate=2024-12-01
 3 | ##source=ClinVar
 4 | ##reference=GRCh37
 5 | ##META=<ID=Assay,Type=String,Number=.,Values=[WholeGenome, Exome]>
 6 | ##META=<ID=Disease,Type=String,Number=.,Values=[None, Cancer]>
 7 | ##META=<ID=Ethnicity,Type=String,Number=.,Values=[AFR, CEU, ASN, MEX]>
 8 | ##META=<ID=Tissue,Type=String,Number=.,Values=[Blood, Breast, Colon, Lung, ?]>
 9 | ##SAMPLE=<ID=Sample1,Assay=WholeGenome,Ethnicity=AFR,Disease=None,Description="Patient germline genome from unaffected",DOI=url>
10 | ##SAMPLE=<ID=Sample2,Assay=Exome,Ethnicity=CEU,Disease=Cancer,Tissue=Breast,Description="European patient exome from breast cancer">
11 | #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
12 | 


--------------------------------------------------------------------------------
/.github/workflows/push.yml:
--------------------------------------------------------------------------------
 1 | name: Push
 2 | 
 3 | on: push
 4 | 
 5 | jobs:
 6 |   test:
 7 |     name: Lint, build, and test on node 20.x and ubuntu-latest
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - uses: actions/checkout@v4
11 |       - name: Use Node.js 20.x
12 |         uses: actions/setup-node@v4
13 |         with:
14 |           node-version: 20.x
15 |       - name: Install deps (with cache)
16 |         uses: bahmutov/npm-install@v1
17 |       - name: Lint codebase
18 |         run: yarn lint
19 |       - name: Build codebase
20 |         run: yarn build
21 |       - name: Test codebase
22 |         run: yarn test --coverage
23 |       - name: Upload coverage
24 |         run: bash <(curl -s https://codecov.io/bash)
25 | 


--------------------------------------------------------------------------------
/scripts/build-both-branches.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | if ! git diff --quiet || ! git diff --cached --quiet; then
 6 |   echo "Error: Uncommitted changes detected. Please commit or stash your changes first."
 7 |   exit 1
 8 | fi
 9 | 
10 | CURRENT_BRANCH=$(git branch --show-current)
11 | BRANCH1="${1:-master}"
12 | BRANCH2="${2:-$CURRENT_BRANCH}"
13 | 
14 | rm -rf esm_branch1 esm_branch2
15 | 
16 | echo "Building $BRANCH1 branch..."
17 | 
18 | git checkout "$BRANCH1"
19 | yarn
20 | yarn build:esm
21 | mv esm esm_branch1
22 | echo "$BRANCH1" >esm_branch1/branchname.txt
23 | 
24 | echo "Building $BRANCH2 branch..."
25 | git checkout "$BRANCH2"
26 | yarn
27 | yarn build:esm
28 | mv esm esm_branch2
29 | echo "$BRANCH2" >esm_branch2/branchname.txt
30 | 
31 | echo "Build complete!"
32 | echo "$BRANCH1 build: esm_branch1/index.js"
33 | echo "$BRANCH2 build: esm_branch2/index.js"
34 | 


--------------------------------------------------------------------------------
/test/parseMetaString.test.ts:
--------------------------------------------------------------------------------
 1 | import { expect, test } from 'vitest'
 2 | 
 3 | import { parseMetaString } from '../src/parseMetaString'
 4 | 
 5 | test('array in values', () => {
 6 |   const result1 = parseMetaString(
 7 |     '<ID=Assay,Type=String,Number=.,Values=[WholeGenome, Exome]>',
 8 |   )
 9 |   const result2 = parseMetaString(
10 |     '<Values=[WholeGenome, Exome],ID=Assay,Type=String,Number=.>',
11 |   )
12 |   expect(result1).toEqual(result2)
13 |   expect(result1).toMatchSnapshot()
14 | })
15 | 
16 | test('quoted string with comma in description', () => {
17 |   expect(
18 |     parseMetaString(
19 |       '<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">',
20 |     ),
21 |   ).toMatchSnapshot()
22 | })
23 | 
24 | test('equals in description', () => {
25 |   expect(
26 |     parseMetaString(
27 |       '<ID=AP,Number=2,Type=Float,Description="Allelic Probability, P(Allele=1|Haplotype)">',
28 |     ),
29 |   ).toMatchSnapshot()
30 | })
31 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Garrett Stevens
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Project-specific
 2 | dist/
 3 | .vscode/
 4 | 
 5 | # Logs
 6 | logs
 7 | *.log
 8 | npm-debug.log*
 9 | yarn-debug.log*
10 | yarn-error.log*
11 | 
12 | # Runtime data
13 | pids
14 | *.pid
15 | *.seed
16 | *.pid.lock
17 | 
18 | # Directory for instrumented libs generated by jscoverage/JSCover
19 | lib-cov
20 | 
21 | # Coverage directory used by tools like istanbul
22 | coverage
23 | 
24 | # nyc test coverage
25 | .nyc_output
26 | 
27 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
28 | .grunt
29 | 
30 | # Bower dependency directory (https://bower.io/)
31 | bower_components
32 | 
33 | # node-waf configuration
34 | .lock-wscript
35 | 
36 | # Compiled binary addons (https://nodejs.org/api/addons.html)
37 | build/Release
38 | 
39 | # Dependency directories
40 | node_modules/
41 | jspm_packages/
42 | 
43 | # TypeScript v1 declaration files
44 | typings/
45 | 
46 | # Optional npm cache directory
47 | .npm
48 | 
49 | # Optional eslint cache
50 | .eslintcache
51 | 
52 | # Optional REPL history
53 | .node_repl_history
54 | 
55 | # Output of 'npm pack'
56 | *.tgz
57 | 
58 | # Yarn Integrity file
59 | .yarn-integrity
60 | 
61 | # dotenv environment variables file
62 | .env
63 | 
64 | # next.js build output
65 | .next
66 | *.swp
67 | 
68 | esm-master
69 | esm-optimized
70 | esm
71 | 
72 | *.cpuprofile
73 | *-results.*
74 | esm_*/
75 | 


--------------------------------------------------------------------------------
/test/data/spec-example.vcf:
--------------------------------------------------------------------------------
 1 | ##fileformat=VCFv4.3
 2 | ##fileDate=20090805
 3 | ##source=myImputationProgramV3.1
 4 | ##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta
 5 | ##contig=<ID=20,length=62435964,assembly=B36,md5=f126cdf8a6e0c7f379d618ff66beb2da,species="Homo sapiens",taxonomy=x>
 6 | ##phasing=partial
 7 | ##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
 8 | ##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
 9 | ##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
10 | ##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
11 | ##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
12 | ##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
13 | ##INFO=<ID=TEST,Number=1,Type=String,Description="Used for testing">
14 | ##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
15 | ##FILTER=<ID=q10,Description="Quality below 10">
16 | ##FILTER=<ID=s50,Description="Less than 50% of samples have data">
17 | ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
18 | ##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
19 | ##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
20 | ##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
21 | ##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods">
22 | ##FORMAT=<ID=TEST,Number=1,Type=String,Description="Used for testing">
23 | #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA00001	NA00002	NA00003
24 | 


--------------------------------------------------------------------------------
/test/data/multipleAltSVs.vcf:
--------------------------------------------------------------------------------
 1 | ##fileformat=VCFv4.2
 2 | ##ALT=<ID=DEL,Description="">
 3 | ##ALT=<ID=INS,Description="">
 4 | ##ALT=<ID=INV,Description="">
 5 | ##ALT=<ID=INV,Description="">
 6 | ##FILTER=<ID=FILTER,Description="">
 7 | ##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">
 8 | ##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
 9 | ##INFO=<ID=CIRPOS,Number=2,Type=Integer,Description="Confidence interval around remote breakend POS for imprecise variants">
10 | ##INFO=<ID=END,Number=1,Type=Integer,Description="Stop position of the interval">
11 | ##INFO=<ID=EVENT,Number=1,Type=String,Description="ID of event associated to breakend">
12 | ##INFO=<ID=HOMLEN,Number=.,Type=Integer,Description="Length of base pair identical micro-homology at event breakpoints">
13 | ##INFO=<ID=HOMSEQ,Number=.,Type=String,Description="Sequence of base pair identical micro-homology at event breakpoints">
14 | ##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
15 | ##INFO=<ID=MATEID,Number=.,Type=String,Description="ID of mate breakends">
16 | ##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">
17 | ##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
18 | ##contig=<ID=chr1,length=249250621>
19 | #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
20 | chr1	1	.	A	<DEL>	.	.	
21 | chr1	1	.	A	C,<DEL>	.	.	
22 | chr1	1	.	A	<DEL>,<INS>	.	.	
23 | chr1	1	.	A	A[chr1:1[,A]chr1:1]	.	.	SVTYPE=BND
24 | 


--------------------------------------------------------------------------------
/test/data/simple.vcf:
--------------------------------------------------------------------------------
 1 | ##fileformat=VCFv4.3
 2 | ##fileDate=20090805
 3 | ##source=myImputationProgramV3.1
 4 | ##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta
 5 | ##contig=<ID=20,length=62435964,assembly=B36,md5=f126cdf8a6e0c7f379d618ff66beb2da,species="Homo sapiens",taxonomy=x>
 6 | ##phasing=partial
 7 | ##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
 8 | ##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
 9 | ##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
10 | ##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
11 | ##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
12 | ##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
13 | ##FILTER=<ID=q10,Description="Quality below 10">
14 | ##FILTER=<ID=s50,Description="Less than 50% of samples have data">
15 | ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
16 | ##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
17 | ##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
18 | ##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
19 | #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA00001	NA00002	NA00003
20 | 20	14370	rs6054257	G	A	29	PASS	NS=3;DP=14;AF=0.5;DB;H2	GT:GQ:DP:HQ	0|0:48:1:51,51	1|0:48:8:51,51	1/1:43:5:.,.
21 | 20	17330	.	T	A	3	q10	NS=3;DP=11;AF=0.017	GT:GQ:DP:HQ	0|0:49:3:58,50	0|1:3:5:65,3	0/0:41:3
22 | 20	1110696	rs6040355	A	G,T	67	PASS	NS=2;DP=10;AF=0.333,0.667;AA=T;DB	GT:GQ:DP:HQ	1|2:21:6:23,27	2|1:2:0:18,2	2/2:35:4
23 | 20	1230237	.	T	.	47	PASS	NS=3;DP=13;AA=T	GT:GQ:DP:HQ	0|0:54:7:56,60	0|0:48:4:51,51	0/0:61:2
24 | 20	1234567	microsat1	GTC	G,GTCT	50	PASS	NS=3;DP=9;AA=G	GT:GQ:DP	0/1:35:4	0/2:17:2	1/1:40:3
25 | 


--------------------------------------------------------------------------------
/test/data/vcf4.3_spec_snippet.vcf:
--------------------------------------------------------------------------------
 1 | ##fileformat=VCFv4.3
 2 | ##fileDate=20090805
 3 | ##source=myImputationProgramV3.1
 4 | ##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta
 5 | ##contig=<ID=20,length=62435964,assembly=B36,md5=f126cdf8a6e0c7f379d618ff66beb2da,species="Homo sapiens",taxonomy=x>
 6 | ##phasing=partial
 7 | ##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
 8 | ##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
 9 | ##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
10 | ##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
11 | ##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
12 | ##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
13 | ##FILTER=<ID=q10,Description="Quality below 10">
14 | ##FILTER=<ID=s50,Description="Less than 50% of samples have data">
15 | ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
16 | ##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
17 | ##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
18 | ##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
19 | #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA00001	NA00002	NA00003
20 | 20	14370	rs6054257	G	A	29	PASS	NS=3;DP=14;AF=0.5;DB;H2	GT:GQ:DP:HQ	0|0:48:1:51,51	1|0:48:8:51,51	1/1:43:5:.,.
21 | 20	17330	.	T	A	3	q10	NS=3;DP=11;AF=0.017	GT:GQ:DP:HQ	0|0:49:3:58,50	0|1:3:5:65,3	0/0:41:3
22 | 20	1110696	rs6040355	A	G,T	67	PASS	NS=2;DP=10;AF=0.333,0.667;AA=T;DB	GT:GQ:DP:HQ	1|2:21:6:23,27	2|1:2:0:18,2	2/2:35:4
23 | 20	1230237	.	T	.	47	PASS	NS=3;DP=13;AA=T	GT:GQ:DP:HQ	0|0:54:7:56,60	0|0:48:4:51,51	0/0:61:2
24 | 20	1234567	microsat1	GTC	G,GTCT	50	PASS	NS=3;DP=9;AA=G	GT:GQ:DP	0/1:35:4	0/2:17:2	1/1:40:3
25 | 


--------------------------------------------------------------------------------
/test/parseGenotypesOnly-ultrafast-edge.test.ts:
--------------------------------------------------------------------------------
 1 | import { expect, test } from 'vitest'
 2 | 
 3 | import { parseGenotypesOnly } from '../src/parseGenotypesOnly'
 4 | 
 5 | test('ultra-fast path should not be tricked by mixed ploidy with matching length', () => {
 6 |   // 4 samples with total length = 15 = 4*4-1
 7 |   // Mix of triploid (5 chars) and haploid (1 char)
 8 |   const result = parseGenotypesOnly('GT', '0/1/2\t0\t1/2/3\t1', [
 9 |     'S1',
10 |     'S2',
11 |     'S3',
12 |     'S4',
13 |   ])
14 | 
15 |   // Should correctly parse each genotype
16 |   expect(result).toEqual({
17 |     S1: '0/1/2',
18 |     S2: '0',
19 |     S3: '1/2/3',
20 |     S4: '1',
21 |   })
22 | })
23 | 
24 | test('another ultra-fast path edge case with >10 samples', () => {
25 |   // 11 samples: mix to create length = 11*4-1 = 43
26 |   // Need: 43 chars total
27 |   // Try: 5 × 5-char + 6 × 1-char = 5*6 + 6*2 - 1 = 30 + 12 - 1 = 41 (not quite)
28 |   // Try: 6 × 5-char + 5 × 1-char = 6*6 + 5*2 - 1 = 36 + 10 - 1 = 45 (too much)
29 |   // Try: different mix... let's calculate properly
30 |   // For 12 samples: expected = 47
31 |   // 6 × 5-char + 6 × 1-char = 6*6 + 6*2 - 1 = 36 + 12 - 1 = 47 ✓
32 |   const data = '0/1/2\t0\t1/2/3\t1\t2/3/4\t2\t3/4/5\t3\t4/5/6\t4\t5/6/7\t5'
33 |   const samples = [
34 |     'S1',
35 |     'S2',
36 |     'S3',
37 |     'S4',
38 |     'S5',
39 |     'S6',
40 |     'S7',
41 |     'S8',
42 |     'S9',
43 |     'S10',
44 |     'S11',
45 |     'S12',
46 |   ]
47 |   const result = parseGenotypesOnly('GT', data, samples)
48 | 
49 |   expect(result).toEqual({
50 |     S1: '0/1/2',
51 |     S2: '0',
52 |     S3: '1/2/3',
53 |     S4: '1',
54 |     S5: '2/3/4',
55 |     S6: '2',
56 |     S7: '3/4/5',
57 |     S8: '3',
58 |     S9: '4/5/6',
59 |     S10: '4',
60 |     S11: '5/6/7',
61 |     S12: '5',
62 |   })
63 | })
64 | 


--------------------------------------------------------------------------------
/test/data/sniffles.vcf:
--------------------------------------------------------------------------------
 1 | ##fileformat=VCFv4.2
 2 | ##source=Sniffles
 3 | ##fileDate=20170420
 4 | ##ALT=<ID=DEL,Description="Deletion">
 5 | ##ALT=<ID=DUP,Description="Duplication">
 6 | ##ALT=<ID=INV,Description="Inversion">
 7 | ##ALT=<ID=INVDUP,Description="InvertedDUP with unknown boundaries">
 8 | ##ALT=<ID=TRA,Description="Translocation">
 9 | ##ALT=<ID=INS,Description="Insertion">
10 | ##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for END coordinate in case of a translocation">
11 | ##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the structural variant">
12 | ##INFO=<ID=MAPQ,Number=1,Type=Integer,Description="Median mapping quality of paired-ends">
13 | ##INFO=<ID=RE,Number=1,Type=Integer,Description="read support">
14 | ##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
15 | ##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variation">
16 | ##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of the SV">
17 | ##INFO=<ID=SVMETHOD,Number=1,Type=String,Description="Type of approach used to detect SV">
18 | ##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
19 | ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
20 | ##FORMAT=<ID=DR,Number=1,Type=Integer,Description="# high-quality reference reads">
21 | ##FORMAT=<ID=DV,Number=1,Type=Integer,Description="# high-quality variant reads">
22 | #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	/seq/schatz/fritz/sv-paper/real/Nanopore_NA12878/mapped/ngm_Nanopore_human_ngmlr-0.2.3_mapped.bam
23 | 8	17709115	28329_0	N	<DEL>	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.3;CHR2=8;END=17709148;STD_quant_start=0.000000;STD_quant_stop=0.000000;Kurtosis_quant_start=20.524521;Kurtosis_quant_stop=3.925926;SVTYPE=DEL;SUPTYPE=AL;SVLEN=33;STRANDS=+-;STRANDS2=20,14,20,14;RE=34;AF=0.971429	GT:DR:DV	1/1:1:34
24 | 


--------------------------------------------------------------------------------
/test/data/vcf44_spec.vcf:
--------------------------------------------------------------------------------
 1 | ##fileformat=VCFv4.4
 2 | ##ALT=<ID=INV,Description="Inversion">
 3 | ##ALT=<ID=INS,Description="Insertion">
 4 | ##ALT=<ID=DUP,Description="Duplication">
 5 | ##ALT=<ID=DUP:TANDEM,Description="Tandem Duplication">
 6 | ##ALT=<ID=DEL,Description="Deletion">
 7 | ##ALT=<ID=CNV,Description="Copy number variable region">
 8 | ##INFO=<ID=MATEID,Number=A,Type=String,Description="ID of mate breakend">
 9 | ##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the longest variant described in this record">
10 | ##INFO=<ID=CIPOS,Number=.,Type=Integer,Description="Confidence interval around POS for symbolic structural variants">
11 | ##INFO=<ID=SVLEN,Number=A,Type=Integer,Description="Length of structural variant">
12 | ##INFO=<ID=CILEN,Number=.,Type=Integer,Description="Confidence interval for the SVLEN field">
13 | ##INFO=<ID=EVENT,Number=A,Type=String,Description="ID of associated event">
14 | ##INFO=<ID=EVENTTYPE,Number=A,Type=String,Description="Type of associated event">
15 | ##INFO=<ID=SVCLAIM,Number=A,Type=String,Description="Claim made by the structural variant call. Valid values are D, J, DJ for abundance, adjacency and both respectively">
16 | ##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
17 | ##contig=<ID=chrA,length=1000000>
18 | ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
19 | ##custom_header_field_containing_chr_seq=ATGCGAAAAAAATGT
20 | #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	sample
21 | chrA	2	.	TGC	T	.	.	EVENT=DEL_seq	GT	0/1
22 | chrA	2	.	T	<DEL>	.	.	SVLEN=2;SVCLAIM=DJ;EVENT=DEL_symbolic;END=4	GT	0/1
23 | chrA	2	delbp1	T	T[chrA:5[	.	.	MATEID=delbp2;EVENT=DEL_split_bp_cn	GT	0/1
24 | chrA	2	delbp2	A	]chrA:2]A	.	.	MATEID=delbp1;EVENT=DEL_split_bp_cn	GT	0/1
25 | chrA	2	.	T	<DEL>	.	.	SVLEN=2;SVCLAIM=D;EVENT=DEL_split_bp_cn;END=4	GT	0/1
26 | chrA	5	.	G	GAAA	.	.	EVENT=homology_seq	GT	1/1
27 | chrA	5	.	G	<DUP>	.	.	SVLEN=3;CIPOS=0,5;EVENT=homology_dup;END=8	GT	1/1
28 | chrA	14	.	T	<INS>	.	.	IMPRECISE;SVLEN=100;CILEN=-50,50;CIPOS=-10,10;END=14	GT	0/1
29 | chrA	14	.	G	.CCCCCCG	.	.	EVENT=single_breakend	GT	0/1
30 | 


--------------------------------------------------------------------------------
/test/index.test.ts:
--------------------------------------------------------------------------------
 1 | import { describe, expect, it } from 'vitest'
 2 | 
 3 | import { Breakend, parseBreakend } from '../src'
 4 | 
 5 | describe('testBreakend', () => {
 6 |   it('can parse breakends', () => {
 7 |     // Breakends from https://samtools.github.io/hts-specs/VCFv4.3.pdf
 8 |     const breakendsAndParsed = [
 9 |       [
10 |         'G]17:198982]',
11 |         {
12 |           MatePosition: '17:198982',
13 |           Join: 'right',
14 |           Replacement: 'G',
15 |           MateDirection: 'left',
16 |         },
17 |       ],
18 |       [
19 |         ']13:123456]T',
20 |         {
21 |           MatePosition: '13:123456',
22 |           Join: 'left',
23 |           Replacement: 'T',
24 |           MateDirection: 'left',
25 |         },
26 |       ],
27 |       [
28 |         'C[2:321682[',
29 |         {
30 |           MatePosition: '2:321682',
31 |           Join: 'right',
32 |           Replacement: 'C',
33 |           MateDirection: 'right',
34 |         },
35 |       ],
36 |       [
37 |         '[17:198983[A',
38 |         {
39 |           MatePosition: '17:198983',
40 |           Join: 'left',
41 |           Replacement: 'A',
42 |           MateDirection: 'right',
43 |         },
44 |       ],
45 |       [
46 |         'A]2:321681]',
47 |         {
48 |           MatePosition: '2:321681',
49 |           Join: 'right',
50 |           Replacement: 'A',
51 |           MateDirection: 'left',
52 |         },
53 |       ],
54 |       [
55 |         '[13:123457[C',
56 |         {
57 |           MatePosition: '13:123457',
58 |           Join: 'left',
59 |           Replacement: 'C',
60 |           MateDirection: 'right',
61 |         },
62 |       ],
63 |     ] as [string, Breakend][]
64 |     breakendsAndParsed.forEach(([breakend, parsedBreakend]) => {
65 |       expect(parseBreakend(breakend)).toEqual(parsedBreakend)
66 |     })
67 |   })
68 | 
69 |   it('throws on invalid breakend', () => {
70 |     expect(() => parseBreakend('[13:123457[')).toThrow(/Invalid breakend/)
71 |   })
72 | 
73 |   it('returns "undefined" for non-breakend', () => {
74 |     expect(parseBreakend('A')).toBeUndefined()
75 |   })
76 | })
77 | 


--------------------------------------------------------------------------------
/benchmark/master-vs-current.bench.ts:
--------------------------------------------------------------------------------
 1 | import { readFileSync } from 'node:fs'
 2 | import { bench, describe } from 'vitest'
 3 | 
 4 | import { parseGenotypesOnly as branch1Fn } from '../esm_branch1/parseGenotypesOnly.js'
 5 | import { parseGenotypesOnly as branch2Fn } from '../esm_branch2/parseGenotypesOnly.js'
 6 | 
 7 | const branch1Name = readFileSync('esm_branch1/branchname.txt', 'utf8').trim()
 8 | const branch2Name = readFileSync('esm_branch2/branchname.txt', 'utf8').trim()
 9 | 
10 | function generateTestData(numSamples: number, format: string) {
11 |   const samples = Array.from({ length: numSamples }, (_, i) => `SAMPLE_${i}`)
12 |   let genotypeData: string
13 |   if (format === 'GT') {
14 |     genotypeData = Array.from({ length: numSamples }, () => '0/1').join('\t')
15 |   } else if (format === 'GT:DP:GQ') {
16 |     genotypeData = Array.from({ length: numSamples }, () => '0/1:23:99').join(
17 |       '\t',
18 |     )
19 |   } else if (format === 'DP:GQ:GT') {
20 |     genotypeData = Array.from({ length: numSamples }, () => '23:99:0/1').join(
21 |       '\t',
22 |     )
23 |   } else {
24 |     genotypeData = Array.from({ length: numSamples }, () => '0/1').join('\t')
25 |   }
26 |   return { samples, genotypeData, format }
27 | }
28 | 
29 | function benchParseGenotypes(
30 |   name: string,
31 |   numSamples: number,
32 |   format: string,
33 |   opts?: { iterations?: number; warmupIterations?: number },
34 | ) {
35 |   const { samples, genotypeData } = generateTestData(numSamples, format)
36 | 
37 |   describe(name, () => {
38 |     bench(
39 |       branch1Name,
40 |       () => {
41 |         branch1Fn(format, genotypeData, samples)
42 |       },
43 |       opts,
44 |     )
45 |     bench(
46 |       branch2Name,
47 |       () => {
48 |         branch2Fn(format, genotypeData, samples)
49 |       },
50 |       opts,
51 |     )
52 |   })
53 | }
54 | 
55 | const formats = ['GT', 'GT:DP:GQ', 'DP:GQ:GT'] as const
56 | const sampleCounts = [10, 100, 1000, 5000] as const
57 | const opts = { iterations: 1000, warmupIterations: 100 }
58 | 
59 | for (const format of formats) {
60 |   for (const numSamples of sampleCounts) {
61 |     benchParseGenotypes(`${numSamples} samples - ${format}`, numSamples, format, opts)
62 |   }
63 | }
64 | 


--------------------------------------------------------------------------------
/src/parseMetaString.ts:
--------------------------------------------------------------------------------
 1 | // constructed with the assistance of claude AI
 2 | //
 3 | // I first prompted it with a regex that splits a comma separated string with
 4 | // awareness of quotation from this stackoverflow question
 5 | // https://stackoverflow.com/a/18893443/2129219, and asked it to add support
 6 | // for square brackets
 7 | //
 8 | // it undid the regex into serial logic and the result was this function
 9 | function customSplit(str: string) {
10 |   const result = []
11 |   const chars = []
12 |   let inQuotes = false
13 |   let inBrackets = false
14 |   const strLen = str.length
15 | 
16 |   for (let i = 0; i < strLen; i++) {
17 |     const char = str[i]!
18 |     if (char === '"') {
19 |       inQuotes = !inQuotes
20 |       chars.push(char)
21 |     } else if (char === '[') {
22 |       inBrackets = true
23 |       chars.push(char)
24 |     } else if (char === ']') {
25 |       inBrackets = false
26 |       chars.push(char)
27 |     } else if (char === ',' && !inQuotes && !inBrackets) {
28 |       result.push(chars.join('').trim())
29 |       chars.length = 0
30 |     } else {
31 |       chars.push(char)
32 |     }
33 |   }
34 | 
35 |   if (chars.length > 0) {
36 |     result.push(chars.join('').trim())
37 |   }
38 | 
39 |   return result
40 | }
41 | 
42 | function splitFirst(str: string, split: string) {
43 |   const index = str.indexOf(split)
44 |   return [str.slice(0, index), str.slice(index + 1)]
45 | }
46 | 
47 | export function parseMetaString(metaString: string) {
48 |   const inside = metaString.slice(1, -1)
49 |   const parts = customSplit(inside)
50 |   const entries: [string, any][] = []
51 |   for (let i = 0; i < parts.length; i++) {
52 |     const f = parts[i]!
53 |     const [key, val] = splitFirst(f, '=')
54 |     if (val && val.startsWith('[') && val.endsWith(']')) {
55 |       const items = val.slice(1, -1).split(',')
56 |       for (let j = 0; j < items.length; j++) {
57 |         items[j] = items[j]!.trim()
58 |       }
59 |       entries.push([key!, items])
60 |     } else if (val && val.startsWith('"') && val.endsWith('"')) {
61 |       entries.push([key!, val.slice(1, -1)])
62 |     } else {
63 |       entries.push([key!, val])
64 |     }
65 |   }
66 |   return Object.fromEntries(entries)
67 | }
68 | 


--------------------------------------------------------------------------------
/src/parseGenotypesOnly.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Extracts genotype (GT) values from VCF sample data.
 3 |  */
 4 | export function parseGenotypesOnly(
 5 |   format: string,
 6 |   prerest: string,
 7 |   samples: string[],
 8 | ) {
 9 |   const genotypes = Object.create(null) as Record<string, string>
10 | 
11 |   const samplesLen = samples.length
12 |   const prerestLen = prerest.length
13 |   const TAB = 9
14 |   const COLON = 58
15 |   let pos = 0
16 | 
17 |   // Fast path: format is exactly "GT"
18 |   if (format === 'GT') {
19 |     for (let idx = 0; idx < samplesLen; idx++) {
20 |       const start = pos
21 |       while (pos < prerestLen && prerest.charCodeAt(pos) !== TAB) {
22 |         pos++
23 |       }
24 |       genotypes[samples[idx]!] = prerest.slice(start, pos)
25 |       pos++
26 |     }
27 |     return genotypes
28 |   }
29 | 
30 |   // Check if GT field exists
31 |   const gtIdx = format.indexOf('GT')
32 |   if (gtIdx === -1) {
33 |     return genotypes
34 |   }
35 | 
36 |   // GT is first field but not only field
37 |   if (gtIdx === 0) {
38 |     for (let idx = 0; idx < samplesLen; idx++) {
39 |       const start = pos
40 |       while (
41 |         pos < prerestLen &&
42 |         prerest.charCodeAt(pos) !== COLON &&
43 |         prerest.charCodeAt(pos) !== TAB
44 |       ) {
45 |         pos++
46 |       }
47 |       genotypes[samples[idx]!] = prerest.slice(start, pos)
48 |       while (pos < prerestLen && prerest.charCodeAt(pos) !== TAB) {
49 |         pos++
50 |       }
51 |       pos++
52 |     }
53 |     return genotypes
54 |   }
55 | 
56 |   // GT is not first field
57 |   let colonCount = 0
58 |   for (let j = 0; j < gtIdx; j++) {
59 |     if (format.charCodeAt(j) === COLON) {
60 |       colonCount++
61 |     }
62 |   }
63 |   for (let idx = 0; idx < samplesLen; idx++) {
64 |     const sampleStart = pos
65 |     let tabIdx = pos
66 |     while (tabIdx < prerestLen && prerest.charCodeAt(tabIdx) !== TAB) {
67 |       tabIdx++
68 |     }
69 | 
70 |     let colons = 0
71 |     let fieldStart = sampleStart
72 |     for (let j = sampleStart; j <= tabIdx; j++) {
73 |       if (j === tabIdx || prerest.charCodeAt(j) === COLON) {
74 |         if (colons === colonCount) {
75 |           genotypes[samples[idx]!] = prerest.slice(fieldStart, j)
76 |           break
77 |         }
78 |         colons++
79 |         fieldStart = j + 1
80 |       }
81 |     }
82 |     pos = tabIdx + 1
83 |   }
84 | 
85 |   return genotypes
86 | }
87 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@gmod/vcf",
 3 |   "version": "6.1.0",
 4 |   "description": "High performance streaming Variant Call Format (VCF) parser in pure JavaScript",
 5 |   "type": "module",
 6 |   "types": "./dist/index.d.ts",
 7 |   "exports": {
 8 |     "import": {
 9 |       "import": "./esm/index.js"
10 |     },
11 |     "require": {
12 |       "require": "./dist/index.js"
13 |     }
14 |   },
15 |   "repository": "github:GMOD/vcf-js",
16 |   "author": {
17 |     "name": "Garrett Stevens",
18 |     "email": "stevens.garrett.j@gmail.com",
19 |     "url": "https://github.com/garrettjstevens"
20 |   },
21 |   "license": "MIT",
22 |   "homepage": "https://github.com/GMOD/vcf-js#vcf-js",
23 |   "bugs": {
24 |     "url": "https://github.com/GMOD/vcf-js/issues"
25 |   },
26 |   "files": [
27 |     "dist",
28 |     "esm",
29 |     "src"
30 |   ],
31 |   "engines": {
32 |     "node": ">=6"
33 |   },
34 |   "scripts": {
35 |     "test": "vitest",
36 |     "benchonly": "vitest bench",
37 |     "bench": "./scripts/build-both-branches.sh \"$BRANCH1\" \"$BRANCH2\" && vitest bench",
38 |     "lint": "eslint --report-unused-disable-directives --max-warnings 0",
39 |     "docs": "documentation readme src/parse.ts --section=API  --shallow",
40 |     "format": "prettier --write .",
41 |     "postdocs": "prettier --write README.md",
42 |     "clean": "rimraf dist esm",
43 |     "build:esm": "tsc --target es2020 --outDir esm",
44 |     "build:es5": "tsc --target es2020 --module commonjs --outDir dist",
45 |     "build": "yarn build:esm && yarn build:es5",
46 |     "prebuild": "yarn clean",
47 |     "postbuild:es5": "echo '{\"type\": \"commonjs\"}' > dist/package.json",
48 |     "preversion": "yarn lint && yarn test --run && yarn build",
49 |     "version": "standard-changelog && git add CHANGELOG.md",
50 |     "postversion": "git push --follow-tags"
51 |   },
52 |   "devDependencies": {
53 |     "@babel/core": "^7.20.5",
54 |     "@eslint/js": "^9.7.0",
55 |     "@types/node": "^24.10.1",
56 |     "@typescript-eslint/eslint-plugin": "^8.48.0",
57 |     "@typescript-eslint/parser": "^8.48.0",
58 |     "@vitest/coverage-v8": "^4.0.14",
59 |     "documentation": "^14.0.1",
60 |     "eslint": "^9.7.0",
61 |     "eslint-plugin-import": "^2.32.0",
62 |     "eslint-plugin-unicorn": "^62.0.0",
63 |     "prettier": "^3.2.4",
64 |     "rimraf": "^6.0.1",
65 |     "standard-changelog": "^7.0.1",
66 |     "typescript": "^5.3.3",
67 |     "typescript-eslint": "^8.48.0",
68 |     "vitest": "^4.0.14"
69 |   },
70 |   "keywords": [
71 |     "vcf",
72 |     "genomics",
73 |     "bionode",
74 |     "biojs"
75 |   ],
76 |   "publishConfig": {
77 |     "access": "public"
78 |   }
79 | }
80 | 


--------------------------------------------------------------------------------
/src/parseBreakend.ts:
--------------------------------------------------------------------------------
 1 | export interface Breakend {
 2 |   Join: string
 3 |   Replacement: string
 4 |   MatePosition?: string
 5 |   MateDirection?: string
 6 |   SingleBreakend?: boolean
 7 | }
 8 | 
 9 | const ANGLE_BRACKET_START_REGEX = /<(.*)>(.*)/
10 | const ANGLE_BRACKET_END_REGEX = /(.*)<(.*)>/
11 | 
12 | export function parseBreakend(breakendString: string): Breakend | undefined {
13 |   const firstChar = breakendString[0]
14 |   const lastChar = breakendString[breakendString.length - 1]
15 | 
16 |   if (
17 |     firstChar === '[' ||
18 |     firstChar === ']' ||
19 |     lastChar === '[' ||
20 |     lastChar === ']'
21 |   ) {
22 |     const tokens = breakendString.split(/[[\]]/)
23 |     const MateDirection = breakendString.includes('[') ? 'right' : 'left'
24 |     let Join
25 |     let Replacement
26 |     let MatePosition
27 |     const tokensLen = tokens.length
28 |     for (let i = 0; i < tokensLen; i++) {
29 |       const tok = tokens[i]!
30 |       if (tok) {
31 |         if (tok.includes(':')) {
32 |           MatePosition = tok
33 |           Join = Replacement ? 'right' : 'left'
34 |         } else {
35 |           Replacement = tok
36 |         }
37 |       }
38 |     }
39 |     if (!(MatePosition && Join && Replacement)) {
40 |       throw new Error(`Invalid breakend: ${breakendString}`)
41 |     }
42 |     return { MatePosition, Join, Replacement, MateDirection }
43 |   }
44 | 
45 |   if (firstChar === '.') {
46 |     return {
47 |       Join: 'left',
48 |       SingleBreakend: true,
49 |       Replacement: breakendString.slice(1),
50 |     }
51 |   }
52 | 
53 |   if (lastChar === '.') {
54 |     return {
55 |       Join: 'right',
56 |       SingleBreakend: true,
57 |       Replacement: breakendString.slice(0, -1),
58 |     }
59 |   }
60 | 
61 |   if (firstChar === '<') {
62 |     const res = ANGLE_BRACKET_START_REGEX.exec(breakendString)
63 |     if (!res) {
64 |       throw new Error(`failed to parse ${breakendString}`)
65 |     }
66 |     const Replacement = res[2]
67 |     return Replacement
68 |       ? {
69 |           Join: 'left',
70 |           Replacement,
71 |           MateDirection: 'right',
72 |           MatePosition: `<${res[1]!}>:1`,
73 |         }
74 |       : undefined
75 |   }
76 | 
77 |   if (breakendString.includes('<')) {
78 |     const res = ANGLE_BRACKET_END_REGEX.exec(breakendString)
79 |     if (!res) {
80 |       throw new Error(`failed to parse ${breakendString}`)
81 |     }
82 |     const Replacement = res[1]
83 |     return Replacement
84 |       ? {
85 |           Join: 'right',
86 |           Replacement,
87 |           MateDirection: 'right',
88 |           MatePosition: `<${res[2]!}>:1`,
89 |         }
90 |       : undefined
91 |   }
92 | 
93 |   return undefined
94 | }
95 | 


--------------------------------------------------------------------------------
/test/data/y-chrom-haploid.vcf:
--------------------------------------------------------------------------------
 1 | ##fileformat=VCFv4.1
 2 | ##FILTER=<ID=PASS,Description="All filters passed">
 3 | ##fileDate=20150218
 4 | ##reference=ftp://ftp.1000genomes.ebi.ac.uk//vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz
 5 | ##contig=<ID=Y,length=59373566,assembly=b37>
 6 | ##source=freeBayes v0.9.9.2 | GT values over-written with maximum likelihood state (subject to threshold) OR phylogenetic imputation
 7 | ##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus">
 8 | ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
 9 | ##source=GenomeSTRiP_v1.04
10 | ##ALT=<ID=CNV,Description="Copy number polymorphism">
11 | ##FILTER=<ID=ALIGNLENGTH,Description="GSELENGTH < 200">
12 | ##FILTER=<ID=CLUSTERSEP,Description="GSCLUSTERSEP == NA || GSCLUSTERSEP <= 2.0">
13 | ##FILTER=<ID=DUPLICATE,Description="GSDUPLICATESCORE != NA && GSDUPLICATEOVERLAP >= 0.5 && GSDUPLICATESCORE >= 0.0">
14 | ##FILTER=<ID=GTDEPTH,Description="GSM1 == NA || GSM1 <= 0.5 || GSM1 >= 2.0">
15 | ##FILTER=<ID=INBREEDINGCOEFF,Description="GLINBREEDINGCOEFF != NA && GLINBREEDINGCOEFF < -0.15">
16 | ##FILTER=<ID=NONVARIANT,Description="GSNONVARSCORE != NA && GSNONVARSCORE >= 13.0">
17 | ##FORMAT=<ID=CN,Number=1,Type=Integer,Description="Copy number genotype for imprecise events">
18 | ##FORMAT=<ID=CNL,Number=.,Type=Float,Description="Copy number likelihoods with no frequency prior">
19 | ##FORMAT=<ID=CNP,Number=.,Type=Float,Description="Copy number likelihoods">
20 | ##FORMAT=<ID=CNQ,Number=1,Type=Float,Description="Copy number genotype quality for imprecise events">
21 | ##FORMAT=<ID=GP,Number=G,Type=Float,Description="Genotype likelihoods">
22 | ##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
23 | ##FORMAT=<ID=FT,Number=1,Type=String,Description="Per-sample genotype filter">
24 | ##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
25 | ##INFO=<ID=END,Number=1,Type=Integer,Description="End coordinate of this variant">
26 | ##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
27 | ##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral allele">
28 | ##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes">
29 | ##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]">
30 | ##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data">
31 | ##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
32 | ##INFO=<ID=SAS_AF,Number=A,Type=Float,Description="Allele frequency in the SAS populations calculated from AC and AN, in the range (0,1)">
33 | ##INFO=<ID=EUR_AF,Number=A,Type=Float,Description="Allele frequency in the EUR populations calculated from AC and AN, in the range (0,1)">
34 | ##INFO=<ID=AFR_AF,Number=A,Type=Float,Description="Allele frequency in the AFR populations calculated from AC and AN, in the range (0,1)">
35 | ##INFO=<ID=AMR_AF,Number=A,Type=Float,Description="Allele frequency in the AMR populations calculated from AC and AN, in the range (0,1)">
36 | ##INFO=<ID=EAS_AF,Number=A,Type=Float,Description="Allele frequency in the EAS populations calculated from AC and AN, in the range (0,1)">
37 | ##INFO=<ID=VT,Number=.,Type=String,Description="indicates what type of variant the line represents">
38 | ##INFO=<ID=EX_TARGET,Number=0,Type=Flag,Description="indicates whether a variant is within the exon pull down target boundaries">
39 | ##INFO=<ID=MULTI_ALLELIC,Number=0,Type=Flag,Description="indicates whether a site is multi-allelic">
40 | #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	HG00096	HG00101	HG00103	HG001055
41 | Y	14483990	CNV_Y_14483990_15232198	C	<CN0>	100	PASS	AC=1;AF=0.000817661;AN=1223;END=15232198;NS=1233;SVTYPE=CNV;AMR_AF=0;AFR_AF=0;EUR_AF=0.0042;SAS_AF=0;EAS_AF=0;VT=SV;EX_TARGET	GT:CN:CNL:CNP:CNQ:GP:GQ:PL	0:1:-1000,0,-119.08:-1000,0,-218.16:99:0,-1000:99:0,10000	0:1:-1000,0,-43.56:-1000,0,-142.64:99:0,-1000:99:0,10000	.:.:.:.:.:.:.:.	.:.:.:.:.:.:.:.
42 | Y	2655180	rs11575897	G	A	100	PASS	AA=G;AC=22;AF=0.0178427;AN=1233;DP=84761;NS=1233;AMR_AF=0;AFR_AF=0;EUR_AF=0;SAS_AF=0;EAS_AF=0.0902;VT=SNP;EX_TARGET	GT	0	0	0	.
43 | 


--------------------------------------------------------------------------------
/eslint.config.mjs:
--------------------------------------------------------------------------------
  1 | import eslint from '@eslint/js'
  2 | import eslintPluginUnicorn from 'eslint-plugin-unicorn'
  3 | import { defineConfig } from 'eslint/config'
  4 | import importPlugin from 'eslint-plugin-import'
  5 | import tseslint from 'typescript-eslint'
  6 | 
  7 | export default defineConfig(
  8 |   {
  9 |     ignores: [
 10 |       'analyze-profile.cjs',
 11 |       'esm_*/',
 12 |       'benchmark/',
 13 |       'esm/',
 14 |       'dist/',
 15 |       '*.js',
 16 |       '*.mjs',
 17 |       'example/*',
 18 |     ],
 19 |   },
 20 |   {
 21 |     languageOptions: {
 22 |       parserOptions: {
 23 |         project: ['./tsconfig.lint.json'],
 24 |         tsconfigRootDir: import.meta.dirname,
 25 |       },
 26 |     },
 27 |   },
 28 |   eslint.configs.recommended,
 29 |   ...tseslint.configs.recommended,
 30 |   ...tseslint.configs.stylisticTypeChecked,
 31 |   ...tseslint.configs.strictTypeChecked,
 32 |   importPlugin.flatConfigs.recommended,
 33 |   eslintPluginUnicorn.configs.recommended,
 34 |   {
 35 |     rules: {
 36 |       curly: 'error',
 37 |       'no-console': [
 38 |         'warn',
 39 |         {
 40 |           allow: ['error', 'warn'],
 41 |         },
 42 |       ],
 43 |       'spaced-comment': [
 44 |         'error',
 45 |         'always',
 46 |         {
 47 |           markers: ['/'],
 48 |         },
 49 |       ],
 50 |       '@typescript-eslint/no-unused-vars': [
 51 |         'warn',
 52 |         {
 53 |           argsIgnorePattern: '^_',
 54 |           caughtErrors: 'none',
 55 |           ignoreRestSiblings: true,
 56 |         },
 57 |       ],
 58 | 
 59 |       '@typescript-eslint/ban-ts-comment': 'off',
 60 |       '@typescript-eslint/no-this-alias': 'off',
 61 |       '@typescript-eslint/no-unsafe-member-access': 'off',
 62 |       '@typescript-eslint/no-unsafe-argument': 'off',
 63 |       '@typescript-eslint/no-explicit-any': 'off',
 64 |       '@typescript-eslint/no-unsafe-assignment': 'off',
 65 |       '@typescript-eslint/no-unsafe-call': 'off',
 66 |       '@typescript-eslint/no-unsafe-return': 'off',
 67 |       '@typescript-eslint/no-non-null-assertion': 'off',
 68 |       '@typescript-eslint/restrict-template-expressions': 'off',
 69 |       '@typescript-eslint/prefer-for-of': 'off',
 70 | 
 71 |       'unicorn/no-new-array': 'off',
 72 |       'unicorn/no-empty-file': 'off',
 73 |       'unicorn/prefer-type-error': 'off',
 74 |       'unicorn/prefer-modern-math-apis': 'off',
 75 |       'unicorn/prefer-node-protocol': 'off',
 76 |       'unicorn/no-unreadable-array-destructuring': 'off',
 77 |       'unicorn/no-abusive-eslint-disable': 'off',
 78 |       'unicorn/no-array-callback-reference': 'off',
 79 |       'unicorn/number-literal-case': 'off',
 80 |       'unicorn/prefer-add-event-listener': 'off',
 81 |       'unicorn/prefer-top-level-await': 'off',
 82 |       'unicorn/consistent-function-scoping': 'off',
 83 |       'unicorn/no-await-expression-member': 'off',
 84 |       'unicorn/no-lonely-if': 'off',
 85 |       'unicorn/consistent-destructuring': 'off',
 86 |       'unicorn/prefer-module': 'off',
 87 |       'unicorn/prefer-optional-catch-binding': 'off',
 88 |       'unicorn/no-useless-undefined': 'off',
 89 |       'unicorn/no-null': 'off',
 90 |       'unicorn/no-nested-ternary': 'off',
 91 |       'unicorn/filename-case': 'off',
 92 |       'unicorn/catch-error-name': 'off',
 93 |       'unicorn/prevent-abbreviations': 'off',
 94 |       'unicorn/prefer-code-point': 'off',
 95 |       'unicorn/numeric-separators-style': 'off',
 96 |       'unicorn/no-array-for-each': 'off',
 97 |       'unicorn/prefer-spread': 'off',
 98 |       'unicorn/explicit-length-check': 'off',
 99 |       'unicorn/prefer-regexp-test': 'off',
100 |       'unicorn/relative-url-style': 'off',
101 |       'unicorn/prefer-math-trunc': 'off',
102 |       'unicorn/prefer-query-selector': 'off',
103 |       'unicorn/no-negated-condition': 'off',
104 |       'unicorn/switch-case-braces': 'off',
105 |       'unicorn/prefer-switch': 'off',
106 |       'unicorn/better-regex': 'off',
107 |       'unicorn/no-for-loop': 'off',
108 |       'unicorn/escape-case': 'off',
109 |       'unicorn/prefer-number-properties': 'off',
110 |       'unicorn/no-process-exit': 'off',
111 |       'unicorn/prefer-at': 'off',
112 |       'unicorn/prefer-structured-clone': 'off',
113 |       'unicorn/prefer-string-replace-all': 'off',
114 | 
115 |       'import/no-unresolved': 'off',
116 |       'import/order': [
117 |         'error',
118 |         {
119 |           named: true,
120 |           'newlines-between': 'always',
121 |           alphabetize: {
122 |             order: 'asc',
123 |           },
124 |           groups: [
125 |             'builtin',
126 |             ['external', 'internal'],
127 |             ['parent', 'sibling', 'index', 'object'],
128 |             'type',
129 |           ],
130 |         },
131 |       ],
132 |     },
133 |   },
134 | )
135 | 


--------------------------------------------------------------------------------
/test/data/clinvar.header.vcf:
--------------------------------------------------------------------------------
 1 | ##fileformat=VCFv4.1
 2 | ##fileDate=2024-12-01
 3 | ##source=ClinVar
 4 | ##reference=GRCh37
 5 | ##ID=<Description="ClinVar Variation ID">
 6 | ##INFO=<ID=AF_ESP,Number=1,Type=Float,Description="allele frequencies from GO-ESP">
 7 | ##INFO=<ID=AF_EXAC,Number=1,Type=Float,Description="allele frequencies from ExAC">
 8 | ##INFO=<ID=AF_TGP,Number=1,Type=Float,Description="allele frequencies from TGP">
 9 | ##INFO=<ID=ALLELEID,Number=1,Type=Integer,Description="the ClinVar Allele ID">
10 | ##INFO=<ID=CLNDN,Number=.,Type=String,Description="ClinVar's preferred disease name for the concept specified by disease identifiers in CLNDISDB">
11 | ##INFO=<ID=CLNDNINCL,Number=.,Type=String,Description="For included Variant : ClinVar's preferred disease name for the concept specified by disease identifiers in CLNDISDB">
12 | ##INFO=<ID=CLNDISDB,Number=.,Type=String,Description="Tag-value pairs of disease database name and identifier submitted for germline classifications, e.g. OMIM:NNNNNN">
13 | ##INFO=<ID=CLNDISDBINCL,Number=.,Type=String,Description="For included Variant: Tag-value pairs of disease database name and identifier for germline classifications, e.g. OMIM:NNNNNN">
14 | ##INFO=<ID=CLNHGVS,Number=.,Type=String,Description="Top-level (primary assembly, alt, or patch) HGVS expression.">
15 | ##INFO=<ID=CLNREVSTAT,Number=.,Type=String,Description="ClinVar review status of germline classification for the Variation ID">
16 | ##INFO=<ID=CLNSIG,Number=.,Type=String,Description="Aggregate germline classification for this single variant; multiple values are separated by a vertical bar">
17 | ##INFO=<ID=CLNSIGCONF,Number=.,Type=String,Description="Conflicting germline classification for this single variant; multiple values are separated by a vertical bar">
18 | ##INFO=<ID=CLNSIGINCL,Number=.,Type=String,Description="Germline classification for a haplotype or genotype that includes this variant. Reported as pairs of VariationID:classification; multiple values are separated by a vertical bar">
19 | ##INFO=<ID=CLNVC,Number=1,Type=String,Description="Variant type">
20 | ##INFO=<ID=CLNVCSO,Number=1,Type=String,Description="Sequence Ontology id for variant type">
21 | ##INFO=<ID=CLNVI,Number=.,Type=String,Description="the variant's clinical sources reported as tag-value pairs of database and variant identifier">
22 | ##INFO=<ID=DBVARID,Number=.,Type=String,Description="nsv accessions from dbVar for the variant">
23 | ##INFO=<ID=GENEINFO,Number=1,Type=String,Description="Gene(s) for the variant reported as gene symbol:gene id. The gene symbol and id are delimited by a colon (:) and each pair is delimited by a vertical bar (|)">
24 | ##INFO=<ID=MC,Number=.,Type=String,Description="comma separated list of molecular consequence in the form of Sequence Ontology ID|molecular_consequence">
25 | ##INFO=<ID=ONCDN,Number=.,Type=String,Description="ClinVar's preferred disease name for the concept specified by disease identifiers in ONCDISDB">
26 | ##INFO=<ID=ONCDNINCL,Number=.,Type=String,Description="For included variant: ClinVar's preferred disease name for the concept specified by disease identifiers in ONCDISDBINCL">
27 | ##INFO=<ID=ONCDISDB,Number=.,Type=String,Description="Tag-value pairs of disease database name and identifier submitted for oncogenicity classifications, e.g. MedGen:NNNNNN">
28 | ##INFO=<ID=ONCDISDBINCL,Number=.,Type=String,Description="For included variant: Tag-value pairs of disease database name and identifier for oncogenicity classifications, e.g. OMIM:NNNNNN">
29 | ##INFO=<ID=ONC,Number=.,Type=String,Description="Aggregate oncogenicity classification for this single variant; multiple values are separated by a vertical bar">
30 | ##INFO=<ID=ONCINCL,Number=.,Type=String,Description="Oncogenicity classification for a haplotype or genotype that includes this variant. Reported as pairs of VariationID:classification; multiple values are separated by a vertical bar">
31 | ##INFO=<ID=ONCREVSTAT,Number=.,Type=String,Description="ClinVar review status of oncogenicity classification for the Variation ID">
32 | ##INFO=<ID=ONCCONF,Number=.,Type=String,Description="Conflicting oncogenicity classification for this single variant; multiple values are separated by a vertical bar">
33 | ##INFO=<ID=ORIGIN,Number=.,Type=String,Description="Allele origin. One or more of the following values may be added: 0 - unknown; 1 - germline; 2 - somatic; 4 - inherited; 8 - paternal; 16 - maternal; 32 - de-novo; 64 - biparental; 128 - uniparental; 256 - not-tested; 512 - tested-inconclusive; 1073741824 - other">
34 | ##INFO=<ID=RS,Number=.,Type=String,Description="dbSNP ID (i.e. rs number)">
35 | ##INFO=<ID=SCIDN,Number=.,Type=String,Description="ClinVar's preferred disease name for the concept specified by disease identifiers in SCIDISDB">
36 | ##INFO=<ID=SCIDNINCL,Number=.,Type=String,Description="For included variant: ClinVar's preferred disease name for the concept specified by disease identifiers in SCIDISDBINCL">
37 | ##INFO=<ID=SCIDISDB,Number=.,Type=String,Description="Tag-value pairs of disease database name and identifier submitted for somatic clinial impact classifications, e.g. MedGen:NNNNNN">
38 | ##INFO=<ID=SCIDISDBINCL,Number=.,Type=String,Description="For included variant: Tag-value pairs of disease database name and identifier for somatic clinical impact classifications, e.g. OMIM:NNNNNN">
39 | ##INFO=<ID=SCIREVSTAT,Number=.,Type=String,Description="ClinVar review status of somatic clinical impact for the Variation ID">
40 | ##INFO=<ID=SCI,Number=.,Type=String,Description="Aggregate somatic clinical impact for this single variant; multiple values are separated by a vertical bar">
41 | ##INFO=<ID=SCIINCL,Number=.,Type=String,Description="Somatic clinical impact classification for a haplotype or genotype that includes this variant. Reported as pairs of VariationID:classification; multiple values are separated by a vertical bar">
42 | #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
43 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # [6.1.0](https://github.com/GMOD/vcf-js/compare/v6.0.9...v6.1.0) (2025-11-26)
  2 | 
  3 | 
  4 | 
  5 | ## [6.0.9](https://github.com/GMOD/vcf-js/compare/v6.0.8...v6.0.9) (2025-04-01)
  6 | 
  7 | ## [6.0.8](https://github.com/GMOD/vcf-js/compare/v6.0.7...v6.0.8) (2025-02-13)
  8 | 
  9 | ## [6.0.7](https://github.com/GMOD/vcf-js/compare/v6.0.6...v6.0.7) (2025-01-29)
 10 | 
 11 | ## [6.0.6](https://github.com/GMOD/vcf-js/compare/v6.0.5...v6.0.6) (2025-01-16)
 12 | 
 13 | ## [6.0.5](https://github.com/GMOD/vcf-js/compare/v6.0.4...v6.0.5) (2025-01-16)
 14 | 
 15 | ## [6.0.4](https://github.com/GMOD/vcf-js/compare/v6.0.3...v6.0.4) (2025-01-16)
 16 | 
 17 | ## [6.0.3](https://github.com/GMOD/vcf-js/compare/v6.0.2...v6.0.3) (2025-01-16)
 18 | 
 19 | ## [6.0.2](https://github.com/GMOD/vcf-js/compare/v6.0.1...v6.0.2) (2025-01-07)
 20 | 
 21 | ## [6.0.1](https://github.com/GMOD/vcf-js/compare/v6.0.0...v6.0.1) (2024-12-17)
 22 | 
 23 | # [6.0.0](https://github.com/GMOD/vcf-js/compare/v5.0.10...v6.0.0) (2024-11-30)
 24 | 
 25 | - Changes the default Variant object to have a SAMPLES() function call instead
 26 |   of a SAMPLES getter, to make it more abundantly clear that it is a lazy
 27 |   operation. Also adds a GENOTYPES() function that returns the raw string of
 28 |   genotype fields
 29 | 
 30 | ## [5.0.10](https://github.com/GMOD/vcf-js/compare/v5.0.9...v5.0.10) (2022-12-17)
 31 | 
 32 | - Use es2015 for nodejs build
 33 | 
 34 | ## [5.0.9](https://github.com/GMOD/vcf-js/compare/v5.0.8...v5.0.9) (2022-11-23)
 35 | 
 36 | - Fix erroneous parsing of symbolic alleles as breakends
 37 | 
 38 | ## [5.0.8](https://github.com/GMOD/vcf-js/compare/v5.0.7...v5.0.8) (2022-11-20)
 39 | 
 40 | - Parse single breakends and large insertion shorthand notation (#95)
 41 | 
 42 | <a name="5.0.7"></a>
 43 | 
 44 | ## [5.0.7](https://github.com/GMOD/vcf-js/compare/v5.0.6...v5.0.7) (2022-08-24)
 45 | 
 46 | - Don't throw error when there is a FORMAT column but no genotypes
 47 | 
 48 | <a name="5.0.6"></a>
 49 | 
 50 | ## [5.0.6](https://github.com/GMOD/vcf-js/compare/v5.0.5...v5.0.6) (2022-03-30)
 51 | 
 52 | - Include src directory for better source maps
 53 | 
 54 | <a name="5.0.5"></a>
 55 | 
 56 | ## [5.0.5](https://github.com/GMOD/vcf-js/compare/v5.0.4...v5.0.5) (2022-01-12)
 57 | 
 58 | - Add optimization related to better allocation of variant records, thanks to
 59 |   @bpow for contributing
 60 | 
 61 | <a name="5.0.4"></a>
 62 | 
 63 | ## [5.0.4](https://github.com/GMOD/vcf-js/compare/v5.0.3...v5.0.4) (2021-12-23)
 64 | 
 65 | - Make the strict field in the constructor optional
 66 | - Export `Breakend` type for typescript users
 67 | 
 68 | <a name="5.0.3"></a>
 69 | 
 70 | ## [5.0.3](https://github.com/GMOD/vcf-js/compare/v5.0.2...v5.0.3) (2021-12-14)
 71 | 
 72 | - Add typescripting and esm module build
 73 | 
 74 | <a name="5.0.2"></a>
 75 | 
 76 | ## [5.0.2](https://github.com/GMOD/vcf-js/compare/v5.0.1...v5.0.2) (2021-11-13)
 77 | 
 78 | - Update package description to refer to variant call format
 79 | 
 80 | <a name="5.0.1"></a>
 81 | 
 82 | ## [5.0.1](https://github.com/GMOD/vcf-js/compare/v5.0.0...v5.0.1) (2021-11-04)
 83 | 
 84 | - Add URI decoding to INFO field
 85 | 
 86 | <a name="5.0.0"></a>
 87 | 
 88 | # [5.0.0](https://github.com/GMOD/vcf-js/compare/v4.0.4...v5.0.0) (2021-09-06)
 89 | 
 90 | - Make parseBreakends an optional helper function, all ALTs are plain strings
 91 |   now instead of string|Breakend. This is a breaking change so a major version
 92 |   bump is applied
 93 | 
 94 | <a name="4.0.4"></a>
 95 | 
 96 | ## [4.0.4](https://github.com/GMOD/vcf-js/compare/v4.0.1...v4.0.4) (2021-08-04)
 97 | 
 98 | - Fix issue when there is extra whitespace on the header line
 99 | 
100 | <a name="4.0.3"></a>
101 | 
102 | ## [4.0.3](https://github.com/GMOD/vcf-js/compare/v4.0.1...v4.0.3) (2021-03-31)
103 | 
104 | - Include github automated fixes in release from before 4.0.2
105 | 
106 | <a name="4.0.2"></a>
107 | 
108 | ## [4.0.2](https://github.com/GMOD/vcf-js/compare/v4.0.1...v4.0.2) (2021-03-31)
109 | 
110 | - Avoid modifying built-in exports with parseMetadata, fixes issue with using
111 |   parseMetadata from jest tests (#63)
112 | 
113 | <a name="4.0.1"></a>
114 | 
115 | ## [4.0.1](https://github.com/GMOD/vcf-js/compare/v4.0.0...v4.0.1) (2019-10-30)
116 | 
117 | - Add toString for Breakend ALTs so they are easily interpretable
118 | 
119 | ## [4.0.0](https://github.com/GMOD/vcf-js/compare/v3.0.0...v4.0.0) (2019-06-14)
120 | 
121 | - Breaking change: INFO entries that are type Flag now evaluate to `true`
122 |   instead of `null`
123 | 
124 | ## [3.0.0](https://github.com/GMOD/vcf-js/compare/v2.0.3...v3.0.0) (2019-05-31)
125 | 
126 | - Breaking change: ALT entries in breakend format now parse into a breakend
127 |   object instead of a string
128 | - Performance improvements
129 | 
130 | ## [2.0.3](https://github.com/GMOD/vcf-js/compare/v2.0.2...v2.0.3) (2019-02-23)
131 | 
132 | - Upgrade to Babel 7
133 | 
134 | ## [2.0.2](https://github.com/GMOD/vcf-js/compare/v2.0.1...v2.0.2) (2018-11-26)
135 | 
136 | - Remove errant unused dependency
137 | 
138 | ## [2.0.1](https://github.com/GMOD/vcf-js/compare/v2.0.0...v2.0.1) (2018-11-08)
139 | 
140 | - Bugfix for getMetadata()
141 | 
142 | ## [2.0.0](https://github.com/GMOD/vcf-js/compare/v1.0.4...v2.0.0) (2018-11-07)
143 | 
144 | - Breaking change: SAMPLES attribute of the variant is now evaluated lazily
145 | 
146 | ## [1.0.4](https://github.com/GMOD/vcf-js/compare/v1.0.3...v1.0.4) (2018-11-06)
147 | 
148 | - Decode %-encoded entries in INFO and FORMAT
149 | 
150 | ## [1.0.3](https://github.com/GMOD/vcf-js/compare/v1.0.2...v1.0.3) (2018-11-05)
151 | 
152 | - Fix for parsing missing genotypes
153 | 
154 | ## [1.0.2](https://github.com/GMOD/vcf-js/compare/v1.0.1...v1.0.2) (2018-10-11)
155 | 
156 | - Better handle filter metadata
157 | 
158 | ## [1.0.1](https://github.com/GMOD/vcf-js/compare/v1.0.0...v1.0.1) (2018-10-05)
159 | 
160 | - Fix bug in interpreting "Number" in header metadata
161 | 
162 | ## 1.0.0 (2018-10-05)
163 | 
164 | - Initial release
165 | 


--------------------------------------------------------------------------------
/test/parse.test.ts:
--------------------------------------------------------------------------------
  1 | import fs from 'fs'
  2 | 
  3 | import { expect, test } from 'vitest'
  4 | 
  5 | import VCF, { parseBreakend } from '../src'
  6 | 
  7 | const readVcf = (file: string) => {
  8 |   const f = fs.readFileSync(file, 'utf8')
  9 |   const lines = f.split('\n')
 10 |   const header = [] as string[]
 11 |   const rest = [] as string[]
 12 |   lines.forEach(line => {
 13 |     if (line.startsWith('#')) {
 14 |       header.push(line)
 15 |     } else if (line) {
 16 |       rest.push(line)
 17 |     }
 18 |   })
 19 |   return {
 20 |     header: header.join('\n'),
 21 |     lines: rest,
 22 |   }
 23 | }
 24 | 
 25 | function makeParser() {
 26 |   const { header } = readVcf('test/data/spec-example.vcf')
 27 |   return new VCF({
 28 |     header,
 29 |   })
 30 | }
 31 | 
 32 | test('can get metadata from the header', () => {
 33 |   const VCFParser = makeParser()
 34 |   // Note that there is a custom PL that overrides the default PL
 35 |   expect(VCFParser.getMetadata()).toMatchSnapshot()
 36 |   expect(VCFParser.getMetadata('nonexistent')).toBe(undefined)
 37 |   expect(VCFParser.getMetadata('fileDate')).toBe('20090805')
 38 |   expect(VCFParser.getMetadata('INFO')).toMatchSnapshot()
 39 |   expect(VCFParser.getMetadata('INFO', 'nonexistent')).toBe(undefined)
 40 |   expect(VCFParser.getMetadata('INFO', 'AA')).toEqual({
 41 |     Description: 'Ancestral Allele',
 42 |     Number: 1,
 43 |     Type: 'String',
 44 |   })
 45 |   expect(VCFParser.getMetadata('INFO', 'AA', 'nonexistent')).toBe(undefined)
 46 |   expect(VCFParser.getMetadata('INFO', 'AA', 'Type')).toBe('String')
 47 |   expect(VCFParser.getMetadata('INFO', 'AA', 'Type', 'nonexistent')).toBe(
 48 |     undefined,
 49 |   )
 50 |   expect(VCFParser.getMetadata('INFO', 'TEST')).toEqual({
 51 |     Description: 'Used for testing',
 52 |     Number: 1,
 53 |     Type: 'String',
 54 |   })
 55 | 
 56 |   expect(VCFParser.getMetadata('INFO', 'AC')).toEqual({
 57 |     Number: 'A',
 58 |     Type: 'Integer',
 59 |     Description:
 60 |       'Allele count in genotypes, for each ALT allele, in the same order as listed',
 61 |   })
 62 | })
 63 | 
 64 | test('can parse a line from the VCF spec', () => {
 65 |   const VCFParser = makeParser()
 66 |   const variant = VCFParser.parseLine(
 67 |     '20\t14370\trs6054257\tG\tA\t29\tPASS\tNS=3;DP=14;AF=0.5;DB;H2\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5:.,.\n',
 68 |   )
 69 |   expect(variant).toMatchSnapshot()
 70 |   expect(variant.SAMPLES()).toMatchSnapshot()
 71 | })
 72 | 
 73 | test('can parse a line with minimal entries', () => {
 74 |   const VCFParser = makeParser()
 75 |   const variant = VCFParser.parseLine(
 76 |     '20\t14370\t.\tG\tA\t.\t.\t.\tGT:GQ:DP:HQ\t.\t.\t.\n',
 77 |   )
 78 |   expect(variant).toMatchSnapshot()
 79 |   expect(variant.SAMPLES()).toMatchSnapshot()
 80 | })
 81 | 
 82 | test('parses a line with a breakend ALT', () => {
 83 |   const VCFParser = makeParser()
 84 |   const variant = VCFParser.parseLine(
 85 |     '2\t321681\tbnd_W\tG\tG]17:198982]\t6\tPASS\tSVTYPE=BND',
 86 |   )
 87 |   expect(variant.ALT?.length).toBe(1)
 88 |   expect(variant.INFO.SVTYPE).toEqual(['BND'])
 89 |   expect(variant).toMatchSnapshot()
 90 | })
 91 | 
 92 | test(`parses a line with mix of multiple breakends and non breakends`, () => {
 93 |   const VCFParser = makeParser()
 94 |   const variant = VCFParser.parseLine(
 95 |     `13\t123456\tbnd_U\tC\tCTATGTCG,C[2 : 321682[,C[17 : 198983[\t6\tPASS\tSVTYPE=BND;MATEID=bnd V,bnd Z`,
 96 |   )
 97 |   expect(variant.ALT?.length).toBe(3)
 98 |   expect(variant.INFO.SVTYPE).toEqual(['BND'])
 99 |   expect(variant).toMatchSnapshot()
100 | })
101 | 
102 | test('throws errors with bad header lines', () => {
103 |   expect(() => {
104 |     new VCF({ header: 'notARealHeader' })
105 |   }).toThrow('Bad line in header')
106 |   expect(() => {
107 |     new VCF({
108 |       header: '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\n',
109 |     })
110 |   }).toThrow('VCF header missing columns')
111 |   expect(() => {
112 |     new VCF({
113 |       header: '#CHROM\tPS\tID\tRF\tALT\tQUAL\tFILTER\tINFO\n',
114 |     })
115 |   }).toThrow('VCF column headers not correct')
116 |   expect(() => {
117 |     new VCF({ header: '##this=badHeader\n' })
118 |   }).toThrow(/No format line/)
119 | })
120 | 
121 | test('sniffles vcf', () => {
122 |   const { header, lines } = readVcf('test/data/sniffles.vcf')
123 |   const VCFParser = new VCF({
124 |     header,
125 |   })
126 |   const variant = VCFParser.parseLine(lines[0])
127 |   expect(variant).toMatchSnapshot()
128 |   expect(variant.SAMPLES()).toMatchSnapshot()
129 | })
130 | 
131 | test('can parse a line from the VCF spec Y chrom (haploid))', () => {
132 |   const { header, lines } = readVcf('test/data/y-chrom-haploid.vcf')
133 |   const VCFParser = new VCF({
134 |     header,
135 |   })
136 |   const variant = VCFParser.parseLine(lines[0])
137 |   const variant2 = VCFParser.parseLine(lines[1])
138 |   expect(variant).toMatchSnapshot()
139 |   expect(variant.SAMPLES()).toMatchSnapshot()
140 |   expect(variant2).toMatchSnapshot()
141 |   expect(variant2.SAMPLES()).toMatchSnapshot()
142 | })
143 | 
144 | test('snippet from VCF 4.3 spec', () => {
145 |   const { header, lines } = readVcf('test/data/vcf4.3_spec_snippet.vcf')
146 |   const VCFParser = new VCF({
147 |     header,
148 |   })
149 |   const variants = lines.map(line => VCFParser.parseLine(line))
150 |   expect(variants).toMatchSnapshot()
151 |   expect(variants.map(variant => variant.SAMPLES())).toMatchSnapshot()
152 | })
153 | test('can parse breakends', () => {
154 |   const { header, lines } = readVcf('test/data/breakends.vcf')
155 |   const VCFParser = new VCF({
156 |     header,
157 |   })
158 | 
159 |   expect(lines.map(line => VCFParser.parseLine(line))).toMatchSnapshot()
160 | })
161 | 
162 | // from https://github.com/GMOD/jbrowse/issues/1358
163 | test('vcf lines with weird info field and missing format/genotypes', () => {
164 |   const { header, lines } = readVcf(
165 |     'test/data/weird_info_and_missing_format.vcf',
166 |   )
167 |   const VCFParser = new VCF({
168 |     header,
169 |   })
170 | 
171 |   expect(lines.map(line => VCFParser.parseLine(line))).toMatchSnapshot()
172 | })
173 | test('test no info strict', () => {
174 |   const { header, lines } = readVcf('test/data/multipleAltSVs.vcf')
175 |   const VCFParser = new VCF({
176 |     header,
177 |     strict: true,
178 |   })
179 |   expect(() => VCFParser.parseLine(lines[0])).toThrow(/INFO/)
180 | })
181 | 
182 | test('test no info non-strict', () => {
183 |   const { header, lines } = readVcf('test/data/multipleAltSVs.vcf')
184 |   const VCFParser = new VCF({
185 |     header,
186 |     strict: false,
187 |   })
188 |   expect(VCFParser.parseLine(lines[0])).toBeTruthy()
189 |   expect(VCFParser.parseLine(lines[0]).GENOTYPES()).toEqual({})
190 | })
191 | 
192 | test('empty header lines', () => {
193 |   expect(() => new VCF({ header: '\n' })).toThrow(/no non-empty/)
194 | })
195 | 
196 | test('shortcut parsing with 1000 genomes', () => {
197 |   const { header, lines } = readVcf('test/data/1000genomes.vcf')
198 | 
199 |   const VCFParser = new VCF({ header })
200 |   expect(lines.map(line => VCFParser.parseLine(line))).toMatchSnapshot()
201 | })
202 | 
203 | test('shortcut parsing with vcf 4.3 bnd example', () => {
204 |   const { header, lines } = readVcf('test/data/vcf4.3_spec_bnd.vcf')
205 | 
206 |   const VCFParser = new VCF({ header })
207 |   const variants = lines.map(line => VCFParser.parseLine(line))
208 |   expect(variants.map(m => m.ALT?.[0].toString())).toEqual(
209 |     lines.map(line => line.split('\t')[4]),
210 |   )
211 | 
212 |   expect(variants).toMatchSnapshot()
213 | })
214 | 
215 | test('vcf 4.3 single breakends', () => {
216 |   // single breakend
217 |   expect(parseBreakend('G.')).toMatchSnapshot()
218 |   expect(parseBreakend('ACGT.')).toMatchSnapshot()
219 |   expect(parseBreakend('.ACGT')).toMatchSnapshot()
220 | })
221 | 
222 | test('vcf 4.3 insertion shorthand', () => {
223 |   expect(parseBreakend('G<ctgA>')).toMatchSnapshot()
224 |   expect(parseBreakend('<ctgA>G')).toMatchSnapshot()
225 |   expect(parseBreakend('C[<ctg1>:1[')).toMatchSnapshot()
226 |   expect(parseBreakend(']13:123456]AGTNNNNNCAT')).toMatchSnapshot()
227 | })
228 | 
229 | test('parse breakend on symbolic alleles', () => {
230 |   expect(parseBreakend('<TRA>')).not.toBeTruthy()
231 |   expect(parseBreakend('<INS>')).not.toBeTruthy()
232 |   expect(parseBreakend('<DEL>')).not.toBeTruthy()
233 |   expect(parseBreakend('<INV>')).not.toBeTruthy()
234 | })
235 | 
236 | test('parse breakend on thing that looks like symbolic allele but is actually a feature', () => {
237 |   expect(parseBreakend('<INV>C')).toMatchSnapshot()
238 | })
239 | 
240 | test('clinvar metadata', () => {
241 |   const { header } = readVcf('test/data/clinvar.header.vcf')
242 |   const VCFParser = new VCF({
243 |     header,
244 |   })
245 |   expect(VCFParser.getMetadata()).toMatchSnapshot()
246 | })
247 | 
248 | test('sample to genotype information', () => {
249 |   const { header } = readVcf('test/data/sample2genotype.vcf')
250 |   const VCFParser = new VCF({
251 |     header,
252 |   })
253 |   expect(VCFParser.getMetadata().META).toMatchSnapshot()
254 |   expect(VCFParser.getMetadata().SAMPLES).toMatchSnapshot()
255 | })
256 | 
257 | test('pedigree', () => {
258 |   const { header } = readVcf('test/data/pedigree.vcf')
259 |   const VCFParser = new VCF({
260 |     header,
261 |   })
262 |   expect(VCFParser.getMetadata()).toMatchSnapshot()
263 | })
264 | 
265 | // https://github.com/samtools/hts-specs/blob/master/examples/vcf/sv44.vcf
266 | test('x vcf44 spec', () => {
267 |   const { header, lines } = readVcf('test/data/vcf44_spec.vcf')
268 |   const VCFParser = new VCF({
269 |     header,
270 |   })
271 |   expect(VCFParser.getMetadata()).toMatchSnapshot()
272 |   expect(
273 |     lines.map(l => {
274 |       const entry = VCFParser.parseLine(l)
275 |       return {
276 |         ...entry,
277 |         SAMPLES: entry.SAMPLES(),
278 |       }
279 |     }),
280 |   ).toMatchSnapshot()
281 | })
282 | 
283 | // https://github.com/samtools/hts-specs/blob/master/examples/vcf/simple.vcf
284 | test('x simple spec', () => {
285 |   const { header, lines } = readVcf('test/data/simple.vcf')
286 |   const VCFParser = new VCF({
287 |     header,
288 |   })
289 |   expect(VCFParser.getMetadata()).toMatchSnapshot()
290 |   expect(
291 |     lines.map(l => {
292 |       const entry = VCFParser.parseLine(l)
293 |       return {
294 |         ...entry,
295 |         SAMPLES: entry.SAMPLES(),
296 |       }
297 |     }),
298 |   ).toMatchSnapshot()
299 | })
300 | 
301 | test('pedigree', () => {
302 |   const { header } = readVcf('test/data/pedigree.vcf')
303 |   const VCFParser = new VCF({
304 |     header,
305 |   })
306 |   expect(VCFParser.getMetadata()).toMatchSnapshot()
307 | })
308 | 


--------------------------------------------------------------------------------
/src/vcfReserved.ts:
--------------------------------------------------------------------------------
  1 | export default {
  2 |   // INFO fields
  3 |   InfoFields: {
  4 |     // from the VCF4.3 spec, https://samtools.github.io/hts-specs/VCFv4.3.pdf
  5 |     AA: { Number: 1, Type: 'String', Description: 'Ancestral allele' },
  6 |     AC: {
  7 |       Number: 'A',
  8 |       Type: 'Integer',
  9 |       Description:
 10 |         'Allele count in genotypes, for each ALT allele, in the same order as listed',
 11 |     },
 12 |     AD: {
 13 |       Number: 'R',
 14 |       Type: 'Integer',
 15 |       Description: 'Total read depth for each allele',
 16 |     },
 17 |     ADF: {
 18 |       Number: 'R',
 19 |       Type: 'Integer',
 20 |       Description: 'Read depth for each allele on the forward strand',
 21 |     },
 22 |     ADR: {
 23 |       Number: 'R',
 24 |       Type: 'Integer',
 25 |       Description: 'Read depth for each allele on the reverse strand',
 26 |     },
 27 |     AF: {
 28 |       Number: 'A',
 29 |       Type: 'Float',
 30 |       Description:
 31 |         'Allele frequency for each ALT allele in the same order as listed (estimated from primary data, not called genotypes)',
 32 |     },
 33 |     AN: {
 34 |       Number: 1,
 35 |       Type: 'Integer',
 36 |       Description: 'Total number of alleles in called genotypes',
 37 |     },
 38 |     BQ: {
 39 |       Number: 1,
 40 |       Type: 'Float',
 41 |       Description: 'RMS base quality',
 42 |     },
 43 |     CIGAR: {
 44 |       Number: 1,
 45 |       Type: 'Float',
 46 |       Description:
 47 |         'Cigar string describing how to align an alternate allele to the reference allele',
 48 |     },
 49 |     DB: {
 50 |       Number: 0,
 51 |       Type: 'Flag',
 52 |       Description: 'dbSNP membership',
 53 |     },
 54 |     DP: {
 55 |       Number: 1,
 56 |       Type: 'Integer',
 57 |       Description: 'combined depth across samples',
 58 |     },
 59 |     END: {
 60 |       Number: 1,
 61 |       Type: 'Integer',
 62 |       Description: 'End position (for use with symbolic alleles)',
 63 |     },
 64 |     H2: {
 65 |       Number: 0,
 66 |       Type: 'Flag',
 67 |       Description: 'HapMap2 membership',
 68 |     },
 69 |     H3: {
 70 |       Number: 0,
 71 |       Type: 'Flag',
 72 |       Description: 'HapMap3 membership',
 73 |     },
 74 |     MQ: {
 75 |       Number: 1,
 76 |       Type: null,
 77 |       Description: 'RMS mapping quality',
 78 |     },
 79 |     MQ0: {
 80 |       Number: 1,
 81 |       Type: 'Integer',
 82 |       Description: 'Number of MAPQ == 0 reads',
 83 |     },
 84 |     NS: {
 85 |       Number: 1,
 86 |       Type: 'Integer',
 87 |       Description: 'Number of samples with data',
 88 |     },
 89 |     SB: {
 90 |       Number: 4,
 91 |       Type: 'Integer',
 92 |       Description: 'Strand bias',
 93 |     },
 94 |     SOMATIC: {
 95 |       Number: 0,
 96 |       Type: 'Flag',
 97 |       Description: 'Somatic mutation (for cancer genomics)',
 98 |     },
 99 |     VALIDATED: {
100 |       Number: 0,
101 |       Type: 'Flag',
102 |       Description: 'Validated by follow-up experiment',
103 |     },
104 |     '1000G': {
105 |       Number: 0,
106 |       Type: 'Flag',
107 |       Description: '1000 Genomes membership',
108 |     },
109 |     // specifically for structural variants
110 |     IMPRECISE: {
111 |       Number: 0,
112 |       Type: 'Flag',
113 |       Description: 'Imprecise structural variation',
114 |     },
115 |     NOVEL: {
116 |       Number: 0,
117 |       Type: 'Flag',
118 |       Description: 'Indicates a novel structural variation',
119 |     },
120 |     // For precise variants, END is POS + length of REF allele - 1,
121 |     // and the for imprecise variants the corresponding best estimate.
122 |     SVTYPE: {
123 |       Number: 1,
124 |       Type: 'String',
125 |       Description: 'Type of structural variant',
126 |     },
127 |     // Value should be one of DEL, INS, DUP, INV, CNV, BND. This key can
128 |     // be derived from the REF/ALT fields but is useful for filtering.
129 |     SVLEN: {
130 |       Number: null,
131 |       Type: 'Integer',
132 |       Description: 'Difference in length between REF and ALT alleles',
133 |     },
134 |     // One value for each ALT allele. Longer ALT alleles (e.g. insertions)
135 |     // have positive values, shorter ALT alleles (e.g. deletions)
136 |     // have negative values.
137 |     CIPOS: {
138 |       Number: 2,
139 |       Type: 'Integer',
140 |       Description: 'Confidence interval around POS for imprecise variants',
141 |     },
142 |     CIEND: {
143 |       Number: 2,
144 |       Type: 'Integer',
145 |       Description: 'Confidence interval around END for imprecise variants',
146 |     },
147 |     HOMLEN: {
148 |       Type: 'Integer',
149 |       Description:
150 |         'Length of base pair identical micro-homology at event breakpoints',
151 |     },
152 |     HOMSEQ: {
153 |       Type: 'String',
154 |       Description:
155 |         'Sequence of base pair identical micro-homology at event breakpoints',
156 |     },
157 |     BKPTID: {
158 |       Type: 'String',
159 |       Description: 'ID of the assembled alternate allele in the assembly file',
160 |     },
161 |     // For precise variants, the consensus sequence the alternate allele assembly
162 |     // is derivable from the REF and ALT fields. However, the alternate allele
163 |     // assembly file may contain additional information about the characteristics
164 |     // of the alt allele contigs.
165 |     MEINFO: {
166 |       Number: 4,
167 |       Type: 'String',
168 |       Description: 'Mobile element info of the form NAME,START,END,POLARITY',
169 |     },
170 |     METRANS: {
171 |       Number: 4,
172 |       Type: 'String',
173 |       Description:
174 |         'Mobile element transduction info of the form CHR,START,END,POLARITY',
175 |     },
176 |     DGVID: {
177 |       Number: 1,
178 |       Type: 'String',
179 |       Description: 'ID of this element in Database of Genomic Variation',
180 |     },
181 |     DBVARID: {
182 |       Number: 1,
183 |       Type: 'String',
184 |       Description: 'ID of this element in DBVAR',
185 |     },
186 |     DBRIPID: {
187 |       Number: 1,
188 |       Type: 'String',
189 |       Description: 'ID of this element in DBRIP',
190 |     },
191 |     MATEID: {
192 |       Number: null,
193 |       Type: 'String',
194 |       Description: 'ID of mate breakends',
195 |     },
196 |     PARID: {
197 |       Number: 1,
198 |       Type: 'String',
199 |       Description: 'ID of partner breakend',
200 |     },
201 |     EVENT: {
202 |       Number: 1,
203 |       Type: 'String',
204 |       Description: 'ID of event associated to breakend',
205 |     },
206 |     CILEN: {
207 |       Number: 2,
208 |       Type: 'Integer',
209 |       Description:
210 |         'Confidence interval around the inserted material between breakend',
211 |     },
212 |     DPADJ: { Type: 'Integer', Description: 'Read Depth of adjacency' },
213 |     CN: {
214 |       Number: 1,
215 |       Type: 'Integer',
216 |       Description: 'Copy number of segment containing breakend',
217 |     },
218 |     CNADJ: {
219 |       Number: null,
220 |       Type: 'Integer',
221 |       Description: 'Copy number of adjacency',
222 |     },
223 |     CICN: {
224 |       Number: 2,
225 |       Type: 'Integer',
226 |       Description: 'Confidence interval around copy number for the segment',
227 |     },
228 |     CICNADJ: {
229 |       Number: null,
230 |       Type: 'Integer',
231 |       Description: 'Confidence interval around copy number for the adjacency',
232 |     },
233 |   },
234 | 
235 |   // FORMAT fields
236 |   GenotypeFields: {
237 |     // from the VCF4.3 spec, https://samtools.github.io/hts-specs/VCFv4.3.pdf
238 |     AD: {
239 |       Number: 'R',
240 |       Type: 'Integer',
241 |       Description: 'Read depth for each allele',
242 |     },
243 |     ADF: {
244 |       Number: 'R',
245 |       Type: 'Integer',
246 |       Description: 'Read depth for each allele on the forward strand',
247 |     },
248 |     ADR: {
249 |       Number: 'R',
250 |       Type: 'Integer',
251 |       Description: 'Read depth for each allele on the reverse strand',
252 |     },
253 |     DP: {
254 |       Number: 1,
255 |       Type: 'Integer',
256 |       Description: 'Read depth',
257 |     },
258 |     EC: {
259 |       Number: 'A',
260 |       Type: 'Integer',
261 |       Description: 'Expected alternate allele counts',
262 |     },
263 |     FT: {
264 |       Number: 1,
265 |       Type: 'String',
266 |       Description: 'Filter indicating if this genotype was "called"',
267 |     },
268 |     GL: {
269 |       Number: 'G',
270 |       Type: 'Float',
271 |       Description: 'Genotype likelihoods',
272 |     },
273 |     GP: {
274 |       Number: 'G',
275 |       Type: 'Float',
276 |       Description: 'Genotype posterior probabilities',
277 |     },
278 |     GQ: {
279 |       Number: 1,
280 |       Type: 'Integer',
281 |       Description: 'Conditional genotype quality',
282 |     },
283 |     GT: {
284 |       Number: 1,
285 |       Type: 'String',
286 |       Description: 'Genotype',
287 |     },
288 |     HQ: {
289 |       Number: 2,
290 |       Type: 'Integer',
291 |       Description: 'Haplotype quality',
292 |     },
293 |     MQ: {
294 |       Number: 1,
295 |       Type: 'Integer',
296 |       Description: 'RMS mapping quality',
297 |     },
298 |     PL: {
299 |       Number: 'G',
300 |       Type: 'Integer',
301 |       Description:
302 |         'Phred-scaled genotype likelihoods rounded to the closest integer',
303 |     },
304 |     PQ: {
305 |       Number: 1,
306 |       Type: 'Integer',
307 |       Description: 'Phasing quality',
308 |     },
309 |     PS: {
310 |       Number: 1,
311 |       Type: 'Integer',
312 |       Description: 'Phase set',
313 |     },
314 |   },
315 | 
316 |   // ALT fields
317 |   AltTypes: {
318 |     DEL: {
319 |       Description: 'Deletion relative to the reference',
320 |     },
321 |     INS: {
322 |       Description: 'Insertion of novel sequence relative to the reference',
323 |     },
324 |     DUP: {
325 |       Description: 'Region of elevated copy number relative to the reference',
326 |     },
327 |     INV: {
328 |       Description: 'Inversion of reference sequence',
329 |     },
330 |     CNV: {
331 |       Description:
332 |         'Copy number variable region (may be both deletion and duplication)',
333 |     },
334 |     'DUP:TANDEM': {
335 |       Description: 'Tandem duplication',
336 |     },
337 |     'DEL:ME': {
338 |       Description: 'Deletion of mobile element relative to the reference',
339 |     },
340 |     'INS:ME': {
341 |       Description: 'Insertion of a mobile element relative to the reference',
342 |     },
343 |     NON_REF: {
344 |       Description:
345 |         'Represents any possible alternative allele at this location',
346 |     },
347 |     '*': {
348 |       Description:
349 |         'Represents any possible alternative allele at this location',
350 |     },
351 |   },
352 | 
353 |   // FILTER fields
354 |   FilterTypes: {
355 |     PASS: {
356 |       Description: 'Passed all filters',
357 |     },
358 |   },
359 | }
360 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # vcf-js
  2 | 
  3 | High performance Variant Call Format (VCF) parser in pure JavaScript.
  4 | 
  5 | ## Status
  6 | 
  7 | [![NPM version](https://img.shields.io/npm/v/@gmod/vcf.svg?logo=npm&style=flat-square)](https://npmjs.org/package/@gmod/vcf)
  8 | [![Coverage Status](https://img.shields.io/codecov/c/github/GMOD/vcf-js/master.svg?style=flat-square)](https://codecov.io/gh/GMOD/vcf-js/branch/master)
  9 | [![Build Status](https://img.shields.io/github/actions/workflow/status/GMOD/vcf-js/push.yml?branch=master)](https://github.com/GMOD/vcf-js/actions)
 10 | 
 11 | ## Usage
 12 | 
 13 | This module is best used when combined with some easy way of retrieving the
 14 | header and individual lines from a VCF, like the `@gmod/tabix` module.
 15 | 
 16 | ```typescript
 17 | import { TabixIndexedFile } from '@gmod/tabix'
 18 | 
 19 | // with import
 20 | import VCF, { parseBreakend } from '@gmod/vcf'
 21 | 
 22 | // with require
 23 | const { default: VCF, parseBreakend } = require('@gmod/vcf')
 24 | 
 25 | const tbiIndexed = new TabixIndexedFile({ path: '/path/to/my.vcf.gz' })
 26 | 
 27 | async function doStuff() {
 28 |   const headerText = await tbiIndexed.getHeader()
 29 |   const tbiVCFParser = new VCF({ header: headerText })
 30 |   const variants = []
 31 |   await tbiIndexed.getLines('ctgA', 200, 300, line =>
 32 |     variants.push(tbiVCFParser.parseLine(line)),
 33 |   )
 34 |   console.log(variants)
 35 | }
 36 | ```
 37 | 
 38 | If you want to stream a VCF file, you can alternatively use something like this
 39 | 
 40 | ```typescript
 41 | const fs = require('fs')
 42 | const VCF = require('@gmod/vcf').default
 43 | const { createGunzip } = require('zlib')
 44 | const readline = require('readline')
 45 | 
 46 | const rl = readline.createInterface({
 47 |   input: fs.createReadStream(process.argv[2]).pipe(createGunzip()),
 48 | })
 49 | 
 50 | let header = []
 51 | let elts = []
 52 | let parser = undefined
 53 | 
 54 | rl.on('line', function (line) {
 55 |   if (line.startsWith('#')) {
 56 |     header.push(line)
 57 |     return
 58 |   } else if (!parser) {
 59 |     parser = new VCF({ header: header.join('\n') })
 60 |   }
 61 |   const elt = parser.parseLine(line)
 62 |   elts.push(elt.INFO.AN[0])
 63 | })
 64 | 
 65 | rl.on('close', function () {
 66 |   console.log(elts.reduce((a, b) => a + b, 0) / elts.length)
 67 | })
 68 | ```
 69 | 
 70 | This method is used to test @gmod/vcf in https://github.com/brentp/vcf-bench
 71 | 
 72 | ## Methods
 73 | 
 74 | Given a VCF with a single variant line
 75 | 
 76 | ```text
 77 | #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	HG00096
 78 | contigA	3000	rs17883296	G	T,A	100	PASS	NS=3;DP=14;AF=0.5;DB;XYZ=5	GT:AP	0|0:0.000,0.000
 79 | ```
 80 | 
 81 | The `variant` object returned by `parseLine()` would be
 82 | 
 83 | ```typescript
 84 | {
 85 |   CHROM: 'contigA',
 86 |   POS: 3000,
 87 |   ID: ['rs17883296'],
 88 |   REF: 'G',
 89 |   ALT: ['T', 'A'],
 90 |   QUAL: 100,
 91 |   FILTER: 'PASS',
 92 |   INFO: {
 93 |     NS: [3],
 94 |     DP: [14],
 95 |     AF: [0.5],
 96 |     DB: true,
 97 |     XYZ: ['5'],
 98 |   },
 99 |   SAMPLES: () => ({
100 |     HG00096: {
101 |       GT: ['0|0'],
102 |       AP: ['0.000', '0.000'],
103 |     },
104 |   }),
105 |   GENOTYPES: () => ({
106 |     HG00096: '0|0',
107 |   }),
108 | }
109 | ```
110 | 
111 | The `variant.SAMPLES()` and `variant.GENOTYPES()` are functions because it does
112 | not try to eagerly parse all the genotype data, so will only do so when you call
113 | either of these which can save time especially if your VCF has a lot of samples
114 | in it.
115 | 
116 | The `variant.SAMPLES()` function parses out the FORMAT fields, while
117 | `variant.GENOTYPES()` returns just the genotypes string which can be faster if
118 | that is the only information you are interested in
119 | 
120 | The parser will try to convert the values in INFO and FORMAT to the proper types
121 | using the header metadata. For example, if there is a header line like
122 | 
123 | ```text
124 | ##INFO=<ID=ABC,Number=2,Type=Integer,Description="A description">
125 | ```
126 | 
127 | The parser will expect any INFO entry ABC to be an array of two integers, so it
128 | would convert `ABC=12,20` to `{ ABC: [12, 20] }`.
129 | 
130 | Each INFO entry value will be an array unless `Type=Flag` is specified, in which
131 | case it will be `true`. If no metadata can be found for the entry, it will
132 | assume `Number=1` and `Type=String`.
133 | 
134 | Some fields are pre-defined by the
135 | [VCF spec](https://samtools.github.io/hts-specs/VCFv4.3.pdf), which is why in
136 | the variant object above "DP" was parsed as an integer (it is defined in the VCF
137 | spec), but "XYZ" was left as a string (it is not defined in either the VCF spec
138 | or the header).
139 | 
140 | Metadata can be accessed with the `getMetadata()` method, including all the
141 | built-in metadata from the VCF spec. With no parameters it will return all the
142 | data. Any parameters passed will further filter the metadata. For example, for a
143 | VCF with this header:
144 | 
145 | ```text
146 | ##INFO=<ID=ABC,Number=2,Type=Integer,Description="A description">
147 | #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
148 | ```
149 | 
150 | you can access the VCF's header metadata like (some output omitted for clarity):
151 | 
152 | ```typescript
153 | > console.log(vcfParser.getMetadata())
154 | { INFO:
155 |    { AA:
156 |       { Number: 1, Type: 'String', Description: 'Ancestral Allele' },
157 | 
158 | ...
159 | 
160 |      ABC: { Number: 2, Type: 'Integer', Description: 'A description' } },
161 |   FORMAT:
162 |    { AD:
163 |       { Number: 'R',
164 |         Type: 'Integer',
165 |         Description: 'Read depth for each allele' },
166 | 
167 | ...
168 | 
169 |   ALT:
170 |    { DEL: { Description: 'Deletion relative to the reference' },
171 | 
172 | ...
173 | 
174 |   FILTER: { PASS: { Description: 'Passed all filters' } } }
175 | 
176 | > console.log(vcfParser.getMetadata('INFO'))
177 | { AA:
178 |    { Number: 1, Type: 'String', Description: 'Ancestral Allele' },
179 |   AC:
180 |    { Number: 'A',
181 |      Type: 'Integer',
182 |      Description:
183 |       'Allele count in genotypes, for each ALT allele, in the same order as listed' },
184 |   AD:
185 |    { Number: 'R',
186 |      Type: 'Integer',
187 |      Description: 'Total read depth for each allele' },
188 | 
189 | ...
190 | 
191 |   ABC: { Number: 2, Type: 'Integer', Description: 'A description' } }
192 | 
193 | > console.log(vcfParser.getMetadata('INFO', 'DP'))
194 | { Number: 1, Type: 'Integer', Description: 'Total Depth' }
195 | 
196 | > console.log(vcfParser.getMetadata('INFO', 'DP', 'Number'))
197 | 1
198 | ```
199 | 
200 | A list of sample names is also available in the `samples` attribute of the
201 | parser object:
202 | 
203 | ```typescript
204 | > console.log(vcfParser.samples)
205 | [ 'HG00096' ]
206 | ```
207 | 
208 | ## Breakends
209 | 
210 | We offer a helper function to parse breakend strings. We used to parse these
211 | automatically but it is now a helper function
212 | 
213 | ```js
214 | import { parseBreakend } from '@gmod/vcf'
215 | parseBreakend('C[2:321682[')
216 | 
217 | // output
218 | //
219 | //     {
220 | //       "MateDirection": "right",
221 | //       "Replacement": "C",
222 | //       "MatePosition": "2:321682",
223 | //       "Join": "right"
224 | //     }
225 | ```
226 | 
227 | - The C\[2:321682\[ parses as "Join": "right" because the BND is after the C
228 |   base
229 | - The C\[2:321682\[ also is given "MateDirection": "right" because the square
230 |   brackets point to the right.
231 | - The spec never has the square brackets pointing in different directions.
232 |   Instead, the different types of joins can be imagined as follows
233 | 
234 | For the above vcf line where chr13:123456->C\[2:321682\[ then we have this
235 | 
236 |         chr13:123456
237 |       -------------C\
238 |                      \
239 |                       \
240 |                        \
241 |                         \
242 |                          \
243 |                           \
244 |                            \
245 |                             \--------------
246 |                              chr2:321682
247 | 
248 | If the alt was instead chr13:123456->\[2:321682\[C then the the "Join" would be
249 | "left" since the "BND" is before "C" and then the breakend structure looks like
250 | this
251 | 
252 |           chr13:123456
253 | 
254 |           |C--------------------
255 |           |
256 |           |
257 |           |
258 |           |
259 |           |
260 |           |
261 |           |
262 |           |
263 |           |
264 |           |
265 |           ----------------------
266 |            chr2:321682
267 | 
268 | ## API
269 | 
270 | <!-- Generated by documentation.js. Update this documentation by updating the source code. -->
271 | 
272 | #### Table of Contents
273 | 
274 | - [VCFParser](#vcfparser)
275 |   - [Parameters](#parameters)
276 |   - [getMetadata](#getmetadata)
277 |     - [Parameters](#parameters-1)
278 |   - [parseLine](#parseline)
279 |     - [Parameters](#parameters-2)
280 | 
281 | ### VCFParser
282 | 
283 | Class representing a VCF parser, instantiated with the VCF header.
284 | 
285 | #### Parameters
286 | 
287 | - `args`
288 |   **[object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)**&#x20;
289 |   - `args.header`
290 |     **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)**
291 |     The VCF header. Supports both LF and CRLF newlines. (optional, default `''`)
292 |   - `args.strict`
293 |     **[boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)**
294 |     Whether to parse in strict mode or not (default true) (optional, default
295 |     `true`)
296 | 
297 | #### getMetadata
298 | 
299 | Get metadata filtered by the elements in args. For example, can pass ('INFO',
300 | 'DP') to only get info on an metadata tag that was like "##INFO=\<ID=DP,...>"
301 | 
302 | ##### Parameters
303 | 
304 | - `args`
305 |   **...[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)**
306 |   List of metadata filter strings.
307 | 
308 | Returns **any** An object, string, or number, depending on the filtering
309 | 
310 | #### parseLine
311 | 
312 | Parse a VCF line into an object like
313 | 
314 | ```typescript
315 | {
316 |   CHROM: 'contigA',
317 |   POS: 3000,
318 |   ID: ['rs17883296'],
319 |   REF: 'G',
320 |   ALT: ['T', 'A'],
321 |   QUAL: 100,
322 |   FILTER: 'PASS',
323 |   INFO: {
324 |     NS: [3],
325 |     DP: [14],
326 |     AF: [0.5],
327 |     DB: true,
328 |     XYZ: ['5'],
329 |   },
330 |   SAMPLES: () => ({
331 |     HG00096: {
332 |       GT: ['0|0'],
333 |       AP: ['0.000', '0.000'],
334 |     }
335 |   }),
336 |   GENOTYPES: () => ({
337 |     HG00096: '0|0'
338 |   })
339 | }
340 | ```
341 | 
342 | SAMPLES and GENOTYPES methods are functions instead of static data fields
343 | because it avoids parsing the potentially long list of samples from e.g. 1000
344 | genotypes data unless requested.
345 | 
346 | The SAMPLES function gives all info about the samples
347 | 
348 | The GENOTYPES function only extracts the raw GT string if it exists, for
349 | potentially optimized parsing by programs that need it
350 | 
351 | ##### Parameters
352 | 
353 | - `line`
354 |   **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)**
355 |   A string of a line from a VCF
356 | 


--------------------------------------------------------------------------------
/test/parseGenotypesOnly-edge-cases.test.ts:
--------------------------------------------------------------------------------
  1 | import { expect, test } from 'vitest'
  2 | 
  3 | import { parseGenotypesOnly } from '../src/parseGenotypesOnly'
  4 | 
  5 | test('last sample with 3-char GT and no trailing tab', () => {
  6 |   // Single sample, no trailing tab
  7 |   const result = parseGenotypesOnly('GT', '0/1', ['S1'])
  8 |   expect(result).toEqual({ S1: '0/1' })
  9 | })
 10 | 
 11 | test('last sample with 3-char GT in multi-sample', () => {
 12 |   // Multiple samples, last one is 3-char with no trailing tab
 13 |   const result = parseGenotypesOnly('GT', '0/1\t1/1', ['S1', 'S2'])
 14 |   expect(result).toEqual({ S1: '0/1', S2: '1/1' })
 15 | })
 16 | 
 17 | test('last sample with non-3-char GT', () => {
 18 |   const result = parseGenotypesOnly('GT', '0/1\t.', ['S1', 'S2'])
 19 |   expect(result).toEqual({ S1: '0/1', S2: '.' })
 20 | })
 21 | 
 22 | test('single sample with 1-char GT', () => {
 23 |   const result = parseGenotypesOnly('GT', '.', ['S1'])
 24 |   expect(result).toEqual({ S1: '.' })
 25 | })
 26 | 
 27 | test('GT:DP:GQ - last sample with 3-char GT', () => {
 28 |   const result = parseGenotypesOnly('GT:DP:GQ', '0/1:20:99', ['S1'])
 29 |   expect(result).toEqual({ S1: '0/1' })
 30 | })
 31 | 
 32 | test('GT:DP:GQ - multiple samples, last with 3-char GT', () => {
 33 |   const result = parseGenotypesOnly('GT:DP:GQ', '0/1:20:99\t1/1:30:99', [
 34 |     'S1',
 35 |     'S2',
 36 |   ])
 37 |   expect(result).toEqual({ S1: '0/1', S2: '1/1' })
 38 | })
 39 | 
 40 | test('GT:DP:GQ - last sample with 1-char GT', () => {
 41 |   const result = parseGenotypesOnly('GT:DP:GQ', '0/1:20:99\t.:30:99', [
 42 |     'S1',
 43 |     'S2',
 44 |   ])
 45 |   expect(result).toEqual({ S1: '0/1', S2: '.' })
 46 | })
 47 | 
 48 | test('empty prerest string', () => {
 49 |   // Returns empty string for sample when no data present
 50 |   const result = parseGenotypesOnly('GT', '', ['S1'])
 51 |   expect(result).toEqual({ S1: '' })
 52 | })
 53 | 
 54 | test('more samples than data', () => {
 55 |   // Returns empty strings for samples beyond available data
 56 |   const result = parseGenotypesOnly('GT', '0/1', ['S1', 'S2', 'S3'])
 57 |   expect(result).toEqual({ S1: '0/1', S2: '', S3: '' })
 58 | })
 59 | 
 60 | test('haploid genotypes - single character', () => {
 61 |   const result = parseGenotypesOnly('GT', '0\t1\t0\t1\t0', [
 62 |     'S1',
 63 |     'S2',
 64 |     'S3',
 65 |     'S4',
 66 |     'S5',
 67 |   ])
 68 |   expect(result).toEqual({ S1: '0', S2: '1', S3: '0', S4: '1', S5: '0' })
 69 | })
 70 | 
 71 | test('haploid genotypes - with missing', () => {
 72 |   const result = parseGenotypesOnly('GT', '0\t.\t1\t.\t0', [
 73 |     'S1',
 74 |     'S2',
 75 |     'S3',
 76 |     'S4',
 77 |     'S5',
 78 |   ])
 79 |   expect(result).toEqual({ S1: '0', S2: '.', S3: '1', S4: '.', S5: '0' })
 80 | })
 81 | 
 82 | test('haploid genotypes - multi-allelic', () => {
 83 |   const result = parseGenotypesOnly('GT', '0\t1\t2\t3\t4\t5', [
 84 |     'S1',
 85 |     'S2',
 86 |     'S3',
 87 |     'S4',
 88 |     'S5',
 89 |     'S6',
 90 |   ])
 91 |   expect(result).toEqual({
 92 |     S1: '0',
 93 |     S2: '1',
 94 |     S3: '2',
 95 |     S4: '3',
 96 |     S5: '4',
 97 |     S6: '5',
 98 |   })
 99 | })
100 | 
101 | test('haploid genotypes - double-digit alleles', () => {
102 |   const result = parseGenotypesOnly('GT', '10\t11\t20\t99', [
103 |     'S1',
104 |     'S2',
105 |     'S3',
106 |     'S4',
107 |   ])
108 |   expect(result).toEqual({ S1: '10', S2: '11', S3: '20', S4: '99' })
109 | })
110 | 
111 | test('haploid genotypes - triple-digit alleles', () => {
112 |   const result = parseGenotypesOnly('GT', '100\t200\t999', ['S1', 'S2', 'S3'])
113 |   expect(result).toEqual({ S1: '100', S2: '200', S3: '999' })
114 | })
115 | 
116 | test('haploid genotypes - many samples', () => {
117 |   const samples = Array.from({ length: 50 }, (_, i) => `S${i}`)
118 |   const gts = Array.from({ length: 50 }, (_, i) => String(i % 2))
119 |   const result = parseGenotypesOnly('GT', gts.join('\t'), samples)
120 | 
121 |   const expected = {} as Record<string, string>
122 |   samples.forEach((s, i) => {
123 |     expected[s] = gts[i]!
124 |   })
125 | 
126 |   expect(result).toEqual(expected)
127 | })
128 | 
129 | test('haploid genotypes - ending without tab', () => {
130 |   const result = parseGenotypesOnly('GT', '0\t1\t2', ['S1', 'S2', 'S3'])
131 |   expect(result).toEqual({ S1: '0', S2: '1', S3: '2' })
132 | })
133 | 
134 | test('haploid genotypes - single sample', () => {
135 |   const result = parseGenotypesOnly('GT', '1', ['S1'])
136 |   expect(result).toEqual({ S1: '1' })
137 | })
138 | 
139 | test('mixed diploid and haploid genotypes', () => {
140 |   const result = parseGenotypesOnly('GT', '0/1\t0\t1/1\t1\t0/0\t.', [
141 |     'S1',
142 |     'S2',
143 |     'S3',
144 |     'S4',
145 |     'S5',
146 |     'S6',
147 |   ])
148 |   expect(result).toEqual({
149 |     S1: '0/1',
150 |     S2: '0',
151 |     S3: '1/1',
152 |     S4: '1',
153 |     S5: '0/0',
154 |     S6: '.',
155 |   })
156 | })
157 | 
158 | test('haploid with GT:DP:GQ format', () => {
159 |   const result = parseGenotypesOnly(
160 |     'GT:DP:GQ',
161 |     '0:20:99\t1:25:99\t0:30:99\t.:15:50',
162 |     ['S1', 'S2', 'S3', 'S4'],
163 |   )
164 |   expect(result).toEqual({ S1: '0', S2: '1', S3: '0', S4: '.' })
165 | })
166 | 
167 | test('many samples with 3-char diploid GTs', () => {
168 |   const result = parseGenotypesOnly(
169 |     'GT',
170 |     '0/1\t1/1\t0/0\t0/1\t1/0\t0/1\t1/1\t0/0\t0/1\t1/0',
171 |     ['S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8', 'S9', 'S10'],
172 |   )
173 |   expect(result).toEqual({
174 |     S1: '0/1',
175 |     S2: '1/1',
176 |     S3: '0/0',
177 |     S4: '0/1',
178 |     S5: '1/0',
179 |     S6: '0/1',
180 |     S7: '1/1',
181 |     S8: '0/0',
182 |     S9: '0/1',
183 |     S10: '1/0',
184 |   })
185 | })
186 | 
187 | test('many samples with phased 3-char GTs', () => {
188 |   const result = parseGenotypesOnly(
189 |     'GT',
190 |     '0|1\t1|1\t0|0\t0|1\t1|0\t0|1\t1|1\t0|0',
191 |     ['S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8'],
192 |   )
193 |   expect(result).toEqual({
194 |     S1: '0|1',
195 |     S2: '1|1',
196 |     S3: '0|0',
197 |     S4: '0|1',
198 |     S5: '1|0',
199 |     S6: '0|1',
200 |     S7: '1|1',
201 |     S8: '0|0',
202 |   })
203 | })
204 | 
205 | test('many samples with GT:DP:GQ format', () => {
206 |   const result = parseGenotypesOnly(
207 |     'GT:DP:GQ',
208 |     '0/1:20:99\t1/1:25:99\t0/0:30:99\t.:15:50\t0/1:22:99\t1/0:28:99',
209 |     ['S1', 'S2', 'S3', 'S4', 'S5', 'S6'],
210 |   )
211 |   expect(result).toEqual({
212 |     S1: '0/1',
213 |     S2: '1/1',
214 |     S3: '0/0',
215 |     S4: '.',
216 |     S5: '0/1',
217 |     S6: '1/0',
218 |   })
219 | })
220 | 
221 | test('many samples with DP:GQ:GT format', () => {
222 |   const result = parseGenotypesOnly(
223 |     'DP:GQ:GT',
224 |     '20:99:0/1\t25:99:1/1\t30:99:0/0\t15:50:.\t22:99:0/1\t28:99:1/0',
225 |     ['S1', 'S2', 'S3', 'S4', 'S5', 'S6'],
226 |   )
227 |   expect(result).toEqual({
228 |     S1: '0/1',
229 |     S2: '1/1',
230 |     S3: '0/0',
231 |     S4: '.',
232 |     S5: '0/1',
233 |     S6: '1/0',
234 |   })
235 | })
236 | 
237 | test('complex multi-allelic genotypes', () => {
238 |   const result = parseGenotypesOnly('GT', '0/1\t1/2\t2/2\t0/2\t1/1\t./.', [
239 |     'S1',
240 |     'S2',
241 |     'S3',
242 |     'S4',
243 |     'S5',
244 |     'S6',
245 |   ])
246 |   expect(result).toEqual({
247 |     S1: '0/1',
248 |     S2: '1/2',
249 |     S3: '2/2',
250 |     S4: '0/2',
251 |     S5: '1/1',
252 |     S6: './.',
253 |   })
254 | })
255 | 
256 | test('triploid genotypes', () => {
257 |   const result = parseGenotypesOnly('GT', '0/0/1\t0/1/1\t1/1/1\t0/0/0', [
258 |     'S1',
259 |     'S2',
260 |     'S3',
261 |     'S4',
262 |   ])
263 |   expect(result).toEqual({
264 |     S1: '0/0/1',
265 |     S2: '0/1/1',
266 |     S3: '1/1/1',
267 |     S4: '0/0/0',
268 |   })
269 | })
270 | 
271 | test('tetraploid genotypes', () => {
272 |   const result = parseGenotypesOnly(
273 |     'GT',
274 |     '0/0/0/1\t0/1/1/1\t1/1/1/1\t0/0/0/0',
275 |     ['S1', 'S2', 'S3', 'S4'],
276 |   )
277 |   expect(result).toEqual({
278 |     S1: '0/0/0/1',
279 |     S2: '0/1/1/1',
280 |     S3: '1/1/1/1',
281 |     S4: '0/0/0/0',
282 |   })
283 | })
284 | 
285 | test('hexaploid genotypes', () => {
286 |   const result = parseGenotypesOnly('GT', '0/0/0/0/0/1\t0/1/1/1/1/1', [
287 |     'S1',
288 |     'S2',
289 |   ])
290 |   expect(result).toEqual({
291 |     S1: '0/0/0/0/0/1',
292 |     S2: '0/1/1/1/1/1',
293 |   })
294 | })
295 | 
296 | test('mixed ploidy - haploid, diploid, triploid, tetraploid', () => {
297 |   const result = parseGenotypesOnly(
298 |     'GT',
299 |     '0\t0/1\t0/1/2\t0/1/2/3\t1\t./.\t0/0/0',
300 |     ['S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S7'],
301 |   )
302 |   expect(result).toEqual({
303 |     S1: '0',
304 |     S2: '0/1',
305 |     S3: '0/1/2',
306 |     S4: '0/1/2/3',
307 |     S5: '1',
308 |     S6: './.',
309 |     S7: '0/0/0',
310 |   })
311 | })
312 | 
313 | test('polyploid with phased genotypes', () => {
314 |   const result = parseGenotypesOnly('GT', '0|0|1\t0|1|1\t1|1|1\t0|0|0', [
315 |     'S1',
316 |     'S2',
317 |     'S3',
318 |     'S4',
319 |   ])
320 |   expect(result).toEqual({
321 |     S1: '0|0|1',
322 |     S2: '0|1|1',
323 |     S3: '1|1|1',
324 |     S4: '0|0|0',
325 |   })
326 | })
327 | 
328 | test('polyploid with multi-allelic variants', () => {
329 |   const result = parseGenotypesOnly(
330 |     'GT',
331 |     '0/1/2\t1/2/3\t2/3/4\t0/0/0\t./././.',
332 |     ['S1', 'S2', 'S3', 'S4', 'S5'],
333 |   )
334 |   expect(result).toEqual({
335 |     S1: '0/1/2',
336 |     S2: '1/2/3',
337 |     S3: '2/3/4',
338 |     S4: '0/0/0',
339 |     S5: './././.',
340 |   })
341 | })
342 | 
343 | test('large scale mixed ploidy', () => {
344 |   const samples = Array.from({ length: 20 }, (_, i) => `S${i}`)
345 |   const gts = [
346 |     '0',
347 |     '0/1',
348 |     '0/1/2',
349 |     '0/1/2/3',
350 |     '1',
351 |     '1/1',
352 |     '0/0/0',
353 |     '1/1/1/1',
354 |     '.',
355 |     './.',
356 |     '0',
357 |     '0/1',
358 |     '0/1/2',
359 |     '0/1/2/3',
360 |     '1',
361 |     '1/1',
362 |     '0/0/0',
363 |     '1/1/1/1',
364 |     '.',
365 |     './.',
366 |   ]
367 |   const result = parseGenotypesOnly('GT', gts.join('\t'), samples)
368 | 
369 |   const expected = {} as Record<string, string>
370 |   samples.forEach((s, i) => {
371 |     expected[s] = gts[i]!
372 |   })
373 | 
374 |   expect(result).toEqual(expected)
375 | })
376 | 
377 | test('very long polyploid genotypes', () => {
378 |   const result = parseGenotypesOnly('GT', '0/1/2/3/4/5/6/7\t0/0/0/0/0/0/0/0', [
379 |     'S1',
380 |     'S2',
381 |   ])
382 |   expect(result).toEqual({
383 |     S1: '0/1/2/3/4/5/6/7',
384 |     S2: '0/0/0/0/0/0/0/0',
385 |   })
386 | })
387 | 
388 | test('polyploid with double-digit alleles', () => {
389 |   const result = parseGenotypesOnly('GT', '0/10/20\t10/11/12\t0/0/0', [
390 |     'S1',
391 |     'S2',
392 |     'S3',
393 |   ])
394 |   expect(result).toEqual({
395 |     S1: '0/10/20',
396 |     S2: '10/11/12',
397 |     S3: '0/0/0',
398 |   })
399 | })
400 | 
401 | test('mixed ploidy ending with haploid', () => {
402 |   const result = parseGenotypesOnly('GT', '0/1\t0/1/2\t0/1/2/3\t1', [
403 |     'S1',
404 |     'S2',
405 |     'S3',
406 |     'S4',
407 |   ])
408 |   expect(result).toEqual({
409 |     S1: '0/1',
410 |     S2: '0/1/2',
411 |     S3: '0/1/2/3',
412 |     S4: '1',
413 |   })
414 | })
415 | 
416 | test('mixed ploidy ending with polyploid', () => {
417 |   const result = parseGenotypesOnly('GT', '0\t0/1\t0/1/2/3/4', [
418 |     'S1',
419 |     'S2',
420 |     'S3',
421 |   ])
422 |   expect(result).toEqual({
423 |     S1: '0',
424 |     S2: '0/1',
425 |     S3: '0/1/2/3/4',
426 |   })
427 | })
428 | 
429 | test('alternating ploidy pattern', () => {
430 |   const result = parseGenotypesOnly('GT', '0\t0/1\t0\t0/1\t0\t0/1\t0\t0/1', [
431 |     'S1',
432 |     'S2',
433 |     'S3',
434 |     'S4',
435 |     'S5',
436 |     'S6',
437 |     'S7',
438 |     'S8',
439 |   ])
440 |   expect(result).toEqual({
441 |     S1: '0',
442 |     S2: '0/1',
443 |     S3: '0',
444 |     S4: '0/1',
445 |     S5: '0',
446 |     S6: '0/1',
447 |     S7: '0',
448 |     S8: '0/1',
449 |   })
450 | })
451 | 


--------------------------------------------------------------------------------
/src/parse.ts:
--------------------------------------------------------------------------------
  1 | import { parseGenotypesOnly } from './parseGenotypesOnly.ts'
  2 | import { parseMetaString } from './parseMetaString.ts'
  3 | import vcfReserved from './vcfReserved.ts'
  4 | 
  5 | function decodeURIComponentNoThrow(uri: string) {
  6 |   try {
  7 |     return decodeURIComponent(uri)
  8 |   } catch (_e) {
  9 |     // avoid throwing exception on a failure to decode URI component
 10 |     return uri
 11 |   }
 12 | }
 13 | 
 14 | /**
 15 |  * Class representing a VCF parser, instantiated with the VCF header.
 16 |  *
 17 |  * @param {object} args
 18 |  *
 19 |  * @param {string} args.header - The VCF header. Supports both LF and CRLF
 20 |  * newlines.
 21 |  *
 22 |  * @param {boolean} args.strict - Whether to parse in strict mode or not
 23 |  * (default true)
 24 |  */
 25 | export default class VCFParser {
 26 |   private metadata: Record<string, unknown>
 27 |   public strict: boolean
 28 |   public samples: string[]
 29 | 
 30 |   constructor({
 31 |     header = '',
 32 |     strict = true,
 33 |   }: {
 34 |     header: string
 35 |     strict?: boolean
 36 |   }) {
 37 |     if (!header.length) {
 38 |       throw new Error('empty header received')
 39 |     }
 40 |     const headerLines = header.split(/[\r\n]+/).filter(Boolean)
 41 |     if (!headerLines.length) {
 42 |       throw new Error('no non-empty header lines specified')
 43 |     }
 44 | 
 45 |     this.strict = strict
 46 |     this.metadata = {
 47 |       INFO: { ...vcfReserved.InfoFields },
 48 |       FORMAT: { ...vcfReserved.GenotypeFields },
 49 |       ALT: { ...vcfReserved.AltTypes },
 50 |       FILTER: { ...vcfReserved.FilterTypes },
 51 |     }
 52 | 
 53 |     let lastLine: string | undefined
 54 |     for (let i = 0; i < headerLines.length; i++) {
 55 |       const line = headerLines[i]!
 56 |       if (!line.startsWith('#')) {
 57 |         throw new Error(`Bad line in header:\n${line}`)
 58 |       } else if (line.startsWith('##')) {
 59 |         this.parseMetadata(line)
 60 |       } else {
 61 |         lastLine = line
 62 |       }
 63 |     }
 64 | 
 65 |     if (!lastLine) {
 66 |       throw new Error('No format line found in header')
 67 |     }
 68 |     const fields = lastLine.trim().split('\t')
 69 |     const thisHeader = fields.slice(0, 8)
 70 |     const correctHeader = [
 71 |       '#CHROM',
 72 |       'POS',
 73 |       'ID',
 74 |       'REF',
 75 |       'ALT',
 76 |       'QUAL',
 77 |       'FILTER',
 78 |       'INFO',
 79 |     ]
 80 |     if (fields.length < 8) {
 81 |       throw new Error(`VCF header missing columns:\n${lastLine}`)
 82 |     } else if (
 83 |       thisHeader.length !== correctHeader.length ||
 84 |       !thisHeader.every((value, index) => value === correctHeader[index])
 85 |     ) {
 86 |       throw new Error(`VCF column headers not correct:\n${lastLine}`)
 87 |     }
 88 |     this.samples = fields.slice(9)
 89 |   }
 90 | 
 91 |   private parseInfo(infoStr: string) {
 92 |     const result: Record<string, any> = {}
 93 |     const hasDecode = infoStr.includes('%')
 94 |     const infoPairs = infoStr.split(';')
 95 |     const infoMeta = this.metadata.INFO as Record<string, any>
 96 |     const pairsLen = infoPairs.length
 97 | 
 98 |     for (let i = 0; i < pairsLen; i++) {
 99 |       const pair = infoPairs[i]!
100 |       const eqIdx = pair.indexOf('=')
101 |       const key = eqIdx === -1 ? pair : pair.slice(0, eqIdx)
102 |       const val = eqIdx === -1 ? undefined : pair.slice(eqIdx + 1)
103 |       const itemType = infoMeta[key]?.Type
104 | 
105 |       if (itemType === 'Flag') {
106 |         result[key] = true
107 |       } else if (!val) {
108 |         result[key] = true
109 |       } else {
110 |         const isNumber = itemType === 'Integer' || itemType === 'Float'
111 |         const rawItems = val.split(',')
112 |         const itemsLen = rawItems.length
113 | 
114 |         if (hasDecode) {
115 |           const items: (string | number | undefined)[] = []
116 |           for (let j = 0; j < itemsLen; j++) {
117 |             const v = rawItems[j]!
118 |             if (v === '.') {
119 |               items.push(undefined)
120 |             } else {
121 |               const decoded = decodeURIComponentNoThrow(v)
122 |               items.push(isNumber ? Number(decoded) : decoded)
123 |             }
124 |           }
125 |           result[key] = items
126 |         } else {
127 |           const items: (string | number | undefined)[] = []
128 |           for (let j = 0; j < itemsLen; j++) {
129 |             const v = rawItems[j]!
130 |             if (v === '.') {
131 |               items.push(undefined)
132 |             } else {
133 |               items.push(isNumber ? Number(v) : v)
134 |             }
135 |           }
136 |           result[key] = items
137 |         }
138 |       }
139 |     }
140 |     return result
141 |   }
142 | 
143 |   private parseSamples(format: string, prerest: string) {
144 |     const genotypes = {} as Record<
145 |       string,
146 |       Record<string, (string | number | undefined)[] | undefined>
147 |     >
148 |     if (format) {
149 |       const rest = prerest.split('\t')
150 |       const formatKeys = format.split(':')
151 |       const formatMeta = this.metadata.FORMAT as Record<string, any>
152 |       const isNumberType: boolean[] = []
153 |       for (let i = 0; i < formatKeys.length; i++) {
154 |         const r = formatMeta[formatKeys[i]!]?.Type
155 |         isNumberType.push(r === 'Integer' || r === 'Float')
156 |       }
157 |       const numKeys = formatKeys.length
158 |       const samplesLen = this.samples.length
159 |       for (let i = 0; i < samplesLen; i++) {
160 |         const sample = this.samples[i]!
161 |         const sampleData: Record<
162 |           string,
163 |           (string | number | undefined)[] | undefined
164 |         > = {}
165 |         const sampleStr = rest[i]!
166 |         const sampleStrLen = sampleStr.length
167 |         let colStart = 0
168 |         let colIdx = 0
169 | 
170 |         for (let j = 0; j <= sampleStrLen; j++) {
171 |           if (j === sampleStrLen || sampleStr[j] === ':') {
172 |             const val = sampleStr.slice(colStart, j)
173 |             if (val === '' || val === '.') {
174 |               sampleData[formatKeys[colIdx]!] = undefined
175 |             } else {
176 |               const items = val.split(',')
177 |               const result: (string | number | undefined)[] = []
178 |               if (isNumberType[colIdx]) {
179 |                 for (let k = 0; k < items.length; k++) {
180 |                   const ent = items[k]!
181 |                   result.push(ent === '.' ? undefined : +ent)
182 |                 }
183 |               } else {
184 |                 for (let k = 0; k < items.length; k++) {
185 |                   const ent = items[k]!
186 |                   result.push(ent === '.' ? undefined : ent)
187 |                 }
188 |               }
189 |               sampleData[formatKeys[colIdx]!] = result
190 |             }
191 |             colStart = j + 1
192 |             colIdx += 1
193 |             if (colIdx >= numKeys) {
194 |               break
195 |             }
196 |           }
197 |         }
198 |         genotypes[sample] = sampleData
199 |       }
200 |     }
201 |     return genotypes
202 |   }
203 | 
204 |   /**
205 |    * Parse a VCF metadata line (i.e. a line that starts with "##") and add its
206 |    * properties to the object.
207 |    *
208 |    * @param {string} line - A line from the VCF. Supports both LF and CRLF
209 |    * newlines.
210 |    */
211 |   private parseMetadata(line: string) {
212 |     const match = /^##(.+?)=(.*)/.exec(line.trim())
213 |     if (!match) {
214 |       throw new Error(`Line is not a valid metadata line: ${line}`)
215 |     }
216 |     const [metaKey, metaVal] = match.slice(1, 3)
217 | 
218 |     const r = metaKey!
219 |     if (metaVal?.startsWith('<')) {
220 |       if (!(r in this.metadata)) {
221 |         this.metadata[r] = {}
222 |       }
223 |       const [id, keyVals] = this.parseStructuredMetaVal(metaVal)
224 |       if (id) {
225 |         // if there is an ID field in the <> metadata
226 |         // e.g. ##INFO=<ID=AF_ESP,...>
227 |         ;(this.metadata[r] as Record<string, unknown>)[id] = keyVals
228 |       } else {
229 |         // if there is not an ID field in the <> metadata
230 |         // e.g. ##ID=<Description="ClinVar Variation ID">
231 |         this.metadata[r] = keyVals
232 |       }
233 |     } else {
234 |       this.metadata[r] = metaVal
235 |     }
236 |   }
237 | 
238 |   /**
239 |    * Parse a VCF header structured meta string (i.e. a meta value that starts
240 |    * with "<ID=...")
241 |    *
242 |    * @param {string} metaVal - The VCF metadata value
243 |    *
244 |    * @returns {Array} - Array with two entries, 1) a string of the metadata ID
245 |    * and 2) an object with the other key-value pairs in the metadata
246 |    */
247 |   private parseStructuredMetaVal(metaVal: string) {
248 |     const keyVals = parseMetaString(metaVal)
249 |     const id = keyVals.ID!
250 |     delete keyVals.ID
251 |     if ('Number' in keyVals) {
252 |       if (!Number.isNaN(Number(keyVals.Number))) {
253 |         keyVals.Number = Number(keyVals.Number)
254 |       }
255 |     }
256 |     return [id, keyVals] as const
257 |   }
258 | 
259 |   /**
260 |    * Get metadata filtered by the elements in args. For example, can pass
261 |    * ('INFO', 'DP') to only get info on an metadata tag that was like
262 |    * "##INFO=<ID=DP,...>"
263 |    *
264 |    * @param  {...string} args - List of metadata filter strings.
265 |    *
266 |    * @returns {any} An object, string, or number, depending on the filtering
267 |    */
268 |   getMetadata(...args: string[]) {
269 |     let filteredMetadata: any = this.metadata
270 |     const argsLen = args.length
271 |     for (let i = 0; i < argsLen; i++) {
272 |       filteredMetadata = filteredMetadata[args[i]!]
273 |       if (!filteredMetadata) {
274 |         return filteredMetadata
275 |       }
276 |     }
277 |     return filteredMetadata
278 |   }
279 | 
280 |   /**
281 |    * Parse a VCF line into an object like
282 |    *
283 |    * ```typescript
284 |    * {
285 |    *   CHROM: 'contigA',
286 |    *   POS: 3000,
287 |    *   ID: ['rs17883296'],
288 |    *   REF: 'G',
289 |    *   ALT: ['T', 'A'],
290 |    *   QUAL: 100,
291 |    *   FILTER: 'PASS',
292 |    *   INFO: {
293 |    *     NS: [3],
294 |    *     DP: [14],
295 |    *     AF: [0.5],
296 |    *     DB: true,
297 |    *     XYZ: ['5'],
298 |    *   },
299 |    *   SAMPLES: () => ({
300 |    *     HG00096: {
301 |    *       GT: ['0|0'],
302 |    *       AP: ['0.000', '0.000'],
303 |    *     }
304 |    *   }),
305 |    *   GENOTYPES: () => ({
306 |    *     HG00096: '0|0'
307 |    *   })
308 |    * }
309 |    * ```
310 |    *
311 |    * SAMPLES and GENOTYPES methods are functions instead of static data fields
312 |    * because it avoids parsing the potentially long list of samples from e.g.
313 |    * 1000 genotypes data unless requested.
314 |    *
315 |    * The SAMPLES function gives all info about the samples
316 |    *
317 |    * The GENOTYPES function only extracts the raw GT string if it exists, for
318 |    * potentially optimized parsing by programs that need it
319 |    *
320 |    * @param {string} line - A string of a line from a VCF
321 |    */
322 |   parseLine(line: string) {
323 |     let currChar = 0
324 |     let tabCount = 0
325 |     while (currChar < line.length && tabCount < 9) {
326 |       if (line[currChar] === '\t') {
327 |         tabCount += 1
328 |       }
329 |       currChar += 1
330 |     }
331 |     const splitPos = tabCount === 9 ? currChar - 1 : currChar
332 |     const fields = line.slice(0, splitPos).split('\t')
333 |     const rest = line.slice(splitPos + 1)
334 |     const [CHROM, POS, ID, REF, ALT, QUAL, FILTER] = fields
335 |     const chrom = CHROM
336 |     const pos = +POS!
337 |     const id = ID === '.' ? undefined : ID!.split(';')
338 |     const ref = REF
339 |     const alt = ALT === '.' ? undefined : ALT!.split(',')
340 |     const qual = QUAL === '.' ? undefined : +QUAL!
341 |     const filter = FILTER === '.' ? undefined : FILTER!.split(';')
342 |     const format = fields[8]
343 | 
344 |     if (this.strict && !fields[7]) {
345 |       throw new Error(
346 |         "no INFO field specified, must contain at least a '.' (turn off strict mode to allow)",
347 |       )
348 |     }
349 |     const info =
350 |       fields[7] === undefined || fields[7] === '.'
351 |         ? {}
352 |         : this.parseInfo(fields[7])
353 | 
354 |     return {
355 |       CHROM: chrom,
356 |       POS: pos,
357 |       ALT: alt,
358 |       INFO: info,
359 |       REF: ref,
360 |       FILTER: filter?.length === 1 && filter[0] === 'PASS' ? 'PASS' : filter,
361 |       ID: id,
362 |       QUAL: qual,
363 |       FORMAT: format,
364 |       SAMPLES: () => this.parseSamples(fields[8] ?? '', rest),
365 |       GENOTYPES: () => parseGenotypesOnly(fields[8] ?? '', rest, this.samples),
366 |     }
367 |   }
368 | }
369 | 
370 | export type Variant = ReturnType<VCFParser['parseLine']>
371 | 


--------------------------------------------------------------------------------
/test/data/1000genomes.vcf:
--------------------------------------------------------------------------------
 1 | ##fileformat=VCFv4.1
 2 | ##FILTER=<ID=PASS,Description="All filters passed">
 3 | ##fileDate=20150218
 4 | ##reference=ftp://ftp.1000genomes.ebi.ac.uk//vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz
 5 | ##contig=<ID=Y,length=59373566,assembly=b37>
 6 | ##source=freeBayes v0.9.9.2 | GT values over-written with maximum likelihood state (subject to threshold) OR phylogenetic imputation
 7 | ##INFO=<ID=DP,Number=1,Type=Integer,Description="Total read depth at the locus">
 8 | ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
 9 | ##source=GenomeSTRiP_v1.04
10 | ##ALT=<ID=CNV,Description="Copy number polymorphism">
11 | ##FILTER=<ID=ALIGNLENGTH,Description="GSELENGTH < 200">
12 | ##FILTER=<ID=CLUSTERSEP,Description="GSCLUSTERSEP == NA || GSCLUSTERSEP <= 2.0">
13 | ##FILTER=<ID=DUPLICATE,Description="GSDUPLICATESCORE != NA && GSDUPLICATEOVERLAP >= 0.5 && GSDUPLICATESCORE >= 0.0">
14 | ##FILTER=<ID=GTDEPTH,Description="GSM1 == NA || GSM1 <= 0.5 || GSM1 >= 2.0">
15 | ##FILTER=<ID=INBREEDINGCOEFF,Description="GLINBREEDINGCOEFF != NA && GLINBREEDINGCOEFF < -0.15">
16 | ##FILTER=<ID=NONVARIANT,Description="GSNONVARSCORE != NA && GSNONVARSCORE >= 13.0">
17 | ##FORMAT=<ID=CN,Number=1,Type=Integer,Description="Copy number genotype for imprecise events">
18 | ##FORMAT=<ID=CNL,Number=.,Type=Float,Description="Copy number likelihoods with no frequency prior">
19 | ##FORMAT=<ID=CNP,Number=.,Type=Float,Description="Copy number likelihoods">
20 | ##FORMAT=<ID=CNQ,Number=1,Type=Float,Description="Copy number genotype quality for imprecise events">
21 | ##FORMAT=<ID=GP,Number=G,Type=Float,Description="Genotype likelihoods">
22 | ##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
23 | ##FORMAT=<ID=FT,Number=1,Type=String,Description="Per-sample genotype filter">
24 | ##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
25 | ##INFO=<ID=END,Number=1,Type=Integer,Description="End coordinate of this variant">
26 | ##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
27 | ##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral allele">
28 | ##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes">
29 | ##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]">
30 | ##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data">
31 | ##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
32 | ##INFO=<ID=SAS_AF,Number=A,Type=Float,Description="Allele frequency in the SAS populations calculated from AC and AN, in the range (0,1)">
33 | ##INFO=<ID=EUR_AF,Number=A,Type=Float,Description="Allele frequency in the EUR populations calculated from AC and AN, in the range (0,1)">
34 | ##INFO=<ID=AFR_AF,Number=A,Type=Float,Description="Allele frequency in the AFR populations calculated from AC and AN, in the range (0,1)">
35 | ##INFO=<ID=AMR_AF,Number=A,Type=Float,Description="Allele frequency in the AMR populations calculated from AC and AN, in the range (0,1)">
36 | ##INFO=<ID=EAS_AF,Number=A,Type=Float,Description="Allele frequency in the EAS populations calculated from AC and AN, in the range (0,1)">
37 | ##INFO=<ID=VT,Number=.,Type=String,Description="indicates what type of variant the line represents">
38 | ##INFO=<ID=EX_TARGET,Number=0,Type=Flag,Description="indicates whether a variant is within the exon pull down target boundaries">
39 | ##INFO=<ID=MULTI_ALLELIC,Number=0,Type=Flag,Description="indicates whether a site is multi-allelic">
40 | #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	HG00096	HG00101	HG00103	HG00105	HG00107	HG00108	HG00109	HG00112	HG00113	HG00114	HG00115	HG00116	HG00117	HG00119	HG00126	HG00129	HG00131	HG00136	HG00138	HG00139	HG00140	HG00141	HG00142	HG00143	HG00145	HG00148	HG00149	HG00151	HG00155	HG00157	HG00159	HG00160	HG00181	HG00182	HG00183	HG00185	HG00186	HG00187	HG00188	HG00189	HG00190	HG00234	HG00242	HG00243	HG00244	HG00246	HG00251	HG00252	HG00256	HG00260	HG00264	HG00265	HG00267	HG00271	HG00273	HG00277	HG00278	HG00280	HG00284	HG00290	HG00308	HG00310	HG00311	HG00321	HG00325	HG00329	HG00335	HG00336	HG00338	HG00341	HG00342	HG00345	HG00351	HG00358	HG00360	HG00366	HG00369	HG00371	HG00372	HG00375	HG00382	HG00403	HG00406	HG00409	HG00421	HG00436	HG00442	HG00445	HG00448	HG00451	HG00457	HG00463	HG00472	HG00475	HG00478	HG00500	HG00524	HG00530	HG00533	HG00536	HG00542	HG00553	HG00556	HG00559	HG00565	HG00580	HG00583	HG00589	HG00592	HG00595	HG00598	HG00607	HG00610	HG00613	HG00619	HG00622	HG00625	HG00628	HG00631	HG00634	HG00637	HG00640	HG00650	HG00653	HG00656	HG00662	HG00671	HG00674	HG00683	HG00689	HG00692	HG00698	HG00701	HG00704	HG00707	HG00728	HG00731	HG00736	HG00739	HG00742	HG00844	HG00881	HG00982	HG01028	HG01031	HG01047	HG01048	HG01051	HG01054	HG01060	HG01063	HG01066	HG01069	HG01072	HG01075	HG01079	HG01082	HG01085	HG01088	HG01094	HG01097	HG01101	HG01104	HG01107	HG01110	HG01112	HG01121	HG01124	HG01130	HG01133	HG01136	HG01139	HG01142	HG01148	HG01161	HG01164	HG01167	HG01170	HG01173	HG01176	HG01182	HG01187	HG01190	HG01197	HG01200	HG01204	HG01241	HG01247	HG01250	HG01253	HG01256	HG01259	HG01271	HG01277	HG01280	HG01286	HG01302	HG01305	HG01308	HG01311	HG01325	HG01334	HG01341	HG01344	HG01350	HG01353	HG01356	HG01359	HG01362	HG01365	HG01374	HG01377	HG01383	HG01389	HG01392	HG01395	HG01398	HG01402	HG01405	HG01412	HG01413	HG01431	HG01437	HG01440	HG01443	HG01455	HG01461	HG01464	HG01479	HG01485	HG01488	HG01491	HG01494	HG01497	HG01500	HG01503	HG01506	HG01509	HG01512	HG01515	HG01518	HG01521	HG01524	HG01527	HG01530	HG01536	HG01550	HG01556	HG01565	HG01571	HG01577	HG01583	HG01586	HG01589	HG01596	HG01603	HG01606	HG01608	HG01610	HG01615	HG01617	HG01619	HG01624	HG01625	HG01630	HG01631	HG01669	HG01672	HG01675	HG01678	HG01680	HG01682	HG01686	HG01694	HG01699	HG01700	HG01705	HG01708	HG01709	HG01747	HG01756	HG01761	HG01765	HG01767	HG01771	HG01775	HG01777	HG01781	HG01783	HG01785	HG01789	HG01791	HG01810	HG01811	HG01816	HG01840	HG01842	HG01844	HG01846	HG01849	HG01852	HG01860	HG01861	HG01864	HG01865	HG01866	HG01867	HG01872	HG01873	HG01879	HG01882	HG01885	HG01890	HG01892	HG01912	HG01914	HG01917	HG01920	HG01923	HG01926	HG01932	HG01935	HG01938	HG01941	HG01944	HG01947	HG01950	HG01953	HG01961	HG01967	HG01970	HG01974	HG01977	HG01979	HG01982	HG01986	HG01988	HG01990	HG01991	HG02002	HG02008	HG02009	HG02013	HG02014	HG02017	HG02020	HG02023	HG02026	HG02029	HG02032	HG02035	HG02040	HG02047	HG02050	HG02051	HG02053	HG02058	HG02061	HG02064	HG02067	HG02070	HG02073	HG02076	HG02079	HG02082	HG02085	HG02088	HG02090	HG02104	HG02107	HG02116	HG02122	HG02128	HG02131	HG02134	HG02137	HG02138	HG02141	HG02143	HG02146	HG02150	HG02219	HG02221	HG02224	HG02231	HG02233	HG02236	HG02238	HG02250	HG02253	HG02255	HG02259	HG02262	HG02265	HG02271	HG02274	HG02277	HG02281	HG02283	HG02284	HG02285	HG02291	HG02299	HG02304	HG02307	HG02314	HG02317	HG02323	HG02330	HG02332	HG02334	HG02343	HG02351	HG02353	HG02355	HG02356	HG02360	HG02364	HG02367	HG02371	HG02373	HG02374	HG02375	HG02379	HG02380	HG02382	HG02383	HG02384	HG02385	HG02386	HG02389	HG02390	HG02391	HG02392	HG02394	HG02395	HG02396	HG02397	HG02398	HG02399	HG02401	HG02402	HG02406	HG02407	HG02408	HG02409	HG02410	HG02420	HG02429	HG02433	HG02439	HG02442	HG02445	HG02449	HG02455	HG02461	HG02464	HG02470	HG02481	HG02484	HG02489	HG02490	HG02493	HG02496	HG02501	HG02512	HG02521	HG02536	HG02541	HG02545	HG02554	HG02557	HG02561	HG02570	HG02573	HG02582	HG02585	HG02588	HG02594	HG02597	HG02600	HG02603	HG02610	HG02613	HG02620	HG02623	HG02628	HG02634	HG02642	HG02645	HG02648	HG02651	HG02654	HG02657	HG02660	HG02666	HG02675	HG02678	HG02681	HG02684	HG02687	HG02690	HG02696	HG02699	HG02702	HG02715	HG02721	HG02724	HG02727	HG02733	HG02736	HG02756	HG02759	HG02768	HG02771	HG02774	HG02780	HG02783	HG02786	HG02789	HG02792	HG02798	HG02804	HG02807	HG02810	HG02813	HG02816	HG02819	HG02836	HG02839	HG02851	HG02854	HG02860	HG02878	HG02881	HG02884	HG02887	HG02890	HG02895	HG02923	HG02938	HG02941	HG02944	HG02947	HG02953	HG02968	HG02971	HG02973	HG02977	HG02981	HG02982	HG03006	HG03009	HG03012	HG03015	HG03018	HG03021	HG03024	HG03027	HG03039	HG03045	HG03048	HG03054	HG03057	HG03060	HG03063	HG03066	HG03069	HG03072	HG03074	HG03077	HG03078	HG03081	HG03084	HG03096	HG03100	HG03103	HG03109	HG03112	HG03115	HG03118	HG03120	HG03124	HG03127	HG03130	HG03133	HG03136	HG03139	HG03157	HG03160	HG03163	HG03166	HG03169	HG03172	HG03175	HG03190	HG03193	HG03196	HG03199	HG03202	HG03209	HG03224	HG03225	HG03228	HG03234	HG03237	HG03240	HG03246	HG03258	HG03265	HG03268	HG03271	HG03280	HG03295	HG03298	HG03301	HG03304	HG03311	HG03313	HG03343	HG03352	HG03367	HG03370	HG03376	HG03382	HG03385	HG03388	HG03391	HG03394	HG03397	HG03432	HG03433	HG03436	HG03439	HG03442	HG03445	HG03451	HG03457	HG03460	HG03469	HG03472	HG03478	HG03484	HG03490	HG03515	HG03518	HG03521	HG03538	HG03547	HG03556	HG03559	HG03565	HG03571	HG03577	HG03585	HG03593	HG03594	HG03600	HG03603	HG03615	HG03624	HG03629	HG03636	HG03644	HG03646	HG03649	HG03652	HG03660	HG03663	HG03667	HG03672	HG03679	HG03680	HG03681	HG03685	HG03686	HG03687	HG03691	HG03693	HG03694	HG03695	HG03696	HG03697	HG03702	HG03705	HG03708	HG03711	HG03713	HG03716	HG03718	HG03720	HG03727	HG03729	HG03738	HG03740	HG03742	HG03743	HG03744	HG03745	HG03746	HG03750	HG03753	HG03755	HG03767	HG03771	HG03773	HG03775	HG03777	HG03778	HG03779	HG03785	HG03786	HG03788	HG03790	HG03792	HG03800	HG03803	HG03809	HG03812	HG03815	HG03821	HG03824	HG03830	HG03833	HG03837	HG03844	HG03846	HG03848	HG03850	HG03851	HG03854	HG03856	HG03864	HG03866	HG03867	HG03869	HG03870	HG03871	HG03872	HG03875	HG03885	HG03887	HG03890	HG03896	HG03899	HG03900	HG03902	HG03905	HG03908	HG03911	HG03914	HG03917	HG03920	HG03926	HG03941	HG03943	HG03950	HG03953	HG03960	HG03963	HG03965	HG03967	HG03969	HG03971	HG03974	HG03976	HG03978	HG03985	HG03990	HG03991	HG03998	HG03999	HG04002	HG04003	HG04006	HG04015	HG04017	HG04019	HG04020	HG04022	HG04023	HG04033	HG04039	HG04056	HG04060	HG04061	HG04080	HG04093	HG04094	HG04096	HG04098	HG04100	HG04107	HG04131	HG04134	HG04140	HG04146	HG04152	HG04155	HG04158	HG04161	HG04164	HG04173	HG04176	HG04182	HG04185	HG04188	HG04194	HG04198	HG04206	HG04210	HG04211	HG04219	HG04222	HG04225	HG04229	HG04235	HG04238	HG04239	NA06984	NA06986	NA06994	NA07048	NA07051	NA07347	NA07357	NA10851	NA11829	NA11831	NA11843	NA11881	NA11893	NA11919	NA11930	NA11932	NA11992	NA11994	NA12003	NA12005	NA12043	NA12045	NA12144	NA12154	NA12155	NA12272	NA12282	NA12286	NA12340	NA12342	NA12347	NA12399	NA12413	NA12546	NA12716	NA12748	NA12750	NA12760	NA12762	NA12775	NA12777	NA12812	NA12814	NA12827	NA12829	NA12842	NA12872	NA12874	NA12889	NA18486	NA18498	NA18501	NA18504	NA18507	NA18510	NA18516	NA18519	NA18522	NA18530	NA18534	NA18536	NA18543	NA18544	NA18546	NA18548	NA18549	NA18557	NA18558	NA18559	NA18561	NA18562	NA18563	NA18572	NA18603	NA18605	NA18606	NA18608	NA18609	NA18611	NA18612	NA18613	NA18620	NA18621	NA18622	NA18623	NA18624	NA18629	NA18632	NA18633	NA18635	NA18636	NA18637	NA18638	NA18639	NA18643	NA18645	NA18647	NA18648	NA18740	NA18745	NA18747	NA18748	NA18749	NA18757	NA18853	NA18856	NA18865	NA18868	NA18871	NA18874	NA18877	NA18879	NA18908	NA18910	NA18915	NA18917	NA18923	NA18934	NA18940	NA18943	NA18944	NA18945	NA18948	NA18952	NA18953	NA18959	NA18960	NA18961	NA18962	NA18965	NA18966	NA18967	NA18970	NA18971	NA18974	NA18977	NA18982	NA18983	NA18984	NA18985	NA18986	NA18988	NA18989	NA18990	NA18994	NA18995	NA19000	NA19004	NA19005	NA19006	NA19007	NA19009	NA19012	NA19020	NA19025	NA19026	NA19027	NA19028	NA19031	NA19035	NA19041	NA19043	NA19055	NA19056	NA19058	NA19060	NA19062	NA19063	NA19066	NA19067	NA19068	NA19070	NA19072	NA19075	NA19076	NA19079	NA19082	NA19083	NA19085	NA19086	NA19088	NA19089	NA19091	NA19092	NA19096	NA19098	NA19107	NA19113	NA19117	NA19119	NA19121	NA19130	NA19138	NA19141	NA19144	NA19146	NA19153	NA19160	NA19171	NA19175	NA19184	NA19189	NA19198	NA19200	NA19207	NA19210	NA19213	NA19223	NA19236	NA19239	NA19248	NA19256	NA19307	NA19308	NA19309	NA19312	NA19317	NA19318	NA19319	NA19331	NA19334	NA19346	NA19347	NA19350	NA19360	NA19372	NA19374	NA19375	NA19376	NA19380	NA19383	NA19384	NA19385	NA19393	NA19394	NA19397	NA19428	NA19429	NA19430	NA19443	NA19448	NA19451	NA19452	NA19454	NA19455	NA19461	NA19466	NA19649	NA19652	NA19655	NA19658	NA19661	NA19664	NA19670	NA19676	NA19679	NA19682	NA19700	NA19703	NA19711	NA19717	NA19720	NA19723	NA19726	NA19729	NA19732	NA19735	NA19741	NA19747	NA19750	NA19756	NA19759	NA19762	NA19771	NA19774	NA19777	NA19780	NA19783	NA19786	NA19789	NA19792	NA19795	NA19818	NA19834	NA19900	NA19904	NA19908	NA19916	NA19920	NA19922	NA19982	NA19984	NA20126	NA20278	NA20281	NA20291	NA20298	NA20318	NA20340	NA20342	NA20346	NA20348	NA20351	NA20356	NA20362	NA20509	NA20510	NA20511	NA20512	NA20513	NA20515	NA20516	NA20518	NA20519	NA20520	NA20521	NA20524	NA20525	NA20527	NA20528	NA20532	NA20534	NA20536	NA20538	NA20539	NA20543	NA20544	NA20581	NA20586	NA20588	NA20752	NA20754	NA20755	NA20758	NA20759	NA20762	NA20763	NA20765	NA20767	NA20770	NA20778	NA20783	NA20785	NA20787	NA20792	NA20796	NA20798	NA20801	NA20803	NA20805	NA20806	NA20809	NA20810	NA20811	NA20812	NA20814	NA20815	NA20827	NA20845	NA20846	NA20850	NA20852	NA20858	NA20861	NA20863	NA20864	NA20866	NA20867	NA20870	NA20884	NA20885	NA20887	NA20889	NA20890	NA20891	NA20895	NA20897	NA20901	NA20903	NA20904	NA20905	NA20911	NA21087	NA21090	NA21091	NA21092	NA21093	NA21094	NA21095	NA21098	NA21099	NA21100	NA21104	NA21105	NA21107	NA21109	NA21111	NA21112	NA21113	NA21114	NA21115	NA21116	NA21117	NA21118	NA21119	NA21123	NA21124	NA21126	NA21127	NA21128	NA21129	NA21130	NA21133	NA21135
41 | Y	2655180	rs11575897	G	A	100	PASS	AA=G;AC=22;AF=0.0178427;AN=1233;DP=84761;NS=1233;AMR_AF=0;AFR_AF=0;EUR_AF=0;SAS_AF=0;EAS_AF=0.0902;VT=SNP;EX_TARGET	GT	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0	0	0	0	1	0	0	0	1	1	0	0	0	0	0	1	0	0	0	1	1	0	0	1	1	0	1	0	1	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0	0	1	1	0	1	0	1	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
42 | Y	2655471	.	A	C	100	PASS	AA=A;AC=5;AF=0.00405515;AN=1233;DP=72067;NS=1233;AMR_AF=0;AFR_AF=0;EUR_AF=0;SAS_AF=0;EAS_AF=0.0205;VT=SNP;EX_TARGET	GT	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
43 | 


--------------------------------------------------------------------------------