├── .Rprofile
├── .gitignore
├── ClinVar
├── FreqFilterClinVarSubmission.ExpEvidence.csv
├── FreqFilterClinVarSubmission.SubmissionInfo.csv
└── FreqFilterClinVarSubmission.Variant.csv
├── R
├── .Rprofile
├── Figures.R
├── floorEffects.R
├── setup.R
└── sumariseCaseSeriesVariants.R
├── README.md
├── data-raw
├── .keep
├── ExACVariants_0.001_sarcomeric.txt
├── HCM_BRU_Variants_and_freqs.txt
├── LMM_OXF_path_likelyPath_VUS_variants_allExACpopulations_withLowestClass.txt
├── SupplementaryTable1.txt
├── Table1.txt
├── Table2.txt
└── authors.txt
├── data
├── .keep
└── filtering_stats.tsv
├── figures
├── .keep
└── Figure1.png
├── frequencyFilter.Rproj
├── manuscript
├── .Rprofile
├── SupplementaryMaterial.Rmd
├── manuscript.Rmd
├── nature.csl
├── newTemplate.docx
└── template2.docx
├── packrat
├── init.R
├── packrat.lock
├── packrat.opts
└── src
│ └── packrat
│ └── packrat_0.4.7-1.tar.gz
└── src
├── hcm_curation.R
├── precompute_exac_af_filter.R
└── vcf_to_var_indiv_table.py
/.Rprofile:
--------------------------------------------------------------------------------
1 | #### -- Packrat Autoloader (version 0.4.7-1) -- ####
2 | source("packrat/init.R")
3 | #### -- End Packrat Autoloader -- ####
4 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 |
6 | *cache
7 | *unnamed-chunk*
8 | figures/Figure2.png
9 | figures/Figure3.png
10 | manuscript/manuscript.docx
11 | manuscript/SupplementaryMaterial.docx
12 | manuscript/*.bib
13 | packrat/lib*/
14 | packrat/src/
15 | *.html
16 | *.DS_Store
17 |
--------------------------------------------------------------------------------
/ClinVar/FreqFilterClinVarSubmission.ExpEvidence.csv:
--------------------------------------------------------------------------------
1 | Variant name,Condition ID type,Condition ID value,Collection method,Allele origin,Affected status
2 | NM_001103.2:c.1484C>T,HPO,HP:0001639,research,germline,unknown
3 | NM_014391.2:c.368C>T,HPO,HP:0001639,research,germline,unknown
4 | NM_001885.1:c.460G>A,HPO,HP:0001639,research,germline,unknown
5 | NM_172201.1:c.79C>T,HPO,HP:0001639,research,germline,unknown
6 | NM_007078.2:c.1823C>T,HPO,HP:0001639,research,germline,unknown
7 | NM_170707.3:c.976T>A,HPO,HP:0001639,research,germline,unknown
8 | NM_000256.3:c.2441_2443delAGA,HPO,HP:0001639,research,germline,unknown
9 | NM_000256.3:c.961G>A,HPO,HP:0001639,research,germline,unknown
10 | NM_000256.3:c.1000G>A,HPO,HP:0001639,research,germline,unknown
11 | NM_000256.3:c.13G>C,HPO,HP:0001639,research,germline,unknown
12 | NM_000256.3:c.1468G>A,HPO,HP:0001639,research,germline,unknown
13 | NM_000256.3:c.1321G>A,HPO,HP:0001639,research,germline,unknown
14 | NM_000256.3:c.624G>C,HPO,HP:0001639,research,germline,unknown
15 | NM_000256.3:c.2618C>A,HPO,HP:0001639,research,germline,unknown
16 | NM_000256.3:c.3049G>A,HPO,HP:0001639,research,germline,unknown
17 | NM_000256.3:c.3763G>A,HPO,HP:0001639,research,germline,unknown
18 | NM_000256.3:c.2269G>A,HPO,HP:0001639,research,germline,unknown
19 | NM_000256.3:c.2873C>T,HPO,HP:0001639,research,germline,unknown
20 | NM_000256.3:c.1786G>A,HPO,HP:0001639,research,germline,unknown
21 | NM_000256.3:c.529C>T,HPO,HP:0001639,research,germline,unknown
22 | NM_000256.3:c.2381C>T,HPO,HP:0001639,research,germline,unknown
23 | NM_000256.3:c.461T>C,HPO,HP:0001639,research,germline,unknown
24 | NM_000256.3:c.640G>A,HPO,HP:0001639,research,germline,unknown
25 | NM_000256.3:c.223G>A,HPO,HP:0001639,research,germline,unknown
26 | NM_000256.3:c.3330+5G>C,HPO,HP:0001639,research,germline,unknown
27 | NM_002471.3:c.3195G>C,HPO,HP:0001639,research,germline,unknown
28 | NM_002471.3:c.2384G>A,HPO,HP:0001639,research,germline,unknown
29 | NM_000257.2:c.2360G>A,HPO,HP:0001639,research,germline,unknown
30 | NM_000257.2:c.2183C>T,HPO,HP:0001639,research,germline,unknown
31 | NM_000257.2:c.1322C>T,HPO,HP:0001639,research,germline,unknown
32 | NM_000257.2:c.5326A>G,HPO,HP:0001639,research,germline,unknown
33 | NM_000432.3:c.37G>A,HPO,HP:0001639,research,germline,unknown
34 | NM_000432.3:c.141C>A,HPO,HP:0001639,research,germline,unknown
35 | NM_000432.3:c.401A>C,HPO,HP:0001639,research,germline,unknown
36 | NM_000258.2:c.170C>A,HPO,HP:0001639,research,germline,unknown
37 | NM_000258.2:c.170C>G,HPO,HP:0001639,research,germline,unknown
38 | NM_033118.3:c.260C>T,HPO,HP:0001639,research,germline,unknown
39 | NM_003803.3:c.1900+3A>C,HPO,HP:0001639,research,germline,unknown
40 | NM_198056.2:c.4534C>T,HPO,HP:0001639,research,germline,unknown
41 | NM_003673.3:c.458G>A,HPO,HP:0001639,research,germline,unknown
42 | NM_003280.2:c.435C>A,HPO,HP:0001639,research,germline,unknown
43 | NM_001001430.1:c.853C>T,HPO,HP:0001639,research,germline,unknown
44 | NM_014000.2:c.2923C>T,HPO,HP:0001639,research,germline,unknown
45 |
--------------------------------------------------------------------------------
/ClinVar/FreqFilterClinVarSubmission.SubmissionInfo.csv:
--------------------------------------------------------------------------------
1 | SubmitterID,SubmitterIDType,Submitter type,Submitter first name,Submitter last name,Submitter phone,Submitter email,Organization type,Organization,OrganizationID,Organization abbreviation,Institution,Street,City,Country,Postal code,Assembly name
2 | RBH_CV_BRU,Handle,Contact,Nicola,Whiffin,0044 20 7352 8121,n.whiffin@rbht.nhs.uk,other,Cardiovascular Biomedical Research Unit,500163,RBH CV BRU,Royal Brompton and Harefield NHS Foundation Trust,Sydney Street,London,United Kingdom,SW3 6NP,GRCh37
3 |
--------------------------------------------------------------------------------
/ClinVar/FreqFilterClinVarSubmission.Variant.csv:
--------------------------------------------------------------------------------
1 | HGVS name,Condition ID type,Condition ID value,Clinical significance,Date last evaluated,Assertion method,Assertion method citation,Comment on clinical significance,Gene symbol
2 | NM_001103.2:c.1484C>T,HPO,HP:0001639,Benign,27/07/16,ACMG Guidelines, 2015,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. 4 individuals in the literature (Chiu 2010, Theis 2006) and 2 reported on ClinVar (SCV000060535.4). Non segregation with disease also reported (2 affecteds; ClinVar SCV000060535.4). No supporting segregation data. [BS1 + BS4],ACTN2
3 | NM_014391.2:c.368C>T,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2016,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. 8/8 missense algorithms predict benign. Mutant protein correctly incorporates into the sarcomere (Crocini 2013). Amino acid change is not conserved. [BS1 + BP4],ANKRD1
4 | NM_001885.1:c.460G>A,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2017,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Identified in a HCM case with an alternative variant sufficient to cause disease (ClinVar SCV000060874.4). Previous pathogenic assertion in ClinVar is from 'literature only' for DCM (no segregation, only 200 controls used for reference). All others say 'VUS' [BS1 + BP5],CRYAB
5 | NM_172201.1:c.79C>T,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2018,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. 7/8 missense algorithms predict damaging. Identified in 2 Chinese families for Atrial Fibrillation (Yang 2004). Only parent and child tested for mutation (50% chance of segregation). An in vitro study on rat cardiomyocytes saw supression of L-type Calcium current (Liu 2014). [BS1 + PP3],KCNE2
6 | NM_007078.2:c.1823C>T,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2019,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Pathogenic in ClinVar from 'literature only'. No segregation or functional data. [BS1],LDB3
7 | NM_170707.3:c.976T>A,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2020,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Possition not conserved 7/8 missense algorithms predict benign, simulation studies suggest no effect on coil B dimerisation. [BS1 + BP4],LMNA
8 | NM_000256.3:c.2441_2443delAGA,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2021,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. In-frame deletion. Experimental studies have shown that this deletion does not lead to decreased MYBPC3 stability in vitro (Bahrudin 2008). Has been identified in 4 adult unaffected relatives (ClinVar SCV000203960.3). [BS1 + PM4],MYBPC3
9 | NM_000256.3:c.961G>A,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2022,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Evidence based on presence in HCM individuals (1 with a MYH7 variant (Maron 2012)) and absence in ESP and nearby residues reported as pathogenic (Ser311Leu and Arg326Gln). No segregation or functional data. Reported multiple times in ClinVar with other variants that could explain phenotype (SCV000208258.4, SCV000261724.2). [BS1 + BP5],MYBPC3
10 | NM_000256.3:c.1000G>A,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2023,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Very high Asian freqeuncy (0.45% in ExAC). Reduced stability of MYBPC3, higher Ca2+ transients and action potential durations in HL-1 cells and rat cardiac myocytes (Bahrudin 2011). [BS1],MYBPC3
11 | NM_000256.3:c.13G>C,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2024,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Found in 2 families; one individual but not unaffected mother and in another but not unaffected sibling. 3 individuals with variant also carry a pathogenic mutation in the same gene (ClinVar SCV000198995.3). Amino acid not conserved in mammals. [BS1 + BP5],MYBPC3
12 | NM_000256.3:c.1468G>A,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2025,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. 7/7 missense algorithms predict deleterious. Identified in individuals with HCM, DCM and LVNC. Conserved residue. Multiple reports of cooccurance with an alternative variant that is sufficient to cause disease (ClinVar SCV000059050.4). [BS1 + BP5 + PP3],MYBPC3
13 | NM_000256.3:c.1321G>A,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2026,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Multiple reports of coocurance with alternative pathogenic variants (ClinVar SCV000059030.4 and SCV000208011.4). Some computational modelling data suggest a confirmational effect (Gajendrarao 2015). [BS1 + BP5],MYBPC3
14 | NM_000256.3:c.624G>C,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2027,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Reports of possible non-segregation (ClinVar SCV000284248.1). [BS1],MYBPC3
15 | NM_000256.3:c.2618C>A,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2028,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Identified as a compound het and in homozygous state (consaguinous family; Nanni 2003, Ingles 2005). No segregation data in HCM. 1 homozygote seen in ExAC. [BS1],MYBPC3
16 | NM_000256.3:c.3049G>A,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2029,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Not conserved amino acid - 7/7 missense algorithms predict benign. No functional or segregation data. [BS1 + BP4],MYBPC3
17 | NM_000256.3:c.3763G>A,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2030,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. 1/5 individuals with variant also had a likely pathogenic variant in another gene (ClinVar SCV000198796.2). No other data available. [BS1],MYBPC3
18 | NM_000256.3:c.2269G>A,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2031,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Conservative amino acid change. No functional or segregation data found. [BS1],MYBPC3
19 | NM_000256.3:c.2873C>T,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2032,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. 2 documented individuals also carry a MYBPC3 variant sufficient to explain disease (ClinVar SCV000059185.4). Amino acid is not well conserved 7/7 missense algorithms predict benign. [BS1 + BP4 + BP2],MYBPC3
20 | NM_000256.3:c.1786G>A,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2033,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. 6/7 missense algorithms predict damaging. No clear segregation data (one suggestion the variant doesn't segregate (ClinVar SCV000259413.1)). [BS1 + PP3],MYBPC3
21 | NM_000256.3:c.529C>T,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2034,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Different missense at the same position is common. Identified with another pathogenic MYBPC3 variant (ClinVar SCV000198969.3). Otherwise found in DCM. Position not well conserved. [BS1 + BP5],MYBPC3
22 | NM_000256.3:c.2381C>T,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2035,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. No evidence towards a pathogenic classification found. [BS1],MYBPC3
23 | NM_000256.3:c.461T>C,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2036,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Position not conserved, 6/7 missense algorithms predict benign.Identified in a child with a further homozygoes MYBPC3 variant (ClinVar SCV000059282.4). [BS1 + BP4],MYBPC3
24 | NM_000256.3:c.640G>A,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2037,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. No evidence towards a pathogenic classification found. [BS1],MYBPC3
25 | NM_000256.3:c.223G>A,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2038,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. No evidence towards a pathogenic classification found. [BS1],MYBPC3
26 | NM_000256.3:c.3330+5G>C,HPO,HP:0001639,Pathogenic,27/07/16,ACMG Guidelines, 2039,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Prevalence in affecteds stastically increased over controls (http://biorxiv.org/content/early/2016/02/24/041111). Shown to produce a truncated protein product and to segregate with disease (Watkins 1995). [BS1 + PVS1 + PS4 + PP1],MYBPC3
27 | NM_002471.3:c.3195G>C,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2040,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. 6/7 missense algorithms predict deleterious. Originally identified as in an affected (died of congestive heart failure at 45 yrs) and not his 2 unnafected offspring or 150 controls (Carniel 2005). Amino acid change seen in birds. [BS1 + PP3],MYH6
28 | NM_002471.3:c.2384G>A,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2041,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Identified in a 75 year old patient and not found in 170 controls (Niimura 2002). No other data. [BS1],MYH6
29 | NM_000257.2:c.2360G>A,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2042,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. 11 reports in total (7 literature, 4 LMM in ClinVar (SCV000059440.4)), majority South Asian decent - very high South Asian frequency (0.1%). 3 probands had another variant sufficient to cause disease. Only moderately conserved amino acid. Possible non-segragation (not in the father of an individual who died of sudden cardiac death; ClinVar SCV000059440.4). [BS1 + BP5],MYH7
30 | NM_000257.2:c.2183C>T,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2043,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Pathogenic in CliNVar from 'literature only' - VUS by 3 other submitters. Identified originally in cis with another MYH7 mutation in 2 siblings with HCM (Blair 2001) - other variant (c.1816G>A) is absent in ExAC and has been shown to segregate in multiple families (24 literature reports) [BS1 + BP2],MYH7
31 | NM_000257.2:c.1322C>T,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2044,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Pathogenic in ClinVar from 'literature only', in Laing distal myopathy. No change in mRNA levels. Amino acid is not conserved in mammals. [BS1],MYH7
32 | NM_000257.2:c.5326A>G,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2045,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Large majority of identified cases are of Asian ancestry. 2 individuals at GeneDx have an alternative pathogenic variant (ClinVar SCV000208629.3). Only segregation data is in 2 affected siblings and not in the unaffected sibling (Blair 2002). [BS1 + BP5],MYH7
33 | NM_000432.3:c.37G>A,HPO,HP:0001639,Benign,27/07/16,ACMG Guidelines, 2046,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. ClinVar 'Pathogenic' assertion from literature only (ClinVar SCV000035365.1). Not found in an affected family member (Ball 2012). Other weak non-segregation data. Found in a 56 year old healthy individual (Ball 2012). Found in Ashkenazi Jewish individuals. Reported to lead to increased Ca2+ affinity (Szczesna 2001). [BS1 + BS4],MYL2
34 | NM_000432.3:c.141C>A,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2047,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. 7/8 missense algorithms predict benign. Co-occurance with likely pathogenic (tested) MYH7:c.5134C>T mutation (segregation data) in literature (Andersen 2001, Hougs 2005). Other reports of a second variant (ClinVar SCV000060036.4). In vitro studies show reduced force development and power output (Szczesna-Cordary 2004, Abraham 2009, Greenberg 2009, Greenberg 2010). [BS1 + BP5 + BP4],MYL2
35 | NM_000432.3:c.401A>C,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2048,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. 8/8 missense prediction algorithms predict deleterious. Identified in 2 individuals with a second 'likely pathogenic' MYL2 variant (ClinVar SCV000060059.4). An in vitro functional study suggests that this variant may reduce cardiac muscle contraction efficiency (Burghardt 2013). Second hit causing more severe phenotype suggested. [BS1 + PP3],MYL2
36 | NM_000258.2:c.170C>A,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2049,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. 8/8 missense algorithms predict deleterious. Mutations at the same residue have been reported for HCM (Lee 2001). Indentified along with with pathogenic and likely pathogenic variants (ClinVar SCV000059671.4, SCV000207109.1). 2 affected siblings reported both with variant as homozygote (no support for recessive MYL3 inheritance). [BS1 + BP5 + PP3],MYL3
37 | NM_000258.2:c.170C>G,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2050,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. 7/8 missense algorithms predict damaging. Functional study showing lowercomplex affinity not significant for this variant after multiple testing (Lossie 2012). Segregation in 5 affected members of 2 families (ClinVar SCV000199362.3). All reports in East Asians (highest ExAC frequency at 0.05%). [BS1 + PP3 + PP1],MYL3
38 | NM_033118.3:c.260C>T,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2051,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Pathogenic in ClinVar from 'literature only', no other submissions. Found as a compound het in a 13yr old and with an MYH7 variant (also in mildy affected parents; Davis (2001). Other compound het variant is also common. [BS1],MYLK2
39 | NM_003803.3:c.1900+3A>C,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2052,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Originally identified as a homozygote in 3 yr old (ClinVar SCV000195854.1). No other data to support. 7 homozygotes in ExAC. Splicing tools predict no effect. [BS1 + BP4],MYOM1
40 | NM_198056.2:c.4534C>T,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2053,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. 8/8 missense algorithms predict damaging. Reported in association with Brugada syndrome (Rook 1999, Deschnes 2000, Meregalli 2009, Crotti 2012). In vitro functional studies show a potential impact (Rook 1999, Deschnes 2000), however these may not be representative of in vivo protein function. [BS1 + PP3],SCN5A
41 | NM_003673.3:c.458G>A,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2054,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. 7/8 missense algorithms predict benign. Only pathogenic in ClinVar by 'literature only'. Identified in siblings but no solid segregation data (Hayashi 2004). Functional studies in yeast show the variant leads to an increase in the ability of TCAP to bind with titin and CS-1 (Hayashi 2004)- in vitro assay may not accurately represent biological function. Amino acid change is not conserved - 7 mammals carry the variant amino acid. [BS1 + BP4],TCAP
42 | NM_003280.2:c.435C>A,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2055,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Functional studies using mutant porcine cardiac skinned fibers showed significantly increased force development and force recovery compared to wildtype (Landstrom 2008) with reduced Ca2+ binding [only in vitro studies]. No segregation data. Has also been reported in a patient with idiopathic DCM who also harbored a missense variant in MYBPC3 gene (Pinto 2011) [BS1],TNNC1
43 | NM_001001430.1:c.853C>T,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2056,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Reported in >10 individuals with HCM and segregated in 5 affected individuals from multiple families (Watkins 1995, Elliott 1999, Garcia-Castro 2003, Torricelli 2003, Van Driest 2003, Theopistou 2004, Ingles 2005, Zeller 2006, Garcia-Castro 2009, Kaski 2009, Gimeno 2009, Millat 2010). Up to half of individuals carried a second variant (ClinVar SCV000060277.4). Residue not evolutionarily conserved. A milder affect with late age of onset has been suggested. In vitro studies suggest may have an affect on muscle contraction (Morimoto 1999, Yanaga 1999, Szczesna 2000, Hernandez 2005) however these might not represent acurately protein function. [BS1],TNNT2
44 | NM_014000.2:c.2923C>T,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2057,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Pathogenic in ClinVar from 'literature only' .Reduced levels of vinculin in the patient with this variant, but other studies found that this was a general feature of HCM and not related to specific genotypes (Vasile 2006). [BS1],VCL
--------------------------------------------------------------------------------
/R/.Rprofile:
--------------------------------------------------------------------------------
1 | source("../.Rprofile", chdir = TRUE)
2 |
--------------------------------------------------------------------------------
/R/Figures.R:
--------------------------------------------------------------------------------
1 | #setwd("./R")
2 | #source("../R/setup.R")
3 | my.CI<-0.95
4 |
5 | ### Figure 2
6 |
7 | variants<-read.table("../data-raw/LMM_OXF_path_likelyPath_VUS_variants_allExACpopulations_withLowestClass.txt", header=TRUE, sep="\t")
8 | maxHCMPenTerm<-9
9 | maxHCM<-5
10 | variants$Class <- factor(variants$Class, c("Pathogenic", "Likely Pathogenic", "VUS"))
11 |
12 | fullplot <- ggplot(subset(variants, (Disease=="HCM" & !(Gene %in% c('GLA','DMD','TAZ','EMD','LAMP2','VBP1','FHL1','KCNE1L','ZIC3','COL4A5','FLNA','MED12')))), aes(x=CaseCount, y=ExACCountALL, colour=Class, shape=Class)) +
13 | geom_point(size=2, alpha=0.6, position=position_jitter(width=0.5,height=0.5)) +
14 | geom_hline(yintercept=maxHCMPenTerm, linetype=2, colour="dodgerblue4") +
15 | geom_hline(yintercept=maxHCM, linetype=2, colour="lightskyblue3") +
16 | scale_color_manual(values=c("#ED1E24", "#FF9912", "gray38")) +
17 | scale_shape_manual(values=c(19,19,1)) +
18 | scale_y_continuous(limits = c(-2,178), expand = c(0, 0)) +
19 | scale_x_continuous(limits = c(-2,108), expand = c(0, 0)) +
20 | theme(axis.text=element_text(size=12),axis.title=element_blank(),legend.position = c(.7, .8),
21 | legend.text=element_text(size=14), legend.title = element_blank(), legend.key = element_blank(),
22 | axis.line.x = element_line(color="grey"), axis.line.y = element_line(color="grey"), panel.background = element_blank(),
23 | axis.ticks = element_line(color="grey"), plot.background = element_rect(linetype="solid", color="grey"))
24 |
25 | zoomplot <- ggplot(subset(variants, (Disease=="HCM" & !(Gene %in% c('GLA','DMD','TAZ','EMD','LAMP2','VBP1','FHL1','KCNE1L','ZIC3','COL4A5','FLNA','MED12')))), aes(x=CaseCount, y=ExACCountALL, colour=Class, shape=Class)) +
26 | geom_point(size=2, alpha=0.6, position=position_jitter(width=0.5,height=0.5)) +
27 | geom_hline(yintercept=(maxHCMPenTerm+0.2), linetype=2, colour="dodgerblue4") +
28 | geom_hline(yintercept=(maxHCM+0.2), linetype=2, colour="lightskyblue3") +
29 | scale_color_manual(values=c("#ED1E24", "#FF9912", "gray38")) +
30 | scale_shape_manual(values=c(19,19,1)) +
31 | labs(x="Count cases",y="Count ExAC") +
32 | #ylim(0,30) +
33 | #xlim(0,70) +
34 | scale_y_continuous(limits = c(-0.5,30), expand = c(0, 0)) +
35 | scale_x_continuous(limits = c(0,70), expand = c(0, 0)) +
36 | annotate("text",x=67,y=(maxHCMPenTerm+0.6), label="AC = 9",colour="dodgerblue4") +
37 | annotate("text",x=67,y=(maxHCM+0.6), label="AC = 5",colour="lightskyblue3") +
38 | theme(axis.text=element_text(size=12), axis.title=element_text(size=14),
39 | legend.position="none", axis.line.x = element_line(color="grey"), axis.line.y = element_line(color="grey"), panel.background = element_blank(),
40 | axis.ticks = element_line(color="grey"))
41 |
42 | png("../figures/Figure2.png",width=1200,height=1000,res=150)
43 | subvp<-viewport(width=.5,height=.5,x=.745,y=.745)
44 |
45 | zoomplot
46 | print(zoomplot)
47 | print(fullplot,vp=subvp)
48 | dev.off()
49 |
50 | ### Figure 3
51 |
52 | # Details for Exome plot (figure 3a)
53 | stats = read.table('../data/filtering_stats.tsv',sep='\t',quote='',header=TRUE)
54 | pops <- c('afr', 'amr', 'eas', 'fin', 'nfe', 'sas')
55 | color_amr = k_amr = '#ED1E24'
56 | color_eur = k_eur = '#6AA5CD'
57 | color_afr = k_afr = '#941494'
58 | color_sas = k_sas = '#FF9912'
59 | color_eas = k_eas = '#108C44'
60 | color_oth = k_oth = '#ABB9B9'
61 | color_mde = k_mde = '#000080'
62 | color_nfe = k_nfe = color_eur
63 | color_fin = k_fin = '#002F6C'
64 |
65 | pop_colors = c('afr' = color_afr,'amr' = color_amr,'eas' = color_eas,'nfe' = color_nfe,'sas' = color_sas)
66 | pop_names = c('afr' = 'African','amr' = 'Latino','eas' = 'East Asian','nfe' = 'European','sas' = 'South Asian')
67 |
68 | ancestries = c(names(pop_names)[c(1,2,3,4,5)])
69 | stats$acol = pop_colors[stats$ancestry]
70 |
71 | # Compute Odds Ratios and Confidence Intervals from ExAC and Internal HCM data
72 | #install.packages("tidyr")
73 | options(stringsAsFactors = FALSE)
74 | exac_data<-read.table("../data-raw/ExACVariants_0.001_sarcomeric.txt", header=TRUE, sep="\t") %>%
75 | tbl_df %>%
76 | transmute(Gene,CodingVar,AC=ExACAlleleCount,exacAF=ExACFreq,group="control")
77 | hcm_data<-read.table("../data-raw/HCM_BRU_Variants_and_freqs.txt", header=TRUE, sep="\t") %>%
78 | tbl_df %>%
79 | transmute(Gene,CodingVar,AC=CaseCount,exacAF=ExACFreq,group="case")
80 |
81 | data <- rbind(exac_data,hcm_data)
82 | data$bin <- cut(data$exacAF,breaks=c(1,0.001,0.0005,0.0001,0.00005,0.00001,-1),labels=c("0.00001","0.00005","0.0001","0.0005","0.001","1"))
83 |
84 | assignGeneGroup <- function(x){
85 | if(x=="MYH7"){return(x)} else
86 | if(x=="MYBPC3"){return(x)} else
87 | {return("OTHER")}
88 | }
89 | applyGeneGroup <- function(x){
90 | sapply(x,assignGeneGroup)
91 | }
92 |
93 | assignPopSize <- function(x,y){
94 | if(y=="case"){return(322)} else
95 | if(x=="MYH7"){return(60469)} else
96 | if(x=="MYBPC3"){return(45793.5)} else
97 | {return(57855)}
98 | }
99 | applyPopSize<-function(x,y){
100 | mapply(assignPopSize,x,y)
101 | }
102 |
103 | ef_data<-data %>%
104 | mutate(Gene=applyGeneGroup(Gene)) %>%
105 | group_by(Gene,bin,group) %>%
106 | summarise(totAC=sum(AC)) %>%
107 | mutate(size=applyPopSize(Gene,group)) %>%
108 | transmute(group,odds=totAC/(size-totAC),seprecurse=((1/totAC)+(1/(size-totAC)))) %>%
109 | unite(oddsSEpre,odds,seprecurse,sep="_") %>%
110 | spread(key=group,value=oddsSEpre,fill="0_0") %>%
111 | separate(case,c("CaseOdds","CaseSEPre"),sep="_") %>%
112 | separate(control,c("ControlOdds","ControlSEPre"),sep="_") %>%
113 | transmute(OR=(as.numeric(CaseOdds)/as.numeric(ControlOdds)),SE=(sqrt(as.numeric(CaseSEPre)+as.numeric(ControlSEPre)))) %>%
114 | transmute(OR,LowerCI=(exp(log(OR)-(1.96*SE))),UpperCI=(exp(log(OR)+(1.96*SE)))) %>%
115 | mutate(ExACfreq=as.numeric(bin))
116 |
117 | gene_list <- c('MYBPC3','MYH7','OTHER')
118 | gene_names = c('MYBPC3' = 'MYBPC3','MYH7' = 'MYH7','OTHER' = 'Other Sarcomeric')
119 | gene_colors = c('MYH7' = color_afr,'MYBPC3' = color_nfe,'OTHER' = color_sas)
120 | genes = c(names(gene_names)[c(1,2,3)])
121 | ef_data$acol = gene_colors[ef_data$Gene]
122 |
123 | # Plot multipanel plot
124 | png('../figures/Figure3.png',width=3800,height=1500,res=150)
125 | m<-rbind(c(1,2))
126 | layout(m)
127 |
128 | par(mar=c(5,6,3,2))
129 | plot(NA, NA, xlim=c(-2,-6), ylim=c(0,550), ann=FALSE, axes=FALSE, yaxs='i')
130 | #abline(h=(0:5)*100, lwd=.5)
131 | abline(h=0)
132 | abline(v=-1.84)
133 | for (ancestry in ancestries) {
134 | rows = stats$ancestry==ancestry & stats$af_limit < .05
135 | points(x=log10(stats$af_limit[rows]), y=stats$filtered_vars[rows], type='l', lwd=5, col=stats$acol[rows])
136 | }
137 | axis(side=1, at=-2:-6, labels=c("1","0.1","0.01","0.001","0.0001"), lwd=0, lwd.ticks=1, cex.axis=2)
138 | axis(side=2, at=(0:5)*100, labels=(0:5)*100, lwd=0, lwd.ticks=0, las=2, cex.axis=2)
139 | mtext(side=1, line=3.5, text='Maximum credible population AF (%)', cex=2.5)
140 | mtext(side=2, line=4.5, text='Mean protein-altering variants per exome', cex=2.5)
141 | legend('topright',pop_names[ancestries],col=pop_colors[ancestries],text.font=2,text.col=pop_colors[ancestries],lwd=5, cex=2)
142 | mtext("(a)", side=3, adj=-0.115, line=0.5, cex=2.8)
143 |
144 | par(mar=c(5,6,3,2))
145 | plot(NA, NA, xlim=c(5,1), ylim=c(0,62), ann=FALSE, axes=FALSE, yaxs='i', log='x')
146 | #abline(h=(0:6)*10, lwd=.5)
147 | abline(h=0)
148 | abline(v=5.33)
149 | for (gene in genes) {
150 | rows = ef_data$Gene==gene
151 | points(ef_data$ExACfreq[rows], ef_data$OR[rows], type='b', lwd=5, pch=19, col=ef_data$acol[rows])
152 | arrows(ef_data$ExACfreq[rows], ef_data$LowerCI[rows], ef_data$ExACfreq[rows], ef_data$UpperCI[rows], length=0.02, angle=90, code=3, col=ef_data$acol[rows])
153 | }
154 | axis(side=1, at=c(5,4,3,2,1), labels=c("0.1","0.05","0.01","0.005","0.001"), lwd=0, lwd.ticks=1, cex.axis=2)
155 | axis(side=2, at=(0:6)*10, labels=(0:6)*10, lwd=0, lwd.ticks=0, las=2, cex.axis=2)
156 | mtext(side=1, line=3.5, text='Maximum ExAC frequency (%)', cex=2.5)
157 | mtext(side=2, line=4.0, text='Odds ratio', cex=2.5)
158 | legend('topleft',gene_names[genes],col=gene_colors[genes],text.font=2,text.col=gene_colors[genes],lwd=5, cex=2)
159 | mtext("(b)", side=3, adj=-0.13, line=0.5, cex=2.8)
160 |
161 | dev.off()
162 |
--------------------------------------------------------------------------------
/R/floorEffects.R:
--------------------------------------------------------------------------------
1 | floorEffects_f1 <- function(x,y=0.95){
2 | y-ppois(1,x)
3 | }
4 |
5 |
6 | floorEffects <- function(confThresholds=c(0.95,0.99,0.999),popSize=121412){
7 | output <- matrix(nrow=length(confThresholds),ncol=2) %>%
8 | data.frame
9 | names(output) <- c("confidence","alleleFrequency")
10 | output$confidence <- confThresholds
11 |
12 | for(i in seq(along=output$confidence)){
13 | myMin <- uniroot(floorEffects_f1,c(0,1),output$confidence[i])$root
14 | output$alleleFrequency[i] <- myMin/popSize
15 | }
16 | return(output)
17 | }
18 |
19 | # floorEffects() is looking for the AF at which a single allele is outside the specified poisson confidence, across a range of confidences
--------------------------------------------------------------------------------
/R/setup.R:
--------------------------------------------------------------------------------
1 | ## Libraries
2 |
3 | # if(!"dplyr" %in% installed.packages()){
4 | # install.packages("dplyr")
5 | # }
6 | # if(!"tidyr" %in% installed.packages()){
7 | # install.packages("tidyr")
8 | # }
9 | # if(!"ggplot2" %in% installed.packages()){
10 | # install.packages("ggplot2")
11 | # }
12 | # if(!"binom" %in% installed.packages()){
13 | # install.packages("binom")
14 | # }
15 | # if(!"pander" %in% installed.packages()){
16 | # install.packages("pander")
17 | # }
18 | # if(!"grid" %in% installed.packages()){
19 | # install.packages("grid")
20 | # }
21 | # if(!"withr" %in% installed.packages()){
22 | # install.packages("withr")
23 | # }
24 |
25 | library(dplyr)
26 | library(tidyr)
27 | library(ggplot2)
28 | #library(binom)
29 | library(pander)
30 | library(grid)
31 | #library(withr)
32 |
33 | ## Install knitcitations in this repo if not already installed
34 | # if(!"knitcitations" %in% installed.packages(lib.loc="../repoPackageLibrary")){
35 | # install.packages("knitcitations",lib="../repoPackageLibrary")
36 | # }
37 |
38 | # ## Install knitauthors in this repo if not already installed (requires devtools)
39 | # if(!"knitauthors" %in% installed.packages(lib.loc="../repoPackageLibrary")){
40 | # if(!"devtools" %in% installed.packages()){
41 | # install.packages("devtools")
42 | # }
43 | # withr::with_libpaths(new = "../repoPackageLibrary/",
44 | # devtools::install_github("jamesware/knitauthors",force=T),
45 | # action= "prefix")
46 | # }
47 | ## Set up citations
48 | # library("knitcitations",lib.loc="../repoPackageLibrary")
49 | library(knitcitations)
50 | cleanbib()
51 | options("citation_format" = "pandoc")
52 |
53 | ## set knitr chunk options
54 | # if(!"knitr" %in% installed.packages()){
55 | # install.packages("knitr")
56 | # }
57 | library(knitr)
58 | opts_chunk$set(fig.path="../figures/", collapse = TRUE, echo=FALSE, warning=TRUE, message=TRUE)#, cache=TRUE)#, results="asis")
59 | library(knitauthors)
60 | # library("knitauthors",lib.loc="../repoPackageLibrary")
61 |
--------------------------------------------------------------------------------
/R/sumariseCaseSeriesVariants.R:
--------------------------------------------------------------------------------
1 | ### Exploring ExAC AC of variants from 6179 HCM cases (Walsh et al)
2 |
3 | summariseCaseSeriesVariants <- function(){
4 |
5 | referenceVariants <<- read.delim("../data-raw/LMM_OXF_path_likelyPath_VUS_variants_allExACpopulations_withLowestClass.txt") %>%
6 | filter(Disease=="HCM") %>%
7 | filter(!(Gene %in% c('GLA','DMD','TAZ','EMD','LAMP2','VBP1','FHL1','KCNE1L','ZIC3','COL4A5','FLNA','MED12')))
8 |
9 | myVariantsN <<- nrow(referenceVariants)
10 |
11 | myFailPath <<- referenceVariants %>%
12 | filter(Class %in% c("Pathogenic","Likely Pathogenic")) %>%
13 | filter(ExACCountALL>9) %>%
14 | nrow
15 | myPassPath <<- referenceVariants %>%
16 | filter(Class %in% c("Pathogenic","Likely Pathogenic")) %>%
17 | filter(ExACCountALL<=9) %>%
18 | nrow
19 | myAbsentPath <<- referenceVariants %>%
20 | filter(Class %in% c("Pathogenic","Likely Pathogenic")) %>%
21 | filter(ExACCountALL==0) %>%
22 | nrow
23 | myAllPath <<- referenceVariants %>%
24 | filter(Class %in% c("Pathogenic","Likely Pathogenic")) %>%
25 | nrow
26 | myFailVus <<- referenceVariants %>%
27 | filter(Class=="VUS") %>%
28 | filter(ExACCountALL>9) %>%
29 | nrow
30 | myPassVus <<- referenceVariants %>%
31 | filter(Class=="VUS") %>%
32 | filter(ExACCountALL<=9) %>%
33 | nrow
34 | myAllVus <<- referenceVariants %>%
35 | filter(Class=="VUS") %>%
36 | nrow
37 |
38 |
39 |
40 | }
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ### Using high-resolution variant frequencies to empower clinical genome interpretation
2 |
3 | **Abstract**
4 |
5 | Whole exome and genome sequencing have transformed the discovery of genetic variants that cause human Mendelian disease, but discriminating pathogenic from benign variants remains a daunting challenge. Rarity is recognized as a necessary, although not sufficient, criterion for pathogenicity, but frequency cutoffs used in Mendelian analysis are often arbitrary and overly lenient. Recent very large reference datasets, such as the Exome Aggregation Consortium (ExAC), provide an unprecedented opportunity to obtain robust frequency estimates even for very rare variants. Here we present a statistical framework for the frequency-based filtering of candidate disease-causing variants, accounting for disease prevalence, genetic and allelic heterogeneity, inheritance mode, penetrance, and sampling variance in reference datasets. Using the example of cardiomyopathy, we show that our approach reduces by two-thirds the number of candidate variants under consideration in the average exome, and identifies 43 variants previously reported as pathogenic that can now be reclassified. We present precomputed allele frequency cutoffs for all variants in the ExAC dataset.
6 |
7 | This repository contains code to reproduce analyses, generate figures, and compile the manuscript.
8 |
9 | **Content**
10 |
11 | - manuscript -> files to compile the main manuscript and the supplementary material
12 | - src -> source code to compute af_filter values
13 | - data -> output from the src code
14 | - R -> R code to do the analysis and create the figures
15 | - data-raw -> raw data files to recreate the analysis
16 | - figures -> the figures
17 | - ClinVar -> ClinVar submission from variant curation
18 | - packrat -> libraries of packages required by the repository
19 |
20 | **Instructions for use with Rstudio**
21 |
22 | - Download R studio (https://www.rstudio.com/)
23 | - Install knitr and devtools packages
24 | `install.packages(c("knitr","devtools"))`
25 | - Download and unpack repository
26 | - Select `File->New project->Existing Directory`
27 | - Browse the 'Project working directory' to match the location of the unpacked repository
28 | - Click `Create Project`
29 | - Wait for the files to be imported and for packrat to install all the required packages (you should get this message: Packrat bootstrap successfully completed. Restarting R and entering packrat mode...)
30 | - Close down Rstudio and restart
31 | - You should now be able to re-create the manuscript and figures by knitting the file: `./manuscript/manuscript.Rmd`
32 |
--------------------------------------------------------------------------------
/data-raw/.keep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImperialCardioGenetics/frequencyFilter/00b08ccf41bbbd927ac3d021d302e80a4ad4ca5d/data-raw/.keep
--------------------------------------------------------------------------------
/data-raw/ExACVariants_0.001_sarcomeric.txt:
--------------------------------------------------------------------------------
1 | Gene CodingVar ExACAlleleCount ExACTotal ExACFreq
2 | MYH7 c.11C>T 2 120794 0.00001656
3 | MYH7 c.77C>T 69 121330 0.00056870
4 | MYH7 c.115G>A 7 121364 0.00005768
5 | MYH7 c.175G>A 1 121302 0.00000824
6 | MYH7 c.343T>C 1 121348 0.00000824
7 | MYH7 c.428G>A 1 121400 0.00000824
8 | MYH7 c.427C>T 6 121402 0.00004942
9 | MYH7 c.443G>T 1 121410 0.00000824
10 | MYH7 c.530C>T 2 121412 0.00001647
11 | MYH7 c.632C>T 3 121406 0.00002471
12 | MYH7 c.709C>T 1 121412 0.00000824
13 | MYH7 c.728G>A 1 121412 0.00000824
14 | MYH7 c.958G>A 1 119812 0.00000835
15 | MYH7 c.976G>C 8 117940 0.00006783
16 | MYH7 c.1128C>A 1 121388 0.00000824
17 | MYH7 c.1129G>A 3 121390 0.00002471
18 | MYH7 c.1231G>A 1 121408 0.00000824
19 | MYH7 c.1322C>T 16 121408 0.00013179
20 | MYH7 c.1324C>T 1 121410 0.00000824
21 | MYH7 c.1325G>A 2 121406 0.00001647
22 | MYH7 c.1447G>A 1 121380 0.00000824
23 | MYH7 c.1681G>A 5 121412 0.00004118
24 | MYH7 c.1727A>G 1 121408 0.00000824
25 | MYH7 c.1954A>G 1 120602 0.00000829
26 | MYH7 c.1988G>A 2 121338 0.00001648
27 | MYH7 c.2080C>T 2 120584 0.00001659
28 | MYH7 c.2167C>T 3 121400 0.00002471
29 | MYH7 c.2206A>G 1 121406 0.00000824
30 | MYH7 c.2348G>A 2 121376 0.00001648
31 | MYH7 c.2359C>T 7 121382 0.00005767
32 | MYH7 c.2360G>A 27 121382 0.00022244
33 | MYH7 c.2389G>A 4 121372 0.00003296
34 | MYH7 c.2573G>A 1 121396 0.00000824
35 | MYH7 c.2594A>G 1 121404 0.00000824
36 | MYH7 c.2606G>A 4 121390 0.00003295
37 | MYH7 c.2608C>T 1 121386 0.00000824
38 | MYH7 c.2890G>C 51 121412 0.00042006
39 | MYH7 c.2945T>C 110 121412 0.00090601
40 | MYH7 c.3056C>A 1 121412 0.00000824
41 | MYH7 c.3133C>T 2 121396 0.00001648
42 | MYH7 c.3134G>A 4 121396 0.00003295
43 | MYH7 c.3158G>A 9 121396 0.00007414
44 | MYH7 c.3169G>A 1 121398 0.00000824
45 | MYH7 c.3236G>A 1 121352 0.00000824
46 | MYH7 c.3286G>T 19 121334 0.00015659
47 | MYH7 c.3301G>A 8 121344 0.00006593
48 | MYH7 c.3788C>A 1 121408 0.00000824
49 | MYH7 c.3981C>A 12 115980 0.00010347
50 | MYH7 c.4052C>T 4 121060 0.00003304
51 | MYH7 c.4075C>T 1 121236 0.00000825
52 | MYH7 c.4258C>T 1 121404 0.00000824
53 | MYH7 c.4377G>T 37 121128 0.00030546
54 | MYH7 c.4423C>T 10 121374 0.00008239
55 | MYH7 c.4487A>C 3 121412 0.00002471
56 | MYH7 c.4717G>A 10 121342 0.00008241
57 | MYH7 c.4909G>A 7 121402 0.00005766
58 | MYH7 c.4954G>T 3 121192 0.00002475
59 | MYH7 c.5071G>A 10 121384 0.00008238
60 | MYH7 c.5135G>A 1 121366 0.00000824
61 | MYH7 c.5257G>A 1 121410 0.00000824
62 | MYH7 c.5279C>T 2 121410 0.00001647
63 | MYH7 c.5305C>A 1 121410 0.00000824
64 | MYH7 c.5326A>G 4 121410 0.00003295
65 | MYH7 c.5329G>A 5 121410 0.00004118
66 | MYH7 c.5494C>T 6 121320 0.00004946
67 | MYH7 c.5507C>T 6 121248 0.00004949
68 | MYH7 c.5536C>T 2 121000 0.00001653
69 | MYH7 c.5561C>T 4 121130 0.00003302
70 | MYH7 c.5588G>A 1 121352 0.00000824
71 | MYH7 c.5779A>T 4 121328 0.00003297
72 | MYH7 c.5786C>T 3 121260 0.00002474
73 | MYH7 c.1370T>C 1 121396 0.00000824
74 | MYH7 c.1772T>C 1 121406 0.00000824
75 | MYH7 c.2710C>T 1 121392 0.00000824
76 | MYH7 c.3994G>A 2 117736 0.00001699
77 | MYH7 c.4606G>A 1 121236 0.00000825
78 | MYH7 c.4985G>A 7 121340 0.00005769
79 | MYH7 c.5030G>A 6 121384 0.00004943
80 | MYH7 c.345+1G>A 3 121322 0.00002473
81 | MYH7 c.5500G>A 6 121280 0.00004947
82 | MYH7 c.5485G>A 3 121340 0.00002472
83 | MYH7 c.5458C>G 3 121382 0.00002472
84 | MYH7 c.5229G>T 3 121408 0.00002471
85 | MYH7 c.5203T>A 4 121408 0.00003295
86 | MYH7 c.4904T>C 3 121406 0.00002471
87 | MYH7 c.4660G>C 2 119834 0.00001669
88 | MYH7 c.4656G>T 1 119582 0.00000836
89 | MYH7 c.4294G>A 6 121400 0.00004942
90 | MYH7 c.3349G>T 1 102626 0.00000974
91 | MYH7 c.3208G>A 1 121390 0.00000824
92 | MYH7 c.2974C>A 2 121410 0.00001647
93 | MYH7 c.2636C>A 2 121378 0.00001648
94 | MYH7 c.2585C>T 4 121392 0.00003295
95 | MYH7 c.2420G>A 1 121350 0.00000824
96 | MYH7 c.5704G>C 9 121412 0.00007413
97 | MYH7 c.4372C>G 1 121072 0.00000826
98 | MYH7 c.3235C>T 6 121354 0.00004944
99 | MYH7 c.5243G>A 6 121408 0.00004942
100 | MYH7 c.3152C>T 2 121398 0.00001647
101 | MYH7 c.3856G>A 2 121408 0.00001647
102 | MYH7 c.4030C>T 2 120556 0.00001659
103 | MYH7 c.4348G>A 1 121330 0.00000824
104 | MYH7 c.5790+1G>A 1 121198 0.00000825
105 | MYH7 c.5494C>G 1 121320 0.00000824
106 | MYH7 c.5458C>T 3 121382 0.00002472
107 | MYH7 c.4672A>T 1 120520 0.00000830
108 | MYH7 c.3853+1G>A 2 121400 0.00001647
109 | MYH7 c.3801G>C 4 121410 0.00003295
110 | MYH7 c.3730A>C 1 121362 0.00000824
111 | MYH7 c.3138G>A 3 121400 0.00002471
112 | MYH7 c.3116A>G 14 121378 0.00011534
113 | MYH7 c.2680-2A>G 1 121202 0.00000825
114 | MYH7 c.1859T>A 1 121062 0.00000826
115 | MYH7 c.916A>G 1 120662 0.00000829
116 | MYH7 c.397G>A 1 121252 0.00000825
117 | MYH7 c.350A>T 5 121152 0.00004127
118 | MYH7 c.28G>C 9 121058 0.00007434
119 | MYH7 c.5283+1G>A 1 121410 0.00000824
120 | MYH7 c.706G>A 1 121412 0.00000824
121 | MYH7 c.5704G>A 2 121412 0.00001647
122 | MYH7 c.298G>A 2 121406 0.00001647
123 | MYH7 c.4399C>G 19 121266 0.00015668
124 | MYH7 c.5660A>G 1 121412 0.00000824
125 | MYH7 c.5587C>T 1 121350 0.00000824
126 | MYH7 c.5459G>A 1 121378 0.00000824
127 | MYH7 c.5422G>A 4 121400 0.00003295
128 | MYH7 c.5088G>C 3 121390 0.00002471
129 | MYH7 c.5029C>T 2 121380 0.00001648
130 | MYH7 c.4817G>A 6 121408 0.00004942
131 | MYH7 c.4687C>G 2 120928 0.00001654
132 | MYH7 c.4210G>A 2 121328 0.00001648
133 | MYH7 c.4187G>A 1 120966 0.00000827
134 | MYH7 c.4078G>A 7 121250 0.00005773
135 | MYH7 c.4010G>A 18 119490 0.00015064
136 | MYH7 c.3982G>A 5 115954 0.00004312
137 | MYH7 c.2561A>C 1 121406 0.00000824
138 | MYH7 c.2177A>G 1 121404 0.00000824
139 | MYH7 c.1913C>T 1 120976 0.00000827
140 | MYH7 c.1700G>A 2 121412 0.00001647
141 | MYH7 c.1315A>T 2 121410 0.00001647
142 | MYH7 c.3200T>C 2 121394 0.00001648
143 | MYH7 c.3168G>C 1 121398 0.00000824
144 | MYH7 c.4004C>T 4 118910 0.00003364
145 | MYH7 c.2456G>A 1 121360 0.00000824
146 | MYH7 c.3268C>G 2 121270 0.00001649
147 | MYH7 c.3830G>A 5 121404 0.00004118
148 | MYH7 c.4941G>C 1 121396 0.00000824
149 | MYH7 c.925G>A 3 120668 0.00002486
150 | MYH7 c.5504_5505delAG 1 121258 0.00000825
151 | MYH7 c.4975G>A 1 121318 0.00000824
152 | MYH7 c.5602G>C 1 121380 0.00000824
153 | MYH7 c.5222T>C 1 121408 0.00000824
154 | MYH7 c.5014G>A 4 121378 0.00003295
155 | MYH7 c.3749G>A 1 121394 0.00000824
156 | MYH7 c.3318G>T 1 121320 0.00000824
157 | MYH7 c.3294G>C 1 121346 0.00000824
158 | MYH7 c.3005C>T 1 121412 0.00000824
159 | MYH7 c.2972A>G 1 121412 0.00000824
160 | MYH7 c.2776C>G 2 121412 0.00001647
161 | MYH7 c.2599G>A 2 121396 0.00001648
162 | MYH7 c.2371C>A 1 121386 0.00000824
163 | MYH7 c.2044G>A 1 121278 0.00000825
164 | MYH7 c.1246A>C 2 121412 0.00001647
165 | MYH7 c.875A>C 1 121386 0.00000824
166 | MYH7 c.481G>A 3 121412 0.00002471
167 | MYH7 c.5800G>A 3 120988 0.00002480
168 | MYH7 c.5783G>A 1 121290 0.00000824
169 | MYH7 c.5774G>A 1 121350 0.00000824
170 | MYH7 c.5761C>T 1 121378 0.00000824
171 | MYH7 c.5737G>A 1 121402 0.00000824
172 | MYH7 c.5726G>A 1 121412 0.00000824
173 | MYH7 c.5725C>T 4 121412 0.00003295
174 | MYH7 c.5699A>T 1 121412 0.00000824
175 | MYH7 c.5696T>C 1 121412 0.00000824
176 | MYH7 c.5689C>T 2 121412 0.00001647
177 | MYH7 c.5687T>C 1 121412 0.00000824
178 | MYH7 c.5606A>G 1 121386 0.00000824
179 | MYH7 c.5537G>A 1 120998 0.00000826
180 | MYH7 c.5530G>A 1 121096 0.00000826
181 | MYH7 c.5507C>G 2 121248 0.00001650
182 | MYH7 c.5503G>C 1 121268 0.00000825
183 | MYH7 c.5500G>T 2 121280 0.00001649
184 | MYH7 c.5498A>G 1 121300 0.00000824
185 | MYH7 c.5495G>A 4 121310 0.00003297
186 | MYH7 c.5467G>C 1 121366 0.00000824
187 | MYH7 c.5452C>T 2 121392 0.00001648
188 | MYH7 c.5448A>C 1 121390 0.00000824
189 | MYH7 c.5419G>A 1 121404 0.00000824
190 | MYH7 c.5386C>T 1 121412 0.00000824
191 | MYH7 c.5374G>A 1 121412 0.00000824
192 | MYH7 c.5369T>C 1 121412 0.00000824
193 | MYH7 c.5363A>G 1 121412 0.00000824
194 | MYH7 c.5348A>G 1 121410 0.00000824
195 | MYH7 c.5342G>A 1 121410 0.00000824
196 | MYH7 c.5303A>T 1 121410 0.00000824
197 | MYH7 c.5293A>G 1 121410 0.00000824
198 | MYH7 c.5288C>T 1 121412 0.00000824
199 | MYH7 c.5287G>A 5 121412 0.00004118
200 | MYH7 c.5234T>C 1 121408 0.00000824
201 | MYH7 c.5231C>A 1 121408 0.00000824
202 | MYH7 c.5231C>T 2 121408 0.00001647
203 | MYH7 c.5230G>T 1 121410 0.00000824
204 | MYH7 c.5201T>G 2 121408 0.00001647
205 | MYH7 c.5187G>T 1 121406 0.00000824
206 | MYH7 c.5130T>G 1 121380 0.00000824
207 | MYH7 c.5119A>T 1 121384 0.00000824
208 | MYH7 c.5105C>A 1 121372 0.00000824
209 | MYH7 c.5105C>T 1 121372 0.00000824
210 | MYH7 c.5090G>A 3 121388 0.00002471
211 | MYH7 c.5074G>T 1 121392 0.00000824
212 | MYH7 c.5066G>A 2 121394 0.00001648
213 | MYH7 c.5065C>T 3 121392 0.00002471
214 | MYH7 c.5065C>G 2 121392 0.00001648
215 | MYH7 c.5054T>A 1 121396 0.00000824
216 | MYH7 c.5039T>C 1 121392 0.00000824
217 | MYH7 c.5032A>C 1 121390 0.00000824
218 | MYH7 c.5026C>T 1 121382 0.00000824
219 | MYH7 c.5020G>A 3 121384 0.00002471
220 | MYH7 c.5017A>G 1 121384 0.00000824
221 | MYH7 c.5013C>G 2 121382 0.00001648
222 | MYH7 c.4996G>A 1 121376 0.00000824
223 | MYH7 c.4993G>A 1 121366 0.00000824
224 | MYH7 c.4992C>A 3 121366 0.00002472
225 | MYH7 c.4984C>T 1 121334 0.00000824
226 | MYH7 c.4954G>A 1 121192 0.00000825
227 | MYH7 c.4949T>C 4 121390 0.00003295
228 | MYH7 c.4943G>A 1 121396 0.00000824
229 | MYH7 c.4919A>T 4 121408 0.00003295
230 | MYH7 c.4903A>G 2 121408 0.00001647
231 | MYH7 c.4894G>A 5 121408 0.00004118
232 | MYH7 c.4861G>A 1 121410 0.00000824
233 | MYH7 c.4831G>T 1 121410 0.00000824
234 | MYH7 c.4823G>A 3 121412 0.00002471
235 | MYH7 c.4823G>T 1 121412 0.00000824
236 | MYH7 c.4821C>A 1 121412 0.00000824
237 | MYH7 c.4787C>T 5 121412 0.00004118
238 | MYH7 c.4786T>C 1 121412 0.00000824
239 | MYH7 c.4780G>C 1 121412 0.00000824
240 | MYH7 c.4751A>G 1 121404 0.00000824
241 | MYH7 c.4721G>A 3 121362 0.00002472
242 | MYH7 c.4709C>A 2 121292 0.00001649
243 | MYH7 c.4707G>C 1 121274 0.00000825
244 | MYH7 c.4679G>A 1 120744 0.00000828
245 | MYH7 c.4678C>T 1 120680 0.00000829
246 | MYH7 c.4603G>A 1 121260 0.00000825
247 | MYH7 c.4589G>A 1 121318 0.00000824
248 | MYH7 c.4588C>G 1 121318 0.00000824
249 | MYH7 c.4588C>T 1 121318 0.00000824
250 | MYH7 c.4571A>C 2 121376 0.00001648
251 | MYH7 c.4558G>A 1 121394 0.00000824
252 | MYH7 c.4553C>A 1 121398 0.00000824
253 | MYH7 c.4545G>C 2 121404 0.00001647
254 | MYH7 c.4536G>C 1 121402 0.00000824
255 | MYH7 c.4525A>C 2 121402 0.00001647
256 | MYH7 c.4499G>A 3 121412 0.00002471
257 | MYH7 c.4481A>T 2 121412 0.00001647
258 | MYH7 c.4471T>A 1 121412 0.00000824
259 | MYH7 c.4459G>A 2 121408 0.00001647
260 | MYH7 c.4441C>T 1 121406 0.00000824
261 | MYH7 c.4435A>G 3 121396 0.00002471
262 | MYH7 c.4390C>T 1 121212 0.00000825
263 | MYH7 c.4343A>T 14 121344 0.00011537
264 | MYH7 c.4321G>T 1 121370 0.00000824
265 | MYH7 c.4301G>A 2 121394 0.00001648
266 | MYH7 c.4292A>G 1 121402 0.00000824
267 | MYH7 c.4286T>C 1 121404 0.00000824
268 | MYH7 c.4283T>C 4 121404 0.00003295
269 | MYH7 c.4238C>T 2 121394 0.00001648
270 | MYH7 c.4219G>A 2 121362 0.00001648
271 | MYH7 c.4216G>A 1 121344 0.00000824
272 | MYH7 c.4196A>G 1 121148 0.00000825
273 | MYH7 c.4180G>A 1 120664 0.00000829
274 | MYH7 c.4159G>C 1 121348 0.00000824
275 | MYH7 c.4141C>G 1 121372 0.00000824
276 | MYH7 c.4097C>T 3 121330 0.00002473
277 | MYH7 c.4090G>A 5 121312 0.00004122
278 | MYH7 c.4076G>A 4 121242 0.00003299
279 | MYH7 c.4063G>A 1 121168 0.00000825
280 | MYH7 c.4061A>G 1 121162 0.00000825
281 | MYH7 c.4040A>G 1 120856 0.00000827
282 | MYH7 c.4023C>A 1 120476 0.00000830
283 | MYH7 c.4021G>A 1 120314 0.00000831
284 | MYH7 c.4021G>T 1 120314 0.00000831
285 | MYH7 c.4018T>C 1 120154 0.00000832
286 | MYH7 c.4009C>T 1 119366 0.00000838
287 | MYH7 c.4003T>G 1 118786 0.00000842
288 | MYH7 c.3977A>G 2 115354 0.00001734
289 | MYH7 c.3974C>T 3 114572 0.00002618
290 | MYH7 c.3974C>G 1 114572 0.00000873
291 | MYH7 c.3931C>T 1 121404 0.00000824
292 | MYH7 c.3908G>A 1 121398 0.00000824
293 | MYH7 c.3883G>A 1 121402 0.00000824
294 | MYH7 c.3829C>T 2 121402 0.00001647
295 | MYH7 c.3814G>A 3 121408 0.00002471
296 | MYH7 c.3809T>C 1 121408 0.00000824
297 | MYH7 c.3803G>A 10 121410 0.00008237
298 | MYH7 c.3797C>A 1 121408 0.00000824
299 | MYH7 c.3782G>A 1 121404 0.00000824
300 | MYH7 c.3779G>A 4 121404 0.00003295
301 | MYH7 c.3778C>T 1 121404 0.00000824
302 | MYH7 c.3777C>A 3 121404 0.00002471
303 | MYH7 c.3770A>G 37 121404 0.00030477
304 | MYH7 c.3749G>C 1 121394 0.00000824
305 | MYH7 c.3493A>G 1 78954 0.00001267
306 | MYH7 c.3376C>T 1 97864 0.00001022
307 | MYH7 c.3350A>T 1 102504 0.00000976
308 | MYH7 c.3283G>A 1 121336 0.00000824
309 | MYH7 c.3274G>A 2 121308 0.00001649
310 | MYH7 c.3229G>T 1 121378 0.00000824
311 | MYH7 c.3196A>G 1 121396 0.00000824
312 | MYH7 c.3149G>C 1 121394 0.00000824
313 | MYH7 c.3137T>G 1 121400 0.00000824
314 | MYH7 c.3134G>T 2 121396 0.00001648
315 | MYH7 c.3094G>A 2 121412 0.00001647
316 | MYH7 c.3089A>G 1 121412 0.00000824
317 | MYH7 c.3068C>T 1 121412 0.00000824
318 | MYH7 c.3058C>A 2 121412 0.00001647
319 | MYH7 c.3050T>C 1 121412 0.00000824
320 | MYH7 c.3046A>G 1 121412 0.00000824
321 | MYH7 c.3040G>A 1 121412 0.00000824
322 | MYH7 c.3037G>A 2 121412 0.00001647
323 | MYH7 c.3035C>A 5 121412 0.00004118
324 | MYH7 c.3028C>T 1 121410 0.00000824
325 | MYH7 c.3010C>G 1 121412 0.00000824
326 | MYH7 c.2982G>C 1 121410 0.00000824
327 | MYH7 c.2968G>A 1 121412 0.00000824
328 | MYH7 c.2891T>C 1 121412 0.00000824
329 | MYH7 c.2846A>T 1 121412 0.00000824
330 | MYH7 c.2822G>A 2 121412 0.00001647
331 | MYH7 c.2821C>A 1 121412 0.00000824
332 | MYH7 c.2812G>T 1 121412 0.00000824
333 | MYH7 c.2804A>T 2 121412 0.00001647
334 | MYH7 c.2787G>C 2 121412 0.00001647
335 | MYH7 c.2766G>T 1 121412 0.00000824
336 | MYH7 c.2765T>C 2 121412 0.00001647
337 | MYH7 c.2762A>C 1 121412 0.00000824
338 | MYH7 c.2761G>T 1 121412 0.00000824
339 | MYH7 c.2749G>A 1 121410 0.00000824
340 | MYH7 c.2689A>C 1 121326 0.00000824
341 | MYH7 c.2681A>T 1 121254 0.00000825
342 | MYH7 c.2677G>A 1 121320 0.00000824
343 | MYH7 c.2665C>T 1 121348 0.00000824
344 | MYH7 c.2573G>T 1 121396 0.00000824
345 | MYH7 c.2452A>G 1 121358 0.00000824
346 | MYH7 c.2425G>T 6 121200 0.00004950
347 | MYH7 c.2400G>T 1 121376 0.00000824
348 | MYH7 c.2357C>T 2 121382 0.00001648
349 | MYH7 c.2347C>T 1 121372 0.00000824
350 | MYH7 c.2312G>C 1 121270 0.00000825
351 | MYH7 c.2263C>T 1 121410 0.00000824
352 | MYH7 c.2134C>T 1 121320 0.00000824
353 | MYH7 c.2060C>A 4 119670 0.00003343
354 | MYH7 c.2052G>A 1 118986 0.00000840
355 | MYH7 c.1997A>G 1 121356 0.00000824
356 | MYH7 c.1975A>G 1 121236 0.00000825
357 | MYH7 c.1971G>T 1 121220 0.00000825
358 | MYH7 c.1896G>T 1 120942 0.00000827
359 | MYH7 c.1894G>T 1 120916 0.00000827
360 | MYH7 c.1892T>C 2 120924 0.00001654
361 | MYH7 c.1888C>T 2 118290 0.00001691
362 | MYH7 c.1856C>T 4 121136 0.00003302
363 | MYH7 c.1788G>C 1 121404 0.00000824
364 | MYH7 c.1766A>G 1 121406 0.00000824
365 | MYH7 c.1756G>A 1 121406 0.00000824
366 | MYH7 c.1754T>C 1 121406 0.00000824
367 | MYH7 c.1730T>C 1 121408 0.00000824
368 | MYH7 c.1729T>C 1 121408 0.00000824
369 | MYH7 c.1709A>G 2 121410 0.00001647
370 | MYH7 c.1702A>G 1 121412 0.00000824
371 | MYH7 c.1699C>T 1 121410 0.00000824
372 | MYH7 c.1681G>T 1 121412 0.00000824
373 | MYH7 c.1662C>A 1 121408 0.00000824
374 | MYH7 c.1630A>G 2 121404 0.00001647
375 | MYH7 c.1611G>C 2 121400 0.00001647
376 | MYH7 c.1601T>C 1 121396 0.00000824
377 | MYH7 c.1588A>G 1 121380 0.00000824
378 | MYH7 c.1519G>A 1 121412 0.00000824
379 | MYH7 c.1405G>A 1 121334 0.00000824
380 | MYH7 c.1334C>T 2 121410 0.00001647
381 | MYH7 c.1262T>C 1 121412 0.00000824
382 | MYH7 c.1261A>G 13 121412 0.00010707
383 | MYH7 c.1230C>A 1 121412 0.00000824
384 | MYH7 c.1126G>C 1 121396 0.00000824
385 | MYH7 c.1095G>T 1 121404 0.00000824
386 | MYH7 c.1086G>T 1 121408 0.00000824
387 | MYH7 c.1045A>G 3 121400 0.00002471
388 | MYH7 c.1033G>A 1 121382 0.00000824
389 | MYH7 c.1018G>A 1 121358 0.00000824
390 | MYH7 c.968T>C 9 119224 0.00007549
391 | MYH7 c.968T>G 1 119224 0.00000839
392 | MYH7 c.949G>C 1 120254 0.00000832
393 | MYH7 c.936C>A 1 120582 0.00000829
394 | MYH7 c.934T>C 1 120618 0.00000829
395 | MYH7 c.913A>C 1 120616 0.00000829
396 | MYH7 c.910A>T 1 120588 0.00000829
397 | MYH7 c.898A>T 1 120304 0.00000831
398 | MYH7 c.853A>G 1 121378 0.00000824
399 | MYH7 c.821T>C 1 121174 0.00000825
400 | MYH7 c.751C>T 1 121410 0.00000824
401 | MYH7 c.658A>G 2 121412 0.00001647
402 | MYH7 c.640G>A 1 121406 0.00000824
403 | MYH7 c.610C>T 3 121412 0.00002471
404 | MYH7 c.601A>T 2 121412 0.00001647
405 | MYH7 c.589G>A 2 121412 0.00001647
406 | MYH7 c.565A>C 1 121412 0.00000824
407 | MYH7 c.541G>A 1 121402 0.00000824
408 | MYH7 c.500C>G 1 121412 0.00000824
409 | MYH7 c.465C>A 1 121412 0.00000824
410 | MYH7 c.445G>A 1 121412 0.00000824
411 | MYH7 c.442A>G 2 121412 0.00001647
412 | MYH7 c.341T>C 1 121352 0.00000824
413 | MYH7 c.340A>T 1 121356 0.00000824
414 | MYH7 c.335G>A 1 121362 0.00000824
415 | MYH7 c.328G>A 3 121370 0.00002472
416 | MYH7 c.298G>T 1 121406 0.00000824
417 | MYH7 c.268A>G 2 121410 0.00001647
418 | MYH7 c.253G>C 2 121410 0.00001647
419 | MYH7 c.211G>A 5 121374 0.00004119
420 | MYH7 c.208A>T 1 121366 0.00000824
421 | MYH7 c.184G>A 1 121290 0.00000824
422 | MYH7 c.175G>T 1 121302 0.00000824
423 | MYH7 c.166G>A 1 121318 0.00000824
424 | MYH7 c.161G>T 1 121318 0.00000824
425 | MYH7 c.161G>A 2 121318 0.00001649
426 | MYH7 c.160C>G 1 121326 0.00000824
427 | MYH7 c.138T>A 1 121354 0.00000824
428 | MYH7 c.124G>A 1 121366 0.00000824
429 | MYH7 c.91T>C 1 121354 0.00000824
430 | MYH7 c.76G>A 1 121332 0.00000824
431 | MYH7 c.67C>T 3 121320 0.00002473
432 | MYH7 c.54G>C 1 121282 0.00000825
433 | MYH7 c.41C>T 1 121166 0.00000825
434 | MYH7 c.22G>A 1 120950 0.00000827
435 | MYH7 c.22G>C 1 120950 0.00000827
436 | MYH7 c.17T>C 3 120852 0.00002482
437 | MYH7 c.3947_3948insA 1 121406 0.00000824
438 | MYH7 c.3100-2A>C 1 121310 0.00000824
439 | MYH7 c.2922+1G>A 1 121412 0.00000824
440 | MYH7 c.2470delG 1 121394 0.00000824
441 | MYH7 c.2163-1G>T 1 121398 0.00000824
442 | MYH7 c.2162+1G>A 1 121392 0.00000824
443 | MYH7 c.1888+1G>A 1 117866 0.00000848
444 | MYH7 c.733-1G>A 1 121412 0.00000824
445 | MYH7 c.503-2A>T 1 121412 0.00000824
446 | MYH7 c.259delA 1 121412 0.00000824
447 | MYH7 c.54delG 1 121288 0.00000824
448 | TNNT2 c.83C>T 59 121208 0.00048677
449 | TNNT2 c.230C>T 4 117802 0.00003396
450 | TNNT2 c.257A>C 1 117696 0.00000850
451 | TNNT2 c.274C>T 1 121396 0.00000824
452 | TNNT2 c.311C>T 1 121400 0.00000824
453 | TNNT2 c.391C>T 1 115728 0.00000864
454 | TNNT2 c.406G>A 1 116090 0.00000861
455 | TNNT2 c.430C>T 4 114926 0.00003481
456 | TNNT2 c.662T>C 15 121380 0.00012358
457 | TNNT2 c.732G>T 40 121412 0.00032946
458 | TNNT2 c.832C>T 40 93228 0.00042906
459 | TNNT2 c.856C>T 1 89396 0.00001119
460 | TNNT2 c.857G>A 7 88810 0.00007882
461 | TNNT2 c.860G>A 1 87972 0.00001137
462 | TNNT2 c.853G>A 6 90344 0.00006641
463 | TNNT2 c.745G>A 2 121412 0.00001647
464 | TNNT2 c.583G>A 2 121350 0.00001648
465 | TNNT2 c.408G>T 1 116076 0.00000862
466 | TNNT2 c.178A>G 6 121308 0.00004946
467 | TNNT2 c.10A>C 2 121406 0.00001647
468 | TNNT2 c.833G>A 2 93298 0.00002144
469 | TNNT2 c.421delC 4 115786 0.00003455
470 | TNNT2 c.780+1G>A 2 121412 0.00001647
471 | TNNT2 c.629A>T 1 121386 0.00000824
472 | TNNT2 c.218A>G 2 117138 0.00001707
473 | TNNT2 c.857G>T 1 88810 0.00001126
474 | TNNT2 c.452G>A 1 110290 0.00000907
475 | TNNT2 c.83C>A 1 121208 0.00000825
476 | TNNT2 c.782T>C 1 52256 0.00001914
477 | TNNT2 c.749T>C 1 121412 0.00000824
478 | TNNT2 c.734C>T 1 121412 0.00000824
479 | TNNT2 c.587G>A 2 121364 0.00001648
480 | TNNT2 c.431G>A 2 114762 0.00001743
481 | TNNT2 c.251dupG 2 117824 0.00001697
482 | TNNT2 c.523A>C 1 121412 0.00000824
483 | TNNT2 c.361G>A 21 121412 0.00017296
484 | TNNT2 c.145G>C 1 121400 0.00000824
485 | TNNT2 c.59C>T 1 120768 0.00000828
486 | TNNT2 c.571-1G>A 2 117154 0.00001707
487 | TNNT2 c.863A>G 1 87244 0.00001146
488 | TNNT2 c.835G>A 1 93274 0.00001072
489 | TNNT2 c.808G>T 1 58244 0.00001717
490 | TNNT2 c.805A>G 1 57730 0.00001732
491 | TNNT2 c.794G>A 1 55152 0.00001813
492 | TNNT2 c.749T>A 2 121412 0.00001647
493 | TNNT2 c.700A>G 1 121408 0.00000824
494 | TNNT2 c.652G>T 4 121382 0.00003295
495 | TNNT2 c.640G>A 1 121388 0.00000824
496 | TNNT2 c.553G>T 1 121412 0.00000824
497 | TNNT2 c.547C>T 1 121412 0.00000824
498 | TNNT2 c.546G>A 1 121412 0.00000824
499 | TNNT2 c.528G>C 1 121412 0.00000824
500 | TNNT2 c.488A>G 1 121412 0.00000824
501 | TNNT2 c.475C>T 2 121402 0.00001647
502 | TNNT2 c.472C>T 1 121402 0.00000824
503 | TNNT2 c.467G>A 1 121408 0.00000824
504 | TNNT2 c.449A>G 1 111530 0.00000897
505 | TNNT2 c.425A>G 1 115582 0.00000865
506 | TNNT2 c.418A>G 1 116056 0.00000862
507 | TNNT2 c.406G>C 1 116090 0.00000861
508 | TNNT2 c.401G>A 1 116042 0.00000862
509 | TNNT2 c.374A>G 1 121412 0.00000824
510 | TNNT2 c.373G>A 1 121412 0.00000824
511 | TNNT2 c.358C>G 1 121412 0.00000824
512 | TNNT2 c.284T>C 1 121396 0.00000824
513 | TNNT2 c.283A>G 2 121394 0.00001648
514 | TNNT2 c.235A>T 2 117982 0.00001695
515 | TNNT2 c.224T>G 2 117540 0.00001702
516 | TNNT2 c.206C>T 1 115146 0.00000868
517 | TNNT2 c.200C>T 2 121364 0.00001648
518 | TNNT2 c.199C>A 1 121360 0.00000824
519 | TNNT2 c.164C>A 1 121400 0.00000824
520 | TNNT2 c.150G>T 1 121400 0.00000824
521 | TNNT2 c.115G>A 2 121346 0.00001648
522 | TNNT2 c.106G>C 6 121340 0.00004945
523 | TNNT2 c.101C>T 1 121334 0.00000824
524 | TNNT2 c.91G>A 1 121280 0.00000825
525 | TNNT2 c.82G>A 4 121176 0.00003301
526 | TNNT2 c.80C>G 2 121168 0.00001651
527 | TNNT2 c.80C>A 1 121168 0.00000825
528 | TNNT2 c.67G>A 1 121378 0.00000824
529 | TNNT2 c.40G>A 2 121408 0.00001647
530 | TNNT2 c.34G>A 1 121408 0.00000824
531 | TNNT2 c.4T>C 1 121406 0.00000824
532 | TNNT2 c.584_585delAG 1 121354 0.00000824
533 | TNNT2 c.381+1G>A 1 121412 0.00000824
534 | TNNT2 c.203+1G>A 1 121378 0.00000824
535 | TNNT2 c.147_149delAGA 1 121400 0.00000824
536 | TNNI3 c.5C>T 4 120314 0.00003325
537 | TNNI3 c.220C>A 2 101688 0.00001967
538 | TNNI3 c.235C>T 46 106538 0.00043177
539 | TNNI3 c.407G>A 1 120380 0.00000831
540 | TNNI3 c.434G>A 3 120502 0.00002490
541 | TNNI3 c.433C>T 1 120492 0.00000830
542 | TNNI3 c.485G>A 3 120102 0.00002498
543 | TNNI3 c.484C>T 4 120202 0.00003328
544 | TNNI3 c.497C>T 1 119968 0.00000834
545 | TNNI3 c.586G>A 1 120728 0.00000828
546 | TNNI3 c.258delC 1 111848 0.00000894
547 | TNNI3 c.347C>G 2 119928 0.00001668
548 | TNNI3 c.356C>A 4 119620 0.00003344
549 | TNNI3 c.293G>A 1 119734 0.00000835
550 | TNNI3 c.92C>T 1 58032 0.00001723
551 | TNNI3 c.304G>A 3 120058 0.00002499
552 | TNNI3 c.114dupA 3 24306 0.00012343
553 | TNNI3 c.428C>A 3 120482 0.00002490
554 | TNNI3 c.167T>C 2 81522 0.00002453
555 | TNNI3 c.34C>A 1 70152 0.00001425
556 | TNNI3 c.611G>T 1 120734 0.00000828
557 | TNNI3 c.603G>C 1 120736 0.00000828
558 | TNNI3 c.581A>C 1 120720 0.00000828
559 | TNNI3 c.561G>T 1 120606 0.00000829
560 | TNNI3 c.556C>T 1 120478 0.00000830
561 | TNNI3 c.541A>G 1 118904 0.00000841
562 | TNNI3 c.482C>T 2 120216 0.00001664
563 | TNNI3 c.463A>G 1 120480 0.00000830
564 | TNNI3 c.439G>C 1 120522 0.00000830
565 | TNNI3 c.421C>T 1 120444 0.00000830
566 | TNNI3 c.370G>C 1 118454 0.00000844
567 | TNNI3 c.368C>T 3 118756 0.00002526
568 | TNNI3 c.341T>C 1 120064 0.00000833
569 | TNNI3 c.340A>G 1 120090 0.00000833
570 | TNNI3 c.331A>G 4 120174 0.00003329
571 | TNNI3 c.322G>A 1 120166 0.00000832
572 | TNNI3 c.318G>C 5 120182 0.00004160
573 | TNNI3 c.308G>A 2 120106 0.00001665
574 | TNNI3 c.298C>T 1 119904 0.00000834
575 | TNNI3 c.292C>G 1 119730 0.00000835
576 | TNNI3 c.292C>T 11 119730 0.00009187
577 | TNNI3 c.278T>C 1 113262 0.00000883
578 | TNNI3 c.272C>T 1 112992 0.00000885
579 | TNNI3 c.263T>G 1 112340 0.00000890
580 | TNNI3 c.259G>A 1 111848 0.00000894
581 | TNNI3 c.250G>C 1 110322 0.00000906
582 | TNNI3 c.245C>G 1 108616 0.00000921
583 | TNNI3 c.239G>A 1 107584 0.00000930
584 | TNNI3 c.236G>T 5 106742 0.00004684
585 | TNNI3 c.220C>G 1 101688 0.00000983
586 | TNNI3 c.200A>T 3 92314 0.00003250
587 | TNNI3 c.178G>C 4 84752 0.00004720
588 | TNNI3 c.166A>G 1 81204 0.00001231
589 | TNNI3 c.158T>G 1 78466 0.00001274
590 | TNNI3 c.157C>G 1 76272 0.00001311
591 | TNNI3 c.143A>C 3 23950 0.00012526
592 | TNNI3 c.57C>G 1 69126 0.00001447
593 | TNNI3 c.55A>G 3 69320 0.00004328
594 | TNNI3 c.52C>A 2 69530 0.00002876
595 | TNNI3 c.34C>T 3 70152 0.00004276
596 | TNNI3 c.23C>T 1 107406 0.00000931
597 | TNNI3 c.632G>T 1 120718 0.00000828
598 | TNNI3 c.373-1G>T 1 120172 0.00000832
599 | TNNI3 c.204delG 6 94672 0.00006338
600 | TNNI3 c.95_99dupAGCCG 1 51804 0.00001930
601 | TNNI3 c.24+2T>A 1 107624 0.00000929
602 | MYBPC3 c.13G>C 36 119710 0.00030073
603 | MYBPC3 c.103C>T 5 88878 0.00005626
604 | MYBPC3 c.184A>C 14 53782 0.00026031
605 | MYBPC3 c.223G>A 3 32862 0.00009129
606 | MYBPC3 c.440C>T 9 23770 0.00037863
607 | MYBPC3 c.461T>C 5 24758 0.00020195
608 | MYBPC3 c.495G>C 8 23792 0.00033625
609 | MYBPC3 c.529C>T 6 95690 0.00006270
610 | MYBPC3 c.532G>A 2 97810 0.00002045
611 | MYBPC3 c.557C>T 5 104952 0.00004764
612 | MYBPC3 c.604A>C 5 111214 0.00004496
613 | MYBPC3 c.624G>C 18 110910 0.00016229
614 | MYBPC3 c.646G>A 40 109458 0.00036544
615 | MYBPC3 c.655G>T 1 94000 0.00001064
616 | MYBPC3 c.682G>A 6 105368 0.00005694
617 | MYBPC3 c.772G>A 3 76868 0.00003903
618 | MYBPC3 c.787G>A 6 25596 0.00023441
619 | MYBPC3 c.814C>T 2 23870 0.00008379
620 | MYBPC3 c.818G>A 1 23428 0.00004268
621 | MYBPC3 c.842G>A 2 28624 0.00006987
622 | MYBPC3 c.859C>T 2 48056 0.00004162
623 | MYBPC3 c.1000G>A 36 106554 0.00033786
624 | MYBPC3 c.1147C>G 9 101614 0.00008857
625 | MYBPC3 c.1246G>A 3 103534 0.00002898
626 | MYBPC3 c.1321G>A 18 110970 0.00016221
627 | MYBPC3 c.1370C>T 22 112792 0.00019505
628 | MYBPC3 c.1373G>A 5 113016 0.00004424
629 | MYBPC3 c.1409G>A 1 116034 0.00000862
630 | MYBPC3 c.1468G>A 25 120382 0.00020767
631 | MYBPC3 c.1484G>A 1 120574 0.00000829
632 | MYBPC3 c.1504C>T 3 120674 0.00002486
633 | MYBPC3 c.1519G>A 82 120688 0.00067944
634 | MYBPC3 c.1564G>A 47 120296 0.00039070
635 | MYBPC3 c.1591G>C 2 115446 0.00001732
636 | MYBPC3 c.1624G>C 2 80474 0.00002485
637 | MYBPC3 c.1633C>A 4 118074 0.00003388
638 | MYBPC3 c.1664T>C 3 118610 0.00002529
639 | MYBPC3 c.1814A>G 7 47806 0.00014643
640 | MYBPC3 c.1813G>A 9 47656 0.00018885
641 | MYBPC3 c.1822C>T 7 49798 0.00014057
642 | MYBPC3 c.1828G>C 3 51486 0.00005827
643 | MYBPC3 c.1855G>A 43 54808 0.00078456
644 | MYBPC3 c.2002C>T 2 119048 0.00001680
645 | MYBPC3 c.2003G>A 32 118698 0.00026959
646 | MYBPC3 c.2063C>A 1 53416 0.00001872
647 | MYBPC3 c.2077G>T 4 120730 0.00003313
648 | MYBPC3 c.2176C>T 4 110088 0.00003633
649 | MYBPC3 c.2197C>T 10 116866 0.00008557
650 | MYBPC3 c.2198G>A 4 117500 0.00003404
651 | MYBPC3 c.2269G>A 8 120616 0.00006633
652 | MYBPC3 c.2308G>A 1 120498 0.00000830
653 | MYBPC3 c.2311G>A 1 17666 0.00005661
654 | MYBPC3 c.2429G>A 4 120518 0.00003319
655 | MYBPC3 c.2459G>A 2 120694 0.00001657
656 | MYBPC3 c.2458C>T 1 120696 0.00000829
657 | MYBPC3 c.2500C>T 2 120634 0.00001658
658 | MYBPC3 c.2504G>T 9 120604 0.00007462
659 | MYBPC3 c.2552C>T 8 118158 0.00006771
660 | MYBPC3 c.2618C>A 13 64128 0.00020272
661 | MYBPC3 c.2654C>T 1 45096 0.00002217
662 | MYBPC3 c.2728C>A 6 15362 0.00039057
663 | MYBPC3 c.2783C>T 1 119088 0.00000840
664 | MYBPC3 c.2807C>T 7 118830 0.00005891
665 | MYBPC3 c.2827C>T 2 117632 0.00001700
666 | MYBPC3 c.2873C>T 7 107600 0.00006506
667 | MYBPC3 c.2882C>T 5 103876 0.00004813
668 | MYBPC3 c.3005G>A 5 107436 0.00004654
669 | MYBPC3 c.3004C>T 72 107228 0.00067147
670 | MYBPC3 c.3049G>A 10 117428 0.00008516
671 | MYBPC3 c.3065G>C 3 119054 0.00002520
672 | MYBPC3 c.3106C>T 62 120392 0.00051498
673 | MYBPC3 c.3137C>T 7 120394 0.00005814
674 | MYBPC3 c.3142C>T 2 120360 0.00001662
675 | MYBPC3 c.3181C>T 2 119398 0.00001675
676 | MYBPC3 c.3372C>A 2 87900 0.00002275
677 | MYBPC3 c.3373G>A 2 87880 0.00002276
678 | MYBPC3 c.3569G>A 7 119246 0.00005870
679 | MYBPC3 c.3580G>A 1 119816 0.00000835
680 | MYBPC3 c.3697C>T 1 120590 0.00000829
681 | MYBPC3 c.3742G>A 4 120334 0.00003324
682 | MYBPC3 c.3763G>A 9 119936 0.00007504
683 | MYBPC3 c.3811C>T 3 116692 0.00002571
684 | MYBPC3 c.1038_1042dupCGGCA 1 118814 0.00000842
685 | MYBPC3 c.2441_2443delAGA 39 120542 0.00032354
686 | MYBPC3 c.82G>A 18 88604 0.00020315
687 | MYBPC3 c.251G>A 1 26098 0.00003832
688 | MYBPC3 c.713G>A 8 107664 0.00007431
689 | MYBPC3 c.961G>A 37 80000 0.00046250
690 | MYBPC3 c.1153G>A 1 98880 0.00001011
691 | MYBPC3 c.1286C>T 22 106526 0.00020652
692 | MYBPC3 c.1309G>A 1 109754 0.00000911
693 | MYBPC3 c.1591G>A 2 115446 0.00001732
694 | MYBPC3 c.1960C>T 1 120584 0.00000829
695 | MYBPC3 c.1985T>C 1 120402 0.00000831
696 | MYBPC3 c.2618C>T 2 64128 0.00003119
697 | MYBPC3 c.2815C>T 4 118364 0.00003379
698 | MYBPC3 c.1543_1545delAAC 1 120616 0.00000829
699 | MYBPC3 c.3286G>A 2 52352 0.00003820
700 | MYBPC3 c.1319G>A 1 110836 0.00000902
701 | MYBPC3 c.2539T>C 1 119904 0.00000834
702 | MYBPC3 c.2761C>G 21 118152 0.00017774
703 | MYBPC3 c.3326C>T 11 66078 0.00016647
704 | MYBPC3 c.2614G>A 38 64766 0.00058673
705 | MYBPC3 c.1544A>G 17 120630 0.00014093
706 | MYBPC3 c.407-1G>A 1 21306 0.00004694
707 | MYBPC3 c.2210C>T 6 119120 0.00005037
708 | MYBPC3 c.3682C>T 26 120584 0.00021562
709 | MYBPC3 c.2914C>T 13 30098 0.00043192
710 | MYBPC3 c.3535G>A 49 106716 0.00045916
711 | MYBPC3 c.26-2A>G 4 78210 0.00005114
712 | MYBPC3 c.2909G>A 1 30760 0.00003251
713 | MYBPC3 c.2113dupA 1 120770 0.00000828
714 | MYBPC3 c.2771C>T 11 118760 0.00009262
715 | MYBPC3 c.2170C>T 2 105286 0.00001900
716 | MYBPC3 c.1789C>T 1 26174 0.00003821
717 | MYBPC3 c.1786G>A 7 26698 0.00026219
718 | MYBPC3 c.1070G>A 2 119700 0.00001671
719 | MYBPC3 c.530G>A 92 96910 0.00094933
720 | MYBPC3 c.2414-2A>G 1 120266 0.00000831
721 | MYBPC3 c.821+1G>A 1 23204 0.00004310
722 | MYBPC3 c.906-1G>C 1 113088 0.00000884
723 | MYBPC3 c.3787C>T 4 118976 0.00003362
724 | MYBPC3 c.3668A>G 1 120564 0.00000829
725 | MYBPC3 c.3642G>A 1 120420 0.00000830
726 | MYBPC3 c.3470C>T 7 74646 0.00009378
727 | MYBPC3 c.3463A>G 1 75580 0.00001323
728 | MYBPC3 c.3442A>G 1 77750 0.00001286
729 | MYBPC3 c.3415G>A 7 79966 0.00008754
730 | MYBPC3 c.3412C>T 3 80666 0.00003719
731 | MYBPC3 c.3384G>C 11 86598 0.00012702
732 | MYBPC3 c.3358C>T 3 88590 0.00003386
733 | MYBPC3 c.3346T>G 1 88020 0.00001136
734 | MYBPC3 c.3217C>T 2 40766 0.00004906
735 | MYBPC3 c.3112G>A 2 120448 0.00001660
736 | MYBPC3 c.3107G>A 7 120414 0.00005813
737 | MYBPC3 c.3052G>C 2 117696 0.00001699
738 | MYBPC3 c.2980C>T 6 28908 0.00020756
739 | MYBPC3 c.2684G>A 4 24258 0.00016489
740 | MYBPC3 c.2683C>T 1 24788 0.00004034
741 | MYBPC3 c.2518G>A 2 120484 0.00001660
742 | MYBPC3 c.2056G>C 1 61086 0.00001637
743 | MYBPC3 c.1820C>T 1 49580 0.00002017
744 | MYBPC3 c.1816G>A 3 48650 0.00006166
745 | MYBPC3 c.1601C>T 5 109380 0.00004571
746 | MYBPC3 c.1565C>T 3 120274 0.00002494
747 | MYBPC3 c.1554G>A 2 120554 0.00001659
748 | MYBPC3 c.1445C>T 5 118108 0.00004233
749 | MYBPC3 c.1372C>T 6 112904 0.00005314
750 | MYBPC3 c.1294G>A 4 106504 0.00003756
751 | MYBPC3 c.917G>A 4 108298 0.00003694
752 | MYBPC3 c.852-1G>T 1 49084 0.00002037
753 | MYBPC3 c.799C>G 2 24914 0.00008028
754 | MYBPC3 c.752A>T 5 96128 0.00005201
755 | MYBPC3 c.598A>G 4 111112 0.00003600
756 | MYBPC3 c.553A>C 2 105016 0.00001904
757 | MYBPC3 c.512G>A 1 77614 0.00001288
758 | MYBPC3 c.187C>T 4 51686 0.00007739
759 | MYBPC3 c.148A>G 3 77184 0.00003887
760 | MYBPC3 c.131G>A 3 82584 0.00003633
761 | MYBPC3 c.107C>A 3 88396 0.00003394
762 | MYBPC3 c.74G>A 5 87496 0.00005715
763 | MYBPC3 c.50G>A 12 83926 0.00014298
764 | MYBPC3 c.206G>A 1 40094 0.00002494
765 | MYBPC3 c.3569G>T 1 119246 0.00000839
766 | MYBPC3 c.3362G>A 11 87916 0.00012512
767 | MYBPC3 c.2833C>T 1 116746 0.00000857
768 | MYBPC3 c.1334C>T 1 112372 0.00000890
769 | MYBPC3 c.2828G>A 3 117440 0.00002554
770 | MYBPC3 c.305C>T 2 19596 0.00010206
771 | MYBPC3 c.3097C>T 1 120276 0.00000831
772 | MYBPC3 c.643C>T 17 109848 0.00015476
773 | MYBPC3 c.956A>C 6 76038 0.00007891
774 | MYBPC3 c.2381C>T 6 27740 0.00021629
775 | MYBPC3 c.1813_1814insCG 1 47806 0.00002092
776 | MYBPC3 c.926+1G>A 2 108696 0.00001840
777 | MYBPC3 c.2543C>T 1 119588 0.00000836
778 | MYBPC3 c.3190+2T>G 2 118438 0.00001689
779 | MYBPC3 c.442G>A 1 23806 0.00004201
780 | MYBPC3 c.994G>A 1 103078 0.00000970
781 | MYBPC3 c.121C>T 3 85890 0.00003493
782 | MYBPC3 c.3715G>A 2 120538 0.00001659
783 | MYBPC3 c.3316G>A 4 65418 0.00006115
784 | MYBPC3 c.481C>A 1 24106 0.00004148
785 | MYBPC3 c.3683G>A 2 120580 0.00001659
786 | MYBPC3 c.3472G>A 11 73936 0.00014878
787 | MYBPC3 c.3442A>C 1 77750 0.00001286
788 | MYBPC3 c.3103G>A 8 120378 0.00006646
789 | MYBPC3 c.2822T>C 1 117988 0.00000848
790 | MYBPC3 c.2512G>C 3 120548 0.00002489
791 | MYBPC3 c.2501G>A 3 120618 0.00002487
792 | MYBPC3 c.1721G>A 11 90118 0.00012206
793 | MYBPC3 c.1112C>T 3 106048 0.00002829
794 | MYBPC3 c.502G>A 11 23574 0.00046662
795 | MYBPC3 c.362C>T 4 27086 0.00014768
796 | MYBPC3 c.188G>A 2 50846 0.00003933
797 | MYBPC3 c.151G>A 1 75080 0.00001332
798 | MYBPC3 c.130C>T 2 82878 0.00002413
799 | MYBPC3 c.104G>A 7 88510 0.00007909
800 | MYBPC3 c.91G>A 1 89950 0.00001112
801 | MYBPC3 c.3812G>A 1 116500 0.00000858
802 | MYBPC3 c.3800G>A 5 117940 0.00004239
803 | MYBPC3 c.3799C>T 1 118184 0.00000846
804 | MYBPC3 c.3791G>A 1 118738 0.00000842
805 | MYBPC3 c.3788G>A 1 118892 0.00000841
806 | MYBPC3 c.3781G>A 2 119302 0.00001676
807 | MYBPC3 c.3777G>C 1 119600 0.00000836
808 | MYBPC3 c.3767C>T 1 119838 0.00000834
809 | MYBPC3 c.3750C>G 2 120234 0.00001663
810 | MYBPC3 c.3676C>T 7 120582 0.00005805
811 | MYBPC3 c.3670G>A 1 120576 0.00000829
812 | MYBPC3 c.3667G>A 1 120562 0.00000829
813 | MYBPC3 c.3620G>T 1 119002 0.00000840
814 | MYBPC3 c.3614G>A 2 119460 0.00001674
815 | MYBPC3 c.3613C>T 2 119530 0.00001673
816 | MYBPC3 c.3581C>T 10 119870 0.00008342
817 | MYBPC3 c.3572C>T 2 119422 0.00001675
818 | MYBPC3 c.3568C>T 1 118976 0.00000841
819 | MYBPC3 c.3523C>G 1 99064 0.00001009
820 | MYBPC3 c.3512A>G 2 93394 0.00002141
821 | MYBPC3 c.3461C>T 1 75698 0.00001321
822 | MYBPC3 c.3457A>T 1 76058 0.00001315
823 | MYBPC3 c.3454G>T 1 75892 0.00001318
824 | MYBPC3 c.3452C>T 6 76054 0.00007889
825 | MYBPC3 c.3449G>A 1 77038 0.00001298
826 | MYBPC3 c.3383A>C 1 86916 0.00001151
827 | MYBPC3 c.3373G>T 1 87880 0.00001138
828 | MYBPC3 c.3371G>A 1 88272 0.00001133
829 | MYBPC3 c.3361C>T 2 88358 0.00002264
830 | MYBPC3 c.3324G>C 1 66120 0.00001512
831 | MYBPC3 c.3323A>C 3 66208 0.00004531
832 | MYBPC3 c.3299A>T 1 63872 0.00001566
833 | MYBPC3 c.3284C>T 3 51566 0.00005818
834 | MYBPC3 c.3277G>T 1 48700 0.00002053
835 | MYBPC3 c.3274G>A 4 48032 0.00008328
836 | MYBPC3 c.3257G>A 1 45884 0.00002179
837 | MYBPC3 c.3242A>C 1 45040 0.00002220
838 | MYBPC3 c.3233G>A 1 44048 0.00002270
839 | MYBPC3 c.3232T>C 1 43934 0.00002276
840 | MYBPC3 c.3229G>T 1 43344 0.00002307
841 | MYBPC3 c.3223A>G 2 42634 0.00004691
842 | MYBPC3 c.3175G>C 1 119696 0.00000835
843 | MYBPC3 c.3170C>T 1 119884 0.00000834
844 | MYBPC3 c.3154A>G 4 120236 0.00003327
845 | MYBPC3 c.3152A>G 1 120254 0.00000832
846 | MYBPC3 c.3148G>A 10 120306 0.00008312
847 | MYBPC3 c.3143G>A 2 120356 0.00001662
848 | MYBPC3 c.3127T>A 1 120458 0.00000830
849 | MYBPC3 c.3127T>C 1 120458 0.00000830
850 | MYBPC3 c.3122G>A 1 120476 0.00000830
851 | MYBPC3 c.3113T>A 2 120464 0.00001660
852 | MYBPC3 c.3110G>A 1 120436 0.00000830
853 | MYBPC3 c.3109C>T 2 120444 0.00001661
854 | MYBPC3 c.3109C>G 1 120444 0.00000830
855 | MYBPC3 c.3079G>C 1 119878 0.00000834
856 | MYBPC3 c.3072C>A 3 119572 0.00002509
857 | MYBPC3 c.3067A>G 1 119268 0.00000838
858 | MYBPC3 c.3064C>T 1 119072 0.00000840
859 | MYBPC3 c.3043G>A 1 116418 0.00000859
860 | MYBPC3 c.3031G>A 1 114586 0.00000873
861 | MYBPC3 c.3017C>A 1 112246 0.00000891
862 | MYBPC3 c.2984T>A 1 28512 0.00003507
863 | MYBPC3 c.2962G>A 1 30636 0.00003264
864 | MYBPC3 c.2938C>T 2 32028 0.00006245
865 | MYBPC3 c.2915G>A 2 30462 0.00006566
866 | MYBPC3 c.2908C>T 2 30154 0.00006633
867 | MYBPC3 c.2894A>G 1 98472 0.00001016
868 | MYBPC3 c.2891T>C 1 99928 0.00001001
869 | MYBPC3 c.2884G>C 2 102414 0.00001953
870 | MYBPC3 c.2876C>T 1 106520 0.00000939
871 | MYBPC3 c.2841C>A 1 115562 0.00000865
872 | MYBPC3 c.2840A>G 1 115776 0.00000864
873 | MYBPC3 c.2812G>T 1 118556 0.00000843
874 | MYBPC3 c.2800C>G 1 119058 0.00000840
875 | MYBPC3 c.2765G>A 3 118412 0.00002534
876 | MYBPC3 c.2723A>G 1 15932 0.00006277
877 | MYBPC3 c.2716G>A 2 16780 0.00011919
878 | MYBPC3 c.2702T>C 1 19020 0.00005258
879 | MYBPC3 c.2693C>T 1 20304 0.00004925
880 | MYBPC3 c.2672G>A 3 31078 0.00009653
881 | MYBPC3 c.2671C>T 1 31864 0.00003138
882 | MYBPC3 c.2668T>G 1 35190 0.00002842
883 | MYBPC3 c.2641G>A 1 53676 0.00001863
884 | MYBPC3 c.2624A>T 1 63064 0.00001586
885 | MYBPC3 c.2621C>T 2 64110 0.00003120
886 | MYBPC3 c.2602G>A 4 108806 0.00003676
887 | MYBPC3 c.2599A>G 1 109526 0.00000913
888 | MYBPC3 c.2576C>T 1 113726 0.00000879
889 | MYBPC3 c.2570C>T 1 114954 0.00000870
890 | MYBPC3 c.2568G>T 1 115162 0.00000868
891 | MYBPC3 c.2562G>A 4 116352 0.00003438
892 | MYBPC3 c.2560A>T 1 116762 0.00000856
893 | MYBPC3 c.2557G>A 1 117276 0.00000853
894 | MYBPC3 c.2557G>T 1 117276 0.00000853
895 | MYBPC3 c.2542G>A 1 119648 0.00000836
896 | MYBPC3 c.2536G>A 1 120026 0.00000833
897 | MYBPC3 c.2504G>A 1 120604 0.00000829
898 | MYBPC3 c.2503C>T 3 120608 0.00002487
899 | MYBPC3 c.2503C>A 1 120608 0.00000829
900 | MYBPC3 c.2491C>T 1 120680 0.00000829
901 | MYBPC3 c.2489G>A 1 120684 0.00000829
902 | MYBPC3 c.2482G>C 1 120692 0.00000829
903 | MYBPC3 c.2470G>A 1 120692 0.00000829
904 | MYBPC3 c.2450G>C 1 120688 0.00000829
905 | MYBPC3 c.2450G>A 2 120688 0.00001657
906 | MYBPC3 c.2439G>C 2 120620 0.00001658
907 | MYBPC3 c.2426A>G 1 120492 0.00000830
908 | MYBPC3 c.2398G>A 2 37364 0.00005353
909 | MYBPC3 c.2345A>G 1 21092 0.00004741
910 | MYBPC3 c.2339T>G 1 20576 0.00004860
911 | MYBPC3 c.2300A>G 2 120570 0.00001659
912 | MYBPC3 c.2278G>C 4 120618 0.00003316
913 | MYBPC3 c.2275G>A 3 120620 0.00002487
914 | MYBPC3 c.2249C>T 3 120524 0.00002489
915 | MYBPC3 c.2242G>A 3 120472 0.00002490
916 | MYBPC3 c.2200A>G 3 117750 0.00002548
917 | MYBPC3 c.2198G>T 3 117500 0.00002553
918 | MYBPC3 c.2189C>T 1 115458 0.00000866
919 | MYBPC3 c.2188A>C 1 114698 0.00000872
920 | MYBPC3 c.2179G>A 4 110952 0.00003605
921 | MYBPC3 c.2177G>A 4 110276 0.00003627
922 | MYBPC3 c.2164G>A 3 102842 0.00002917
923 | MYBPC3 c.2164G>C 1 102842 0.00000972
924 | MYBPC3 c.2161A>T 1 101754 0.00000983
925 | MYBPC3 c.2135T>C 1 120776 0.00000828
926 | MYBPC3 c.2134G>T 2 120776 0.00001656
927 | MYBPC3 c.2119G>A 2 120772 0.00001656
928 | MYBPC3 c.2099A>T 2 120758 0.00001656
929 | MYBPC3 c.2056G>A 1 61086 0.00001637
930 | MYBPC3 c.2041G>A 1 80126 0.00001248
931 | MYBPC3 c.2033C>A 2 90338 0.00002214
932 | MYBPC3 c.2018T>C 1 110022 0.00000909
933 | MYBPC3 c.1999C>A 1 119594 0.00000836
934 | MYBPC3 c.1978G>T 1 120528 0.00000830
935 | MYBPC3 c.1973C>T 2 120584 0.00001659
936 | MYBPC3 c.1955C>G 1 120578 0.00000829
937 | MYBPC3 c.1942C>T 1 120524 0.00000830
938 | MYBPC3 c.1938G>C 2 120490 0.00001660
939 | MYBPC3 c.1919C>T 2 36860 0.00005426
940 | MYBPC3 c.1915G>A 4 37290 0.00010727
941 | MYBPC3 c.1889A>T 2 50468 0.00003963
942 | MYBPC3 c.1871A>G 2 53822 0.00003716
943 | MYBPC3 c.1867T>A 1 54482 0.00001835
944 | MYBPC3 c.1826C>T 2 50972 0.00003924
945 | MYBPC3 c.1823C>G 3 50520 0.00005938
946 | MYBPC3 c.1809T>G 2 46874 0.00004267
947 | MYBPC3 c.1778C>T 1 28932 0.00003456
948 | MYBPC3 c.1773G>T 1 30306 0.00003300
949 | MYBPC3 c.1726G>A 1 81820 0.00001222
950 | MYBPC3 c.1720C>T 5 92398 0.00005411
951 | MYBPC3 c.1685C>T 1 116300 0.00000860
952 | MYBPC3 c.1678G>C 1 117362 0.00000852
953 | MYBPC3 c.1672G>A 1 117892 0.00000848
954 | MYBPC3 c.1639G>A 1 118306 0.00000845
955 | MYBPC3 c.1623G>C 1 81928 0.00001221
956 | MYBPC3 c.1615A>G 6 92042 0.00006519
957 | MYBPC3 c.1599G>C 1 111074 0.00000900
958 | MYBPC3 c.1595G>A 1 113374 0.00000882
959 | MYBPC3 c.1589G>A 1 117054 0.00000854
960 | MYBPC3 c.1580T>C 1 119166 0.00000839
961 | MYBPC3 c.1546G>A 5 120616 0.00004145
962 | MYBPC3 c.1540A>G 1 120642 0.00000829
963 | MYBPC3 c.1503C>G 1 120676 0.00000829
964 | MYBPC3 c.1499A>C 1 120674 0.00000829
965 | MYBPC3 c.1493C>A 1 120644 0.00000829
966 | MYBPC3 c.1487A>G 1 120616 0.00000829
967 | MYBPC3 c.1472T>G 2 120462 0.00001660
968 | MYBPC3 c.1471G>A 7 120424 0.00005813
969 | MYBPC3 c.1469G>A 1 120410 0.00000830
970 | MYBPC3 c.1459C>A 1 120300 0.00000831
971 | MYBPC3 c.1442G>A 1 117996 0.00000847
972 | MYBPC3 c.1433C>T 2 117534 0.00001702
973 | MYBPC3 c.1423T>C 1 117150 0.00000854
974 | MYBPC3 c.1405C>G 1 115820 0.00000863
975 | MYBPC3 c.1397T>A 1 115342 0.00000867
976 | MYBPC3 c.1375C>T 1 113410 0.00000882
977 | MYBPC3 c.1363C>T 4 112128 0.00003567
978 | MYBPC3 c.1361T>C 2 112346 0.00001780
979 | MYBPC3 c.1358C>T 1 111738 0.00000895
980 | MYBPC3 c.1357C>G 1 111762 0.00000895
981 | MYBPC3 c.1316G>A 2 110650 0.00001808
982 | MYBPC3 c.1309G>T 2 109754 0.00001822
983 | MYBPC3 c.1291G>A 3 106710 0.00002811
984 | MYBPC3 c.1256G>A 2 104254 0.00001918
985 | MYBPC3 c.1256G>C 1 104254 0.00000959
986 | MYBPC3 c.1255C>T 1 104266 0.00000959
987 | MYBPC3 c.1211A>C 1 49204 0.00002032
988 | MYBPC3 c.1180G>A 1 77034 0.00001298
989 | MYBPC3 c.1171G>T 1 86828 0.00001152
990 | MYBPC3 c.1145G>A 1 102512 0.00000975
991 | MYBPC3 c.1133G>C 1 105316 0.00000950
992 | MYBPC3 c.1123G>A 1 106450 0.00000939
993 | MYBPC3 c.1120C>A 1 106582 0.00000938
994 | MYBPC3 c.1101G>T 1 104750 0.00000955
995 | MYBPC3 c.1100A>G 1 104622 0.00000956
996 | MYBPC3 c.1091C>T 7 103158 0.00006786
997 | MYBPC3 c.1084A>G 1 119592 0.00000836
998 | MYBPC3 c.1074T>A 2 119670 0.00001671
999 | MYBPC3 c.1072G>A 1 119694 0.00000835
1000 | MYBPC3 c.1039G>A 2 118514 0.00001688
1001 | MYBPC3 c.1021G>A 3 116106 0.00002584
1002 | MYBPC3 c.1009G>A 1 111296 0.00000899
1003 | MYBPC3 c.1006A>T 1 110474 0.00000905
1004 | MYBPC3 c.1003C>T 2 108234 0.00001848
1005 | MYBPC3 c.976C>T 2 91656 0.00002182
1006 | MYBPC3 c.970A>T 1 86712 0.00001153
1007 | MYBPC3 c.961G>C 1 80000 0.00001250
1008 | MYBPC3 c.960C>G 1 78254 0.00001278
1009 | MYBPC3 c.949G>A 1 70368 0.00001421
1010 | MYBPC3 c.916C>T 1 108134 0.00000925
1011 | MYBPC3 c.886A>G 1 42726 0.00002340
1012 | MYBPC3 c.845G>A 1 28712 0.00003483
1013 | MYBPC3 c.821C>T 2 23316 0.00008578
1014 | MYBPC3 c.815G>A 1 23752 0.00004210
1015 | MYBPC3 c.805T>C 1 24534 0.00004076
1016 | MYBPC3 c.746G>A 1 99080 0.00001009
1017 | MYBPC3 c.745T>C 1 99748 0.00001003
1018 | MYBPC3 c.712C>T 1 108096 0.00000925
1019 | MYBPC3 c.700A>G 2 108824 0.00001838
1020 | MYBPC3 c.684T>G 8 106064 0.00007543
1021 | MYBPC3 c.667G>A 12 99722 0.00012033
1022 | MYBPC3 c.659A>G 4 95742 0.00004178
1023 | MYBPC3 c.644G>A 2 109776 0.00001822
1024 | MYBPC3 c.640G>A 4 110080 0.00003634
1025 | MYBPC3 c.634A>G 1 110768 0.00000903
1026 | MYBPC3 c.631G>A 1 110802 0.00000903
1027 | MYBPC3 c.573G>T 1 108312 0.00000923
1028 | MYBPC3 c.568A>G 1 107756 0.00000928
1029 | MYBPC3 c.554A>G 2 104998 0.00001905
1030 | MYBPC3 c.541G>A 2 100940 0.00001981
1031 | MYBPC3 c.538G>A 1 100372 0.00000996
1032 | MYBPC3 c.527C>G 1 95772 0.00001044
1033 | MYBPC3 c.527C>T 1 95772 0.00001044
1034 | MYBPC3 c.503T>C 2 23754 0.00008420
1035 | MYBPC3 c.451G>A 1 24354 0.00004106
1036 | MYBPC3 c.446C>A 1 24026 0.00004162
1037 | MYBPC3 c.437C>T 2 23688 0.00008443
1038 | MYBPC3 c.434C>A 1 23386 0.00004276
1039 | MYBPC3 c.364G>A 1 27028 0.00003700
1040 | MYBPC3 c.323C>T 3 23248 0.00012904
1041 | MYBPC3 c.287A>G 1 17726 0.00005641
1042 | MYBPC3 c.238G>A 1 29510 0.00003389
1043 | MYBPC3 c.194C>T 2 46730 0.00004280
1044 | MYBPC3 c.166G>A 1 66652 0.00001500
1045 | MYBPC3 c.152C>T 1 74804 0.00001337
1046 | MYBPC3 c.151G>T 2 75080 0.00002664
1047 | MYBPC3 c.145A>T 2 78388 0.00002551
1048 | MYBPC3 c.133G>A 2 81912 0.00002442
1049 | MYBPC3 c.122G>A 3 85626 0.00003504
1050 | MYBPC3 c.118G>A 1 86926 0.00001150
1051 | MYBPC3 c.117G>T 1 87142 0.00001148
1052 | MYBPC3 c.94G>A 7 89538 0.00007818
1053 | MYBPC3 c.94G>C 1 89538 0.00001117
1054 | MYBPC3 c.88G>A 1 89854 0.00001113
1055 | MYBPC3 c.73A>T 2 87770 0.00002279
1056 | MYBPC3 c.67G>A 2 86564 0.00002310
1057 | MYBPC3 c.50G>T 2 83926 0.00002383
1058 | MYBPC3 c.49C>T 2 83364 0.00002399
1059 | MYBPC3 c.46C>T 2 83050 0.00002408
1060 | MYBPC3 c.41A>G 1 82480 0.00001212
1061 | MYBPC3 c.11C>T 4 119646 0.00003343
1062 | MYBPC3 c.3G>A 1 119362 0.00000838
1063 | MYBPC3 c.3518_3520delAGG 1 98522 0.00001015
1064 | MYBPC3 c.3335_3337dupGGT 2 87032 0.00002298
1065 | MYBPC3 c.3323_3325delAGA 1 66078 0.00001513
1066 | MYBPC3 c.3168delC 1 119940 0.00000834
1067 | MYBPC3 c.2555_2556insT 1 117708 0.00000850
1068 | MYBPC3 c.2490_2491insT 3 120680 0.00002486
1069 | MYBPC3 c.2443_2443insCGA 1 120542 0.00000830
1070 | MYBPC3 c.2373_2374insG 1 26444 0.00003782
1071 | MYBPC3 c.1639delG 1 118374 0.00000845
1072 | MYBPC3 c.1227-1_1228dupGGT 1 102352 0.00000977
1073 | MYBPC3 c.1144delC 1 102512 0.00000975
1074 | MYBPC3 c.1143_1144insTC 1 103118 0.00000970
1075 | MYBPC3 c.1082_1084delAGA 1 119552 0.00000836
1076 | MYBPC3 c.146_148delTCA 3 76496 0.00003922
1077 | MYBPC3 c.98_99delCA 1 89464 0.00001118
1078 | TPM1 c.515T>C 1 121332 0.00000824
1079 | TPM1 c.644C>T 1 121278 0.00000825
1080 | TPM1 c.829G>A 4 115578 0.00003461
1081 | TPM1 c.1A>G 1 112046 0.00000892
1082 | TPM1 c.532C>T 1 121286 0.00000824
1083 | TPM1 c.797A>G 5 119058 0.00004200
1084 | TPM1 c.845C>G 38 111126 0.00034195
1085 | TPM1 c.117delG 1 112530 0.00000889
1086 | TPM1 c.841A>G 3 111910 0.00002681
1087 | TPM1 c.62G>T 1 99636 0.00001004
1088 | TPM1 c.92C>A 2 69332 0.00002885
1089 | TPM1 c.155G>T 1 120032 0.00000833
1090 | TPM1 c.252C>G 1 121410 0.00000824
1091 | TPM1 c.253G>A 1 121410 0.00000824
1092 | TPM1 c.257C>G 1 121410 0.00000824
1093 | TPM1 c.301C>G 1 121408 0.00000824
1094 | TPM1 c.301C>T 1 121408 0.00000824
1095 | TPM1 c.302G>A 2 121408 0.00001647
1096 | TPM1 c.314G>A 2 121408 0.00001647
1097 | TPM1 c.323C>T 1 121408 0.00000824
1098 | TPM1 c.354G>T 1 121394 0.00000824
1099 | TPM1 c.355G>A 1 121394 0.00000824
1100 | TPM1 c.410A>G 1 121400 0.00000824
1101 | TPM1 c.459C>A 1 121250 0.00000825
1102 | TPM1 c.511A>G 1 121344 0.00000824
1103 | TPM1 c.514A>C 1 121328 0.00000824
1104 | TPM1 c.572C>T 1 121352 0.00000824
1105 | TPM1 c.629A>C 1 120894 0.00000827
1106 | TPM1 c.635A>T 1 120670 0.00000829
1107 | TPM1 c.659G>A 1 121332 0.00000824
1108 | TPM1 c.740C>A 1 121380 0.00000824
1109 | TPM1 c.775G>A 1 119116 0.00000840
1110 | TPM1 c.784G>A 2 119196 0.00001678
1111 | TPM1 c.814G>A 2 117954 0.00001696
1112 | TPM1 c.838G>A 2 113428 0.00001763
1113 | TPM1 c.851T>C 1 108270 0.00000924
1114 | TPM1 c.493-2A>G 1 121366 0.00000824
1115 | ACTC1 c.301G>A 1 121402 0.00000824
1116 | ACTC1 c.268C>T 1 121402 0.00000824
1117 | ACTC1 c.289C>T 1 121402 0.00000824
1118 | ACTC1 c.623G>A 5 121388 0.00004119
1119 | ACTC1 c.536G>T 1 121410 0.00000824
1120 | ACTC1 c.28C>A 3 119184 0.00002517
1121 | ACTC1 c.1085A>C 1 121412 0.00000824
1122 | ACTC1 c.1010G>A 1 121296 0.00000824
1123 | ACTC1 c.994A>G 2 120936 0.00001654
1124 | ACTC1 c.967G>A 2 116632 0.00001715
1125 | ACTC1 c.903A>C 1 121374 0.00000824
1126 | ACTC1 c.847A>G 1 121412 0.00000824
1127 | ACTC1 c.713T>C 1 121394 0.00000824
1128 | ACTC1 c.688G>T 1 121396 0.00000824
1129 | ACTC1 c.678G>T 1 121396 0.00000824
1130 | ACTC1 c.614C>G 1 121402 0.00000824
1131 | ACTC1 c.281A>G 1 121402 0.00000824
1132 | ACTC1 c.260T>G 1 121402 0.00000824
1133 | ACTC1 c.229A>G 3 121398 0.00002471
1134 | ACTC1 c.217A>G 2 121396 0.00001648
1135 | ACTC1 c.106A>G 1 114756 0.00000871
1136 | ACTC1 c.23C>G 2 119300 0.00001676
1137 | ACTC1 c.19A>T 2 119326 0.00001676
1138 | ACTC1 c.2T>C 1 119198 0.00000839
1139 | ACTC1 c.56_57insCA 1 118556 0.00000843
1140 | ACTC1 c.-22-1G>A 1 118744 0.00000842
1141 | MYL2 c.37G>A 37 120592 0.00030682
1142 | MYL2 c.64G>A 1 120944 0.00000827
1143 | MYL2 c.141C>A 22 121392 0.00018123
1144 | MYL2 c.173G>A 1 120720 0.00000828
1145 | MYL2 c.310A>G 1 121410 0.00000824
1146 | MYL2 c.401A>C 19 121396 0.00015651
1147 | MYL2 c.403-1G>C 3 120852 0.00002482
1148 | MYL2 c.275-2A>G 1 121368 0.00000824
1149 | MYL2 c.459G>C 3 121354 0.00002472
1150 | MYL2 c.380C>T 3 121410 0.00002471
1151 | MYL2 c.359G>A 7 121404 0.00005766
1152 | MYL2 c.433G>A 1 121258 0.00000825
1153 | MYL2 c.431delC 2 121238 0.00001650
1154 | MYL2 c.436G>A 5 121280 0.00004123
1155 | MYL2 c.374C>T 5 121412 0.00004118
1156 | MYL2 c.366G>T 1 121410 0.00000824
1157 | MYL2 c.304G>A 2 121406 0.00001647
1158 | MYL2 c.229A>G 1 121404 0.00000824
1159 | MYL2 c.11_15delAGAAA 2 119144 0.00001679
1160 | MYL2 c.298C>G 5 121406 0.00004118
1161 | MYL2 c.475A>G 1 121368 0.00000824
1162 | MYL2 c.469C>T 2 121368 0.00001648
1163 | MYL2 c.466G>T 2 121366 0.00001648
1164 | MYL2 c.454T>C 2 121346 0.00001648
1165 | MYL2 c.437T>C 1 121280 0.00000825
1166 | MYL2 c.431C>G 2 121224 0.00001650
1167 | MYL2 c.430C>G 8 121202 0.00006601
1168 | MYL2 c.430C>A 1 121202 0.00000825
1169 | MYL2 c.428C>T 3 121200 0.00002475
1170 | MYL2 c.424T>G 1 121162 0.00000825
1171 | MYL2 c.413T>A 1 121024 0.00000826
1172 | MYL2 c.403G>T 2 120876 0.00001655
1173 | MYL2 c.358C>G 1 121410 0.00000824
1174 | MYL2 c.355G>A 5 121410 0.00004118
1175 | MYL2 c.337G>T 1 121408 0.00000824
1176 | MYL2 c.321C>A 1 121412 0.00000824
1177 | MYL2 c.308T>G 15 121408 0.00012355
1178 | MYL2 c.301A>G 1 121406 0.00000824
1179 | MYL2 c.278C>T 1 121384 0.00000824
1180 | MYL2 c.257T>C 1 121400 0.00000824
1181 | MYL2 c.256T>C 4 121400 0.00003295
1182 | MYL2 c.241G>A 2 121402 0.00001647
1183 | MYL2 c.203A>G 1 121370 0.00000824
1184 | MYL2 c.172C>T 1 120642 0.00000829
1185 | MYL2 c.124G>T 1 121388 0.00000824
1186 | MYL2 c.97T>C 1 121360 0.00000824
1187 | MYL2 c.64G>C 1 120944 0.00000827
1188 | MYL2 c.64G>T 1 120944 0.00000827
1189 | MYL2 c.59T>A 1 120956 0.00000827
1190 | MYL2 c.58A>G 1 120954 0.00000827
1191 | MYL2 c.49G>A 2 120878 0.00001655
1192 | MYL2 c.34G>T 1 120482 0.00000830
1193 | MYL2 c.31G>A 2 120352 0.00001662
1194 | MYL2 c.31G>T 1 120352 0.00000831
1195 | MYL2 c.28G>A 1 120234 0.00000832
1196 | MYL2 c.7C>A 1 117246 0.00000853
1197 | MYL2 c.353_354insG 1 121410 0.00000824
1198 | MYL2 c.170-6_170-2dupCCCTA 1 120350 0.00000831
1199 | MYL2 c.92_93+1delAGG 1 120402 0.00000831
1200 | MYL2 c.47delA 1 120856 0.00000827
1201 | MYL3 c.170C>G 11 121338 0.00009066
1202 | MYL3 c.461G>A 3 121398 0.00002471
1203 | MYL3 c.466G>A 2 121398 0.00001647
1204 | MYL3 c.532G>A 32 121336 0.00026373
1205 | MYL3 c.460C>T 2 121400 0.00001647
1206 | MYL3 c.235G>A 4 121318 0.00003297
1207 | MYL3 c.188G>A 3 121342 0.00002472
1208 | MYL3 c.55G>T 3 121328 0.00002473
1209 | MYL3 c.4G>C 5 120230 0.00004159
1210 | MYL3 c.170C>A 14 121338 0.00011538
1211 | MYL3 c.530A>G 6 121330 0.00004945
1212 | MYL3 c.446T>C 1 121408 0.00000824
1213 | MYL3 c.92G>A 13 121392 0.00010709
1214 | MYL3 c.557A>C 1 121368 0.00000824
1215 | MYL3 c.484G>A 1 120994 0.00000826
1216 | MYL3 c.374A>G 1 121410 0.00000824
1217 | MYL3 c.227G>A 1 121320 0.00000824
1218 | MYL3 c.91C>T 2 121386 0.00001648
1219 | MYL3 c.73C>T 2 121374 0.00001648
1220 | MYL3 c.8C>A 1 120248 0.00000832
1221 | MYL3 c.521C>T 1 121320 0.00000824
1222 | MYL3 c.493A>G 1 121142 0.00000825
1223 | MYL3 c.488G>C 1 121088 0.00000826
1224 | MYL3 c.485A>G 1 121028 0.00000826
1225 | MYL3 c.476C>T 3 121396 0.00002471
1226 | MYL3 c.461G>T 1 121398 0.00000824
1227 | MYL3 c.456G>T 1 121404 0.00000824
1228 | MYL3 c.439A>G 1 121408 0.00000824
1229 | MYL3 c.400G>T 1 121412 0.00000824
1230 | MYL3 c.389A>T 1 121412 0.00000824
1231 | MYL3 c.347C>T 2 121408 0.00001647
1232 | MYL3 c.317C>A 1 121410 0.00000824
1233 | MYL3 c.311T>C 1 121406 0.00000824
1234 | MYL3 c.301C>G 1 120910 0.00000827
1235 | MYL3 c.294G>T 1 120974 0.00000827
1236 | MYL3 c.280C>T 1 121120 0.00000826
1237 | MYL3 c.274G>A 1 121166 0.00000825
1238 | MYL3 c.245C>T 4 121284 0.00003298
1239 | MYL3 c.241C>T 1 121296 0.00000824
1240 | MYL3 c.220G>A 7 121328 0.00005769
1241 | MYL3 c.202G>C 1 121336 0.00000824
1242 | MYL3 c.194C>G 1 121340 0.00000824
1243 | MYL3 c.187C>T 1 121336 0.00000824
1244 | MYL3 c.184G>A 1 121330 0.00000824
1245 | MYL3 c.176T>C 1 121334 0.00000824
1246 | MYL3 c.160T>G 1 121340 0.00000824
1247 | MYL3 c.152T>C 2 121100 0.00001652
1248 | MYL3 c.152T>G 1 121100 0.00000826
1249 | MYL3 c.140C>T 3 120954 0.00002480
1250 | MYL3 c.136T>C 1 120856 0.00000827
1251 | MYL3 c.106G>A 1 121398 0.00000824
1252 | MYL3 c.94C>T 2 121392 0.00001648
1253 | MYL3 c.91C>G 1 121386 0.00000824
1254 | MYL3 c.85C>G 1 121386 0.00000824
1255 | MYL3 c.82G>A 1 121382 0.00000824
1256 | MYL3 c.61C>T 1 121340 0.00000824
1257 | MYL3 c.36T>G 1 121202 0.00000825
1258 | MYL3 c.30G>C 1 121080 0.00000826
1259 | MYL3 c.17C>G 1 120734 0.00000828
1260 | MYL3 c.1A>G 2 120050 0.00001666
1261 | MYL3 c.410_411insT 1 121410 0.00000824
1262 | MYL3 c.209_211delAGA 1 121332 0.00000824
1263 | MYL3 c.28_30delAAG 1 121094 0.00000826
1264 | MYL3 c.2delT 1 120220 0.00000832
1265 |
--------------------------------------------------------------------------------
/data-raw/HCM_BRU_Variants_and_freqs.txt:
--------------------------------------------------------------------------------
1 | Gene CodingVar ProteinVar VarType CaseCount ExACFreq
2 | MYH7 c.1063G>A p.A355T missense 4 0.00000000
3 | MYH7 c.1075T>G p.F359V missense 1 0.00000000
4 | MYH7 c.1357C>T p.R453C missense 1 0.00000000
5 | MYH7 c.1358G>A p.R453H missense 1 0.00000000
6 | MYH7 c.1405G>A p.D469N missense 1 0.00000824
7 | MYH7 c.1447G>A p.E483K missense 1 0.00000824
8 | MYH7 c.1464C>A p.F488L missense 1 0.00000000
9 | MYH7 c.1753A>T p.I585F missense 1 0.00000000
10 | MYH7 c.1988G>A p.R663H missense 1 0.00001648
11 | MYH7 c.2011C>T p.R671C missense 1 0.00000000
12 | MYH7 c.2221G>T p.G741W missense 1 0.00000000
13 | MYH7 c.2242T>A p.S748T missense 1 0.00000000
14 | MYH7 c.2243C>T p.S748F missense 1 0.00000000
15 | MYH7 c.2302G>A p.G768R missense 2 0.00000000
16 | MYH7 c.2360G>A p.R787H missense 1 0.00022244
17 | MYH7 c.2389G>A p.A797T missense 2 0.00003296
18 | MYH7 c.2539A>G p.K847E missense 2 0.00000000
19 | MYH7 c.2555T>C p.M852T missense 1 0.00000000
20 | MYH7 c.2606G>A p.R869H missense 1 0.00003295
21 | MYH7 c.2606G>C p.R869P missense 1 0.00000000
22 | MYH7 c.2717A>G p.D906G missense 3 0.00000000
23 | MYH7 c.2770G>A p.E924K missense 1 0.00000000
24 | MYH7 c.2782G>A p.D928N missense 1 0.00000000
25 | MYH7 c.2890G>C p.V964L missense 1 0.00042006
26 | MYH7 c.2945T>C p.M982T missense 3 0.00090601
27 | MYH7 c.3134G>T p.R1045L missense 1 0.00001648
28 | MYH7 c.3981C>A p.N1327K missense 1 0.00010347
29 | MYH7 c.4066G>A p.E1356K missense 1 0.00000000
30 | MYH7 c.4124A>G p.Y1375C missense 1 0.00000000
31 | MYH7 c.428G>A p.R143Q missense 1 0.00000824
32 | MYH7 c.4317_4319dupTGC inframe 1 0.00000000
33 | MYH7 c.4377G>T p.K1459N missense 1 0.00030546
34 | MYH7 c.4645-2A>C essential splice site 1 0.00000000
35 | MYH7 c.4954G>T p.D1652Y missense 1 0.00002475
36 | MYH7 c.5005G>T p.E1669X nonsense 1 0.00000000
37 | MYH7 c.5075T>A p.V1692E missense 1 0.00000000
38 | MYH7 c.508G>A p.E170K missense 2 0.00000000
39 | MYH7 c.5134C>T p.R1712W missense 1 0.00000000
40 | MYH7 c.5326A>G p.S1776G missense 1 0.00003295
41 | MYH7 c.5334C>A p.H1778Q missense 1 0.00000000
42 | MYH7 c.5341C>T p.R1781C missense 1 0.00000000
43 | MYH7 c.5380C>A p.Q1794K missense 1 0.00000000
44 | MYH7 c.5470A>G p.N1824D missense 1 0.00000000
45 | MYH7 c.5471A>G p.N1824S missense 1 0.00000000
46 | MYH7 c.5507C>T p.S1836L missense 1 0.00004949
47 | MYH7 c.5749G>T p.V1917F missense 1 0.00000000
48 | MYH7 c.5779A>T p.I1927F missense 1 0.00003297
49 | MYBPC3 c.1000G>A p.E334K missense 1 0.00033786
50 | MYBPC3 c.1084A>C p.S362R missense 1 0.00000000
51 | MYBPC3 c.1201C>T p.Q401X nonsense 1 0.00000000
52 | MYBPC3 c.1217G>A p.S406N missense 1 0.00000000
53 | MYBPC3 c.1218C>A p.S406R missense 1 0.00000000
54 | MYBPC3 c.1321G>A p.E441K missense 1 0.00016221
55 | MYBPC3 c.1351+1G>A essential splice site 1 0.00000000
56 | MYBPC3 c.1359delT frameshift 1 0.00000000
57 | MYBPC3 c.1405C>T p.Q469X nonsense 1 0.00000000
58 | MYBPC3 c.1504C>T p.R502W missense 4 0.00002486
59 | MYBPC3 c.1505G>A p.R502Q missense 1 0.00000000
60 | MYBPC3 c.1518C>A p.D506E missense 1 0.00000000
61 | MYBPC3 c.1519G>A p.G507R missense 3 0.00067944
62 | MYBPC3 c.1544A>G p.N515S missense 1 0.00014093
63 | MYBPC3 c.1624G>C p.E542Q missense 1 0.00002485
64 | MYBPC3 c.1765C>T p.R589C missense 1 0.00000000
65 | MYBPC3 c.1822C>T p.P608S missense 1 0.00014057
66 | MYBPC3 c.1828G>A p.D610N missense 1 0.00000000
67 | MYBPC3 c.188G>A p.R63Q missense 1 0.00003933
68 | MYBPC3 c.2011G>A p.V671I missense 1 0.00000000
69 | MYBPC3 c.2030C>T p.P677L missense 1 0.00000000
70 | MYBPC3 c.223G>A p.D75N missense 1 0.00009129
71 | MYBPC3 c.2267delC frameshift 1 0.00000000
72 | MYBPC3 c.2308G>A p.D770N missense 1 0.00000830
73 | MYBPC3 c.2373dupG frameshift 1 0.00000000
74 | MYBPC3 c.2376G>A p.W792X nonsense 1 0.00000000
75 | MYBPC3 c.2429G>A p.R810H missense 2 0.00003319
76 | MYBPC3 c.2441_2443delAGA inframe 1 0.00032354
77 | MYBPC3 c.2459G>A p.R820Q missense 1 0.00001657
78 | MYBPC3 c.2558delG frameshift 1 0.00000000
79 | MYBPC3 c.2737+2_2737+3delTG essential splice site 1 0.00000000
80 | MYBPC3 c.2827C>T p.R943X nonsense 1 0.00001700
81 | MYBPC3 c.2864_2865delCT frameshift 1 0.00000000
82 | MYBPC3 c.2905+1G>A essential splice site 1 0.00000000
83 | MYBPC3 c.2905C>T p.Q969X nonsense 1 0.00000000
84 | MYBPC3 c.3004C>T p.R1002W missense 2 0.00067147
85 | MYBPC3 c.3137C>T p.T1046M missense 1 0.00005814
86 | MYBPC3 c.3163A>T p.K1055X nonsense 1 0.00000000
87 | MYBPC3 c.3190+1G>A essential splice site 1 0.00000000
88 | MYBPC3 c.3226_3227insT frameshift 1 0.00000000
89 | MYBPC3 c.3257G>A p.W1086X nonsense 1 0.00002179
90 | MYBPC3 c.327_332delCCCTGC inframe 1 0.00000000
91 | MYBPC3 c.3286G>T p.E1096X nonsense 1 0.00000000
92 | MYBPC3 c.3326C>T p.T1109I missense 1 0.00016647
93 | MYBPC3 c.3408C>A p.Y1136X nonsense 1 0.00000000
94 | MYBPC3 c.3535G>A p.E1179K missense 1 0.00045916
95 | MYBPC3 c.3613C>T p.R1205W missense 1 0.00001673
96 | MYBPC3 c.3742G>A p.G1248R missense 1 0.00003324
97 | MYBPC3 c.3763G>A p.A1255T missense 1 0.00007504
98 | MYBPC3 c.3763G>C p.A1255P missense 1 0.00000000
99 | MYBPC3 c.3771C>A p.N1257K missense 1 0.00000000
100 | MYBPC3 c.3787C>T p.R1263W missense 1 0.00003362
101 | MYBPC3 c.442G>A p.G148R missense 1 0.00004201
102 | MYBPC3 c.451G>A p.D151N missense 1 0.00004106
103 | MYBPC3 c.640G>A p.D214N missense 1 0.00003634
104 | MYBPC3 c.655G>C p.V219L missense 1 0.00000000
105 | MYBPC3 c.710A>C p.Y237S missense 1 0.00000000
106 | MYBPC3 c.772G>A p.E258K missense 1 0.00003903
107 | MYBPC3 c.806C>A p.S269X nonsense 1 0.00000000
108 | MYBPC3 c.821+2T>C essential splice site 1 0.00000000
109 | TNNT2 c.236T>A p.I79N missense 1 0.00000000
110 | TNNT2 c.662T>C p.I221T missense 1 0.00012358
111 | TNNT2 c.785A>G p.N262S missense 1 0.00000000
112 | TNNT2 c.832C>T p.R278C missense 2 0.00042906
113 | TNNT2 c.857G>A p.R286H missense 1 0.00007882
114 | TNNI3 c.407G>A p.R136Q missense 1 0.00000831
115 | TNNI3 c.433C>T p.R145W missense 1 0.00000830
116 | TNNI3 c.485G>A p.R162Q missense 1 0.00002498
117 | TNNI3 c.557G>A p.R186Q missense 1 0.00000000
118 | TNNI3 c.561G>T p.E187D missense 1 0.00000829
119 | TNNI3 c.587A>G p.D196G missense 1 0.00000000
120 | TNNI3 c.608G>T p.G203V missense 1 0.00000000
121 | TPM1 c.842T>C p.M281T missense 1 0.00000000
122 | MYL2 c.221C>T p.P74L missense 1 0.00000000
123 | MYL2 c.401A>C p.E134A missense 1 0.00015651
124 | MYL2 c.428C>T p.P143L missense 1 0.00002475
125 | MYL3 c.170C>G p.A57G missense 1 0.00009066
126 | MYL3 c.460C>T p.R154C missense 1 0.00001647
127 | MYL3 c.532G>A p.D178N missense 1 0.00026373
128 | ACTC1 c.76G>A p.D26N missense 1 0.00000000
129 | ACTC1 c.787C>G p.L263V missense 1 0.00000000
130 | ACTC1 c.850A>T p.I284F missense 1 0.00000000
131 |
--------------------------------------------------------------------------------
/data-raw/SupplementaryTable1.txt:
--------------------------------------------------------------------------------
1 | measureset_id symbol hgvs_c ExACCount Class Notes
2 | 43475* MYL2 c.401A>C 19 VUS 8/8 missense prediction algorithms predict deleterious. Identified in 2 individuals with a second 'likely pathogenic' MYL2 variant (ClinVar SCV000060059.4). An in vitro functional study suggests that this variant may reduce cardiac muscle contraction efficiency (Burghardt 2013). Second hit causing more severe phenotype suggested. [BS1 + PP3]
3 | 43121* MYL3 c.170C>A 14 Likely Benign 8/8 missense algorithms predict deleterious. Mutations at the same residue have been reported for HCM (Lee 2001). Indentified along with with pathogenic and likely pathogenic variants (ClinVar SCV000059671.4, SCV000207109.1). 2 affected siblings reported both with variant as homozygote (no support for recessive MYL3 inheritance). [BS1 + BP5 + PP3]
4 | 180694 MYOM1 c.1900+3A>C 466 Likely Benign Originally identified as a homozygote in 3 yr old (ClinVar SCV000195854.1). No other data to support. 7 homozygotes in ExAC. Splicing tools predict no effect. [BS1 + BP4]
5 | 41926 CRYAB c.460G>A 93 Likely Benign Identified in a HCM case with an alternative variant sufficient to cause disease (ClinVar SCV000060874.4). Previous pathogenic assertion in ClinVar is from 'literature only' for DCM (no segregation, only 200 controls used for reference). All others say 'VUS' [BS1 + BP5]
6 | 12411 TNNT2 c.853C>T 40 VUS Reported in >10 individuals with HCM and segregated in 5 affected individuals from multiple families (Watkins 1995, Elliott 1999, Garcia-Castro 2003, Torricelli 2003, Van Driest 2003, Theopistou 2004, Ingles 2005, Zeller 2006, Garcia-Castro 2009, Kaski 2009, Gimeno 2009, Millat 2010). Up to half of individuals carried a second variant (ClinVar SCV000060277.4). Residue not evolutionarily conserved. A milder affect with late age of onset has been suggested. In vitro studies suggest may have an affect on muscle contraction (Morimoto 1999, Yanaga 1999, Szczesna 2000, Hernandez 2005) however these might not represent acurately protein function. [BS1]
7 | 177700 MYBPC3 c.2441_2443delAGA 39 VUS In-frame deletion. Experimental studies have shown that this deletion does not lead to decreased MYBPC3 stability in vitro (Bahrudin 2008). Has been identified in 4 adult unaffected relatives (ClinVar SCV000203960.3). [BS1 + PM4]
8 | 14064 MYL2 c.37G>A 37 Benign ClinVar 'Pathogenic' assertion from literature only (ClinVar SCV000035365.1). Not found in an affected family member (Ball 2012). Other weak non-segregation data. Found in a 56 year old healthy individual (Ball 2012). Found in Ashkenazi Jewish individuals. Reported to lead to increased Ca2+ affinity (Szczesna 2001). [BS1 + BS4]
9 | 161310 MYBPC3 c.961G>A 37 Likely Benign Evidence based on presence in HCM individuals (1 with a MYH7 variant (Maron 2012)) and absence in ESP and nearby residues reported as pathogenic (Ser311Leu and Arg326Gln). No segregation or functional data. Reported multiple times in ClinVar with other variants that could explain phenotype (SCV000208258.4, SCV000261724.2). [BS1 + BP5]
10 | 177902 MYBPC3 c.1000G>A 36 VUS Very high Asian freqeuncy (0.45% in ExAC). Reduced stability of MYBPC3, higher Ca2+ transients and action potential durations in HL-1 cells and rat cardiac myocytes (Bahrudin 2011). [BS1]
11 | 161305 MYBPC3 c.13G>C 36 Likely Benign Reported in two families; one individual but not unaffected mother and another individual but not unaffected sibling. 3 individuals with variant also carry a pathogenic mutation in the same gene (ClinVar SCV000198995.3). Amino acid not conserved in mammals. [BS1 + BP5]
12 | 191577 ANKRD1 c.368C>T 28 Likely Benign 8/8 missense algorithms predict benign. Mutant protein correctly incorporates into the sarcomere (Crocini 2013). Amino acid change is not conserved. [BS1 + BP4]
13 | 14149 MYH6 c.3195G>C 28 VUS 6/7 missense algorithms predict deleterious. Originally identified as in an affected (died of congestive heart failure at 45 yrs) and not his 2 unnafected offspring or 150 controls (Carniel 2005). Amino acid change seen in birds. [BS1 + PP3]
14 | 12445 TNNC1 c.435C>A 28 VUS Functional studies using mutant porcine cardiac skinned fibers showed significantly increased force development and force recovery compared to wildtype (Landstrom 2008) with reduced Ca2+ binding [only in vitro studies]. No segregation data. Has also been reported in a patient with idiopathic DCM who also harbored a missense variant in MYBPC3 gene (Pinto 2011) [BS1]
15 | 42900 MYH7 c.2360G>A 27 Likely Benign 11 reports in total (7 literature, 4 LMM in ClinVar (SCV000059440.4)), majority South Asian decent - very high South Asian frequency (0.1%). 3 probands had another variant sufficient to cause disease. Only moderately conserved amino acid. Possible non-segregation (not in the father of an individual who died of sudden cardiac death; ClinVar SCV000059440.4). [BS1 + BP5]
16 | 43911 ACTN2 c.1484C>T 26 Benign 4 individuals in the literature (Chiu 2010, Theis 2006) and 2 reported on ClinVar (SCV000060535.4). Non segregation with disease also reported (2 affecteds; ClinVar SCV000060535.4). No supporting segregation data. [BS1 + BS4]
17 | 42536 MYBPC3 c.1468G>A 25 Likely Benign 7/7 missense algorithms predict deleterious. Identified in individuals with HCM, DCM and LVNC. Conserved residue. Multiple reports of cooccurance with an alternative variant that is sufficient to cause disease (ClinVar SCV000059050.4). [BS1 + BP5 + PP3]
18 | 31766 MYL2 c.141C>A 22 Likely Benign 7/8 missense algorithms predict benign. Co-occurance with likely pathogenic (tested) MYH7:c.5134C>T mutation (segregation data) in literature (Andersen 2001, Hougs 2005). Other reports of a second variant (ClinVar SCV000060036.4). In vitro studies show reduced force development and power output (Szczesna-Cordary 2004, Abraham 2009, Greenberg 2009, Greenberg 2010). [BS1 + BP5 + BP4]
19 | 44710 TCAP c.458G>A 20 Likely Benign 7/8 missense algorithms predict benign. Only pathogenic in ClinVar by 'literature only'. Identified in siblings but no solid segregation data (Hayashi 2004). Functional studies in yeast show the variant leads to an increase in the ability of TCAP to bind with titin and CS-1 (Hayashi 2004)- in vitro assay may not accurately represent biological function. Amino acid change is not conserved - 7 mammals carry the variant amino acid. [BS1 + BP4]
20 | 36601 MYBPC3 c.1321G>A 18 Likely Benign Multiple reports of co-occurence with alternative pathogenic variants (ClinVar SCV000059030.4 and SCV000208011.4). Some computational modelling data suggest a confirmational effect (Gajendrarao 2015). [BS1 + BP5]
21 | 42775 MYBPC3 c.624G>C 18 VUS Reports of possible non-segregation (ClinVar SCV000284248.1). [BS1]
22 | 14111 MYH7 c.2183C>T 18 Likely Benign Pathogenic in CliNVar from 'literature only' - VUS by 3 other submitters. Identified originally in cis with another MYH7 mutation in 2 siblings with HCM (Blair 2001) - other variant (c.1816G>A) is absent in ExAC and has been shown to segregate in multiple families (24 literature reports) [BS1 + BP2]
23 | 4243 MYLK2 c.260C>T 16 VUS Pathogenic in ClinVar from 'literature only', no other submissions. Found as a compound het in a 13yr old and with an MYH7 variant (also in mildy affected parents; Davis (2001). Other compound het variant is also common. [BS1]
24 | 12197 VCL c.2923C>T 16 VUS Pathogenic in ClinVar from 'literature only' .Reduced levels of vinculin in the patient with this variant, but other studies found that this was a general feature of HCM and not related to specific genotypes (Vasile 2006). [BS1]
25 | 14122 MYH7 c.1322C>T 16 VUS Pathogenic in ClinVar from 'literature only', in Laing distal myopathy. No change in mRNA levels. Amino acid is not conserved in mammals. [BS1]
26 | 30143 MYBPC3 c.2618C>A 13 VUS Identified as a compound het and in homozygous state (consaguinous family; Nanni 2003, Ingles 2005). No segregation data in HCM. 1 homozygote seen in ExAC. [BS1]
27 | 31780 MYL3 c.170C>G 11 VUS 7/8 missense algorithms predict damaging. Functional study showing lowercomplex affinity not significant for this variant after multiple testing (Lossie 2012). Segregation in 5 affected members of 2 families (ClinVar SCV000199362.3). All reports in East Asians (highest ExAC frequency at 0.05%). [BS1 + PP3 + PP1]
28 | 45533 LDB3 c.1823C>T 11 VUS Pathogenic in ClinVar from 'literature only'. No segregation or functional data. [BS1]
29 | 42679 MYBPC3 c.3049G>A 10 Likely Benign Not conserved amino acid - 7/7 missense algorithms predict benign. No functional or segregation data. [BS1 + BP4]
30 | 9380 SCN5A c.4534C>T 9 VUS 8/8 missense algorithms predict damaging. Reported in association with Brugada syndrome (Rook 1999, Deschnes 2000, Meregalli 2009, Crotti 2012). In vitro functional studies show a potential impact (Rook 1999, Deschnes 2000), however these may not be representative of in vivo protein function. [BS1 + PP3]
31 | 6055 KCNE2 c.79C>T 9 VUS 7/8 missense algorithms predict damaging. Identified in 2 Chinese families for Atrial Fibrillation (Yang 2004). Only parent and child tested for mutation (50% chance of segregation). An in vitro study on rat cardiomyocytes saw supression of L-type Calcium current (Liu 2014). [BS1 + PP3]
32 | 164023 MYBPC3 c.3763G>A 9 VUS 1/5 individuals with variant also had a likely pathogenic variant in another gene (ClinVar SCV000198796.2). No other data available. [BS1]
33 | 161309 MYBPC3 c.2269G>A 8 VUS Conservative amino acid change. No functional or segregation data found. [BS1]
34 | 42664 MYBPC3 c.2873C>T 7 Likely Benign 2 documented individuals also carry a MYBPC3 variant sufficient to explain disease (ClinVar SCV000059185.4). Amino acid is not well conserved 7/7 missense algorithms predict benign. [BS1 + BP4 + BP2]
35 | 48097 LMNA c.976T>A 7 Likely Benign Position not conserved 7/8 missense algorithms predict benign, simulation studies suggest no effect on coil B dimerisation. [BS1 + BP4]
36 | 42566 MYBPC3 c.1786G>A 7 VUS 6/7 missense algorithms predict damaging. No clear segregation data (one suggestion the variant doesn't segregate (ClinVar SCV000259413.1)). [BS1 + PP3]
37 | 36613 MYBPC3 c.529C>T 6 Likely Benign Different missense at the same position is common. Identified with another pathogenic MYBPC3 variant (ClinVar SCV000198969.3). Otherwise found in DCM. Position not well conserved. [BS1 + BP5]
38 | 180968 MYBPC3 c.2381C>T 6 VUS No evidence towards a pathogenic classification found. [BS1]
39 | 42756 MYBPC3 c.461T>C 5 Likely Benign Position not conserved, 6/7 missense algorithms predict benign.Identified in a child with a further homozygoes MYBPC3 variant (ClinVar SCV000059282.4). [BS1 + BP4]
40 | 177629 MYH7 c.5326A>G 4 Likely Benign Large majority of identified cases are of Asian ancestry. 2 individuals at GeneDx have an alternative pathogenic variant (ClinVar SCV000208629.3). Only segregation data is in 2 affected siblings and not in the unaffected sibling (Blair 2002). [BS1 + BP5]
41 | 191705 MYBPC3 c.640G>A 4 VUS No evidence towards a pathogenic classification found. [BS1]
42 | 14147 MYH6 c.2384G>A 4 VUS Identified in a 75 year old patient and not found in 170 controls (Niimura 2002). No other data. [BS1]
43 | 161313 MYBPC3 c.223G>A 3 VUS No evidence towards a pathogenic classification found. [BS1]
44 | 42706 MYBPC3 c.3330+5G>C 2 Pathogenic Prevalence in affecteds stastically increased over controls (http://biorxiv.org/content/early/2016/02/24/041111). Shown to produce a truncated protein product and to segregate with disease (Watkins 1995). [BS1 + PVS1 + PS4 + PP1]
45 |
--------------------------------------------------------------------------------
/data-raw/Table1.txt:
--------------------------------------------------------------------------------
1 | Disease Prevalence Commonest causative variant Case count Case frequency (95% CI) Penetrance* Expected population frequency Model predicted maximum ExAC AC Observed ExAC AC
2 | HCM 1/500 MYBPC3:c.1504C>T 104/6179 1.7% (1.4-2.0%) 0.5 3.4x10-5 (2.7-4.0x10-5) 9 3
3 | DCM 1/250 TNNT2:c.629_631delAGA 18/1254 1.4% (0.78-2.1%) 0.5 5.6x10-5 (3.1-8.4x10-5) 16 0
4 | ARVC 1/1000 PKP2:c.2146-1G>C 24/361 6.7% (4.1-9.2%) 0.5 6.7x10-5 (4.1-9.2x10-5) 17 6
5 | LQTS 1/2000 KCNQ1:c.797T>C 30/2500 1.2% (0.77-1.6%) 0.5 6.0x10-6 (3.9-8.2x10-6) 3 0
6 | Brugada 1/1000 SCN5A:c.5350G>A 14/2111 0.66% (0.32-1.0%) 0.5 6.6x10-6 (0.32-1.0x10-5) 3 0
--------------------------------------------------------------------------------
/data-raw/Table2.txt:
--------------------------------------------------------------------------------
1 | Disease Maximum allelic contribution Prevalence Penetrance* Maximum population frequency Maximum tolerated ExAC allele count
2 | Marfan 0.015 1/3000 0.5 5.0x10-6 2
3 | Noonan 0.10 1/1000 0.5 1.0x10-4 18
4 | CPVT 0.10 1/10,000 0.5 1.0x10-5 3
5 | Classic Ehlers-Danlos 0.40 1/20,000 0.5 2.0x10-5 5
6 |
--------------------------------------------------------------------------------
/data-raw/authors.txt:
--------------------------------------------------------------------------------
1 | Name Affiliation coFirst coLast Corresponding
2 | Nicola Whiffin "National Heart & Lung Institute, Imperial College London" T
3 | Nicola Whiffin "NIHR Royal Brompton Cardiovascular Biomedical Research Unit, Royal Brompton & Harefield Hospitals & Imperial College London"
4 | Eric Minikel "Analytic & Translational Genetics Unit, Massachusetts General Hospital, Boston MA" T
5 | Eric Minikel "Program in Medical and Population Genetics, Broad Institute of MIT & Harvard, Cambridge MA"
6 | Roddy Walsh "National Heart & Lung Institute, Imperial College London"
7 | Roddy Walsh "NIHR Royal Brompton Cardiovascular Biomedical Research Unit, Royal Brompton & Harefield Hospitals & Imperial College London"
8 | Anne O'Donnell-Luria "Analytic & Translational Genetics Unit, Massachusetts General Hospital, Boston MA"
9 | Anne O'Donnell-Luria "Program in Medical and Population Genetics, Broad Institute of MIT & Harvard, Cambridge MA"
10 | Konrad Karczewski "Analytic & Translational Genetics Unit, Massachusetts General Hospital, Boston MA"
11 | Konrad Karczewski "Program in Medical and Population Genetics, Broad Institute of MIT & Harvard, Cambridge MA"
12 | Alexander Y Ing "Laboratory for Molecular Medicine, Partners HealthCare Personalized Medicine, Cambridge, MA"
13 | Alexander Y Ing "Department of Pathology, Massachusetts General Hospital and Harvard Medical School, Boston MA"
14 | Paul JR Barton "National Heart & Lung Institute, Imperial College London"
15 | Paul JR Barton "NIHR Royal Brompton Cardiovascular Biomedical Research Unit, Royal Brompton & Harefield Hospitals & Imperial College London"
16 | Birgit Funke "Laboratory for Molecular Medicine, Partners HealthCare Personalized Medicine, Cambridge MA"
17 | Birgit Funke "Department of Pathology, Massachusetts General Hospital and Harvard Medical School, Boston MA"
18 | Stuart A Cook "National Heart & Lung Institute, Imperial College London" T
19 | Stuart A Cook "NIHR Royal Brompton Cardiovascular Biomedical Research Unit, Royal Brompton & Harefield Hospitals & Imperial College London"
20 | Stuart A Cook "National Heart Centre Singapore, Singapore"
21 | Stuart A Cook "Duke-National University of Singapore, Singapore"
22 | Daniel MacArthur "Analytic & Translational Genetics Unit, Massachusetts General Hospital, Boston MA" T
23 | Daniel MacArthur "Program in Medical and Population Genetics, Broad Institute of MIT & Harvard, Cambridge MA"
24 | Daniel MacArthur "Department of Medicine, Harvard Medical School, Boston MA"
25 | James S Ware "National Heart & Lung Institute, Imperial College London" T T
26 | James S Ware "NIHR Royal Brompton Cardiovascular Biomedical Research Unit, Royal Brompton & Harefield Hospitals & Imperial College London"
27 | James S Ware "MRC Clinical Sciences Centre, Imperial College London"
28 | James S Ware "Program in Medical and Population Genetics, Broad Institute of MIT & Harvard, Cambridge MA"
29 |
--------------------------------------------------------------------------------
/data/.keep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImperialCardioGenetics/frequencyFilter/00b08ccf41bbbd927ac3d021d302e80a4ad4ca5d/data/.keep
--------------------------------------------------------------------------------
/data/filtering_stats.tsv:
--------------------------------------------------------------------------------
1 | ancestry filter af_limit total_vars filtered_vars proportion_removed
2 | afr af_filter 1e-06 13616.64 73.65 0.994591176677947
3 | amr af_filter 1e-06 12235.66 53.1 0.995660225929782
4 | eas af_filter 1e-06 12450.28 74.54 0.994012986053326
5 | nfe af_filter 1e-06 11953.86 44.85 0.99624807384393
6 | sas af_filter 1e-06 12230.67 67.09 0.994514609583939
7 | afr af_filter 3.16227766016838e-06 13616.64 73.65 0.994591176677947
8 | amr af_filter 3.16227766016838e-06 12235.66 53.1 0.995660225929782
9 | eas af_filter 3.16227766016838e-06 12450.28 74.54 0.994012986053326
10 | nfe af_filter 3.16227766016838e-06 11953.86 44.85 0.99624807384393
11 | sas af_filter 3.16227766016838e-06 12230.67 67.09 0.994514609583939
12 | afr af_filter 1e-05 13616.64 77.56 0.994304028012784
13 | amr af_filter 1e-05 12235.66 56.3 0.995398695289016
14 | eas af_filter 1e-05 12450.28 77.69 0.993759979695236
15 | nfe af_filter 1e-05 11953.86 53.3 0.99554118920583
16 | sas af_filter 1e-05 12230.67 69.99 0.994277500741987
17 | afr af_filter 3.16227766016838e-05 13616.64 83.98 0.99383254606129
18 | amr af_filter 3.16227766016838e-05 12235.66 70.16 0.994265940701196
19 | eas af_filter 3.16227766016838e-05 12450.28 82.42 0.993380068560707
20 | nfe af_filter 3.16227766016838e-05 11953.86 68.5 0.994269633407117
21 | sas af_filter 3.16227766016838e-05 12230.67 84.82 0.993064975181245
22 | afr af_filter 1e-04 13616.64 117.04 0.991404634329761
23 | amr af_filter 1e-04 12235.66 87.44 0.992853675241058
24 | eas af_filter 1e-04 12450.28 113.99 0.990844382616295
25 | nfe af_filter 1e-04 11953.86 85.94 0.992810690438068
26 | sas af_filter 1e-04 12230.67 108.26 0.991148481644914
27 | afr af_filter 0.000316227766016838 13616.64 148.22 0.989114788964091
28 | amr af_filter 0.000316227766016838 12235.66 115.9 0.990527687104741
29 | eas af_filter 0.000316227766016838 12450.28 145.17 0.988340021268598
30 | nfe af_filter 0.000316227766016838 11953.86 109.44 0.990844798249268
31 | sas af_filter 0.000316227766016838 12230.67 141.93 0.988395566228179
32 | afr af_filter 0.001 13616.64 205.9 0.98487879535627
33 | amr af_filter 0.001 12235.66 151.9 0.987585467396119
34 | eas af_filter 0.001 12450.28 189.21 0.984802751424064
35 | nfe af_filter 0.001 11953.86 142.34 0.988092549184949
36 | sas af_filter 0.001 12230.67 190.95 0.98438760918249
37 | afr af_filter 0.00316227766016838 13616.64 310.82 0.977173517108479
38 | amr af_filter 0.00316227766016838 12235.66 204.09 0.98332006610187
39 | eas af_filter 0.00316227766016838 12450.28 258.31 0.979252675441837
40 | nfe af_filter 0.00316227766016838 11953.86 198.54 0.983391138929183
41 | sas af_filter 0.00316227766016838 12230.67 264.28 0.978392025947883
42 | afr af_filter 0.01 13616.64 535.44 0.960677523970671
43 | amr af_filter 0.01 12235.66 274.81 0.977540238940932
44 | eas af_filter 0.01 12450.28 345.48 0.972251226478441
45 | nfe af_filter 0.01 11953.86 298.89 0.974996361008076
46 | sas af_filter 0.01 12230.67 379.18 0.968997610106397
47 | afr af_filter 0.05 13616.64 1377.35 0.898848027119759
48 | amr af_filter 0.05 12235.66 503.26 0.958869403039967
49 | eas af_filter 0.05 12450.28 555.22 0.955405019003589
50 | nfe af_filter 0.05 11953.86 591.47 0.950520584982591
51 | sas af_filter 0.05 12230.67 664.79 0.945645659640886
52 |
--------------------------------------------------------------------------------
/figures/.keep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImperialCardioGenetics/frequencyFilter/00b08ccf41bbbd927ac3d021d302e80a4ad4ca5d/figures/.keep
--------------------------------------------------------------------------------
/figures/Figure1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImperialCardioGenetics/frequencyFilter/00b08ccf41bbbd927ac3d021d302e80a4ad4ca5d/figures/Figure1.png
--------------------------------------------------------------------------------
/frequencyFilter.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
--------------------------------------------------------------------------------
/manuscript/.Rprofile:
--------------------------------------------------------------------------------
1 | source("../.Rprofile", chdir = TRUE)
2 |
--------------------------------------------------------------------------------
/manuscript/SupplementaryMaterial.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Supplementary Material - Using high-resolution variant frequencies to empower clinical genome interpretation"
3 | output:
4 | word_document:
5 | reference_docx: template2.docx
6 |
7 | bibliography: "suppReferences.bib"
8 | csl: "nature.csl"
9 | ---
10 |
11 | ```{r, echo=FALSE, include=FALSE}
12 | #read all scripts in the R directory, to load functions
13 | sourceDir <- function(path, trace = TRUE, ...) {
14 | for (nm in list.files(path, pattern = "\\.[RrSsQq]$")) {
15 | if(trace) cat(nm,":")
16 | source(file.path(path, nm), ...)
17 | if(trace) cat("\n")
18 | }
19 | }
20 |
21 | #load functions
22 | #sourceDir(file.path("..","R"))
23 | source("../R/setup.R")
24 | ```
25 |
26 | ### Supplementary table 1
27 |
28 | Variants previously reported as causative of HCM either in ClinVar, or in a clinical series of 6179 HCM cases, but that were observed in ExAC above the maximum tolerated allele count for HCM (AC>9 globally) were manually curated according to the ACMG guidelines for interpretation and classification on sequence variants. *These variants were previously categorised as Likely Pathogenic in the LMM and Oxford HCM cohorts.
29 |
30 | ```{r}
31 | read.delim("../data-raw/SupplementaryTable1.txt") %>% kable
32 | ```
33 |
34 | ### Supplementary note 1 - Curation of a high frequency PCD variant
35 |
36 | _NME8_ NM\_016616.4:c.271-27C>T which is reported as Pathogenic in ClinVar is found in 2306/120984 ExAC individuals. This variant was initially reported as pathogenic on the basis of two compound heterozygous cases when specifically searching for NME8 variants in a set of patients, and was found in 2/196 control chromosomes `r citep("10.1073/pnas.0611405104")`. We further note that _NME8_ has not otherwise been associated with PCD and shows no evidence of missense nor loss-of-function constraint and that this splice variant affects a non-canonical transcript. During our curation exercise we found that this variant meets none of the ACMG criteria for assertions of pathogenicity, and therefore we reclassified it as a VUS.
37 |
38 | ### Supplementary note 2 - Dealing with penetrance
39 |
40 | It is often difficult to obtain accurate penetrance information for reported variants, and it is also difficult to know what degree of penetrance to expect or assume in newly discovered pathogenic variants. In this work we uniformly apply a value of 50% penetrance equivalent to that reported for our HCM example variant, and the lowest we found reported for any of our examples. We recognise several other approaches that can be used to deal with the issues of penetrance, these include: setting a penetrance level equivalent to the minimum that is 'clinically actionable' for a disorder; lowering the penetrance if reduced penetrance is expected in a family; or using a two tiered approach, initially searching for a high-moderate penetrance variant but allowing for a lower-penetrance variant in a second pass. We believe that the ease of re-calculating our "maximum credible population allele frequency" lends itself to any of these approaches. We provide an online calculator to facilitate the exploration of these parameters ( *temporary URL for review - username/password = frequency/filter*).
41 |
42 | ##Supplemental references
43 |
44 | ```{r, echo=FALSE, message=FALSE}
45 | write.bibtex(file="suppReferences.bib")
46 | ```
47 |
--------------------------------------------------------------------------------
/manuscript/manuscript.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Using high-resolution variant frequencies to empower clinical genome interpretation"
3 | output:
4 | # html_document
5 | word_document:
6 | reference_docx: newTemplate.docx
7 |
8 | bibliography: "references.bib"
9 | csl: "nature.csl"
10 | ---
11 |
12 | ```{r, echo=FALSE, include=FALSE}
13 | #load functions
14 | source(file.path("..","R","setup.R"))
15 | source(file.path("..","R","sumariseCaseSeriesVariants.R"))
16 | #create figures
17 | source(file.path("..","R","Figures.R"))
18 | ```
19 |
20 | ```{r authors, results='asis'}
21 | read.delim("../data-raw/authors.txt") %>%
22 | formatAuthors
23 | ```
24 |
25 | ## Abstract 100-150 words
26 |
27 | Whole exome and genome sequencing have transformed the discovery of genetic variants that cause human Mendelian disease, but discriminating pathogenic from benign variants remains a daunting challenge. Rarity is recognised as a necessary, although not sufficient, criterion for pathogenicity, but frequency cutoffs used in Mendelian analysis are often arbitrary and overly lenient. Recent very large reference datasets, such as the Exome Aggregation Consortium (ExAC), provide an unprecedented opportunity to obtain robust frequency estimates even for very rare variants. Here we present a statistical framework for the frequency-based filtering of candidate disease-causing variants, accounting for disease prevalence, genetic and allelic heterogeneity, inheritance mode, penetrance, and sampling variance in reference datasets. Using the example of cardiomyopathy, we show that our approach reduces by two-thirds the number of candidate variants under consideration in the average exome, and identifies 43 variants previously reported as pathogenic that can now be reclassified. We present precomputed allele frequency cutoffs for all variants in the ExAC dataset.
28 |
29 | ## Introduction
30 |
31 | Whole exome and whole genome sequencing have been instrumental in identifying causal variants in Mendelian disease patients `r citep("10.1016/j.ajhg.2015.06.009")`. As every individual harbors ~12,000-14,000 predicted protein-altering variants `r citep("10.1038/nature19057")`, distinguishing disease-causing variants from benign bystanders is perhaps the principal challenge in contemporary clinical genetics. A variant's low frequency in, or absence from, reference databases is now recognised as a necessary, but not sufficient, criterion for variant pathogenicity `r citep(c("10.1038/gim.2015.30","10.1038/nature13127"))`. The recent availability of very large reference databases, such as the Exome Aggregation Consortium (ExAC) `r citep("10.1038/nature19057")` dataset, which has characterised the population allele frequencies of 10 million genomic variants through the analysis of exome sequencing data from over 60,000 humans, provides an opportunity to obtain robust frequency estimates even for rare variants, improving the theoretical power for allele frequency filtering in Mendelian variant discovery efforts.
32 |
33 | In practice, there exists considerable ambiguity around what allele frequency should be considered "too common", with the lenient values of 1% and 0.1% often invoked as conservative frequency cutoffs for recessive and dominant diseases respectively `r citep("10.1038/nrg3031")`. Population genetics, however, dictates that severe disease-causing variants must be much rarer than these cutoffs, except in cases of bottlenecked populations, balancing selection, or other special circumstances `r citep(c("10.1093/molbev/msp190","10.1073/pnas.1322563111"))`.
34 |
35 | It is intuitive that when assessing a variant for a causative role in a dominant Mendelian disease, the frequency of a variant in a reference sample, not selected for the condition, should not exceed the prevalence of the condition `r citep(c("10.1016/j.ajhg.2016.03.024","10.1126/scitranslmed.aad5169"))`. This rule must, however, be refined to account for different inheritance modes, genetic and allelic heterogeneity, and reduced penetrance. In addition, for rare variants, estimation of true population allele frequency is clouded by considerable sampling variance, even in the largest samples currently available. These limitations have encouraged the adoption of very lenient approaches when filtering variants by allele frequency `r citep(c("10.1016/S0140-6736(14)61705-0","10.1038/ng.3304"))`, and recognition that more stringent approaches that account for disease-specific genetic architecture are urgently needed`r citep("10.1016/j.ajhg.2016.03.024")`.
36 |
37 | Here we present a statistical framework for assessing whether rare variants are sufficiently rare to cause penetrant Mendelian disease, while accounting for both architecture and sampling variance in observed allele counts. We demonstrate that allele frequency cutoffs well below 0.1% are justified for a variety of human disease phenotypes and that such filters can remove an additional two-thirds of variants from consideration when compared to traditionally lenient frequency cutoffs. We present pre-computed allele frequency filtering values for all variants in the Exome Aggregation Consortium database, which are now available through the ExAC data browser and for download, to assist others in applying our framework.
38 |
39 | ## Results
40 |
41 | #### *Defining the statistical framework*
42 |
43 | For a penetrant dominant Mendelian allele to be disease causing, it cannot be present in the general population more frequently that the disease it causes. Furthermore, if the disease is genetically heterogeneous, it must not be more frequent than the proportion of cases attributable to that gene, or indeed to any single variant. We can therefore define the maximum credible population allele frequency (for a pathogenic allele) as:
44 |
45 | *maximum credible population AF = prevalence x maximum allelic contribution x 1/penetrance*
46 |
47 | where _maximum allelic contribution_ is the maximum proportion of cases potentially attributable to a single allele, a measure of heterogeneity.
48 |
49 | We do not know the true population allele frequency of any variant, having only an observed allele frequency in a finite population sample. Moreover, confidence intervals around this observed frequency are problematic to estimate given our incomplete knowledge of the frequency spectrum of rare variants, which appears to be skewed towards very rare variants. For instance, a variant observed only once in a sample of 10,000 chromosomes is much more likely to have a frequency < 1:10,000 than a frequency >1:10,000. `r citep("10.1038/nature19057")`
50 |
51 | If we turn the problem around, and begin instead from allele frequency, specifying a maximum _true_ allele frequency value we are willing to consider in the population (using the equation above), then we can estimate the probability distribution for allele counts in a given sample size. This follows a binomial distribution, and can be satisfactorily approximated with a Poisson distribution (see **Online Methods**). This allows us to set an upper limit on the number of alleles in a sample that is consistent with a given population frequency.
52 |
53 | Taking a range of cardiac disorders as exemplars, we use this framework to define the maximum credible allele frequency for disease-causing variants in each condition, and define and validate a set of maximum tolerated allele counts in the ExAC reference population sample. Figure 1 shows the general outline of our approach.
54 |
55 | ### Figure 1
56 |
57 | > A flow diagram of our approach, applied to a dominant condition. First, a disease-level maximum credible population allele frequency is calculated, based on disease prevalence, heterogeneity and penetrance. This is then used to calculate the maximum tolerated allele count in a reference dataset, taking into acount the size of this dataset.
58 |
59 | 
60 |
61 | #### *Application and validation in hypertrophic cardiomyopathy*
62 |
63 | We illustrate our general approach using the dominant cardiac disorder hypertrophic cardiomyopathy (HCM), which has an estimated prevalence of 1 in 500 in the general population `r citep("10.1038/gim.2014.205")`. As there have been previous large-scale genetic studies of HCM, with series of up to 6,179 individuals `r citep(c("10.1038/gim.2014.205","10.1038/gim.2016.90"))`, we can make the assumption that no newly identified variant will be more frequent in cases that those identified to date (at least for well-studied ancestries). This allows us to define the maximum allelic contribution of any single variant to the disorder. In these large case series, the largest proportion of cases is attributable to the missense variant _MYBPC3_ c.1504C>T (p.Arg502Trp), found in 104/6179 HCM cases (1.7%; 95%CI 1.4-2.0%) `r citep(c("10.1038/gim.2014.205","10.1038/gim.2016.90"))`. We therefore take the upper bound of this proportion (0.02) as an estimate of the maximum allelic contribution in HCM (Table 1). Our maximum expected population allele frequency for this allele, assuming 50% penetrance as previously reported `r citep("10.1161/CIRCRESAHA.109.216291")`, is 1/500 x 1/2 (dividing prevalence per individual by the number of chromosomes per individual) x 0.02 x 1/0.5 = 4.0x10^-5^, which we take as the maximum credible population AF for any causative variant for HCM (Table 1).
64 |
65 | ### Table 1
66 | > Details of the most prevalent pathogenic variants in case cohorts for five cardiac conditions. Shown along with the frequency in cases is the estimated population allele frequency (calculated as: case frequency x disease prevalence x 1/2 x 1/variant penetrance) and the observed frequency in the ExAC dataset. ^\*^As penetrance estimates for individual variants are not widely available, we have applied an estimate of 0.5 across all disorders (see **Supplementary information**). HCM - hypertrophic cardiomyopathy; DCM - dilated cardiomyopathy; ARVC - arrhythmogenic right ventricular cardiomyopathy; LQTS - long QT syndrome. Case cohorts and prevalence estimates were obtained from: HCM`r citep(c("10.1038/gim.2014.205","10.1038/gim.2016.90"))`, DCM`r citep(c("10.1038/gim.2014.205","10.1038/gim.2016.90","10.1038/nrcardio.2013.105"))`, ARVC`r citep(c("10.1038/gim.2016.90","10.1016/j.ijcard.2005.12.015"))`, LQTS`r citep(c("10.1016/j.hrthm.2009.05.021","10.1016/j.cjca.2012.07.847"))` and Brugada`r citep(c("10.1016/j.hrthm.2009.09.069","10.1016/j.hlc.2015.07.020"))`.
67 |
68 | ```{r}
69 | read.delim("../data-raw/Table1.txt",check.names=F) %>% pander
70 | ```
71 |
72 | To apply this threshold while remaining robust to chance variation in observed allele counts, we ask how many times a variant with population allele frequency of 4.0x10^-5^ can be observed in a random population sample of a given size. For a 5% error rate we take the 95th percentile of a poisson distribution with λ = expected allele count, which is given by: sample size (chromosomes) × expected population allele frequency (**Online Methods**). For HCM this gives us a maximum tolerated allele count of 9, assuming 50% penetrance (or 5 for fully penetrant alleles), for variants genotyped in the full ExAC cohort (sample size=121,412 chromosomes). The _MYBPC3_:c.1504C>T variant is observed 3 times in ExAC (freq=2.49x10^-5^; Table 1).
73 |
74 | To facilitate these calculations, we have produced an online calculator () that will compute maximum credible population allele frequency and maximum sample allele count for a user-specified genetic architecture, and conversely allow users to dynamically explore what genetic architecture(s) might be most compatible with an observed variant having a causal role in disease.
75 |
76 | ```{r}
77 | summariseCaseSeriesVariants()
78 | ```
79 |
80 | To assess these thresholds empirically, we explored the ExAC allele frequency spectrum of `r myVariantsN` distinct autosomal variants identified in 6179 recently published HCM cases referred for diagnostic sequencing, and individually assessed and reported according to international guidelines`r citep(c("10.1038/gim.2014.205","10.1038/gim.2016.90"))`.
81 | `r myPassPath`/`r myAllPath` (`r signif(100*myPassPath/myAllPath,3)`%) variants reported as ‘Pathogenic’ or ‘Likely Pathogenic’ fell below our threshold (Figure 2), including all variants with a clear excess in cases. `r myAbsentPath` of these variants are absent from ExAC. The `r myFailPath` variants historically classified as ‘Likely Pathogenic’, but prevalent in ExAC in this analysis, were reassessed using contemporary ACMG criteria: there was no strong evidence in support of pathogenicity, and they were reclassified in light of these findings (Supplementary Table 1).
82 | This analysis identifies `r myFailVus`/`r myAllVus` (`r signif(100*myFailVus/myAllVus,3)`%) VUS that are unlikely to be truly causative for HCM.
83 |
84 | ### Figure 2
85 | > Plot of ExAC allele count (all populations) against case allele count for variants classified as VUS, Likely Pathogenic or Pathogenic in 6179 HCM cases. The dotted lines represent the maximum tolerated ExAC allele counts in HCM for 50% (dark blue) and 100% penetrance (light blue). Variants are colour coded according to reported pathogenicity. Where classifications from contributing laboratories were discordant the more conservative classification is plotted. The inset panel shows the full dataset, while the main panel expands the region of primary interest.
86 |
87 | 
88 |
89 | The above analysis applied a single global allele count limit of 9 for HCM, however, as allele frequencies differ between populations, filtering based on frequencies in individual populations may provide greater power `r citep("10.1038/nature19057")`. For example, a variant relatively common in any one population is unlikely pathogenic, even if rare in other populations, provided the disease prevalence and architecture is consistent across populations. We therefore compute a maximum tolerated AC for each distinct sub-population of our reference sample, and filter based on the highest allele frequency observed in any major continental population (see **Online Methods**).
90 |
91 | To further validate this approach, we examined all 601 variants identified in ClinVar `r citep("10.1093/nar/gkt1113")` as "Pathogenic" or "Likely Pathogenic" and non-conflicted for HCM. 558 (93%) were sufficiently rare when assessed as described. 43 variants were insufficiently rare in at least one ExAC population, and were therefore re-curated. 42 of these had no segregation or functional data sufficient to demonstrate pathogenicity in the heterozygous state, and we would classify as VUS at most. The remaining variant (MYBPC3:c.3330+5G>C) had convincing evidence of pathogenicity, though with uncertain penetrance (see **Supplementary information**), and was observed twice in the African/African American ExAC population. This fell outside the 95% confidence interval for an underlying population frequency <4x10^-5^, but within the 99% confidence threshold: a single outlier due to stochastic variation is unsurprising given that these nominal probabilities are not corrected for multiple testing across 601 variants. In light of our updated assessment, 20 variants were reclassified as Benign/Likely Benign and 22 as VUS according to the American College for Medical Genetics and Genomics (ACMG) guidelines for variant interpretation `r citep("10.1038/gim.2015.30")` (Supplementary Table 1).
92 |
93 | #### *Extending this approach to other disorders*
94 |
95 | This approach can be readily applied in diseases where large case series are available to assess the genetic and allelic architecture, such as the inherited cardiac conditions displayed in Table 1. In the absence of large case series, we must estimate the genetic architecture parameters by extrapolating from similar disorders and/or variant databases.
96 |
97 | Where disease-specific variant databases exist, we can use these to help estimate the maximum allelic contribution in lieu of individual case series. For example, Marfan syndrome is a rare connective tissue disorder caused by variants in the _FBN1_ gene. The UMD-FBN1 database `r citep("10.1002/humu.10249")` contains 3077 variants in _FBN1_ from 280 references (last updated 28/08/14). The most common variant is in 30/3006 records (1.00%; 95CI 0.53-1.46%), which likely overestimates its contribution to disease if related individuals are not systematically excluded. Taking the upper bound of this frequency as our maximum allelic contribution, we derive a maximum tolerated allele count of 2 (Table 2). None of the five most common variants in the database are present in ExAC.
98 |
99 | Where no mutation database exists, we can use what is known about similar disorders to estimate the maximum allelic contribution. For the cardiac conditions with large cases series in Table 1, the maximum proportion of cases attributable to any one variant is 6.7% (95CI 4.1-9.2%; PKP2:c.2146-1G>C found in 24/361 ARVC cases `r citep("10.1038/gim.2016.90")`). We therefore take the upper bound of this confidence interval (rounded up to 0.1) as an estimate of the maximum allelic contribution for other genetically heterogeneous cardiac conditions, unless we can find disease-specific evidence to alter it. For Noonan syndrome and Catecholaminergic Polymorphic Ventricular Tachycardia (CPVT - an inherited cardiac arrhythmia syndrome) with prevalences of 1 in 1000 `r citep("10.1016/S0140-6736(12)61023-X")` and 1 in 10,000 `r citep("10.1038/ejhg.2013.55")` respectively, this translates to maximum population frequencies of 5x10^-5^ and 5x10^-6^ and maximum tolerated ExAC allele counts of 10 and 2 (Table 2).
100 |
101 | Finally, if the allelic heterogeneity of a disorder is not well characterised, it is conservative to assume minimal heterogeneity, so that the contribution of each gene is modelled as attributable to one allele, and the maximum allelic contribution is substituted by the maximum genetic contribution (i.e the maximum proportion of the disease attributable to single gene). For classic Ehlers-Danlos syndrome, up to 40% of the disease is caused by variation in the _COL5A1_ gene `r citep("10.1097/GIM.0b013e3181eed412")`. Taking 0.4 as our maximum allelic contribution, and a population prevalence of 1/20,000 `r citep("10.1097/GIM.0b013e3181eed412")` we derive a maximum tolerated ExAC AC of 5 (Table 2).
102 |
103 | ### Table 2
104 | > Maximum credible population frequencies and maximum tolerated ExAC allele counts for variants causative of exemplar inherited cardiac conditions, assuming a penetrance of 0.5 throughout. CPVT - catecholaminergic polymorphic ventricular tachycardia; FH - familial hypercholesterolaemia. Prevalence estimates were obtained from: Marfan`r citep("10.1016/S0140-6736(05)67789-6")`, Noonan`r citep("10.1016/S0140-6736(12)61023-X")`, CPVT`r citep("10.1038/ejhg.2013.55")` and classical Ehlers-Danlos`r citep("10.1097/GIM.0b013e3181eed412")`.
105 |
106 | ```{r}
107 | read.delim("../data-raw/Table2.txt",check.names=F) %>% pander
108 | ```
109 |
110 | Here we have illustrated frequencies analysed at the level of the disease. In some cases this may be further refined by calculating distinct thresholds for individual genes, or even variants. For example, if there is one common founder mutation but no other variants that are recurrent across cases, then it would make sense to have the founder mutation as an exception to the calculated threshold.
111 |
112 | #### *Application to recessive diseases*
113 |
114 | So far we have considered diseases with a dominant inheritance model. Our framework is readily modified for application in recessive disease, and to illustrate this we consider the example of Primary Ciliary Dyskinesia (PCD), which has a prevalence of up to 1 in 10,000 individuals in the general population `r citep("10.1136/archdischild-2013-304831")`.
115 |
116 | Intuitively, if one penetrant recessive variant were to be responsible for all PCD cases, it could have a maximum population frequency of $\sqrt(1/10000)$. The maximum frequency of a recessive disease-causing variant in the population can be more completely defined as:
117 | *max credible allele frequency* = $\sqrt(prevalence) \times$ *maximum allelic contribution* $\times \sqrt(maximum genetic contribution) \times 1/\sqrt(penetrance)$
118 |
119 | where *maximum genetic contribution* represents the proportion of all cases that are attributable to the gene under evaluation, and *maximum allelic contribution* represents the proportion of cases attributable to that gene that are attributable to an individual variant (full derivation can be found in **Online Methods**).
120 |
121 | We can refine our evaluation of PCD by estimating the maximum genetic and allelic contribution. Across previously published cohorts of PCD cases `r citep(c("10.1164/rccm.200603-370OC","10.1164/rccm.200601-084OC","10.1038/ng.2277"))`, *DNAI1* IVS1+2\_3insT was the most common variant with a total of 17/358 alleles (4.7% 95CI 2.5-7.0%). Given that ~9% of all patients with PCD have disease-causing variants in _DNAI1_ and the IVS1+2\_3insT variant is estimated to account for ~57% of variant alleles in _DNAI1_ `r citep("10.1164/rccm.200603-370OC")`, we can take these values as estimates of the maximum genetic and allelic contribution for PCD, yielding a maximum expected population AF of $\sqrt(1/10000) \times 0.57 \times \sqrt0.09 \times 1/\sqrt0.5 = 2.42 \times10$^-3^ This translates to a maximum tolerated ExAC AC of 322. *DNAI1* IVS1+2\_3insT is itself present at 56/121108 ExAC alleles (45/66636 non-Finnish European alleles). A single variant reported to cause PCD in ClinVar occurs in ExAC with AC > 332 (_NME8_ NM\_016616.4:c.271-27C>T; AC=2306/120984): our model therefore indicates that this variant frequency is too common to be disease-causing, and consistent with this we note that it meets none of the current ACMG criteria for assertions of pathogenicity, and have reclassified it as VUS (see **Supplementary information**).
122 |
123 | #### *Pre-computing threshold values for the ExAC populations*
124 |
125 | For each ExAC variant, we defined a "filtering allele frequency" that represents the threshold disease-specific "maximum credible allele frequency" at or below which the disease could not plausibly be caused by that variant. A variant with a filtering allele frequency ≥ the maximum credible allele frequency for the disease under consideration should be filtered, while a variant with a filtering allele frequency below the maximum credible remains a candidate. This value has been pre-computed for all variants in ExAC (see **Online Methods**), and is available via the ExAC VCF and browser (http://exac.broadinstitute.org).
126 |
127 | To assess the efficiency of our approach, we calculated the filtering allele frequency based on 60,206 exomes from ExAC and applied these filters to a simulated dominant Mendelian variant discovery analysis on the remaining 500 exomes (see **Online Methods**). Filtering at allele frequencies lower than 0.1% can substantially reduce the number of predicted protein-altering variants in consideration, with the mean number of variants per exome falling from 176 at a cutoff of 0.1% to 63 at a cutoff of 0.0001% (Figure 3a).
128 | Additionally, we compared the prevalence of variants in HCM genes in cases and controls across the allele frequency spectrum, and computed disease odds ratios for different frequency bins. The odds ratio for disease-association increases markedly at very low allele frequencies (Figure 3b) demonstrating that increasing the stringency of a frequency filter improves the information content of a genetic result.
129 |
130 | ### Figure 3
131 |
132 | > The clinical utility of stringent allele frequency thresholds. (a) The number of predicted protein-altering variants (definition in Online Methods) per exome as a function of the frequency filter applied. A one-tailed 95% confidence interval is used, meaning that variants were removed from consideration if their AC would fall within the top 5% of the Poisson probability distribution for the user's maximum credible AF (x axis). (b) The odds ratio for HCM disease-association against allele frequency. The prevalence of variants in HCM-associated genes (_MYH7_, _MYBPC3_ and other sarcomeric (_TNNT2_, _TNNI3_, _MYL2_, _MYL3_, _TPM1_ and _ACTC1_, analysed collectively) in 322 HCM cases and 60,706 ExAC controls were compared for a range of allele frequency bins, and an odds ratio computed (see **Online Methods**). Data for each bin is plotted at the upper allele frequency cutoff. Error bars represent 95% confidence intervals. The probability that a variant is pathogenic is much greater at very low allele frequencies.
133 |
134 | 
135 |
136 | ## Discussion
137 |
138 | We have outlined a statistically robust framework for assessing whether a variant is 'too common' to be causative for a Mendelian disorder of interest. To our knowledge, there is currently no equivalent guidance on the use of variant frequency information, resulting in inconsistent thresholds across both clinical and research settings. Furthermore, though disease-specific thresholds are recommended`r citep("10.1016/j.ajhg.2016.03.024")`, in practice the same thresholds may be used across all diseases, even where they have widely differing genetic architectures and prevalences. We have shown the importance of applying stringent AF thresholds, in that many more variants can be removed from consideration, and the remaining variants have a much higher likelihood of being relevant. We also show, using HCM as an example, how lowering this threshold does not remove true dominant pathogenic variants.
139 |
140 | In order to assist others in applying our framework, we have precomputed a 'filtering allele frequency' for all variants across the ExAC dataset. This is defined such that if the filtering allele frequency of a variant is at or above the "maximum credible population allele frequency" for the disease in question, then that variant is not a credible candidate (in other words, for any population allele frequency below the threshold value, the probability of the observed allele count in the ExAC sample is <0.05). Once a user has determined their "maximum credible population allele frequency", they may remove from consideration ExAC variants for which the filtering allele frequency is greater than or equal to than the chosen value.
141 |
142 | We recognize several limitations of our approach.
143 | First, the approach is limited by our understanding of the prevalence and genetic architecture of the disease in question: this characterisation will vary for different diseases and in different populations, though we illustrate approaches to estimation and extrapolation of parameters. In particular, we must be wary of extrapolating to or from less-well characterised populations that could harbour population-specific founder mutations. It is critical to define the genetic architecture in the population under study. Secondly, it is often difficult to obtain accurate penetrance information for reported variants, and it is also difficult to know what degree of penetrance to expect or assume for newly discovered pathogenic variants (see **Supplementary information** for alternative approaches).
144 |
145 | Thirdly, while we believe that ExAC is depleted of severe childhood inherited conditions, and not enriched for cardiomyopathies, it could be enriched relative to the general population for some Mendelian conditions, including Mendelian forms of common diseases such as diabetes or coronary disease that have been studied in contributing cohorts. Where this is possible, the maximum credible population allele frequency can be simply computed based on the estimated disease prevalence in the ExAC cohort, rather than the population prevalence. Finally, although the resulting allele frequency thresholds are more stringent than those previously used, they are likely to still be very lenient for many applications. For instance, we base our calculation on the most prevalent known pathogenic variant from a disease cohort. For HCM, for which more than 6,000 people have been sequenced, it is unlikely that any single newly identified variant, not previously catalogued in this large cohort, will explain a similarly large proportion of the disease as the most common causal variant, at least in well-studied populations. Future work may therefore involve modeling the frequency distribution of all known variants for a disorder, to further refine these thresholds.
146 |
147 | The power of our approach is limited by currently available datasets. Increases in both the ancestral diversity and size of reference datasets will bring additional power to our method over time. We have avoided filtering on variants observed only once, because a single observation provides little information about true allele frequency. A ten-fold increase in sample size, resulting from projects such as the US Precision Medicine Initiative, will separate vanishingly rare variants from those whose frequency really is ~1 in 100,000. Increased phenotypic information linked to reference datasets will also reduce limitations due to uncertain disease status, and improve prevalence estimates, adding further power to our approach.
148 |
149 | ## Acknowledgements
150 |
151 | This work was supported by the Wellcome Trust (107469/Z/15/Z), the Medical Research Council (UK), the NIHR Biomedical Research Unit in Cardiovascular Disease at Royal Brompton & Harefield NHS Foundation Trust and Imperial College London, the Fondation Leducq (11 CVD-01), a Health Innovation Challenge Fund (HICF-R6-373) award from the Wellcome Trust and Department of Health, UK, and by the National Institute of Diabetes and Digestive and Kidney Diseases and the National Institute of General Medical Sciences of the NIH (awards U54DK105566 and R01GM104371). EVM is supported by the National Institutes of Health under a Ruth L. Kirschstein National Research Service Award (NRSA) NIH Individual Predoctoral Fellowship (F31) (award AI122592-01A1). AHO-L is supported by National Institutes of Health under Ruth L. Kirschstein National Research Service Award 4T32GM007748.
152 |
153 | This publication includes independent research commissioned by the Health Innovation Challenge Fund (HICF), a parallel funding partnership between the Department of Health and Wellcome Trust. The views expressed in this work are those of the authors and not necessarily those of the Department of Health or Wellcome Trust.
154 |
155 | ## Data availability
156 |
157 | All data required to reproduce these analyses is available at . The manuscript was compiled in R, and source code for the analysis, figures and manuscript, are available at the same location. Curated variant interpretations are deposited in ClinVar *Accession & DOI to be added*. ExAC annotations are available at . Our allele frequency calculator app is located at , and the source code available at .
158 |
159 | ## Methods
160 |
161 | ### Calculating maximum tolerated allele counts
162 |
163 | The maximum frequency of a dominant disease-causing variant in the population was defined as:
164 | *maximum credible population AF = prevalence* $\times$ *maximum allelic contribution* $\times$ *1/penetrance*
165 |
166 | Estimates of disease prevalence were obtained from the literature. Where multiple different values were reported, the highest was used in the calculation, which leads to lenient filtering. A variant penetrance of 0.5 was used for all analyses, as penetrance estimates for individual variants are not widely available. This corresponds to the reported penetrance of the HCM variant used to illustrate our approach `r citep("10.1161/CIRCRESAHA.109.216291")` and is the minimum found when researching other variants/disorders.
167 |
168 | Determination of the maximum allelic contribution (a measure of heterogeneity) is described in the text. Where a large cohort exists for a disorder, the upper confidence interval of the frequency of the most common variant in this cohort, was used as the maximum allelic contribution.
169 |
170 | Having established a maximum credible allele frequency (AF), the maximum tolerated allele count (AC) was computed as the AC occurring at the upper bound of the one-tailed 95% confidence interval (95%CI AC) for that allele frequency, given the observed allele number (AN). Since the population is drawn without replacement, this would strictly be a hypergeometric distribution, but this can be modeled as binomial as the sample is much smaller than the population from which it is drawn. For ease of computation, we approximate this with a Poisson distribution. In R, this is implemented as `max_ac = qpois(quantile_limit,an*af)`, where `max_ac` is the 95%CI AC, `quantile_limit` is 0.95 (for a one-sided 95%CI), `an` is the observed allele number, and `af` is the maximum credible population allele frequency.
171 |
172 | ### Application to recessive diseases
173 |
174 | The prevalence of a recessive condition can be related to the allele frequency of causative variants by:
175 | *Prevalence =* $\sum$ *(allele frequency of causative alleles in each contributing gene)^2^ x penetrance*
176 | approximating to:
177 | *Prevalence = (combined frequency of causative alleles in gene)^2^ x (number of similar genes) x penetrance*
178 | and expanding to:
179 | *Prevalence = (max individual allele frequency x 1/maximum allelic contribution)^2^ x 1/maximum genetic contribution x penetrance*
180 |
181 | where *maximum genetic contribution* represents the proportion of all cases that are attributable to the gene under evaluation, and *maximum allelic contribution* represents the proportion of cases attributable to that gene that are attributable to an individual variant. The maximum frequency of a recessive disease causing variant in the population was therefore defined as:
182 | *max credible allele frequency* = $\sqrt(prevalence) \times$ *maximum allelic contribution* $\times \sqrt(maximum genetic contribution) \times 1/\sqrt(penetrance)$
183 |
184 | ### Pre-computing filtering allele frequency values for ExAC
185 |
186 | We define the "filtering allele frequency" for a variant, or `af_filter`, as the highest true population allele frequency for which the upper bound of the 95% confidence interval of allele count under a Poisson distribution is still less than the variant's observed allele count in the reference sample. It functions as equivalent to a lower bound estimate for the true allele frequency of an observed variant: if the filtering allele frequency of a variant is at or above the maximum credible allele frequency for a disease, then the variant is considered too common to be causative of the disease.
187 |
188 | Consider, for example, a variant with an observed AC=3 and AN=100,000. If a user's maximum credible allele frequency for their disease is 1 in 100,000, then this variant should be kept in consideration as potentially pathogenic, because the upper bound of the Poisson 95%CI is AC=3. On the other hand, if the user's credible tolerated allele frequency is 1 in 200,000 then this variant should be filtered out, as the 95%CI upper bound is only AC=2. We define `af_filter` as the highest AF value for which a variant should be filtered out.
189 |
190 | In the example, the highest allele frequency that gives a 95%CI AC of 2 when AN=100,000 is approximately 8.17e-6. Instead of solving exactly for such values, which would require solving the inverse cumulative distribution function of the Poisson distribution, we derive a numerical approximation in two steps:
191 |
192 | 1. For each variant in consideration, we use R's `uniroot` function to find an AF value (though not necessarily the highest AF value) for which the 95%CI AC is one less than the observed AC.
193 | 2. We then loop, incrementing by units of millionths, and return the highest AF value that still gives a 95%CI AC less than the observed AC.
194 |
195 | In order to pre-compute `af_filter` values for all of ExAC (verson 0.3.1), we apply this procedure to the AC and AN values for each of the five major continental populations in ExAC, and take the highest result from any population. Usually, this is from the population with the highest nominal allele frequency. However, because the tightness of a 95% confidence interval in the Poisson distribution depends upon sample size, the stringency of the filter depends upon the allele number (AN). The stringency of the filter therefore varies appropriately according the the size of the sub-population in which the variant is observed, and sequencing coverage at that site, and `af_filter` is occasionally derived from a population other than the one with the highest nominal allele frequency.
196 |
197 | For this analysis, we used adjusted AC and AN, meaning variant calls with GQ≥20 and DP≥10.
198 |
199 | #### Treatment of singletons and other populations
200 |
201 | It is worth considering whether a single observation in a reference sample should ever be treated as incompatible with disease. Using the approach outlined above, it can be inferred that an ExAC AC=1 would be considered incompatible with a true population allele frequency <2.9x10^-6^ (with 95% confidence). For a penetrant disease with a prevalence of 1:1,000,000, the probability of observing a specific causative allele in ExAC is <0.01, even if the disease is genetically homogeneous with just one causative variant. In practice however, we feel that there are few, if any, diseases that are extremely rare yet have sufficiently well-characterized genetic architecture to discard singleton variants from a reference sample. Therefore, for singletons (variants observed exactly once in ExAC), we set the filtering allele frequency to zero.
202 |
203 | We also note that occasionally a variant is seen in individuals falling under the Finnish or "Other" population categories in ExAC, and is a singleton or absent in all five continental populations. For these variants, the filtering allele frequency is set to zero. Because the Finnish are a bottlenecked population, disease-causing alleles may reach frequencies that would be impossible in large outbred populations. Similarly, because we have not assigned ancestry for the "Other" individuals, it is difficult to assess the population frequency of variants seen only in this set of individuals. Users are left to judge whether variants that would not be filtered on the basis of frequency in the five continental populations, but that are recurrent in Finnish or "Other" populations, should be removed from consideration according to the specific circumstances.
204 |
205 | #### Simulated Mendelian variant discovery analysis
206 |
207 | To simulate Mendelian variant discovery, we randomly selected 100 individuals from each of five major continental populations and filtered their exomes against filtering allele frequencies derived from the remaining 60,206 ExAC individuals. The subset of individuals was the same as that previously reported `r citep("10.1038/nature19057")`.
208 | Predicted protein-altering variants are defined as missense and equivalent (including in-frame indels, start lost, stop lost, and mature miRNA-altering), and protein-truncating variants (nonsense, essential splice site, and frameshift).
209 |
210 | ### Variant curation
211 |
212 | We utilized the July 9, 2015 release of ClinVar, extracting variants from XML and TXT releases into a single tab-delimited file through use of a Python implementation of vt normalize `r citep("10.1093/bioinformatics/btv112")`, as described previously `r citep("10.1038/nature19057")`. Only variants annotated as pathogenic and non-conflicted were investigated. ExAC counts were determined by matching on chromosome, position, reference, and alternate alleles. For all variants above the proposed maximum tolerated allele count for HCM, all HGMD annotated literature was reviewed and the level of evidence supporting disease pathogenicity was curated according to ACMG criteria `r citep("10.1038/gim.2015.30")`.
213 |
214 | ### Calculating odds ratios for HCM variant burden
215 |
216 | We used a cohort of 322 patients recruited to the Royal Brompton Hospital cardiac Biomedical Research Unit with diagnosis of HCM confirmed by cardiac MRI. These samples were sequenced using the IlluminaTruSight Cardio Sequencing Kit `r citep("10.1007/s12265-016-9673-5")` on theIlluminaMiSeq and NextSeq platforms. This study was subject to ethical approval (REC: 09/H0504/104+5) and informed consent was obtained for all subjects. The number of rare variants in _MYBPC3_, _MYH7_ and the six other sarcomeric genes associated with HCM (_TNNT2_, _TNNI3_, _MYL2_, _MYL3_, _TPM1_ and _ACTC1_) were calculated for this HCM cohort, and for reference population samples from ExAC. Case/control variant frequencies were calculated for all protein altering variants (frameshift, nonsense, splice donor/acceptor, missense and in-frame insertions/deletions), with frequencies and case/control odds ratios calculated separately for non-overlapping ExAC allele frequency bins with the following breakpoints: 1x10^-5^, 5x10^-5^, 1x10^-4^, 5x10^-4^ and 1x10^-3^. Odds Ratios were calculated as OR = (cases with variant / cases without variant) / (ExAC samples with variant / ExAC samples without variant) along with 95% confidence intervals. In the absence of sample-level genotype data for ExAC, the number of samples with a variant was approximated by the total number of variant alleles - i.e. assuming that each rare variant was found in a distinct sample.
217 |
218 | ## Code availability
219 |
220 | The manuscript was compiled in R, and source code for the analysis, figures and manuscript, are available at . The source code for our allele frequency calculator app is located at .
221 |
222 | ## Supplementary information
223 |
224 | Supplementary note 1 - Curation of a high frequency PCD variant
225 |
226 | Supplementary note 2 - Dealing with penetrance
227 |
228 | Supplementary table 1
229 |
230 | ##Bibliography
231 |
232 | ```{r, echo=FALSE, message=FALSE}
233 | write.bibtex(file="references.bib")
234 | ```
235 |
--------------------------------------------------------------------------------
/manuscript/nature.csl:
--------------------------------------------------------------------------------
1 |
2 |
118 |
--------------------------------------------------------------------------------
/manuscript/newTemplate.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImperialCardioGenetics/frequencyFilter/00b08ccf41bbbd927ac3d021d302e80a4ad4ca5d/manuscript/newTemplate.docx
--------------------------------------------------------------------------------
/manuscript/template2.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImperialCardioGenetics/frequencyFilter/00b08ccf41bbbd927ac3d021d302e80a4ad4ca5d/manuscript/template2.docx
--------------------------------------------------------------------------------
/packrat/init.R:
--------------------------------------------------------------------------------
1 | local({
2 |
3 | ## Helper function to get the path to the library directory for a
4 | ## given packrat project.
5 | getPackratLibDir <- function(projDir = NULL) {
6 | path <- file.path("packrat", "lib", R.version$platform, getRversion())
7 |
8 | if (!is.null(projDir)) {
9 |
10 | ## Strip trailing slashes if necessary
11 | projDir <- sub("/+$", "", projDir)
12 |
13 | ## Only prepend path if different from current working dir
14 | if (!identical(normalizePath(projDir), normalizePath(getwd())))
15 | path <- file.path(projDir, path)
16 | }
17 |
18 | path
19 | }
20 |
21 | ## Ensure that we set the packrat library directory relative to the
22 | ## project directory. Normally, this should be the working directory,
23 | ## but we also use '.rs.getProjectDirectory()' if necessary (e.g. we're
24 | ## rebuilding a project while within a separate directory)
25 | libDir <- if (exists(".rs.getProjectDirectory"))
26 | getPackratLibDir(.rs.getProjectDirectory())
27 | else
28 | getPackratLibDir()
29 |
30 | ## Unload packrat in case it's loaded -- this ensures packrat _must_ be
31 | ## loaded from the private library. Note that `requireNamespace` will
32 | ## succeed if the package is already loaded, regardless of lib.loc!
33 | if ("packrat" %in% loadedNamespaces())
34 | try(unloadNamespace("packrat"), silent = TRUE)
35 |
36 | if (suppressWarnings(requireNamespace("packrat", quietly = TRUE, lib.loc = libDir))) {
37 |
38 | # Check 'print.banner.on.startup' -- when NA and RStudio, don't print
39 | print.banner <- packrat::get_opts("print.banner.on.startup")
40 | if (print.banner == "auto" && is.na(Sys.getenv("RSTUDIO", unset = NA))) {
41 | print.banner <- TRUE
42 | } else {
43 | print.banner <- FALSE
44 | }
45 | return(packrat::on(print.banner = print.banner))
46 | }
47 |
48 | ## Escape hatch to allow RStudio to handle bootstrapping. This
49 | ## enables RStudio to provide print output when automagically
50 | ## restoring a project from a bundle on load.
51 | if (!is.na(Sys.getenv("RSTUDIO", unset = NA)) &&
52 | is.na(Sys.getenv("RSTUDIO_PACKRAT_BOOTSTRAP", unset = NA))) {
53 | Sys.setenv("RSTUDIO_PACKRAT_BOOTSTRAP" = "1")
54 | setHook("rstudio.sessionInit", function(...) {
55 | # Ensure that, on sourcing 'packrat/init.R', we are
56 | # within the project root directory
57 | if (exists(".rs.getProjectDirectory")) {
58 | owd <- getwd()
59 | setwd(.rs.getProjectDirectory())
60 | on.exit(setwd(owd), add = TRUE)
61 | }
62 | source("packrat/init.R")
63 | })
64 | return(invisible(NULL))
65 | }
66 |
67 | ## Bootstrapping -- only performed in interactive contexts,
68 | ## or when explicitly asked for on the command line
69 | if (interactive() || "--bootstrap-packrat" %in% commandArgs(TRUE)) {
70 |
71 | message("Packrat is not installed in the local library -- ",
72 | "attempting to bootstrap an installation...")
73 |
74 | ## We need utils for the following to succeed -- there are calls to functions
75 | ## in 'restore' that are contained within utils. utils gets loaded at the
76 | ## end of start-up anyhow, so this should be fine
77 | library("utils", character.only = TRUE)
78 |
79 | ## Install packrat into local project library
80 | packratSrcPath <- list.files(full.names = TRUE,
81 | file.path("packrat", "src", "packrat")
82 | )
83 |
84 | ## No packrat tarballs available locally -- try some other means of installation
85 | if (!length(packratSrcPath)) {
86 |
87 | message("> No source tarball of packrat available locally")
88 |
89 | ## There are no packrat sources available -- try using a version of
90 | ## packrat installed in the user library to bootstrap
91 | if (requireNamespace("packrat", quietly = TRUE) && packageVersion("packrat") >= "0.2.0.99") {
92 | message("> Using user-library packrat (",
93 | packageVersion("packrat"),
94 | ") to bootstrap this project")
95 | }
96 |
97 | ## Couldn't find a user-local packrat -- try finding and using devtools
98 | ## to install
99 | else if (requireNamespace("devtools", quietly = TRUE)) {
100 | message("> Attempting to use devtools::install_github to install ",
101 | "a temporary version of packrat")
102 | library(stats) ## for setNames
103 | devtools::install_github("rstudio/packrat")
104 | }
105 |
106 | ## Try downloading packrat from CRAN if available
107 | else if ("packrat" %in% rownames(available.packages())) {
108 | message("> Installing packrat from CRAN")
109 | install.packages("packrat")
110 | }
111 |
112 | ## Fail -- couldn't find an appropriate means of installing packrat
113 | else {
114 | stop("Could not automatically bootstrap packrat -- try running ",
115 | "\"'install.packages('devtools'); devtools::install_github('rstudio/packrat')\"",
116 | "and restarting R to bootstrap packrat.")
117 | }
118 |
119 | # Restore the project, unload the temporary packrat, and load the private packrat
120 | packrat::restore(prompt = FALSE, restart = TRUE)
121 |
122 | ## This code path only reached if we didn't restart earlier
123 | unloadNamespace("packrat")
124 | requireNamespace("packrat", lib.loc = libDir, quietly = TRUE)
125 | return(packrat::on())
126 |
127 | }
128 |
129 | ## Multiple packrat tarballs available locally -- try to choose one
130 | ## TODO: read lock file and infer most appropriate from there; low priority because
131 | ## after bootstrapping packrat a restore should do the right thing
132 | if (length(packratSrcPath) > 1) {
133 | warning("Multiple versions of packrat available in the source directory;",
134 | "using packrat source:\n- ", shQuote(packratSrcPath))
135 | packratSrcPath <- packratSrcPath[[1]]
136 | }
137 |
138 |
139 | lib <- file.path("packrat", "lib", R.version$platform, getRversion())
140 | if (!file.exists(lib)) {
141 | dir.create(lib, recursive = TRUE)
142 | }
143 | lib <- normalizePath(lib, winslash = "/")
144 |
145 | message("> Installing packrat into project private library:")
146 | message("- ", shQuote(lib))
147 |
148 | surround <- function(x, with) {
149 | if (!length(x)) return(character())
150 | paste0(with, x, with)
151 | }
152 |
153 | ## The following is performed because a regular install.packages call can fail
154 | peq <- function(x, y) paste(x, y, sep = " = ")
155 | installArgs <- c(
156 | peq("pkgs", surround(packratSrcPath, with = "'")),
157 | peq("lib", surround(lib, with = "'")),
158 | peq("repos", "NULL"),
159 | peq("type", surround("source", with = "'"))
160 | )
161 | installCmd <- paste(sep = "",
162 | "utils::install.packages(",
163 | paste(installArgs, collapse = ", "),
164 | ")")
165 |
166 | fullCmd <- paste(
167 | surround(file.path(R.home("bin"), "R"), with = "\""),
168 | "--vanilla",
169 | "--slave",
170 | "-e",
171 | surround(installCmd, with = "\"")
172 | )
173 | system(fullCmd)
174 |
175 | ## Tag the installed packrat so we know it's managed by packrat
176 | ## TODO: should this be taking information from the lockfile? this is a bit awkward
177 | ## because we're taking an un-annotated packrat source tarball and simply assuming it's now
178 | ## an 'installed from source' version
179 |
180 | ## -- InstallAgent -- ##
181 | installAgent <- 'InstallAgent: packrat 0.4.7-1'
182 |
183 | ## -- InstallSource -- ##
184 | installSource <- 'InstallSource: source'
185 |
186 | packratDescPath <- file.path(lib, "packrat", "DESCRIPTION")
187 | DESCRIPTION <- readLines(packratDescPath)
188 | DESCRIPTION <- c(DESCRIPTION, installAgent, installSource)
189 | cat(DESCRIPTION, file = packratDescPath, sep = "\n")
190 |
191 | # Otherwise, continue on as normal
192 | message("> Attaching packrat")
193 | library("packrat", character.only = TRUE, lib.loc = lib)
194 |
195 | message("> Restoring library")
196 | restore(restart = FALSE)
197 |
198 | # If the environment allows us to restart, do so with a call to restore
199 | restart <- getOption("restart")
200 | if (!is.null(restart)) {
201 | message("> Packrat bootstrap successfully completed. ",
202 | "Restarting R and entering packrat mode...")
203 | return(restart())
204 | }
205 |
206 | # Callers (source-erers) can define this hidden variable to make sure we don't enter packrat mode
207 | # Primarily useful for testing
208 | if (!exists(".__DONT_ENTER_PACKRAT_MODE__.") && interactive()) {
209 | message("> Packrat bootstrap successfully completed. Entering packrat mode...")
210 | packrat::on()
211 | }
212 |
213 | Sys.unsetenv("RSTUDIO_PACKRAT_BOOTSTRAP")
214 |
215 | }
216 |
217 | })
218 |
--------------------------------------------------------------------------------
/packrat/packrat.lock:
--------------------------------------------------------------------------------
1 | PackratFormat: 1.4
2 | PackratVersion: 0.4.7.1
3 | RVersion: 3.3.0
4 | Repos: BioCsoft=https://bioconductor.org/packages/3.3/bioc,
5 | BioCann=https://bioconductor.org/packages/3.3/data/annotation,
6 | BioCexp=https://bioconductor.org/packages/3.3/data/experiment,
7 | BioCextra=https://bioconductor.org/packages/3.3/extra,
8 | CRAN=https://cran.rstudio.com/
9 |
10 | Package: BH
11 | Source: CRAN
12 | Version: 1.60.0-1
13 | Hash: 889445e87a2acd4cc58440957f3b0d1a
14 |
15 | Package: DBI
16 | Source: CRAN
17 | Version: 0.4-1
18 | Hash: 6524d5c5d30ff74084ac028eeb19eb1b
19 |
20 | Package: R6
21 | Source: CRAN
22 | Version: 2.1.2
23 | Hash: c515e41c73294952cbb33ee1c1be5a2d
24 |
25 | Package: RColorBrewer
26 | Source: CRAN
27 | Version: 1.1-2
28 | Hash: c0d56cd15034f395874c870141870c25
29 |
30 | Package: RCurl
31 | Source: CRAN
32 | Version: 1.95-4.7
33 | Hash: 7756ed9df5d79ca87bc9e93f85d89b87
34 | Requires: bitops
35 |
36 | Package: RJSONIO
37 | Source: CRAN
38 | Version: 1.3-0
39 | Hash: fb672e20eb6f3010a3639f855d8ef6de
40 |
41 | Package: Rcpp
42 | Source: CRAN
43 | Version: 0.12.6
44 | Hash: 79f89fabd832de0a2e60aa05d9027f32
45 |
46 | Package: RefManageR
47 | Source: CRAN
48 | Version: 0.10.5
49 | Hash: 987c0eb72c07d2e8b568f53cd22ef706
50 | Requires: RCurl, RJSONIO, XML, bibtex, lubridate, plyr, stringr
51 |
52 | Package: XML
53 | Source: CRAN
54 | Version: 3.98-1.3
55 | Hash: 689166755239c3300769ca2d600381c6
56 |
57 | Package: assertthat
58 | Source: CRAN
59 | Version: 0.1
60 | Hash: 0afb92b59b02593c70ff8046700ba9d3
61 |
62 | Package: base64enc
63 | Source: CRAN
64 | Version: 0.1-3
65 | Hash: c590d29e555926af053055e23ee79efb
66 |
67 | Package: bibtex
68 | Source: CRAN
69 | Version: 0.4.0
70 | Hash: 1884fc6fc10f47e9c6a77bfd9370b28a
71 |
72 | Package: bitops
73 | Source: CRAN
74 | Version: 1.0-6
75 | Hash: 67d0775189fd0041d95abca618c5c07e
76 |
77 | Package: caTools
78 | Source: CRAN
79 | Version: 1.17.1
80 | Hash: 97cb6f6293cd18d17df77a6383cc6763
81 | Requires: bitops
82 |
83 | Package: colorspace
84 | Source: CRAN
85 | Version: 1.2-6
86 | Hash: 00bb12245cd975c450cc4a960884fa15
87 |
88 | Package: curl
89 | Source: CRAN
90 | Version: 0.9.5
91 | Hash: 03562aad7537914f516bd67e9bba28b1
92 |
93 | Package: dichromat
94 | Source: CRAN
95 | Version: 2.0-0
96 | Hash: 08eed0c80510af29bb15f840ccfe37ce
97 |
98 | Package: digest
99 | Source: CRAN
100 | Version: 0.6.8
101 | Hash: de202f956596a476d09304e3a2ebf1e5
102 |
103 | Package: dplyr
104 | Source: CRAN
105 | Version: 0.5.0
106 | Hash: fc7f06a5772a1b2845ef6e74b8aff740
107 | Requires: BH, DBI, R6, Rcpp, assertthat, lazyeval, magrittr, tibble
108 |
109 | Package: evaluate
110 | Source: CRAN
111 | Version: 0.9
112 | Hash: cf338d73ba4ee3a4fefec612cdfd38db
113 | Requires: stringr
114 |
115 | Package: formatR
116 | Source: CRAN
117 | Version: 1.2
118 | Hash: cce53206179a99ae5a8bcf154ab5abde
119 |
120 | Package: ggplot2
121 | Source: CRAN
122 | Version: 2.1.0
123 | Hash: 50e297b0191179c39c7dcae0eff72b51
124 | Requires: digest, gtable, plyr, reshape2, scales
125 |
126 | Package: gtable
127 | Source: CRAN
128 | Version: 0.1.2
129 | Hash: 38a0f066373a60824cd3ade2362af363
130 |
131 | Package: highr
132 | Source: CRAN
133 | Version: 0.5
134 | Hash: 25f9c02030b94c8154273699a7df9cf8
135 |
136 | Package: htmltools
137 | Source: CRAN
138 | Version: 0.3.5
139 | Hash: f81546c00234653ca3a7dcd0b7e1a757
140 | Requires: Rcpp, digest
141 |
142 | Package: httr
143 | Source: CRAN
144 | Version: 1.1.0
145 | Hash: 27ffb4b4dbb8ac4019126070d66910fc
146 | Requires: R6, curl, jsonlite, mime, openssl
147 |
148 | Package: jsonlite
149 | Source: CRAN
150 | Version: 0.9.19
151 | Hash: 4a983f753b6e88ae0f5a6ac152d8cc32
152 |
153 | Package: knitauthors
154 | Source: github
155 | Version: 0.0.1
156 | Hash: 9bdff792c9601b965853e78177d208ec
157 | Requires: dplyr
158 | GithubRepo: knitauthors
159 | GithubUsername: jamesware
160 | GithubRef: master
161 | GithubSha1: 15f87cf98f561cc097d47f463905b2ca5b4ab35b
162 |
163 | Package: knitcitations
164 | Source: CRAN
165 | Version: 1.0.7
166 | Hash: 8ae09e4d9390a166ac95ca75c91767a8
167 | Requires: RefManageR, digest, httr
168 |
169 | Package: knitr
170 | Source: CRAN
171 | Version: 1.12
172 | Hash: bbb197833ff9b57c328c0fc9f924cb92
173 | Requires: digest, formatR, highr, yaml, markdown, stringr, evaluate
174 |
175 | Package: labeling
176 | Source: CRAN
177 | Version: 0.3
178 | Hash: ecf589b42cd284b03a4beb9665482d3e
179 |
180 | Package: lazyeval
181 | Source: CRAN
182 | Version: 0.1.10
183 | Hash: 9679f1ac7f6bc07bc79755f34cd15e1f
184 |
185 | Package: lubridate
186 | Source: CRAN
187 | Version: 1.5.0
188 | Hash: 038715dfce23c748aef45c22f46e4b75
189 | Requires: stringr
190 |
191 | Package: magrittr
192 | Source: CRAN
193 | Version: 1.5
194 | Hash: bdc4d48c3135e8f3b399536ddf160df4
195 |
196 | Package: markdown
197 | Source: CRAN
198 | Version: 0.7.7
199 | Hash: fea2343a1119d61b0cc5c0a950d103a3
200 | Requires: mime
201 |
202 | Package: mime
203 | Source: CRAN
204 | Version: 0.3
205 | Hash: 629133973445ad75e5438a2bb6b05c17
206 |
207 | Package: munsell
208 | Source: CRAN
209 | Version: 0.4.2
210 | Hash: 72d343a6664778029f49a7ad36de1cab
211 | Requires: colorspace
212 |
213 | Package: openssl
214 | Source: CRAN
215 | Version: 0.9.4
216 | Hash: 5ceb5d83bba44e8e5a764ac6ad32e9fb
217 |
218 | Package: packrat
219 | Source: CRAN
220 | Version: 0.4.7-1
221 | Hash: 16ca16d36eb18d1d16113606ecc5c400
222 |
223 | Package: pander
224 | Source: CRAN
225 | Version: 0.6.0
226 | Hash: c7dc3e820aad53039a123e2242e5d566
227 | Requires: Rcpp, digest
228 |
229 | Package: plyr
230 | Source: CRAN
231 | Version: 1.8.2
232 | Hash: 46fd0a3c03da5fc861f77b79400fd311
233 | Requires: Rcpp
234 |
235 | Package: reshape2
236 | Source: CRAN
237 | Version: 1.4.1
238 | Hash: 55a31e20f8a216855e138ca83756a4b0
239 | Requires: Rcpp, plyr, stringr
240 |
241 | Package: rmarkdown
242 | Source: CRAN
243 | Version: 1.0
244 | Hash: 7c60cd30594b34874a6a7e729f0a6fad
245 | Requires: base64enc, caTools, evaluate, htmltools, jsonlite, knitr,
246 | yaml
247 |
248 | Package: scales
249 | Source: CRAN
250 | Version: 0.3.0
251 | Hash: 3ed76b93ca7adf79721d3533b1a1ac95
252 | Requires: RColorBrewer, Rcpp, dichromat, labeling, munsell, plyr
253 |
254 | Package: stringi
255 | Source: CRAN
256 | Version: 0.4-1
257 | Hash: 5f5be3e0eaf1ce0228196e80c47b0ab3
258 |
259 | Package: stringr
260 | Source: CRAN
261 | Version: 1.0.0
262 | Hash: 2676dd5f88890910962b733b0f9540e1
263 | Requires: magrittr, stringi
264 |
265 | Package: tibble
266 | Source: CRAN
267 | Version: 1.1
268 | Hash: 5467b4aa158b2d984538f7ffde17171c
269 | Requires: Rcpp, assertthat, lazyeval
270 |
271 | Package: tidyr
272 | Source: CRAN
273 | Version: 0.5.1
274 | Hash: 56af57d037a1d229dfa50601debf3f4b
275 | Requires: Rcpp, dplyr, lazyeval, magrittr, stringi, tibble
276 |
277 | Package: yaml
278 | Source: CRAN
279 | Version: 2.1.13
280 | Hash: 4854ccabebc225e8a7309fb4a74980de
281 |
--------------------------------------------------------------------------------
/packrat/packrat.opts:
--------------------------------------------------------------------------------
1 | auto.snapshot: TRUE
2 | use.cache: FALSE
3 | print.banner.on.startup: auto
4 | vcs.ignore.lib: TRUE
5 | vcs.ignore.src: TRUE
6 | external.packages:
7 | knitr
8 | devtools
9 | local.repos:
10 | load.external.packages.on.startup: TRUE
11 | ignored.packages:
12 | quiet.package.installation: TRUE
13 | snapshot.recommended.packages: FALSE
14 |
--------------------------------------------------------------------------------
/packrat/src/packrat/packrat_0.4.7-1.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImperialCardioGenetics/frequencyFilter/00b08ccf41bbbd927ac3d021d302e80a4ad4ca5d/packrat/src/packrat/packrat_0.4.7-1.tar.gz
--------------------------------------------------------------------------------
/src/hcm_curation.R:
--------------------------------------------------------------------------------
1 | setwd('~/d/sci/src/freq_filter')
2 | options(stringsAsFactors=FALSE)
3 | source('../exac_2015/exac_constants.R')
4 |
5 | # load ExAC dataset
6 | if (!("exac" %in% ls(globalenv()))) {
7 | exac = load_exac_data(reload=T)
8 | }
9 |
10 | clinvar = read.table('../clinvar/output/clinvar.tsv',sep='\t',header=T,quote='',comment.char='')
11 | clinvar$pos_id = paste(clinvar$chrom, formatC(clinvar$pos,width=9,flag='0'), clinvar$ref, clinvar$alt, sep='_')
12 | clinvar$exac_ac_adj = exac$ac_adj[match(clinvar$pos_id, exac$pos_id)]
13 | clinvar$exac_ac_adj[is.na(clinvar$exac_ac_adj)] = 0
14 | # "Literature only" refers to ClinVar submissions that have no submitter except OMIM and/or GeneReviews
15 | clinvar$lit_only = nchar(gsub('OMIM','',gsub('GeneReviews','',gsub(';','',clinvar$all_submitters)))) == 0
16 |
17 | hcm = grepl("cardiomyopathy",clinvar$all_traits,ignore.case=TRUE) & grepl("hypertrophic",clinvar$all_traits,ignore.case=TRUE)
18 |
19 | sum(hcm) # 1063
20 | sum(hcm & clinvar$pathogenic==1) # 631
21 | sum(hcm & clinvar$exac_ac_adj > 9 & clinvar$pathogenic==1) # 50
22 | sum(hcm & clinvar$exac_ac_adj > 9 & clinvar$pathogenic==1 & clinvar$all_pmids != '') # 50
23 | sum(hcm & clinvar$exac_ac_adj > 9 & clinvar$pathogenic==1 & clinvar$lit_only) # 2
24 | sum(hcm & clinvar$exac_ac_adj > 9 & clinvar$pathogenic==1 & clinvar$conflicted==0) # 28
25 | sum(hcm & clinvar$exac_ac_adj < 9 & clinvar$pathogenic==1) # 577
26 | sum(hcm & clinvar$exac_ac_adj < 9 & clinvar$pathogenic==1 & clinvar$conflicted==0) # 573
27 |
28 | # create a table of these variants to browse in Rstudio
29 | hcm_prev = clinvar[hcm & clinvar$exac_ac_adj > 9 & clinvar$pathogenic==1 & clinvar$conflicted==0,]
30 |
31 | # write out a table of HCM variants per Nicky's request
32 | write.table(clinvar[hcm,],'data/clinvar_hcm.tsv',sep='\t',row.names=F,col.names=T,quote=F)
33 |
34 | pcd = grepl("primary",clinvar$all_traits,ignore.case=TRUE) & grepl("ciliary",clinvar$all_traits,ignore.case=TRUE) & grepl("dyskinesia",clinvar$all_traits,ignore.case=TRUE)
35 | sum(pcd) # 246
36 | sum(pcd & clinvar$pathogenic==1 & clinvar$conflicted==0) # 128
37 | sum(pcd & clinvar$pathogenic==1 & clinvar$conflicted==0 & clinvar$exac_ac_adj > 154) # 1
38 |
39 | nccm = grepl("cardiomyopathy",clinvar$all_traits,ignore.case=TRUE) & grepl("compaction",clinvar$all_traits,ignore.case=TRUE)
40 | sum(nccm) # 85
41 | sum(nccm & clinvar$pathogenic==1 & clinvar$conflicted==0) # 42
42 | sum(nccm & clinvar$pathogenic==1 & clinvar$conflicted==0 & clinvar$exac_ac_adj > 18) # 3
43 |
44 | to_look_at = clinvar$pathogenic==1 & clinvar$conflicted==0 & ((nccm & clinvar$exac_ac_adj > 18) | (pcd & clinvar$exac_ac_adj > 154))
45 | write.table(clinvar[to_look_at,],'data/clinvar_pcd_nccm_high_af.tsv',sep='\t',row.names=F,col.names=T,quote=F)
46 |
--------------------------------------------------------------------------------
/src/precompute_exac_af_filter.R:
--------------------------------------------------------------------------------
1 | setwd('~/d/sci/src/freq_filter')
2 | options(stringsAsFactors=FALSE)
3 | source('../exac_2015/exac_constants.R')
4 |
5 | # load ExAC dataset
6 | if (!("exac" %in% ls(globalenv()))) {
7 | exac = load_exac_data(reload=F)
8 | }
9 |
10 | # forward method by James Ware:
11 | # given a desired AF to filter on for Mendelian analyses, what is the max AC that would be expected in a dataset?
12 | find_max_ac = function(af,an,ci=.95) {
13 | if (af == 0) {
14 | return (0)
15 | } else {
16 | quantile_limit = ci # ci for one-sided, 1-(1-ci)/2 for two-sided
17 | max_ac = qpois(quantile_limit,an*af)
18 | return (max_ac)
19 | }
20 | }
21 |
22 |
23 | # backward method: given an observed AC, what is the highest AF filter for which one should remove this variant?
24 | # This function will accept an AC_Adj and an AN_Adj and will return the highest AF filter for which you would want to reject
25 | # this variant as a potential Mendelian causal allele.# for uniroot to work, lower has to be rarer than a singleton, and upper has to be higher than fixed
26 | # Important notes about parameters:
27 | # 1. for uniroot to work, lower has to be rarer than a singleton, and upper has to be higher than fixed (100% AF),
28 | # hence the AF here runs from 0.1 alleles in ExAC to an impossible AF of 200%
29 | # 2. you need a tight tolerance (tol) to get accurate results from uniroot at low AF, so we use 1e-7 as default.
30 | # 3. we also specify a precision (1e-6) for incrementing to find the upper boundary of the range of AFs for which
31 | # the 95%CI AC is less than the observed AC
32 | find_af_filter = function(ac, an, ci=.95, lower=(.1/(2*60706)), upper=2, tol=1e-7, precision=1e-6) {
33 | if (is.na(ac) | is.na(an) | ac == 0 | an == 0 | ac == 1) {
34 | return (0.0)
35 | } else {
36 | quantile_limit = ci # ci for one-sided, 1-(1-ci)/2 for two-sided
37 | # this has been tested and with the default parameters here, uniroot seems to never fail for an AC, AN values
38 | # in ExAC. still, it's good to have this tryCatch in here for debugging or in case anyone wants to override
39 | # the defaults. instead of just giving the error/warning message it also tells you which AC and AN values
40 | # it failed on.
41 | attempt_uniroot = tryCatch({
42 | uniroot_result = uniroot(f = function(af,ac,an) { return (ac - 1 - qpois(p=quantile_limit,lambda=an*af)) },lower=lower,upper=upper,ac=ac,an=an,tol=tol)
43 | }, warning = function(w) {
44 | print(paste("ac= ",as.character(ac),", an= ",as.character(an)," warning = ",as.character(w),sep=''))
45 | return (0.0)
46 | }, error = function(e) {
47 | print(paste("ac= ",as.character(ac),", an= ",as.character(an)," error = ",as.character(e),sep=''))
48 | return (0.0)
49 | }, finally = {
50 | })
51 | max_af = round(uniroot_result$root,-log10(precision)) # round to nearest millionth
52 | while(find_max_ac(af=max_af,an=an) < ac) {
53 | max_af = max_af + precision # increase by millionths until you find the upper bound - the highest AF for which 95%CI AC is still less than observed AC
54 | }
55 | max_af = max_af - precision # back off one unit from the AF that violated the 95%CI AC < obs AC condition
56 | return (max_af)
57 | }
58 | }
59 |
60 | # This finds the af_filter for, say, a vector of AC at 10,000 differnet alleles and a corresponding vector of 10,000 AN values
61 | # it's basically just a wrapper for mapply, which is trivial but makes more readable the code below in find_highest_af_filter.
62 | find_af_filter_vectorized = function(ac_vector, an_vector, ci=.95, lower=(.1/(2*60706)), upper=2, tol=1e-7) {
63 | return (mapply(find_af_filter, ac_vector, an_vector, ci=ci, lower=lower, upper=upper, tol=tol))
64 | }
65 |
66 | # In some cases, the highest af_filter value will _NOT_ come from the popmax population, because
67 | # sometimes the population with higher AN_Adj will give more filtering power even if its AF is slightly lower
68 | # therefore, it is important to be able to take the highest af_filter value from a vector of population AC and AN
69 | # this function will accept a list of vectors of AC and of AN. for instance, ac_list may be a list of 5 vectors, which are
70 | # each 10,000 value of ac_afr, ac_amr, ac_eas, ac_nfe, and ac_sas. an_list is the corresponding list of an_ vectors.
71 | find_highest_af_filter = function(ac_list,an_list,ci=.95) {
72 | len_ac = length(list(ac_list))
73 | len_an = length(list(an_list))
74 | stopifnot(len_ac==len_an) # check that we have the same number of AC and AN columns
75 | # make a list of af_filter vectors for each population
76 | # SIMPLIFY=FALSE makes it return a list instead of concatenating the vectors
77 | af_filter_list = mapply(find_af_filter_vectorized,ac_list,an_list,SIMPLIFY=FALSE)
78 | # now take pmax of the population-specific af_filters, returning ONE vector
79 | # containing the highest af_filter value across populations
80 | highest_af_filter = do.call(pmax, af_filter_list)
81 | highest_af_filter = numeric(length(af_filter_list[[1]]))
82 | highest_af_filter_pop = character(length(af_filter_list[[1]]))
83 | for (i in 1:length(highest_af_filter)) { # couldn't figure out a way to do both things with do.call so just looping
84 | to_compare = mapply("[[",af_filter_list,i)
85 | idx = which.max(to_compare)
86 | highest_af_filter[i] = to_compare[idx] # get max filter
87 | highest_af_filter_pop[i] = names(to_compare)[idx] # get the pop that gave the max filter
88 | }
89 | highest_af_filter_pop = gsub("ac_","",highest_af_filter_pop)
90 | highest_af_filter_pop[highest_af_filter==0] = 'none'
91 | return (list(af_filter=highest_af_filter, af_filter_pop=highest_af_filter_pop))
92 | }
93 |
94 | # QC test #1: do we get the same answer forward and backward?
95 | an = 2*60706
96 | find_af_filter(find_max_ac(af=.001,an=an),an=an)
97 | find_af_filter(find_max_ac(af=.01,an=an),an=an)
98 |
99 | # QC test 2. do we get roughly the same answer across a whole range of AC?
100 | an = 2*60706 # for now assume full genotyping
101 | afs = c((1:9)*10^-6,(1:9)*10^-5,(1:9)*10^-4,(1:9)*10^-3,1*10^-2)
102 | plot(NA,NA,xlim=range(afs),ylim=c(1,300),xlab='AF',ylab='95%CI max AC',log='xy',axes=FALSE)
103 | forward_ac = numeric(length(afs))
104 | backward_af = numeric(length(afs))
105 | for (i in 1:length(afs)) {
106 | forward_ac[i] = find_max_ac(afs[i],an=an)
107 | backward_af[i] = find_af_filter(ac=forward_ac[i],an=an)
108 | }
109 | points(x=afs,y=forward_ac,col='red',type='s',lwd=10)
110 | points(x=backward_af,y=forward_ac,col='blue',type='s',lwd=3)
111 | legend('topleft',c('forward (James)','backward (Eric)'),col=c('red','blue'),lwd=c(10,3))
112 | axis(side=1,at=10^(-6:-2),labels=percent(10^(-6:-2)),lwd=0,lwd.ticks=1)
113 | axis(side=2,at=(0:6)*50,labels=(0:6)*50,lwd=0,lwd.ticks=1,las=1)
114 |
115 | # QC test 3. is the backward method sensitive to changes in AC at the lower end - singleton, doubleton etc.
116 | find_af_filter(ac=1,an=2*60706,ci=.95)
117 | find_af_filter(ac=2,an=2*60706,ci=.95)
118 | find_af_filter(ac=3,an=2*60706,ci=.95)
119 | find_af_filter(ac=4,an=2*60706,ci=.95)
120 |
121 | # QC test 4. does the backward method compute in reasonable time?
122 | # (note just using ac_popmax and an_popmax for this initial test, though we'll ultimately want to find the
123 | # highest af_filter across all populations)
124 | exac_test = head(exac,n=200000)
125 | start_time = proc.time()
126 | mapply(find_af_filter, exac_test$ac_popmax, exac_test$an_popmax, ci=.95)
127 | proc.time() - start_time
128 | # takes roughly in the range of ~200 seconds for 200K variants
129 |
130 | # QC test 5. for an actual set of ExAC variants, does running backwards and then forwards reproduce the actual AC reaosnably well?
131 | exac_test$af_filter = mapply(find_af_filter, ac=exac_test$ac_popmax, an=exac_test$an_popmax, ci=.95)
132 | exac_test$ac_forward = mapply(find_max_ac, af=exac_test$af_filter, an=exac_test$an_popmax, ci=.95)
133 | cor.test(exac_test$ac_popmax, exac_test$ac_forward)
134 | # yes, perfectly
135 |
136 | # QC test 6. does the AF filter compare to the actual AF popmax in a reasonable way
137 | #png('display/af_popmax_vs_af_filter.png',height=600,width=600)
138 | par(mar=c(5,5,2,2))
139 | plot(exac_test$af_popmax, exac_test$af_filter, pch=20, xlab='', ylab='',
140 | log='xy', xlim=10^c(-6,-1), ylim=10^c(-6,-1), axes=FALSE)
141 | abline(a=0,b=1,col='red')
142 | axis(side=1,at=10^(-5:-1),labels=percent(10^(-5:-1)),lwd=0,lwd.ticks=1)
143 | axis(side=2,at=10^(-5:-1),labels=percent(10^(-5:-1)),lwd=0,lwd.ticks=1,las=1)
144 | mtext(side=1,line=3,text='AF popmax',font=2)
145 | mtext(side=2,line=4,text='AF filter',font=2)
146 | #dev.off()
147 | # yes, all points are below the diagonal, and the correlation is fairly tight
148 | # do a sanity check that much of the noisiness can be explained by differences in AN:
149 | find_af_filter(ac=100,an=100000)
150 | find_af_filter(ac=10,an=10000)
151 | find_af_filter(ac=1,an=1000)
152 | # ok, good...
153 |
154 | # QC test 7. does the find highest AF filter function work correctly and give the same answer
155 | # when called two different ways?
156 | # first, call with pmax and separate mapply calls
157 | exac_test$af_filter_max = pmax(mapply(find_af_filter,ac=exac_test$ac_afr,an=exac_test$an_afr),
158 | mapply(find_af_filter,ac=exac_test$ac_amr,an=exac_test$an_amr),
159 | mapply(find_af_filter,ac=exac_test$ac_eas,an=exac_test$an_eas),
160 | mapply(find_af_filter,ac=exac_test$ac_nfe,an=exac_test$an_nfe),
161 | mapply(find_af_filter,ac=exac_test$ac_sas,an=exac_test$an_sas))
162 |
163 | # now call with find_highest_af_filter which bundles all of that together
164 | ptm = proc.time()
165 | test_filtering_data = find_highest_af_filter(ac_list=as.list(exac_test[,c("ac_afr","ac_amr","ac_eas","ac_nfe","ac_sas")]),
166 | an_list=as.list(exac_test[,c("an_afr","an_amr","an_eas","an_nfe","an_sas")]))
167 | exac_test$af_filter_max_2 = test_filtering_data[["af_filter"]]
168 | exac_test$af_filter_pop = test_filtering_data[["af_filter_pop"]]
169 | proc.time() - ptm # 310 seconds for 200K variants
170 | # this predicts just over 4 hours to compute af_filter for all of exac
171 |
172 | # check that the correlation between these two methods is perfect
173 | cor.test(exac_test$af_filter_max, exac_test$af_filter_max_2)
174 | # it is.
175 |
176 | # and do the results still look reasonable w.r.t. af_popmax?
177 | plot(exac_test$af_popmax, exac_test$af_filter_max_2,pch=20)
178 | # yep
179 |
180 | # ok, next actually compute af_filter for all of exac (N.B. this block took about 3.5 hours to complete)
181 | filtering_data = find_highest_af_filter(ac_list=as.list(exac[,c("ac_afr","ac_amr","ac_eas","ac_nfe","ac_sas")]),
182 | an_list=as.list(exac[,c("an_afr","an_amr","an_eas","an_nfe","an_sas")]))
183 | exac$af_filter = filtering_data[["af_filter"]]
184 | exac$af_filter_pop = filtering_data[["af_filter_pop"]]
185 |
186 | # write out results for public release
187 | af_filter_data = exac[,c("chrom","pos","ref","alt","af_filter","af_filter_pop")]
188 | write.table(af_filter_data,"af_filter_data.tsv",sep='\t',row.names=F,col.names=T,quote=F)
189 |
190 |
191 | # a few QC checks.
192 | # most variants should fall on the diagonal where popmax == af_filter_pop
193 | table(exac[,c("popmax","af_filter_pop")])
194 | # af_filter_pop
195 | # popmax afr amr eas nfe none sas
196 | # AFR 786392 202 56 101735 807486 6256
197 | # AMR 1804 387449 181 83244 589595 7134
198 | # EAS 1073 641 488158 56861 648524 7575
199 | # NFE 24 1 9 1273470 2499020 1332
200 | # SAS 139 50 29 61030 1147140 764055
201 | # ok, that looks good.
202 | # And the correlation should be tight:
203 | cor.test(exac$af_filter, exac$af_popmax)
204 | # which it is: cor = 0.9881759
205 |
206 | ### FIGURE
207 |
208 | # variants remaining after filters
209 | # things to loop over in generating summary stats
210 | # allele frequency thresholds
211 | af_limits = c(1e-6, 10^-5.5, 1e-5, 10^-4.5, 1e-4, 10^-3.5, 1e-3, 10^-2.5, 1e-2, 5e-2)
212 | ancestries = c(names(pop_names)[c(1,2,3,5,7)]) # "afr" "amr" "eas" "nfe" "sas"
213 | filters = c("af_filter")
214 | # only doing dominant analysis so don't need this line:
215 | # zygosities = c('any','hom')
216 |
217 | # We don't have permission to release individual genotype-level data for the "leave-out 500" for the
218 | # simulated Mendelian analysis, but such data are needed in order to compute the filtering statistics
219 | # for Figure 3A. Thus, this part of the code can only be reproduced in-house. recalculate_stats is
220 | # therefore set to FALSE by default for public release. We who are running the code locally can set to
221 | # TRUE to recompute the filtering statistics. Note that this involves re-computing the af_filter
222 | # on the 60,206 individuals who aren't left out, plus a bunch of costly joins between ExAC and the lv500
223 | # dataset. Thus, this block of code takes several hours and should be run overnight.
224 | recalculate_stats = FALSE
225 | if ('lv500.table.gz' %in% list.files('../exac_papers/misc_data/') & recalculate_stats) {
226 | lv = read.table('../exac_papers/misc_data/lv500.table.gz',sep='\t',header=TRUE,comment.char='',quote='')
227 | colnames(lv) = tolower(colnames(lv))
228 | lv = lv[,-which(colnames(lv)=='ac_hemi')]
229 |
230 | # join relevant cols to ExAC
231 | lv$pos_id = paste(lv$chrom, formatC(lv$pos,width=9,flag='0'), lv$ref, lv$alt, sep='_')
232 | exac$pos_id = paste(exac$chrom, formatC(exac$pos,width=9,flag='0'), exac$ref, exac$alt, sep='_')
233 | # figure out how to match lv500 to ExAC
234 | match_indices = match(lv$pos_id, exac$pos_id)
235 |
236 | # compute AC and AN for the 60,206 non-left-out individuals
237 | for (colname in colnames(lv)[7:26]) {
238 | # compute ACs in ExAC minus the leave-out 500
239 | lv[,paste("exac",colname,sep="_")] = exac[match_indices,colname] - lv[,colname]
240 | }
241 |
242 | # re-calculate af_filter without the leave-out individuals in there
243 | lv$af_filter = find_highest_af_filter(ac_list=as.list(lv[,c("exac_ac_afr","exac_ac_amr","exac_ac_eas","exac_ac_nfe","exac_ac_sas")]),
244 | an_list=as.list(lv[,c("exac_an_afr","exac_an_amr","exac_an_eas","exac_an_nfe","exac_an_sas")]))[["af_filter"]]
245 |
246 | # copy `use` and `category` from exac (and only use if lv500 also has ac_adj > 0)
247 | lv$use = exac$use[match_indices] & lv$ac_adj > 0
248 | lv$category = exac$category[match_indices]
249 |
250 | # some constants and stats
251 | n_indiv = {} # count the number of ExAC individuals in each continental ancestry
252 | for (ancestry in ancestries) {
253 | n_indiv[[ancestry]] = max(lv[,paste("an",ancestry,sep="_")])/2
254 | }
255 | sum(n_indiv) # this will be 500
256 |
257 | # now calculate summary stats for Figure 3A
258 | stats = expand.grid(ancestries,filters,af_limits)
259 | colnames(stats) = c('ancestry','filter','af_limit')
260 | stats$ancestry = as.character(stats$ancestry)
261 | stats$total_vars = 0
262 | stats$filtered_vars = 0
263 | stats$proportion_removed = 0.0
264 | for (af_limit in af_limits) {
265 | for (filter in filters) {
266 | # only consider "use" variants that are missense, missense-equivalent, or protein-truncating
267 | include_in_total = lv$use & !is.na(lv$category) & lv$category %in% c(mis_like,lof_like)
268 | # note the < logic in next line. because af_filter is an af for which 95%CI AC < obs AC,
269 | # users should remove variants whose af_filter is _greater than or equal to_ their maximum credible AF
270 | # conversely, the "filtered" set of variants includes only those with af_filter strictly _less than_
271 | # the user's maximum credible AF
272 | include_in_filtered = include_in_total & (lv[,filter] < af_limit)
273 | for (ancestry in ancestries) {
274 | # find target row of stats table
275 | row = stats$ancestry == ancestry & stats$filter == filter & stats$af_limit == af_limit
276 | exac_colname = paste("ac",ancestry,sep="_") # column of interest in exac
277 | # below, these n_indiv terms cancel in the final calculation, but are included as a reminder
278 | # that conceptually what we are computing is the average number of variants _in each person_
279 | # of a given ancestry that are removed by an allele frequency filter
280 | average_variants_without_af_filter = sum(lv[include_in_total,exac_colname]) / n_indiv[ancestry]
281 | average_variants_with_af_filter = sum(lv[include_in_filtered,exac_colname]) / n_indiv[ancestry]
282 | proportion_removed = 1 - average_variants_with_af_filter / average_variants_without_af_filter
283 | stats$proportion_removed[row] = proportion_removed # store it in the table
284 | stats$total_vars[row] = average_variants_without_af_filter
285 | stats$filtered_vars[row] = average_variants_with_af_filter
286 | }
287 | }
288 | }
289 | # write out results so that others can at least reproduce Figure 3A, given this table of stats
290 | write.table(stats,'data/filtering_stats.tsv',sep='\t',quote=FALSE,col.names=TRUE,row.names=FALSE)
291 | } else {
292 | # if not re-calculating stats, read them in and then you can reproduce Figure 3A
293 | stats = read.table('data/filtering_stats.tsv',sep='\t',quote='',header=TRUE)
294 | }
295 |
296 | # get colors and x axis values for plotting Figure 3A. if you end up with a Figure 3A that lacks
297 | # the curves themselves and has just the axes, it's probably because you forgot to run this line
298 | stats$acol = pop_colors[stats$ancestry]
299 |
300 | # this saves a version of Figure 3A with gridlines. The actual version of Figure 3A for publication
301 | # is produced in analysis/Figures.Rmd as a multi-panel plot with Figure 3B, from a modified version of this code.
302 | png('figures/variants_after_filtering.png',width=800,height=500,res=100)
303 | par(mar=c(4,5,1,1))
304 | plot(NA, NA, xlim=c(-2,-6), ylim=c(0,550), ann=FALSE, axes=FALSE, yaxs='i')
305 | abline(h=(0:5)*100, lwd=.5)
306 | for (ancestry in ancestries) {
307 | rows = stats$ancestry==ancestry & stats$af_limit < .05
308 | points(x=log10(stats$af_limit[rows]), y=stats$filtered_vars[rows], type='l', lwd=5, col=stats$acol[rows])
309 | }
310 | axis(side=1, at=-2:-6, labels=c("1%","0.1%","0.01%","1e-5","1e-6"), lwd=0, lwd.ticks=1)
311 | axis(side=2, at=(0:5)*100, labels=(0:5)*100, lwd=0, lwd.ticks=0, las=2)
312 | mtext(side=1, line=2.5, text='your maximum credible population AF')
313 | mtext(side=2, line=3.5, text='mean predicted protein-altering variants per exome')
314 | legend('topright',pop_names[ancestries],col=pop_colors[ancestries],text.font=2,text.col=pop_colors[ancestries],lwd=5)
315 | dev.off()
316 |
317 | # statistics quoted in text -- averages across populations at different filtering thresholds
318 | mean(stats$filtered_vars[stats$af_limit==.001])
319 | mean(stats$filtered_vars[stats$af_limit==1e-6])
320 |
321 |
322 |
--------------------------------------------------------------------------------
/src/vcf_to_var_indiv_table.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import vcf
4 | import sys
5 | import pysam
6 | import normalize # https://github.com/ericminikel/minimal_representation/blob/master/normalize.py
7 |
8 | # suggested use: vcf_to_var_indiv_table.py $lv500genos $b37ref > genotypes.table
9 | # or, bsub -q priority -J lv500 -o lv500.o -e lv500.e ". private_paths.bash; vcf_to_var_indiv_table.py $lv500genos $b37ref > genotypes.table"
10 |
11 | def vcf_to_var_indiv_table(genos_vcf, fasta_path):
12 | genos = vcf.Reader(filename=genos_vcf,strict_whitespace=True)
13 | pysam_fasta = pysam.FastaFile(fasta_path)
14 | sys.stdout.write('\t'.join(map(str,['chrom','pos','ref','alt','filter','sample','zyg','gt','gq','dp','ad']))+'\n')
15 | for record in genos:
16 | if len(record.FILTER) > 0:
17 | filter_val = record.FILTER[0]
18 | else:
19 | filter_val = 'PASS'
20 | for alt_allele in record.ALT:
21 | allele_number = record.ALT.index(alt_allele) + 1
22 | alt_allele = str(alt_allele)
23 | chrom, pos, ref, alt = normalize.normalize(pysam_fasta, record.CHROM, record.POS, record.REF, alt_allele)
24 | for sample in record.samples:
25 | if sample['GT'] is None:
26 | continue
27 | if allele_number in map(int,sample.gt_alleles):
28 | if all(x==allele_number for x in map(int,sample.gt_alleles)):
29 | zyg = 'hom'
30 | else:
31 | zyg = 'het'
32 | sys.stdout.write('\t'.join(map(str,[chrom, pos, ref, alt, filter_val,sample.sample,zyg,sample['GT'],sample['GQ'],sample['DP'],sample['AD'][allele_number-1]]))+'\n')
33 |
34 | if __name__ == '__main__':
35 | vcf_to_var_indiv_table(sys.argv[1], sys.argv[2])
36 |
37 |
--------------------------------------------------------------------------------