├── .Rprofile ├── .gitignore ├── ClinVar ├── FreqFilterClinVarSubmission.ExpEvidence.csv ├── FreqFilterClinVarSubmission.SubmissionInfo.csv └── FreqFilterClinVarSubmission.Variant.csv ├── R ├── .Rprofile ├── Figures.R ├── floorEffects.R ├── setup.R └── sumariseCaseSeriesVariants.R ├── README.md ├── data-raw ├── .keep ├── ExACVariants_0.001_sarcomeric.txt ├── HCM_BRU_Variants_and_freqs.txt ├── LMM_OXF_path_likelyPath_VUS_variants_allExACpopulations_withLowestClass.txt ├── SupplementaryTable1.txt ├── Table1.txt ├── Table2.txt └── authors.txt ├── data ├── .keep └── filtering_stats.tsv ├── figures ├── .keep └── Figure1.png ├── frequencyFilter.Rproj ├── manuscript ├── .Rprofile ├── SupplementaryMaterial.Rmd ├── manuscript.Rmd ├── nature.csl ├── newTemplate.docx └── template2.docx ├── packrat ├── init.R ├── packrat.lock ├── packrat.opts └── src │ └── packrat │ └── packrat_0.4.7-1.tar.gz └── src ├── hcm_curation.R ├── precompute_exac_af_filter.R └── vcf_to_var_indiv_table.py /.Rprofile: -------------------------------------------------------------------------------- 1 | #### -- Packrat Autoloader (version 0.4.7-1) -- #### 2 | source("packrat/init.R") 3 | #### -- End Packrat Autoloader -- #### 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | 6 | *cache 7 | *unnamed-chunk* 8 | figures/Figure2.png 9 | figures/Figure3.png 10 | manuscript/manuscript.docx 11 | manuscript/SupplementaryMaterial.docx 12 | manuscript/*.bib 13 | packrat/lib*/ 14 | packrat/src/ 15 | *.html 16 | *.DS_Store 17 | -------------------------------------------------------------------------------- /ClinVar/FreqFilterClinVarSubmission.ExpEvidence.csv: -------------------------------------------------------------------------------- 1 | Variant name,Condition ID type,Condition ID value,Collection method,Allele origin,Affected status 2 | NM_001103.2:c.1484C>T,HPO,HP:0001639,research,germline,unknown 3 | NM_014391.2:c.368C>T,HPO,HP:0001639,research,germline,unknown 4 | NM_001885.1:c.460G>A,HPO,HP:0001639,research,germline,unknown 5 | NM_172201.1:c.79C>T,HPO,HP:0001639,research,germline,unknown 6 | NM_007078.2:c.1823C>T,HPO,HP:0001639,research,germline,unknown 7 | NM_170707.3:c.976T>A,HPO,HP:0001639,research,germline,unknown 8 | NM_000256.3:c.2441_2443delAGA,HPO,HP:0001639,research,germline,unknown 9 | NM_000256.3:c.961G>A,HPO,HP:0001639,research,germline,unknown 10 | NM_000256.3:c.1000G>A,HPO,HP:0001639,research,germline,unknown 11 | NM_000256.3:c.13G>C,HPO,HP:0001639,research,germline,unknown 12 | NM_000256.3:c.1468G>A,HPO,HP:0001639,research,germline,unknown 13 | NM_000256.3:c.1321G>A,HPO,HP:0001639,research,germline,unknown 14 | NM_000256.3:c.624G>C,HPO,HP:0001639,research,germline,unknown 15 | NM_000256.3:c.2618C>A,HPO,HP:0001639,research,germline,unknown 16 | NM_000256.3:c.3049G>A,HPO,HP:0001639,research,germline,unknown 17 | NM_000256.3:c.3763G>A,HPO,HP:0001639,research,germline,unknown 18 | NM_000256.3:c.2269G>A,HPO,HP:0001639,research,germline,unknown 19 | NM_000256.3:c.2873C>T,HPO,HP:0001639,research,germline,unknown 20 | NM_000256.3:c.1786G>A,HPO,HP:0001639,research,germline,unknown 21 | NM_000256.3:c.529C>T,HPO,HP:0001639,research,germline,unknown 22 | NM_000256.3:c.2381C>T,HPO,HP:0001639,research,germline,unknown 23 | NM_000256.3:c.461T>C,HPO,HP:0001639,research,germline,unknown 24 | NM_000256.3:c.640G>A,HPO,HP:0001639,research,germline,unknown 25 | NM_000256.3:c.223G>A,HPO,HP:0001639,research,germline,unknown 26 | NM_000256.3:c.3330+5G>C,HPO,HP:0001639,research,germline,unknown 27 | NM_002471.3:c.3195G>C,HPO,HP:0001639,research,germline,unknown 28 | NM_002471.3:c.2384G>A,HPO,HP:0001639,research,germline,unknown 29 | NM_000257.2:c.2360G>A,HPO,HP:0001639,research,germline,unknown 30 | NM_000257.2:c.2183C>T,HPO,HP:0001639,research,germline,unknown 31 | NM_000257.2:c.1322C>T,HPO,HP:0001639,research,germline,unknown 32 | NM_000257.2:c.5326A>G,HPO,HP:0001639,research,germline,unknown 33 | NM_000432.3:c.37G>A,HPO,HP:0001639,research,germline,unknown 34 | NM_000432.3:c.141C>A,HPO,HP:0001639,research,germline,unknown 35 | NM_000432.3:c.401A>C,HPO,HP:0001639,research,germline,unknown 36 | NM_000258.2:c.170C>A,HPO,HP:0001639,research,germline,unknown 37 | NM_000258.2:c.170C>G,HPO,HP:0001639,research,germline,unknown 38 | NM_033118.3:c.260C>T,HPO,HP:0001639,research,germline,unknown 39 | NM_003803.3:c.1900+3A>C,HPO,HP:0001639,research,germline,unknown 40 | NM_198056.2:c.4534C>T,HPO,HP:0001639,research,germline,unknown 41 | NM_003673.3:c.458G>A,HPO,HP:0001639,research,germline,unknown 42 | NM_003280.2:c.435C>A,HPO,HP:0001639,research,germline,unknown 43 | NM_001001430.1:c.853C>T,HPO,HP:0001639,research,germline,unknown 44 | NM_014000.2:c.2923C>T,HPO,HP:0001639,research,germline,unknown 45 | -------------------------------------------------------------------------------- /ClinVar/FreqFilterClinVarSubmission.SubmissionInfo.csv: -------------------------------------------------------------------------------- 1 | SubmitterID,SubmitterIDType,Submitter type,Submitter first name,Submitter last name,Submitter phone,Submitter email,Organization type,Organization,OrganizationID,Organization abbreviation,Institution,Street,City,Country,Postal code,Assembly name 2 | RBH_CV_BRU,Handle,Contact,Nicola,Whiffin,0044 20 7352 8121,n.whiffin@rbht.nhs.uk,other,Cardiovascular Biomedical Research Unit,500163,RBH CV BRU,Royal Brompton and Harefield NHS Foundation Trust,Sydney Street,London,United Kingdom,SW3 6NP,GRCh37 3 | -------------------------------------------------------------------------------- /ClinVar/FreqFilterClinVarSubmission.Variant.csv: -------------------------------------------------------------------------------- 1 | HGVS name,Condition ID type,Condition ID value,Clinical significance,Date last evaluated,Assertion method,Assertion method citation,Comment on clinical significance,Gene symbol 2 | NM_001103.2:c.1484C>T,HPO,HP:0001639,Benign,27/07/16,ACMG Guidelines, 2015,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. 4 individuals in the literature (Chiu 2010, Theis 2006) and 2 reported on ClinVar (SCV000060535.4). Non segregation with disease also reported (2 affecteds; ClinVar SCV000060535.4). No supporting segregation data. [BS1 + BS4],ACTN2 3 | NM_014391.2:c.368C>T,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2016,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. 8/8 missense algorithms predict benign. Mutant protein correctly incorporates into the sarcomere (Crocini 2013). Amino acid change is not conserved. [BS1 + BP4],ANKRD1 4 | NM_001885.1:c.460G>A,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2017,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Identified in a HCM case with an alternative variant sufficient to cause disease (ClinVar SCV000060874.4). Previous pathogenic assertion in ClinVar is from 'literature only' for DCM (no segregation, only 200 controls used for reference). All others say 'VUS' [BS1 + BP5],CRYAB 5 | NM_172201.1:c.79C>T,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2018,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. 7/8 missense algorithms predict damaging. Identified in 2 Chinese families for Atrial Fibrillation (Yang 2004). Only parent and child tested for mutation (50% chance of segregation). An in vitro study on rat cardiomyocytes saw supression of L-type Calcium current (Liu 2014). [BS1 + PP3],KCNE2 6 | NM_007078.2:c.1823C>T,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2019,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Pathogenic in ClinVar from 'literature only'. No segregation or functional data. [BS1],LDB3 7 | NM_170707.3:c.976T>A,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2020,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Possition not conserved 7/8 missense algorithms predict benign, simulation studies suggest no effect on coil B dimerisation. [BS1 + BP4],LMNA 8 | NM_000256.3:c.2441_2443delAGA,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2021,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. In-frame deletion. Experimental studies have shown that this deletion does not lead to decreased MYBPC3 stability in vitro (Bahrudin 2008). Has been identified in 4 adult unaffected relatives (ClinVar SCV000203960.3). [BS1 + PM4],MYBPC3 9 | NM_000256.3:c.961G>A,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2022,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Evidence based on presence in HCM individuals (1 with a MYH7 variant (Maron 2012)) and absence in ESP and nearby residues reported as pathogenic (Ser311Leu and Arg326Gln). No segregation or functional data. Reported multiple times in ClinVar with other variants that could explain phenotype (SCV000208258.4, SCV000261724.2). [BS1 + BP5],MYBPC3 10 | NM_000256.3:c.1000G>A,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2023,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Very high Asian freqeuncy (0.45% in ExAC). Reduced stability of MYBPC3, higher Ca2+ transients and action potential durations in HL-1 cells and rat cardiac myocytes (Bahrudin 2011). [BS1],MYBPC3 11 | NM_000256.3:c.13G>C,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2024,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Found in 2 families; one individual but not unaffected mother and in another but not unaffected sibling. 3 individuals with variant also carry a pathogenic mutation in the same gene (ClinVar SCV000198995.3). Amino acid not conserved in mammals. [BS1 + BP5],MYBPC3 12 | NM_000256.3:c.1468G>A,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2025,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. 7/7 missense algorithms predict deleterious. Identified in individuals with HCM, DCM and LVNC. Conserved residue. Multiple reports of cooccurance with an alternative variant that is sufficient to cause disease (ClinVar SCV000059050.4). [BS1 + BP5 + PP3],MYBPC3 13 | NM_000256.3:c.1321G>A,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2026,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Multiple reports of coocurance with alternative pathogenic variants (ClinVar SCV000059030.4 and SCV000208011.4). Some computational modelling data suggest a confirmational effect (Gajendrarao 2015). [BS1 + BP5],MYBPC3 14 | NM_000256.3:c.624G>C,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2027,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Reports of possible non-segregation (ClinVar SCV000284248.1). [BS1],MYBPC3 15 | NM_000256.3:c.2618C>A,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2028,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Identified as a compound het and in homozygous state (consaguinous family; Nanni 2003, Ingles 2005). No segregation data in HCM. 1 homozygote seen in ExAC. [BS1],MYBPC3 16 | NM_000256.3:c.3049G>A,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2029,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Not conserved amino acid - 7/7 missense algorithms predict benign. No functional or segregation data. [BS1 + BP4],MYBPC3 17 | NM_000256.3:c.3763G>A,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2030,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. 1/5 individuals with variant also had a likely pathogenic variant in another gene (ClinVar SCV000198796.2). No other data available. [BS1],MYBPC3 18 | NM_000256.3:c.2269G>A,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2031,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Conservative amino acid change. No functional or segregation data found. [BS1],MYBPC3 19 | NM_000256.3:c.2873C>T,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2032,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. 2 documented individuals also carry a MYBPC3 variant sufficient to explain disease (ClinVar SCV000059185.4). Amino acid is not well conserved 7/7 missense algorithms predict benign. [BS1 + BP4 + BP2],MYBPC3 20 | NM_000256.3:c.1786G>A,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2033,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. 6/7 missense algorithms predict damaging. No clear segregation data (one suggestion the variant doesn't segregate (ClinVar SCV000259413.1)). [BS1 + PP3],MYBPC3 21 | NM_000256.3:c.529C>T,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2034,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Different missense at the same position is common. Identified with another pathogenic MYBPC3 variant (ClinVar SCV000198969.3). Otherwise found in DCM. Position not well conserved. [BS1 + BP5],MYBPC3 22 | NM_000256.3:c.2381C>T,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2035,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. No evidence towards a pathogenic classification found. [BS1],MYBPC3 23 | NM_000256.3:c.461T>C,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2036,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Position not conserved, 6/7 missense algorithms predict benign.Identified in a child with a further homozygoes MYBPC3 variant (ClinVar SCV000059282.4). [BS1 + BP4],MYBPC3 24 | NM_000256.3:c.640G>A,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2037,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. No evidence towards a pathogenic classification found. [BS1],MYBPC3 25 | NM_000256.3:c.223G>A,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2038,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. No evidence towards a pathogenic classification found. [BS1],MYBPC3 26 | NM_000256.3:c.3330+5G>C,HPO,HP:0001639,Pathogenic,27/07/16,ACMG Guidelines, 2039,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Prevalence in affecteds stastically increased over controls (http://biorxiv.org/content/early/2016/02/24/041111). Shown to produce a truncated protein product and to segregate with disease (Watkins 1995). [BS1 + PVS1 + PS4 + PP1],MYBPC3 27 | NM_002471.3:c.3195G>C,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2040,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. 6/7 missense algorithms predict deleterious. Originally identified as in an affected (died of congestive heart failure at 45 yrs) and not his 2 unnafected offspring or 150 controls (Carniel 2005). Amino acid change seen in birds. [BS1 + PP3],MYH6 28 | NM_002471.3:c.2384G>A,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2041,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Identified in a 75 year old patient and not found in 170 controls (Niimura 2002). No other data. [BS1],MYH6 29 | NM_000257.2:c.2360G>A,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2042,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. 11 reports in total (7 literature, 4 LMM in ClinVar (SCV000059440.4)), majority South Asian decent - very high South Asian frequency (0.1%). 3 probands had another variant sufficient to cause disease. Only moderately conserved amino acid. Possible non-segragation (not in the father of an individual who died of sudden cardiac death; ClinVar SCV000059440.4). [BS1 + BP5],MYH7 30 | NM_000257.2:c.2183C>T,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2043,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Pathogenic in CliNVar from 'literature only' - VUS by 3 other submitters. Identified originally in cis with another MYH7 mutation in 2 siblings with HCM (Blair 2001) - other variant (c.1816G>A) is absent in ExAC and has been shown to segregate in multiple families (24 literature reports) [BS1 + BP2],MYH7 31 | NM_000257.2:c.1322C>T,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2044,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Pathogenic in ClinVar from 'literature only', in Laing distal myopathy. No change in mRNA levels. Amino acid is not conserved in mammals. [BS1],MYH7 32 | NM_000257.2:c.5326A>G,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2045,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Large majority of identified cases are of Asian ancestry. 2 individuals at GeneDx have an alternative pathogenic variant (ClinVar SCV000208629.3). Only segregation data is in 2 affected siblings and not in the unaffected sibling (Blair 2002). [BS1 + BP5],MYH7 33 | NM_000432.3:c.37G>A,HPO,HP:0001639,Benign,27/07/16,ACMG Guidelines, 2046,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. ClinVar 'Pathogenic' assertion from literature only (ClinVar SCV000035365.1). Not found in an affected family member (Ball 2012). Other weak non-segregation data. Found in a 56 year old healthy individual (Ball 2012). Found in Ashkenazi Jewish individuals. Reported to lead to increased Ca2+ affinity (Szczesna 2001). [BS1 + BS4],MYL2 34 | NM_000432.3:c.141C>A,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2047,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. 7/8 missense algorithms predict benign. Co-occurance with likely pathogenic (tested) MYH7:c.5134C>T mutation (segregation data) in literature (Andersen 2001, Hougs 2005). Other reports of a second variant (ClinVar SCV000060036.4). In vitro studies show reduced force development and power output (Szczesna-Cordary 2004, Abraham 2009, Greenberg 2009, Greenberg 2010). [BS1 + BP5 + BP4],MYL2 35 | NM_000432.3:c.401A>C,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2048,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. 8/8 missense prediction algorithms predict deleterious. Identified in 2 individuals with a second 'likely pathogenic' MYL2 variant (ClinVar SCV000060059.4). An in vitro functional study suggests that this variant may reduce cardiac muscle contraction efficiency (Burghardt 2013). Second hit causing more severe phenotype suggested. [BS1 + PP3],MYL2 36 | NM_000258.2:c.170C>A,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2049,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. 8/8 missense algorithms predict deleterious. Mutations at the same residue have been reported for HCM (Lee 2001). Indentified along with with pathogenic and likely pathogenic variants (ClinVar SCV000059671.4, SCV000207109.1). 2 affected siblings reported both with variant as homozygote (no support for recessive MYL3 inheritance). [BS1 + BP5 + PP3],MYL3 37 | NM_000258.2:c.170C>G,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2050,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. 7/8 missense algorithms predict damaging. Functional study showing lowercomplex affinity not significant for this variant after multiple testing (Lossie 2012). Segregation in 5 affected members of 2 families (ClinVar SCV000199362.3). All reports in East Asians (highest ExAC frequency at 0.05%). [BS1 + PP3 + PP1],MYL3 38 | NM_033118.3:c.260C>T,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2051,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Pathogenic in ClinVar from 'literature only', no other submissions. Found as a compound het in a 13yr old and with an MYH7 variant (also in mildy affected parents; Davis (2001). Other compound het variant is also common. [BS1],MYLK2 39 | NM_003803.3:c.1900+3A>C,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2052,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Originally identified as a homozygote in 3 yr old (ClinVar SCV000195854.1). No other data to support. 7 homozygotes in ExAC. Splicing tools predict no effect. [BS1 + BP4],MYOM1 40 | NM_198056.2:c.4534C>T,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2053,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. 8/8 missense algorithms predict damaging. Reported in association with Brugada syndrome (Rook 1999, Deschnes 2000, Meregalli 2009, Crotti 2012). In vitro functional studies show a potential impact (Rook 1999, Deschnes 2000), however these may not be representative of in vivo protein function. [BS1 + PP3],SCN5A 41 | NM_003673.3:c.458G>A,HPO,HP:0001639,Likely Benign,27/07/16,ACMG Guidelines, 2054,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. 7/8 missense algorithms predict benign. Only pathogenic in ClinVar by 'literature only'. Identified in siblings but no solid segregation data (Hayashi 2004). Functional studies in yeast show the variant leads to an increase in the ability of TCAP to bind with titin and CS-1 (Hayashi 2004)- in vitro assay may not accurately represent biological function. Amino acid change is not conserved - 7 mammals carry the variant amino acid. [BS1 + BP4],TCAP 42 | NM_003280.2:c.435C>A,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2055,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Functional studies using mutant porcine cardiac skinned fibers showed significantly increased force development and force recovery compared to wildtype (Landstrom 2008) with reduced Ca2+ binding [only in vitro studies]. No segregation data. Has also been reported in a patient with idiopathic DCM who also harbored a missense variant in MYBPC3 gene (Pinto 2011) [BS1],TNNC1 43 | NM_001001430.1:c.853C>T,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2056,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Reported in >10 individuals with HCM and segregated in 5 affected individuals from multiple families (Watkins 1995, Elliott 1999, Garcia-Castro 2003, Torricelli 2003, Van Driest 2003, Theopistou 2004, Ingles 2005, Zeller 2006, Garcia-Castro 2009, Kaski 2009, Gimeno 2009, Millat 2010). Up to half of individuals carried a second variant (ClinVar SCV000060277.4). Residue not evolutionarily conserved. A milder affect with late age of onset has been suggested. In vitro studies suggest may have an affect on muscle contraction (Morimoto 1999, Yanaga 1999, Szczesna 2000, Hernandez 2005) however these might not represent acurately protein function. [BS1],TNNT2 44 | NM_014000.2:c.2923C>T,HPO,HP:0001639,Uncertain significance,27/07/16,ACMG Guidelines, 2057,PMID:25741868,Frequency in ExAC too high to be caustive for HCM. Pathogenic in ClinVar from 'literature only' .Reduced levels of vinculin in the patient with this variant, but other studies found that this was a general feature of HCM and not related to specific genotypes (Vasile 2006). [BS1],VCL -------------------------------------------------------------------------------- /R/.Rprofile: -------------------------------------------------------------------------------- 1 | source("../.Rprofile", chdir = TRUE) 2 | -------------------------------------------------------------------------------- /R/Figures.R: -------------------------------------------------------------------------------- 1 | #setwd("./R") 2 | #source("../R/setup.R") 3 | my.CI<-0.95 4 | 5 | ### Figure 2 6 | 7 | variants<-read.table("../data-raw/LMM_OXF_path_likelyPath_VUS_variants_allExACpopulations_withLowestClass.txt", header=TRUE, sep="\t") 8 | maxHCMPenTerm<-9 9 | maxHCM<-5 10 | variants$Class <- factor(variants$Class, c("Pathogenic", "Likely Pathogenic", "VUS")) 11 | 12 | fullplot <- ggplot(subset(variants, (Disease=="HCM" & !(Gene %in% c('GLA','DMD','TAZ','EMD','LAMP2','VBP1','FHL1','KCNE1L','ZIC3','COL4A5','FLNA','MED12')))), aes(x=CaseCount, y=ExACCountALL, colour=Class, shape=Class)) + 13 | geom_point(size=2, alpha=0.6, position=position_jitter(width=0.5,height=0.5)) + 14 | geom_hline(yintercept=maxHCMPenTerm, linetype=2, colour="dodgerblue4") + 15 | geom_hline(yintercept=maxHCM, linetype=2, colour="lightskyblue3") + 16 | scale_color_manual(values=c("#ED1E24", "#FF9912", "gray38")) + 17 | scale_shape_manual(values=c(19,19,1)) + 18 | scale_y_continuous(limits = c(-2,178), expand = c(0, 0)) + 19 | scale_x_continuous(limits = c(-2,108), expand = c(0, 0)) + 20 | theme(axis.text=element_text(size=12),axis.title=element_blank(),legend.position = c(.7, .8), 21 | legend.text=element_text(size=14), legend.title = element_blank(), legend.key = element_blank(), 22 | axis.line.x = element_line(color="grey"), axis.line.y = element_line(color="grey"), panel.background = element_blank(), 23 | axis.ticks = element_line(color="grey"), plot.background = element_rect(linetype="solid", color="grey")) 24 | 25 | zoomplot <- ggplot(subset(variants, (Disease=="HCM" & !(Gene %in% c('GLA','DMD','TAZ','EMD','LAMP2','VBP1','FHL1','KCNE1L','ZIC3','COL4A5','FLNA','MED12')))), aes(x=CaseCount, y=ExACCountALL, colour=Class, shape=Class)) + 26 | geom_point(size=2, alpha=0.6, position=position_jitter(width=0.5,height=0.5)) + 27 | geom_hline(yintercept=(maxHCMPenTerm+0.2), linetype=2, colour="dodgerblue4") + 28 | geom_hline(yintercept=(maxHCM+0.2), linetype=2, colour="lightskyblue3") + 29 | scale_color_manual(values=c("#ED1E24", "#FF9912", "gray38")) + 30 | scale_shape_manual(values=c(19,19,1)) + 31 | labs(x="Count cases",y="Count ExAC") + 32 | #ylim(0,30) + 33 | #xlim(0,70) + 34 | scale_y_continuous(limits = c(-0.5,30), expand = c(0, 0)) + 35 | scale_x_continuous(limits = c(0,70), expand = c(0, 0)) + 36 | annotate("text",x=67,y=(maxHCMPenTerm+0.6), label="AC = 9",colour="dodgerblue4") + 37 | annotate("text",x=67,y=(maxHCM+0.6), label="AC = 5",colour="lightskyblue3") + 38 | theme(axis.text=element_text(size=12), axis.title=element_text(size=14), 39 | legend.position="none", axis.line.x = element_line(color="grey"), axis.line.y = element_line(color="grey"), panel.background = element_blank(), 40 | axis.ticks = element_line(color="grey")) 41 | 42 | png("../figures/Figure2.png",width=1200,height=1000,res=150) 43 | subvp<-viewport(width=.5,height=.5,x=.745,y=.745) 44 | 45 | zoomplot 46 | print(zoomplot) 47 | print(fullplot,vp=subvp) 48 | dev.off() 49 | 50 | ### Figure 3 51 | 52 | # Details for Exome plot (figure 3a) 53 | stats = read.table('../data/filtering_stats.tsv',sep='\t',quote='',header=TRUE) 54 | pops <- c('afr', 'amr', 'eas', 'fin', 'nfe', 'sas') 55 | color_amr = k_amr = '#ED1E24' 56 | color_eur = k_eur = '#6AA5CD' 57 | color_afr = k_afr = '#941494' 58 | color_sas = k_sas = '#FF9912' 59 | color_eas = k_eas = '#108C44' 60 | color_oth = k_oth = '#ABB9B9' 61 | color_mde = k_mde = '#000080' 62 | color_nfe = k_nfe = color_eur 63 | color_fin = k_fin = '#002F6C' 64 | 65 | pop_colors = c('afr' = color_afr,'amr' = color_amr,'eas' = color_eas,'nfe' = color_nfe,'sas' = color_sas) 66 | pop_names = c('afr' = 'African','amr' = 'Latino','eas' = 'East Asian','nfe' = 'European','sas' = 'South Asian') 67 | 68 | ancestries = c(names(pop_names)[c(1,2,3,4,5)]) 69 | stats$acol = pop_colors[stats$ancestry] 70 | 71 | # Compute Odds Ratios and Confidence Intervals from ExAC and Internal HCM data 72 | #install.packages("tidyr") 73 | options(stringsAsFactors = FALSE) 74 | exac_data<-read.table("../data-raw/ExACVariants_0.001_sarcomeric.txt", header=TRUE, sep="\t") %>% 75 | tbl_df %>% 76 | transmute(Gene,CodingVar,AC=ExACAlleleCount,exacAF=ExACFreq,group="control") 77 | hcm_data<-read.table("../data-raw/HCM_BRU_Variants_and_freqs.txt", header=TRUE, sep="\t") %>% 78 | tbl_df %>% 79 | transmute(Gene,CodingVar,AC=CaseCount,exacAF=ExACFreq,group="case") 80 | 81 | data <- rbind(exac_data,hcm_data) 82 | data$bin <- cut(data$exacAF,breaks=c(1,0.001,0.0005,0.0001,0.00005,0.00001,-1),labels=c("0.00001","0.00005","0.0001","0.0005","0.001","1")) 83 | 84 | assignGeneGroup <- function(x){ 85 | if(x=="MYH7"){return(x)} else 86 | if(x=="MYBPC3"){return(x)} else 87 | {return("OTHER")} 88 | } 89 | applyGeneGroup <- function(x){ 90 | sapply(x,assignGeneGroup) 91 | } 92 | 93 | assignPopSize <- function(x,y){ 94 | if(y=="case"){return(322)} else 95 | if(x=="MYH7"){return(60469)} else 96 | if(x=="MYBPC3"){return(45793.5)} else 97 | {return(57855)} 98 | } 99 | applyPopSize<-function(x,y){ 100 | mapply(assignPopSize,x,y) 101 | } 102 | 103 | ef_data<-data %>% 104 | mutate(Gene=applyGeneGroup(Gene)) %>% 105 | group_by(Gene,bin,group) %>% 106 | summarise(totAC=sum(AC)) %>% 107 | mutate(size=applyPopSize(Gene,group)) %>% 108 | transmute(group,odds=totAC/(size-totAC),seprecurse=((1/totAC)+(1/(size-totAC)))) %>% 109 | unite(oddsSEpre,odds,seprecurse,sep="_") %>% 110 | spread(key=group,value=oddsSEpre,fill="0_0") %>% 111 | separate(case,c("CaseOdds","CaseSEPre"),sep="_") %>% 112 | separate(control,c("ControlOdds","ControlSEPre"),sep="_") %>% 113 | transmute(OR=(as.numeric(CaseOdds)/as.numeric(ControlOdds)),SE=(sqrt(as.numeric(CaseSEPre)+as.numeric(ControlSEPre)))) %>% 114 | transmute(OR,LowerCI=(exp(log(OR)-(1.96*SE))),UpperCI=(exp(log(OR)+(1.96*SE)))) %>% 115 | mutate(ExACfreq=as.numeric(bin)) 116 | 117 | gene_list <- c('MYBPC3','MYH7','OTHER') 118 | gene_names = c('MYBPC3' = 'MYBPC3','MYH7' = 'MYH7','OTHER' = 'Other Sarcomeric') 119 | gene_colors = c('MYH7' = color_afr,'MYBPC3' = color_nfe,'OTHER' = color_sas) 120 | genes = c(names(gene_names)[c(1,2,3)]) 121 | ef_data$acol = gene_colors[ef_data$Gene] 122 | 123 | # Plot multipanel plot 124 | png('../figures/Figure3.png',width=3800,height=1500,res=150) 125 | m<-rbind(c(1,2)) 126 | layout(m) 127 | 128 | par(mar=c(5,6,3,2)) 129 | plot(NA, NA, xlim=c(-2,-6), ylim=c(0,550), ann=FALSE, axes=FALSE, yaxs='i') 130 | #abline(h=(0:5)*100, lwd=.5) 131 | abline(h=0) 132 | abline(v=-1.84) 133 | for (ancestry in ancestries) { 134 | rows = stats$ancestry==ancestry & stats$af_limit < .05 135 | points(x=log10(stats$af_limit[rows]), y=stats$filtered_vars[rows], type='l', lwd=5, col=stats$acol[rows]) 136 | } 137 | axis(side=1, at=-2:-6, labels=c("1","0.1","0.01","0.001","0.0001"), lwd=0, lwd.ticks=1, cex.axis=2) 138 | axis(side=2, at=(0:5)*100, labels=(0:5)*100, lwd=0, lwd.ticks=0, las=2, cex.axis=2) 139 | mtext(side=1, line=3.5, text='Maximum credible population AF (%)', cex=2.5) 140 | mtext(side=2, line=4.5, text='Mean protein-altering variants per exome', cex=2.5) 141 | legend('topright',pop_names[ancestries],col=pop_colors[ancestries],text.font=2,text.col=pop_colors[ancestries],lwd=5, cex=2) 142 | mtext("(a)", side=3, adj=-0.115, line=0.5, cex=2.8) 143 | 144 | par(mar=c(5,6,3,2)) 145 | plot(NA, NA, xlim=c(5,1), ylim=c(0,62), ann=FALSE, axes=FALSE, yaxs='i', log='x') 146 | #abline(h=(0:6)*10, lwd=.5) 147 | abline(h=0) 148 | abline(v=5.33) 149 | for (gene in genes) { 150 | rows = ef_data$Gene==gene 151 | points(ef_data$ExACfreq[rows], ef_data$OR[rows], type='b', lwd=5, pch=19, col=ef_data$acol[rows]) 152 | arrows(ef_data$ExACfreq[rows], ef_data$LowerCI[rows], ef_data$ExACfreq[rows], ef_data$UpperCI[rows], length=0.02, angle=90, code=3, col=ef_data$acol[rows]) 153 | } 154 | axis(side=1, at=c(5,4,3,2,1), labels=c("0.1","0.05","0.01","0.005","0.001"), lwd=0, lwd.ticks=1, cex.axis=2) 155 | axis(side=2, at=(0:6)*10, labels=(0:6)*10, lwd=0, lwd.ticks=0, las=2, cex.axis=2) 156 | mtext(side=1, line=3.5, text='Maximum ExAC frequency (%)', cex=2.5) 157 | mtext(side=2, line=4.0, text='Odds ratio', cex=2.5) 158 | legend('topleft',gene_names[genes],col=gene_colors[genes],text.font=2,text.col=gene_colors[genes],lwd=5, cex=2) 159 | mtext("(b)", side=3, adj=-0.13, line=0.5, cex=2.8) 160 | 161 | dev.off() 162 | -------------------------------------------------------------------------------- /R/floorEffects.R: -------------------------------------------------------------------------------- 1 | floorEffects_f1 <- function(x,y=0.95){ 2 | y-ppois(1,x) 3 | } 4 | 5 | 6 | floorEffects <- function(confThresholds=c(0.95,0.99,0.999),popSize=121412){ 7 | output <- matrix(nrow=length(confThresholds),ncol=2) %>% 8 | data.frame 9 | names(output) <- c("confidence","alleleFrequency") 10 | output$confidence <- confThresholds 11 | 12 | for(i in seq(along=output$confidence)){ 13 | myMin <- uniroot(floorEffects_f1,c(0,1),output$confidence[i])$root 14 | output$alleleFrequency[i] <- myMin/popSize 15 | } 16 | return(output) 17 | } 18 | 19 | # floorEffects() is looking for the AF at which a single allele is outside the specified poisson confidence, across a range of confidences -------------------------------------------------------------------------------- /R/setup.R: -------------------------------------------------------------------------------- 1 | ## Libraries 2 | 3 | # if(!"dplyr" %in% installed.packages()){ 4 | # install.packages("dplyr") 5 | # } 6 | # if(!"tidyr" %in% installed.packages()){ 7 | # install.packages("tidyr") 8 | # } 9 | # if(!"ggplot2" %in% installed.packages()){ 10 | # install.packages("ggplot2") 11 | # } 12 | # if(!"binom" %in% installed.packages()){ 13 | # install.packages("binom") 14 | # } 15 | # if(!"pander" %in% installed.packages()){ 16 | # install.packages("pander") 17 | # } 18 | # if(!"grid" %in% installed.packages()){ 19 | # install.packages("grid") 20 | # } 21 | # if(!"withr" %in% installed.packages()){ 22 | # install.packages("withr") 23 | # } 24 | 25 | library(dplyr) 26 | library(tidyr) 27 | library(ggplot2) 28 | #library(binom) 29 | library(pander) 30 | library(grid) 31 | #library(withr) 32 | 33 | ## Install knitcitations in this repo if not already installed 34 | # if(!"knitcitations" %in% installed.packages(lib.loc="../repoPackageLibrary")){ 35 | # install.packages("knitcitations",lib="../repoPackageLibrary") 36 | # } 37 | 38 | # ## Install knitauthors in this repo if not already installed (requires devtools) 39 | # if(!"knitauthors" %in% installed.packages(lib.loc="../repoPackageLibrary")){ 40 | # if(!"devtools" %in% installed.packages()){ 41 | # install.packages("devtools") 42 | # } 43 | # withr::with_libpaths(new = "../repoPackageLibrary/", 44 | # devtools::install_github("jamesware/knitauthors",force=T), 45 | # action= "prefix") 46 | # } 47 | ## Set up citations 48 | # library("knitcitations",lib.loc="../repoPackageLibrary") 49 | library(knitcitations) 50 | cleanbib() 51 | options("citation_format" = "pandoc") 52 | 53 | ## set knitr chunk options 54 | # if(!"knitr" %in% installed.packages()){ 55 | # install.packages("knitr") 56 | # } 57 | library(knitr) 58 | opts_chunk$set(fig.path="../figures/", collapse = TRUE, echo=FALSE, warning=TRUE, message=TRUE)#, cache=TRUE)#, results="asis") 59 | library(knitauthors) 60 | # library("knitauthors",lib.loc="../repoPackageLibrary") 61 | -------------------------------------------------------------------------------- /R/sumariseCaseSeriesVariants.R: -------------------------------------------------------------------------------- 1 | ### Exploring ExAC AC of variants from 6179 HCM cases (Walsh et al) 2 | 3 | summariseCaseSeriesVariants <- function(){ 4 | 5 | referenceVariants <<- read.delim("../data-raw/LMM_OXF_path_likelyPath_VUS_variants_allExACpopulations_withLowestClass.txt") %>% 6 | filter(Disease=="HCM") %>% 7 | filter(!(Gene %in% c('GLA','DMD','TAZ','EMD','LAMP2','VBP1','FHL1','KCNE1L','ZIC3','COL4A5','FLNA','MED12'))) 8 | 9 | myVariantsN <<- nrow(referenceVariants) 10 | 11 | myFailPath <<- referenceVariants %>% 12 | filter(Class %in% c("Pathogenic","Likely Pathogenic")) %>% 13 | filter(ExACCountALL>9) %>% 14 | nrow 15 | myPassPath <<- referenceVariants %>% 16 | filter(Class %in% c("Pathogenic","Likely Pathogenic")) %>% 17 | filter(ExACCountALL<=9) %>% 18 | nrow 19 | myAbsentPath <<- referenceVariants %>% 20 | filter(Class %in% c("Pathogenic","Likely Pathogenic")) %>% 21 | filter(ExACCountALL==0) %>% 22 | nrow 23 | myAllPath <<- referenceVariants %>% 24 | filter(Class %in% c("Pathogenic","Likely Pathogenic")) %>% 25 | nrow 26 | myFailVus <<- referenceVariants %>% 27 | filter(Class=="VUS") %>% 28 | filter(ExACCountALL>9) %>% 29 | nrow 30 | myPassVus <<- referenceVariants %>% 31 | filter(Class=="VUS") %>% 32 | filter(ExACCountALL<=9) %>% 33 | nrow 34 | myAllVus <<- referenceVariants %>% 35 | filter(Class=="VUS") %>% 36 | nrow 37 | 38 | 39 | 40 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### Using high-resolution variant frequencies to empower clinical genome interpretation 2 | 3 | **Abstract** 4 | 5 | Whole exome and genome sequencing have transformed the discovery of genetic variants that cause human Mendelian disease, but discriminating pathogenic from benign variants remains a daunting challenge. Rarity is recognized as a necessary, although not sufficient, criterion for pathogenicity, but frequency cutoffs used in Mendelian analysis are often arbitrary and overly lenient. Recent very large reference datasets, such as the Exome Aggregation Consortium (ExAC), provide an unprecedented opportunity to obtain robust frequency estimates even for very rare variants. Here we present a statistical framework for the frequency-based filtering of candidate disease-causing variants, accounting for disease prevalence, genetic and allelic heterogeneity, inheritance mode, penetrance, and sampling variance in reference datasets. Using the example of cardiomyopathy, we show that our approach reduces by two-thirds the number of candidate variants under consideration in the average exome, and identifies 43 variants previously reported as pathogenic that can now be reclassified. We present precomputed allele frequency cutoffs for all variants in the ExAC dataset. 6 | 7 | This repository contains code to reproduce analyses, generate figures, and compile the manuscript. 8 | 9 | **Content** 10 | 11 | - manuscript -> files to compile the main manuscript and the supplementary material 12 | - src -> source code to compute af_filter values 13 | - data -> output from the src code 14 | - R -> R code to do the analysis and create the figures 15 | - data-raw -> raw data files to recreate the analysis 16 | - figures -> the figures 17 | - ClinVar -> ClinVar submission from variant curation 18 | - packrat -> libraries of packages required by the repository 19 | 20 | **Instructions for use with Rstudio** 21 | 22 | - Download R studio (https://www.rstudio.com/) 23 | - Install knitr and devtools packages 24 | `install.packages(c("knitr","devtools"))` 25 | - Download and unpack repository 26 | - Select `File->New project->Existing Directory` 27 | - Browse the 'Project working directory' to match the location of the unpacked repository 28 | - Click `Create Project` 29 | - Wait for the files to be imported and for packrat to install all the required packages (you should get this message: Packrat bootstrap successfully completed. Restarting R and entering packrat mode...) 30 | - Close down Rstudio and restart 31 | - You should now be able to re-create the manuscript and figures by knitting the file: `./manuscript/manuscript.Rmd` 32 | -------------------------------------------------------------------------------- /data-raw/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ImperialCardioGenetics/frequencyFilter/00b08ccf41bbbd927ac3d021d302e80a4ad4ca5d/data-raw/.keep -------------------------------------------------------------------------------- /data-raw/ExACVariants_0.001_sarcomeric.txt: -------------------------------------------------------------------------------- 1 | Gene CodingVar ExACAlleleCount ExACTotal ExACFreq 2 | MYH7 c.11C>T 2 120794 0.00001656 3 | MYH7 c.77C>T 69 121330 0.00056870 4 | MYH7 c.115G>A 7 121364 0.00005768 5 | MYH7 c.175G>A 1 121302 0.00000824 6 | MYH7 c.343T>C 1 121348 0.00000824 7 | MYH7 c.428G>A 1 121400 0.00000824 8 | MYH7 c.427C>T 6 121402 0.00004942 9 | MYH7 c.443G>T 1 121410 0.00000824 10 | MYH7 c.530C>T 2 121412 0.00001647 11 | MYH7 c.632C>T 3 121406 0.00002471 12 | MYH7 c.709C>T 1 121412 0.00000824 13 | MYH7 c.728G>A 1 121412 0.00000824 14 | MYH7 c.958G>A 1 119812 0.00000835 15 | MYH7 c.976G>C 8 117940 0.00006783 16 | MYH7 c.1128C>A 1 121388 0.00000824 17 | MYH7 c.1129G>A 3 121390 0.00002471 18 | MYH7 c.1231G>A 1 121408 0.00000824 19 | MYH7 c.1322C>T 16 121408 0.00013179 20 | MYH7 c.1324C>T 1 121410 0.00000824 21 | MYH7 c.1325G>A 2 121406 0.00001647 22 | MYH7 c.1447G>A 1 121380 0.00000824 23 | MYH7 c.1681G>A 5 121412 0.00004118 24 | MYH7 c.1727A>G 1 121408 0.00000824 25 | MYH7 c.1954A>G 1 120602 0.00000829 26 | MYH7 c.1988G>A 2 121338 0.00001648 27 | MYH7 c.2080C>T 2 120584 0.00001659 28 | MYH7 c.2167C>T 3 121400 0.00002471 29 | MYH7 c.2206A>G 1 121406 0.00000824 30 | MYH7 c.2348G>A 2 121376 0.00001648 31 | MYH7 c.2359C>T 7 121382 0.00005767 32 | MYH7 c.2360G>A 27 121382 0.00022244 33 | MYH7 c.2389G>A 4 121372 0.00003296 34 | MYH7 c.2573G>A 1 121396 0.00000824 35 | MYH7 c.2594A>G 1 121404 0.00000824 36 | MYH7 c.2606G>A 4 121390 0.00003295 37 | MYH7 c.2608C>T 1 121386 0.00000824 38 | MYH7 c.2890G>C 51 121412 0.00042006 39 | MYH7 c.2945T>C 110 121412 0.00090601 40 | MYH7 c.3056C>A 1 121412 0.00000824 41 | MYH7 c.3133C>T 2 121396 0.00001648 42 | MYH7 c.3134G>A 4 121396 0.00003295 43 | MYH7 c.3158G>A 9 121396 0.00007414 44 | MYH7 c.3169G>A 1 121398 0.00000824 45 | MYH7 c.3236G>A 1 121352 0.00000824 46 | MYH7 c.3286G>T 19 121334 0.00015659 47 | MYH7 c.3301G>A 8 121344 0.00006593 48 | MYH7 c.3788C>A 1 121408 0.00000824 49 | MYH7 c.3981C>A 12 115980 0.00010347 50 | MYH7 c.4052C>T 4 121060 0.00003304 51 | MYH7 c.4075C>T 1 121236 0.00000825 52 | MYH7 c.4258C>T 1 121404 0.00000824 53 | MYH7 c.4377G>T 37 121128 0.00030546 54 | MYH7 c.4423C>T 10 121374 0.00008239 55 | MYH7 c.4487A>C 3 121412 0.00002471 56 | MYH7 c.4717G>A 10 121342 0.00008241 57 | MYH7 c.4909G>A 7 121402 0.00005766 58 | MYH7 c.4954G>T 3 121192 0.00002475 59 | MYH7 c.5071G>A 10 121384 0.00008238 60 | MYH7 c.5135G>A 1 121366 0.00000824 61 | MYH7 c.5257G>A 1 121410 0.00000824 62 | MYH7 c.5279C>T 2 121410 0.00001647 63 | MYH7 c.5305C>A 1 121410 0.00000824 64 | MYH7 c.5326A>G 4 121410 0.00003295 65 | MYH7 c.5329G>A 5 121410 0.00004118 66 | MYH7 c.5494C>T 6 121320 0.00004946 67 | MYH7 c.5507C>T 6 121248 0.00004949 68 | MYH7 c.5536C>T 2 121000 0.00001653 69 | MYH7 c.5561C>T 4 121130 0.00003302 70 | MYH7 c.5588G>A 1 121352 0.00000824 71 | MYH7 c.5779A>T 4 121328 0.00003297 72 | MYH7 c.5786C>T 3 121260 0.00002474 73 | MYH7 c.1370T>C 1 121396 0.00000824 74 | MYH7 c.1772T>C 1 121406 0.00000824 75 | MYH7 c.2710C>T 1 121392 0.00000824 76 | MYH7 c.3994G>A 2 117736 0.00001699 77 | MYH7 c.4606G>A 1 121236 0.00000825 78 | MYH7 c.4985G>A 7 121340 0.00005769 79 | MYH7 c.5030G>A 6 121384 0.00004943 80 | MYH7 c.345+1G>A 3 121322 0.00002473 81 | MYH7 c.5500G>A 6 121280 0.00004947 82 | MYH7 c.5485G>A 3 121340 0.00002472 83 | MYH7 c.5458C>G 3 121382 0.00002472 84 | MYH7 c.5229G>T 3 121408 0.00002471 85 | MYH7 c.5203T>A 4 121408 0.00003295 86 | MYH7 c.4904T>C 3 121406 0.00002471 87 | MYH7 c.4660G>C 2 119834 0.00001669 88 | MYH7 c.4656G>T 1 119582 0.00000836 89 | MYH7 c.4294G>A 6 121400 0.00004942 90 | MYH7 c.3349G>T 1 102626 0.00000974 91 | MYH7 c.3208G>A 1 121390 0.00000824 92 | MYH7 c.2974C>A 2 121410 0.00001647 93 | MYH7 c.2636C>A 2 121378 0.00001648 94 | MYH7 c.2585C>T 4 121392 0.00003295 95 | MYH7 c.2420G>A 1 121350 0.00000824 96 | MYH7 c.5704G>C 9 121412 0.00007413 97 | MYH7 c.4372C>G 1 121072 0.00000826 98 | MYH7 c.3235C>T 6 121354 0.00004944 99 | MYH7 c.5243G>A 6 121408 0.00004942 100 | MYH7 c.3152C>T 2 121398 0.00001647 101 | MYH7 c.3856G>A 2 121408 0.00001647 102 | MYH7 c.4030C>T 2 120556 0.00001659 103 | MYH7 c.4348G>A 1 121330 0.00000824 104 | MYH7 c.5790+1G>A 1 121198 0.00000825 105 | MYH7 c.5494C>G 1 121320 0.00000824 106 | MYH7 c.5458C>T 3 121382 0.00002472 107 | MYH7 c.4672A>T 1 120520 0.00000830 108 | MYH7 c.3853+1G>A 2 121400 0.00001647 109 | MYH7 c.3801G>C 4 121410 0.00003295 110 | MYH7 c.3730A>C 1 121362 0.00000824 111 | MYH7 c.3138G>A 3 121400 0.00002471 112 | MYH7 c.3116A>G 14 121378 0.00011534 113 | MYH7 c.2680-2A>G 1 121202 0.00000825 114 | MYH7 c.1859T>A 1 121062 0.00000826 115 | MYH7 c.916A>G 1 120662 0.00000829 116 | MYH7 c.397G>A 1 121252 0.00000825 117 | MYH7 c.350A>T 5 121152 0.00004127 118 | MYH7 c.28G>C 9 121058 0.00007434 119 | MYH7 c.5283+1G>A 1 121410 0.00000824 120 | MYH7 c.706G>A 1 121412 0.00000824 121 | MYH7 c.5704G>A 2 121412 0.00001647 122 | MYH7 c.298G>A 2 121406 0.00001647 123 | MYH7 c.4399C>G 19 121266 0.00015668 124 | MYH7 c.5660A>G 1 121412 0.00000824 125 | MYH7 c.5587C>T 1 121350 0.00000824 126 | MYH7 c.5459G>A 1 121378 0.00000824 127 | MYH7 c.5422G>A 4 121400 0.00003295 128 | MYH7 c.5088G>C 3 121390 0.00002471 129 | MYH7 c.5029C>T 2 121380 0.00001648 130 | MYH7 c.4817G>A 6 121408 0.00004942 131 | MYH7 c.4687C>G 2 120928 0.00001654 132 | MYH7 c.4210G>A 2 121328 0.00001648 133 | MYH7 c.4187G>A 1 120966 0.00000827 134 | MYH7 c.4078G>A 7 121250 0.00005773 135 | MYH7 c.4010G>A 18 119490 0.00015064 136 | MYH7 c.3982G>A 5 115954 0.00004312 137 | MYH7 c.2561A>C 1 121406 0.00000824 138 | MYH7 c.2177A>G 1 121404 0.00000824 139 | MYH7 c.1913C>T 1 120976 0.00000827 140 | MYH7 c.1700G>A 2 121412 0.00001647 141 | MYH7 c.1315A>T 2 121410 0.00001647 142 | MYH7 c.3200T>C 2 121394 0.00001648 143 | MYH7 c.3168G>C 1 121398 0.00000824 144 | MYH7 c.4004C>T 4 118910 0.00003364 145 | MYH7 c.2456G>A 1 121360 0.00000824 146 | MYH7 c.3268C>G 2 121270 0.00001649 147 | MYH7 c.3830G>A 5 121404 0.00004118 148 | MYH7 c.4941G>C 1 121396 0.00000824 149 | MYH7 c.925G>A 3 120668 0.00002486 150 | MYH7 c.5504_5505delAG 1 121258 0.00000825 151 | MYH7 c.4975G>A 1 121318 0.00000824 152 | MYH7 c.5602G>C 1 121380 0.00000824 153 | MYH7 c.5222T>C 1 121408 0.00000824 154 | MYH7 c.5014G>A 4 121378 0.00003295 155 | MYH7 c.3749G>A 1 121394 0.00000824 156 | MYH7 c.3318G>T 1 121320 0.00000824 157 | MYH7 c.3294G>C 1 121346 0.00000824 158 | MYH7 c.3005C>T 1 121412 0.00000824 159 | MYH7 c.2972A>G 1 121412 0.00000824 160 | MYH7 c.2776C>G 2 121412 0.00001647 161 | MYH7 c.2599G>A 2 121396 0.00001648 162 | MYH7 c.2371C>A 1 121386 0.00000824 163 | MYH7 c.2044G>A 1 121278 0.00000825 164 | MYH7 c.1246A>C 2 121412 0.00001647 165 | MYH7 c.875A>C 1 121386 0.00000824 166 | MYH7 c.481G>A 3 121412 0.00002471 167 | MYH7 c.5800G>A 3 120988 0.00002480 168 | MYH7 c.5783G>A 1 121290 0.00000824 169 | MYH7 c.5774G>A 1 121350 0.00000824 170 | MYH7 c.5761C>T 1 121378 0.00000824 171 | MYH7 c.5737G>A 1 121402 0.00000824 172 | MYH7 c.5726G>A 1 121412 0.00000824 173 | MYH7 c.5725C>T 4 121412 0.00003295 174 | MYH7 c.5699A>T 1 121412 0.00000824 175 | MYH7 c.5696T>C 1 121412 0.00000824 176 | MYH7 c.5689C>T 2 121412 0.00001647 177 | MYH7 c.5687T>C 1 121412 0.00000824 178 | MYH7 c.5606A>G 1 121386 0.00000824 179 | MYH7 c.5537G>A 1 120998 0.00000826 180 | MYH7 c.5530G>A 1 121096 0.00000826 181 | MYH7 c.5507C>G 2 121248 0.00001650 182 | MYH7 c.5503G>C 1 121268 0.00000825 183 | MYH7 c.5500G>T 2 121280 0.00001649 184 | MYH7 c.5498A>G 1 121300 0.00000824 185 | MYH7 c.5495G>A 4 121310 0.00003297 186 | MYH7 c.5467G>C 1 121366 0.00000824 187 | MYH7 c.5452C>T 2 121392 0.00001648 188 | MYH7 c.5448A>C 1 121390 0.00000824 189 | MYH7 c.5419G>A 1 121404 0.00000824 190 | MYH7 c.5386C>T 1 121412 0.00000824 191 | MYH7 c.5374G>A 1 121412 0.00000824 192 | MYH7 c.5369T>C 1 121412 0.00000824 193 | MYH7 c.5363A>G 1 121412 0.00000824 194 | MYH7 c.5348A>G 1 121410 0.00000824 195 | MYH7 c.5342G>A 1 121410 0.00000824 196 | MYH7 c.5303A>T 1 121410 0.00000824 197 | MYH7 c.5293A>G 1 121410 0.00000824 198 | MYH7 c.5288C>T 1 121412 0.00000824 199 | MYH7 c.5287G>A 5 121412 0.00004118 200 | MYH7 c.5234T>C 1 121408 0.00000824 201 | MYH7 c.5231C>A 1 121408 0.00000824 202 | MYH7 c.5231C>T 2 121408 0.00001647 203 | MYH7 c.5230G>T 1 121410 0.00000824 204 | MYH7 c.5201T>G 2 121408 0.00001647 205 | MYH7 c.5187G>T 1 121406 0.00000824 206 | MYH7 c.5130T>G 1 121380 0.00000824 207 | MYH7 c.5119A>T 1 121384 0.00000824 208 | MYH7 c.5105C>A 1 121372 0.00000824 209 | MYH7 c.5105C>T 1 121372 0.00000824 210 | MYH7 c.5090G>A 3 121388 0.00002471 211 | MYH7 c.5074G>T 1 121392 0.00000824 212 | MYH7 c.5066G>A 2 121394 0.00001648 213 | MYH7 c.5065C>T 3 121392 0.00002471 214 | MYH7 c.5065C>G 2 121392 0.00001648 215 | MYH7 c.5054T>A 1 121396 0.00000824 216 | MYH7 c.5039T>C 1 121392 0.00000824 217 | MYH7 c.5032A>C 1 121390 0.00000824 218 | MYH7 c.5026C>T 1 121382 0.00000824 219 | MYH7 c.5020G>A 3 121384 0.00002471 220 | MYH7 c.5017A>G 1 121384 0.00000824 221 | MYH7 c.5013C>G 2 121382 0.00001648 222 | MYH7 c.4996G>A 1 121376 0.00000824 223 | MYH7 c.4993G>A 1 121366 0.00000824 224 | MYH7 c.4992C>A 3 121366 0.00002472 225 | MYH7 c.4984C>T 1 121334 0.00000824 226 | MYH7 c.4954G>A 1 121192 0.00000825 227 | MYH7 c.4949T>C 4 121390 0.00003295 228 | MYH7 c.4943G>A 1 121396 0.00000824 229 | MYH7 c.4919A>T 4 121408 0.00003295 230 | MYH7 c.4903A>G 2 121408 0.00001647 231 | MYH7 c.4894G>A 5 121408 0.00004118 232 | MYH7 c.4861G>A 1 121410 0.00000824 233 | MYH7 c.4831G>T 1 121410 0.00000824 234 | MYH7 c.4823G>A 3 121412 0.00002471 235 | MYH7 c.4823G>T 1 121412 0.00000824 236 | MYH7 c.4821C>A 1 121412 0.00000824 237 | MYH7 c.4787C>T 5 121412 0.00004118 238 | MYH7 c.4786T>C 1 121412 0.00000824 239 | MYH7 c.4780G>C 1 121412 0.00000824 240 | MYH7 c.4751A>G 1 121404 0.00000824 241 | MYH7 c.4721G>A 3 121362 0.00002472 242 | MYH7 c.4709C>A 2 121292 0.00001649 243 | MYH7 c.4707G>C 1 121274 0.00000825 244 | MYH7 c.4679G>A 1 120744 0.00000828 245 | MYH7 c.4678C>T 1 120680 0.00000829 246 | MYH7 c.4603G>A 1 121260 0.00000825 247 | MYH7 c.4589G>A 1 121318 0.00000824 248 | MYH7 c.4588C>G 1 121318 0.00000824 249 | MYH7 c.4588C>T 1 121318 0.00000824 250 | MYH7 c.4571A>C 2 121376 0.00001648 251 | MYH7 c.4558G>A 1 121394 0.00000824 252 | MYH7 c.4553C>A 1 121398 0.00000824 253 | MYH7 c.4545G>C 2 121404 0.00001647 254 | MYH7 c.4536G>C 1 121402 0.00000824 255 | MYH7 c.4525A>C 2 121402 0.00001647 256 | MYH7 c.4499G>A 3 121412 0.00002471 257 | MYH7 c.4481A>T 2 121412 0.00001647 258 | MYH7 c.4471T>A 1 121412 0.00000824 259 | MYH7 c.4459G>A 2 121408 0.00001647 260 | MYH7 c.4441C>T 1 121406 0.00000824 261 | MYH7 c.4435A>G 3 121396 0.00002471 262 | MYH7 c.4390C>T 1 121212 0.00000825 263 | MYH7 c.4343A>T 14 121344 0.00011537 264 | MYH7 c.4321G>T 1 121370 0.00000824 265 | MYH7 c.4301G>A 2 121394 0.00001648 266 | MYH7 c.4292A>G 1 121402 0.00000824 267 | MYH7 c.4286T>C 1 121404 0.00000824 268 | MYH7 c.4283T>C 4 121404 0.00003295 269 | MYH7 c.4238C>T 2 121394 0.00001648 270 | MYH7 c.4219G>A 2 121362 0.00001648 271 | MYH7 c.4216G>A 1 121344 0.00000824 272 | MYH7 c.4196A>G 1 121148 0.00000825 273 | MYH7 c.4180G>A 1 120664 0.00000829 274 | MYH7 c.4159G>C 1 121348 0.00000824 275 | MYH7 c.4141C>G 1 121372 0.00000824 276 | MYH7 c.4097C>T 3 121330 0.00002473 277 | MYH7 c.4090G>A 5 121312 0.00004122 278 | MYH7 c.4076G>A 4 121242 0.00003299 279 | MYH7 c.4063G>A 1 121168 0.00000825 280 | MYH7 c.4061A>G 1 121162 0.00000825 281 | MYH7 c.4040A>G 1 120856 0.00000827 282 | MYH7 c.4023C>A 1 120476 0.00000830 283 | MYH7 c.4021G>A 1 120314 0.00000831 284 | MYH7 c.4021G>T 1 120314 0.00000831 285 | MYH7 c.4018T>C 1 120154 0.00000832 286 | MYH7 c.4009C>T 1 119366 0.00000838 287 | MYH7 c.4003T>G 1 118786 0.00000842 288 | MYH7 c.3977A>G 2 115354 0.00001734 289 | MYH7 c.3974C>T 3 114572 0.00002618 290 | MYH7 c.3974C>G 1 114572 0.00000873 291 | MYH7 c.3931C>T 1 121404 0.00000824 292 | MYH7 c.3908G>A 1 121398 0.00000824 293 | MYH7 c.3883G>A 1 121402 0.00000824 294 | MYH7 c.3829C>T 2 121402 0.00001647 295 | MYH7 c.3814G>A 3 121408 0.00002471 296 | MYH7 c.3809T>C 1 121408 0.00000824 297 | MYH7 c.3803G>A 10 121410 0.00008237 298 | MYH7 c.3797C>A 1 121408 0.00000824 299 | MYH7 c.3782G>A 1 121404 0.00000824 300 | MYH7 c.3779G>A 4 121404 0.00003295 301 | MYH7 c.3778C>T 1 121404 0.00000824 302 | MYH7 c.3777C>A 3 121404 0.00002471 303 | MYH7 c.3770A>G 37 121404 0.00030477 304 | MYH7 c.3749G>C 1 121394 0.00000824 305 | MYH7 c.3493A>G 1 78954 0.00001267 306 | MYH7 c.3376C>T 1 97864 0.00001022 307 | MYH7 c.3350A>T 1 102504 0.00000976 308 | MYH7 c.3283G>A 1 121336 0.00000824 309 | MYH7 c.3274G>A 2 121308 0.00001649 310 | MYH7 c.3229G>T 1 121378 0.00000824 311 | MYH7 c.3196A>G 1 121396 0.00000824 312 | MYH7 c.3149G>C 1 121394 0.00000824 313 | MYH7 c.3137T>G 1 121400 0.00000824 314 | MYH7 c.3134G>T 2 121396 0.00001648 315 | MYH7 c.3094G>A 2 121412 0.00001647 316 | MYH7 c.3089A>G 1 121412 0.00000824 317 | MYH7 c.3068C>T 1 121412 0.00000824 318 | MYH7 c.3058C>A 2 121412 0.00001647 319 | MYH7 c.3050T>C 1 121412 0.00000824 320 | MYH7 c.3046A>G 1 121412 0.00000824 321 | MYH7 c.3040G>A 1 121412 0.00000824 322 | MYH7 c.3037G>A 2 121412 0.00001647 323 | MYH7 c.3035C>A 5 121412 0.00004118 324 | MYH7 c.3028C>T 1 121410 0.00000824 325 | MYH7 c.3010C>G 1 121412 0.00000824 326 | MYH7 c.2982G>C 1 121410 0.00000824 327 | MYH7 c.2968G>A 1 121412 0.00000824 328 | MYH7 c.2891T>C 1 121412 0.00000824 329 | MYH7 c.2846A>T 1 121412 0.00000824 330 | MYH7 c.2822G>A 2 121412 0.00001647 331 | MYH7 c.2821C>A 1 121412 0.00000824 332 | MYH7 c.2812G>T 1 121412 0.00000824 333 | MYH7 c.2804A>T 2 121412 0.00001647 334 | MYH7 c.2787G>C 2 121412 0.00001647 335 | MYH7 c.2766G>T 1 121412 0.00000824 336 | MYH7 c.2765T>C 2 121412 0.00001647 337 | MYH7 c.2762A>C 1 121412 0.00000824 338 | MYH7 c.2761G>T 1 121412 0.00000824 339 | MYH7 c.2749G>A 1 121410 0.00000824 340 | MYH7 c.2689A>C 1 121326 0.00000824 341 | MYH7 c.2681A>T 1 121254 0.00000825 342 | MYH7 c.2677G>A 1 121320 0.00000824 343 | MYH7 c.2665C>T 1 121348 0.00000824 344 | MYH7 c.2573G>T 1 121396 0.00000824 345 | MYH7 c.2452A>G 1 121358 0.00000824 346 | MYH7 c.2425G>T 6 121200 0.00004950 347 | MYH7 c.2400G>T 1 121376 0.00000824 348 | MYH7 c.2357C>T 2 121382 0.00001648 349 | MYH7 c.2347C>T 1 121372 0.00000824 350 | MYH7 c.2312G>C 1 121270 0.00000825 351 | MYH7 c.2263C>T 1 121410 0.00000824 352 | MYH7 c.2134C>T 1 121320 0.00000824 353 | MYH7 c.2060C>A 4 119670 0.00003343 354 | MYH7 c.2052G>A 1 118986 0.00000840 355 | MYH7 c.1997A>G 1 121356 0.00000824 356 | MYH7 c.1975A>G 1 121236 0.00000825 357 | MYH7 c.1971G>T 1 121220 0.00000825 358 | MYH7 c.1896G>T 1 120942 0.00000827 359 | MYH7 c.1894G>T 1 120916 0.00000827 360 | MYH7 c.1892T>C 2 120924 0.00001654 361 | MYH7 c.1888C>T 2 118290 0.00001691 362 | MYH7 c.1856C>T 4 121136 0.00003302 363 | MYH7 c.1788G>C 1 121404 0.00000824 364 | MYH7 c.1766A>G 1 121406 0.00000824 365 | MYH7 c.1756G>A 1 121406 0.00000824 366 | MYH7 c.1754T>C 1 121406 0.00000824 367 | MYH7 c.1730T>C 1 121408 0.00000824 368 | MYH7 c.1729T>C 1 121408 0.00000824 369 | MYH7 c.1709A>G 2 121410 0.00001647 370 | MYH7 c.1702A>G 1 121412 0.00000824 371 | MYH7 c.1699C>T 1 121410 0.00000824 372 | MYH7 c.1681G>T 1 121412 0.00000824 373 | MYH7 c.1662C>A 1 121408 0.00000824 374 | MYH7 c.1630A>G 2 121404 0.00001647 375 | MYH7 c.1611G>C 2 121400 0.00001647 376 | MYH7 c.1601T>C 1 121396 0.00000824 377 | MYH7 c.1588A>G 1 121380 0.00000824 378 | MYH7 c.1519G>A 1 121412 0.00000824 379 | MYH7 c.1405G>A 1 121334 0.00000824 380 | MYH7 c.1334C>T 2 121410 0.00001647 381 | MYH7 c.1262T>C 1 121412 0.00000824 382 | MYH7 c.1261A>G 13 121412 0.00010707 383 | MYH7 c.1230C>A 1 121412 0.00000824 384 | MYH7 c.1126G>C 1 121396 0.00000824 385 | MYH7 c.1095G>T 1 121404 0.00000824 386 | MYH7 c.1086G>T 1 121408 0.00000824 387 | MYH7 c.1045A>G 3 121400 0.00002471 388 | MYH7 c.1033G>A 1 121382 0.00000824 389 | MYH7 c.1018G>A 1 121358 0.00000824 390 | MYH7 c.968T>C 9 119224 0.00007549 391 | MYH7 c.968T>G 1 119224 0.00000839 392 | MYH7 c.949G>C 1 120254 0.00000832 393 | MYH7 c.936C>A 1 120582 0.00000829 394 | MYH7 c.934T>C 1 120618 0.00000829 395 | MYH7 c.913A>C 1 120616 0.00000829 396 | MYH7 c.910A>T 1 120588 0.00000829 397 | MYH7 c.898A>T 1 120304 0.00000831 398 | MYH7 c.853A>G 1 121378 0.00000824 399 | MYH7 c.821T>C 1 121174 0.00000825 400 | MYH7 c.751C>T 1 121410 0.00000824 401 | MYH7 c.658A>G 2 121412 0.00001647 402 | MYH7 c.640G>A 1 121406 0.00000824 403 | MYH7 c.610C>T 3 121412 0.00002471 404 | MYH7 c.601A>T 2 121412 0.00001647 405 | MYH7 c.589G>A 2 121412 0.00001647 406 | MYH7 c.565A>C 1 121412 0.00000824 407 | MYH7 c.541G>A 1 121402 0.00000824 408 | MYH7 c.500C>G 1 121412 0.00000824 409 | MYH7 c.465C>A 1 121412 0.00000824 410 | MYH7 c.445G>A 1 121412 0.00000824 411 | MYH7 c.442A>G 2 121412 0.00001647 412 | MYH7 c.341T>C 1 121352 0.00000824 413 | MYH7 c.340A>T 1 121356 0.00000824 414 | MYH7 c.335G>A 1 121362 0.00000824 415 | MYH7 c.328G>A 3 121370 0.00002472 416 | MYH7 c.298G>T 1 121406 0.00000824 417 | MYH7 c.268A>G 2 121410 0.00001647 418 | MYH7 c.253G>C 2 121410 0.00001647 419 | MYH7 c.211G>A 5 121374 0.00004119 420 | MYH7 c.208A>T 1 121366 0.00000824 421 | MYH7 c.184G>A 1 121290 0.00000824 422 | MYH7 c.175G>T 1 121302 0.00000824 423 | MYH7 c.166G>A 1 121318 0.00000824 424 | MYH7 c.161G>T 1 121318 0.00000824 425 | MYH7 c.161G>A 2 121318 0.00001649 426 | MYH7 c.160C>G 1 121326 0.00000824 427 | MYH7 c.138T>A 1 121354 0.00000824 428 | MYH7 c.124G>A 1 121366 0.00000824 429 | MYH7 c.91T>C 1 121354 0.00000824 430 | MYH7 c.76G>A 1 121332 0.00000824 431 | MYH7 c.67C>T 3 121320 0.00002473 432 | MYH7 c.54G>C 1 121282 0.00000825 433 | MYH7 c.41C>T 1 121166 0.00000825 434 | MYH7 c.22G>A 1 120950 0.00000827 435 | MYH7 c.22G>C 1 120950 0.00000827 436 | MYH7 c.17T>C 3 120852 0.00002482 437 | MYH7 c.3947_3948insA 1 121406 0.00000824 438 | MYH7 c.3100-2A>C 1 121310 0.00000824 439 | MYH7 c.2922+1G>A 1 121412 0.00000824 440 | MYH7 c.2470delG 1 121394 0.00000824 441 | MYH7 c.2163-1G>T 1 121398 0.00000824 442 | MYH7 c.2162+1G>A 1 121392 0.00000824 443 | MYH7 c.1888+1G>A 1 117866 0.00000848 444 | MYH7 c.733-1G>A 1 121412 0.00000824 445 | MYH7 c.503-2A>T 1 121412 0.00000824 446 | MYH7 c.259delA 1 121412 0.00000824 447 | MYH7 c.54delG 1 121288 0.00000824 448 | TNNT2 c.83C>T 59 121208 0.00048677 449 | TNNT2 c.230C>T 4 117802 0.00003396 450 | TNNT2 c.257A>C 1 117696 0.00000850 451 | TNNT2 c.274C>T 1 121396 0.00000824 452 | TNNT2 c.311C>T 1 121400 0.00000824 453 | TNNT2 c.391C>T 1 115728 0.00000864 454 | TNNT2 c.406G>A 1 116090 0.00000861 455 | TNNT2 c.430C>T 4 114926 0.00003481 456 | TNNT2 c.662T>C 15 121380 0.00012358 457 | TNNT2 c.732G>T 40 121412 0.00032946 458 | TNNT2 c.832C>T 40 93228 0.00042906 459 | TNNT2 c.856C>T 1 89396 0.00001119 460 | TNNT2 c.857G>A 7 88810 0.00007882 461 | TNNT2 c.860G>A 1 87972 0.00001137 462 | TNNT2 c.853G>A 6 90344 0.00006641 463 | TNNT2 c.745G>A 2 121412 0.00001647 464 | TNNT2 c.583G>A 2 121350 0.00001648 465 | TNNT2 c.408G>T 1 116076 0.00000862 466 | TNNT2 c.178A>G 6 121308 0.00004946 467 | TNNT2 c.10A>C 2 121406 0.00001647 468 | TNNT2 c.833G>A 2 93298 0.00002144 469 | TNNT2 c.421delC 4 115786 0.00003455 470 | TNNT2 c.780+1G>A 2 121412 0.00001647 471 | TNNT2 c.629A>T 1 121386 0.00000824 472 | TNNT2 c.218A>G 2 117138 0.00001707 473 | TNNT2 c.857G>T 1 88810 0.00001126 474 | TNNT2 c.452G>A 1 110290 0.00000907 475 | TNNT2 c.83C>A 1 121208 0.00000825 476 | TNNT2 c.782T>C 1 52256 0.00001914 477 | TNNT2 c.749T>C 1 121412 0.00000824 478 | TNNT2 c.734C>T 1 121412 0.00000824 479 | TNNT2 c.587G>A 2 121364 0.00001648 480 | TNNT2 c.431G>A 2 114762 0.00001743 481 | TNNT2 c.251dupG 2 117824 0.00001697 482 | TNNT2 c.523A>C 1 121412 0.00000824 483 | TNNT2 c.361G>A 21 121412 0.00017296 484 | TNNT2 c.145G>C 1 121400 0.00000824 485 | TNNT2 c.59C>T 1 120768 0.00000828 486 | TNNT2 c.571-1G>A 2 117154 0.00001707 487 | TNNT2 c.863A>G 1 87244 0.00001146 488 | TNNT2 c.835G>A 1 93274 0.00001072 489 | TNNT2 c.808G>T 1 58244 0.00001717 490 | TNNT2 c.805A>G 1 57730 0.00001732 491 | TNNT2 c.794G>A 1 55152 0.00001813 492 | TNNT2 c.749T>A 2 121412 0.00001647 493 | TNNT2 c.700A>G 1 121408 0.00000824 494 | TNNT2 c.652G>T 4 121382 0.00003295 495 | TNNT2 c.640G>A 1 121388 0.00000824 496 | TNNT2 c.553G>T 1 121412 0.00000824 497 | TNNT2 c.547C>T 1 121412 0.00000824 498 | TNNT2 c.546G>A 1 121412 0.00000824 499 | TNNT2 c.528G>C 1 121412 0.00000824 500 | TNNT2 c.488A>G 1 121412 0.00000824 501 | TNNT2 c.475C>T 2 121402 0.00001647 502 | TNNT2 c.472C>T 1 121402 0.00000824 503 | TNNT2 c.467G>A 1 121408 0.00000824 504 | TNNT2 c.449A>G 1 111530 0.00000897 505 | TNNT2 c.425A>G 1 115582 0.00000865 506 | TNNT2 c.418A>G 1 116056 0.00000862 507 | TNNT2 c.406G>C 1 116090 0.00000861 508 | TNNT2 c.401G>A 1 116042 0.00000862 509 | TNNT2 c.374A>G 1 121412 0.00000824 510 | TNNT2 c.373G>A 1 121412 0.00000824 511 | TNNT2 c.358C>G 1 121412 0.00000824 512 | TNNT2 c.284T>C 1 121396 0.00000824 513 | TNNT2 c.283A>G 2 121394 0.00001648 514 | TNNT2 c.235A>T 2 117982 0.00001695 515 | TNNT2 c.224T>G 2 117540 0.00001702 516 | TNNT2 c.206C>T 1 115146 0.00000868 517 | TNNT2 c.200C>T 2 121364 0.00001648 518 | TNNT2 c.199C>A 1 121360 0.00000824 519 | TNNT2 c.164C>A 1 121400 0.00000824 520 | TNNT2 c.150G>T 1 121400 0.00000824 521 | TNNT2 c.115G>A 2 121346 0.00001648 522 | TNNT2 c.106G>C 6 121340 0.00004945 523 | TNNT2 c.101C>T 1 121334 0.00000824 524 | TNNT2 c.91G>A 1 121280 0.00000825 525 | TNNT2 c.82G>A 4 121176 0.00003301 526 | TNNT2 c.80C>G 2 121168 0.00001651 527 | TNNT2 c.80C>A 1 121168 0.00000825 528 | TNNT2 c.67G>A 1 121378 0.00000824 529 | TNNT2 c.40G>A 2 121408 0.00001647 530 | TNNT2 c.34G>A 1 121408 0.00000824 531 | TNNT2 c.4T>C 1 121406 0.00000824 532 | TNNT2 c.584_585delAG 1 121354 0.00000824 533 | TNNT2 c.381+1G>A 1 121412 0.00000824 534 | TNNT2 c.203+1G>A 1 121378 0.00000824 535 | TNNT2 c.147_149delAGA 1 121400 0.00000824 536 | TNNI3 c.5C>T 4 120314 0.00003325 537 | TNNI3 c.220C>A 2 101688 0.00001967 538 | TNNI3 c.235C>T 46 106538 0.00043177 539 | TNNI3 c.407G>A 1 120380 0.00000831 540 | TNNI3 c.434G>A 3 120502 0.00002490 541 | TNNI3 c.433C>T 1 120492 0.00000830 542 | TNNI3 c.485G>A 3 120102 0.00002498 543 | TNNI3 c.484C>T 4 120202 0.00003328 544 | TNNI3 c.497C>T 1 119968 0.00000834 545 | TNNI3 c.586G>A 1 120728 0.00000828 546 | TNNI3 c.258delC 1 111848 0.00000894 547 | TNNI3 c.347C>G 2 119928 0.00001668 548 | TNNI3 c.356C>A 4 119620 0.00003344 549 | TNNI3 c.293G>A 1 119734 0.00000835 550 | TNNI3 c.92C>T 1 58032 0.00001723 551 | TNNI3 c.304G>A 3 120058 0.00002499 552 | TNNI3 c.114dupA 3 24306 0.00012343 553 | TNNI3 c.428C>A 3 120482 0.00002490 554 | TNNI3 c.167T>C 2 81522 0.00002453 555 | TNNI3 c.34C>A 1 70152 0.00001425 556 | TNNI3 c.611G>T 1 120734 0.00000828 557 | TNNI3 c.603G>C 1 120736 0.00000828 558 | TNNI3 c.581A>C 1 120720 0.00000828 559 | TNNI3 c.561G>T 1 120606 0.00000829 560 | TNNI3 c.556C>T 1 120478 0.00000830 561 | TNNI3 c.541A>G 1 118904 0.00000841 562 | TNNI3 c.482C>T 2 120216 0.00001664 563 | TNNI3 c.463A>G 1 120480 0.00000830 564 | TNNI3 c.439G>C 1 120522 0.00000830 565 | TNNI3 c.421C>T 1 120444 0.00000830 566 | TNNI3 c.370G>C 1 118454 0.00000844 567 | TNNI3 c.368C>T 3 118756 0.00002526 568 | TNNI3 c.341T>C 1 120064 0.00000833 569 | TNNI3 c.340A>G 1 120090 0.00000833 570 | TNNI3 c.331A>G 4 120174 0.00003329 571 | TNNI3 c.322G>A 1 120166 0.00000832 572 | TNNI3 c.318G>C 5 120182 0.00004160 573 | TNNI3 c.308G>A 2 120106 0.00001665 574 | TNNI3 c.298C>T 1 119904 0.00000834 575 | TNNI3 c.292C>G 1 119730 0.00000835 576 | TNNI3 c.292C>T 11 119730 0.00009187 577 | TNNI3 c.278T>C 1 113262 0.00000883 578 | TNNI3 c.272C>T 1 112992 0.00000885 579 | TNNI3 c.263T>G 1 112340 0.00000890 580 | TNNI3 c.259G>A 1 111848 0.00000894 581 | TNNI3 c.250G>C 1 110322 0.00000906 582 | TNNI3 c.245C>G 1 108616 0.00000921 583 | TNNI3 c.239G>A 1 107584 0.00000930 584 | TNNI3 c.236G>T 5 106742 0.00004684 585 | TNNI3 c.220C>G 1 101688 0.00000983 586 | TNNI3 c.200A>T 3 92314 0.00003250 587 | TNNI3 c.178G>C 4 84752 0.00004720 588 | TNNI3 c.166A>G 1 81204 0.00001231 589 | TNNI3 c.158T>G 1 78466 0.00001274 590 | TNNI3 c.157C>G 1 76272 0.00001311 591 | TNNI3 c.143A>C 3 23950 0.00012526 592 | TNNI3 c.57C>G 1 69126 0.00001447 593 | TNNI3 c.55A>G 3 69320 0.00004328 594 | TNNI3 c.52C>A 2 69530 0.00002876 595 | TNNI3 c.34C>T 3 70152 0.00004276 596 | TNNI3 c.23C>T 1 107406 0.00000931 597 | TNNI3 c.632G>T 1 120718 0.00000828 598 | TNNI3 c.373-1G>T 1 120172 0.00000832 599 | TNNI3 c.204delG 6 94672 0.00006338 600 | TNNI3 c.95_99dupAGCCG 1 51804 0.00001930 601 | TNNI3 c.24+2T>A 1 107624 0.00000929 602 | MYBPC3 c.13G>C 36 119710 0.00030073 603 | MYBPC3 c.103C>T 5 88878 0.00005626 604 | MYBPC3 c.184A>C 14 53782 0.00026031 605 | MYBPC3 c.223G>A 3 32862 0.00009129 606 | MYBPC3 c.440C>T 9 23770 0.00037863 607 | MYBPC3 c.461T>C 5 24758 0.00020195 608 | MYBPC3 c.495G>C 8 23792 0.00033625 609 | MYBPC3 c.529C>T 6 95690 0.00006270 610 | MYBPC3 c.532G>A 2 97810 0.00002045 611 | MYBPC3 c.557C>T 5 104952 0.00004764 612 | MYBPC3 c.604A>C 5 111214 0.00004496 613 | MYBPC3 c.624G>C 18 110910 0.00016229 614 | MYBPC3 c.646G>A 40 109458 0.00036544 615 | MYBPC3 c.655G>T 1 94000 0.00001064 616 | MYBPC3 c.682G>A 6 105368 0.00005694 617 | MYBPC3 c.772G>A 3 76868 0.00003903 618 | MYBPC3 c.787G>A 6 25596 0.00023441 619 | MYBPC3 c.814C>T 2 23870 0.00008379 620 | MYBPC3 c.818G>A 1 23428 0.00004268 621 | MYBPC3 c.842G>A 2 28624 0.00006987 622 | MYBPC3 c.859C>T 2 48056 0.00004162 623 | MYBPC3 c.1000G>A 36 106554 0.00033786 624 | MYBPC3 c.1147C>G 9 101614 0.00008857 625 | MYBPC3 c.1246G>A 3 103534 0.00002898 626 | MYBPC3 c.1321G>A 18 110970 0.00016221 627 | MYBPC3 c.1370C>T 22 112792 0.00019505 628 | MYBPC3 c.1373G>A 5 113016 0.00004424 629 | MYBPC3 c.1409G>A 1 116034 0.00000862 630 | MYBPC3 c.1468G>A 25 120382 0.00020767 631 | MYBPC3 c.1484G>A 1 120574 0.00000829 632 | MYBPC3 c.1504C>T 3 120674 0.00002486 633 | MYBPC3 c.1519G>A 82 120688 0.00067944 634 | MYBPC3 c.1564G>A 47 120296 0.00039070 635 | MYBPC3 c.1591G>C 2 115446 0.00001732 636 | MYBPC3 c.1624G>C 2 80474 0.00002485 637 | MYBPC3 c.1633C>A 4 118074 0.00003388 638 | MYBPC3 c.1664T>C 3 118610 0.00002529 639 | MYBPC3 c.1814A>G 7 47806 0.00014643 640 | MYBPC3 c.1813G>A 9 47656 0.00018885 641 | MYBPC3 c.1822C>T 7 49798 0.00014057 642 | MYBPC3 c.1828G>C 3 51486 0.00005827 643 | MYBPC3 c.1855G>A 43 54808 0.00078456 644 | MYBPC3 c.2002C>T 2 119048 0.00001680 645 | MYBPC3 c.2003G>A 32 118698 0.00026959 646 | MYBPC3 c.2063C>A 1 53416 0.00001872 647 | MYBPC3 c.2077G>T 4 120730 0.00003313 648 | MYBPC3 c.2176C>T 4 110088 0.00003633 649 | MYBPC3 c.2197C>T 10 116866 0.00008557 650 | MYBPC3 c.2198G>A 4 117500 0.00003404 651 | MYBPC3 c.2269G>A 8 120616 0.00006633 652 | MYBPC3 c.2308G>A 1 120498 0.00000830 653 | MYBPC3 c.2311G>A 1 17666 0.00005661 654 | MYBPC3 c.2429G>A 4 120518 0.00003319 655 | MYBPC3 c.2459G>A 2 120694 0.00001657 656 | MYBPC3 c.2458C>T 1 120696 0.00000829 657 | MYBPC3 c.2500C>T 2 120634 0.00001658 658 | MYBPC3 c.2504G>T 9 120604 0.00007462 659 | MYBPC3 c.2552C>T 8 118158 0.00006771 660 | MYBPC3 c.2618C>A 13 64128 0.00020272 661 | MYBPC3 c.2654C>T 1 45096 0.00002217 662 | MYBPC3 c.2728C>A 6 15362 0.00039057 663 | MYBPC3 c.2783C>T 1 119088 0.00000840 664 | MYBPC3 c.2807C>T 7 118830 0.00005891 665 | MYBPC3 c.2827C>T 2 117632 0.00001700 666 | MYBPC3 c.2873C>T 7 107600 0.00006506 667 | MYBPC3 c.2882C>T 5 103876 0.00004813 668 | MYBPC3 c.3005G>A 5 107436 0.00004654 669 | MYBPC3 c.3004C>T 72 107228 0.00067147 670 | MYBPC3 c.3049G>A 10 117428 0.00008516 671 | MYBPC3 c.3065G>C 3 119054 0.00002520 672 | MYBPC3 c.3106C>T 62 120392 0.00051498 673 | MYBPC3 c.3137C>T 7 120394 0.00005814 674 | MYBPC3 c.3142C>T 2 120360 0.00001662 675 | MYBPC3 c.3181C>T 2 119398 0.00001675 676 | MYBPC3 c.3372C>A 2 87900 0.00002275 677 | MYBPC3 c.3373G>A 2 87880 0.00002276 678 | MYBPC3 c.3569G>A 7 119246 0.00005870 679 | MYBPC3 c.3580G>A 1 119816 0.00000835 680 | MYBPC3 c.3697C>T 1 120590 0.00000829 681 | MYBPC3 c.3742G>A 4 120334 0.00003324 682 | MYBPC3 c.3763G>A 9 119936 0.00007504 683 | MYBPC3 c.3811C>T 3 116692 0.00002571 684 | MYBPC3 c.1038_1042dupCGGCA 1 118814 0.00000842 685 | MYBPC3 c.2441_2443delAGA 39 120542 0.00032354 686 | MYBPC3 c.82G>A 18 88604 0.00020315 687 | MYBPC3 c.251G>A 1 26098 0.00003832 688 | MYBPC3 c.713G>A 8 107664 0.00007431 689 | MYBPC3 c.961G>A 37 80000 0.00046250 690 | MYBPC3 c.1153G>A 1 98880 0.00001011 691 | MYBPC3 c.1286C>T 22 106526 0.00020652 692 | MYBPC3 c.1309G>A 1 109754 0.00000911 693 | MYBPC3 c.1591G>A 2 115446 0.00001732 694 | MYBPC3 c.1960C>T 1 120584 0.00000829 695 | MYBPC3 c.1985T>C 1 120402 0.00000831 696 | MYBPC3 c.2618C>T 2 64128 0.00003119 697 | MYBPC3 c.2815C>T 4 118364 0.00003379 698 | MYBPC3 c.1543_1545delAAC 1 120616 0.00000829 699 | MYBPC3 c.3286G>A 2 52352 0.00003820 700 | MYBPC3 c.1319G>A 1 110836 0.00000902 701 | MYBPC3 c.2539T>C 1 119904 0.00000834 702 | MYBPC3 c.2761C>G 21 118152 0.00017774 703 | MYBPC3 c.3326C>T 11 66078 0.00016647 704 | MYBPC3 c.2614G>A 38 64766 0.00058673 705 | MYBPC3 c.1544A>G 17 120630 0.00014093 706 | MYBPC3 c.407-1G>A 1 21306 0.00004694 707 | MYBPC3 c.2210C>T 6 119120 0.00005037 708 | MYBPC3 c.3682C>T 26 120584 0.00021562 709 | MYBPC3 c.2914C>T 13 30098 0.00043192 710 | MYBPC3 c.3535G>A 49 106716 0.00045916 711 | MYBPC3 c.26-2A>G 4 78210 0.00005114 712 | MYBPC3 c.2909G>A 1 30760 0.00003251 713 | MYBPC3 c.2113dupA 1 120770 0.00000828 714 | MYBPC3 c.2771C>T 11 118760 0.00009262 715 | MYBPC3 c.2170C>T 2 105286 0.00001900 716 | MYBPC3 c.1789C>T 1 26174 0.00003821 717 | MYBPC3 c.1786G>A 7 26698 0.00026219 718 | MYBPC3 c.1070G>A 2 119700 0.00001671 719 | MYBPC3 c.530G>A 92 96910 0.00094933 720 | MYBPC3 c.2414-2A>G 1 120266 0.00000831 721 | MYBPC3 c.821+1G>A 1 23204 0.00004310 722 | MYBPC3 c.906-1G>C 1 113088 0.00000884 723 | MYBPC3 c.3787C>T 4 118976 0.00003362 724 | MYBPC3 c.3668A>G 1 120564 0.00000829 725 | MYBPC3 c.3642G>A 1 120420 0.00000830 726 | MYBPC3 c.3470C>T 7 74646 0.00009378 727 | MYBPC3 c.3463A>G 1 75580 0.00001323 728 | MYBPC3 c.3442A>G 1 77750 0.00001286 729 | MYBPC3 c.3415G>A 7 79966 0.00008754 730 | MYBPC3 c.3412C>T 3 80666 0.00003719 731 | MYBPC3 c.3384G>C 11 86598 0.00012702 732 | MYBPC3 c.3358C>T 3 88590 0.00003386 733 | MYBPC3 c.3346T>G 1 88020 0.00001136 734 | MYBPC3 c.3217C>T 2 40766 0.00004906 735 | MYBPC3 c.3112G>A 2 120448 0.00001660 736 | MYBPC3 c.3107G>A 7 120414 0.00005813 737 | MYBPC3 c.3052G>C 2 117696 0.00001699 738 | MYBPC3 c.2980C>T 6 28908 0.00020756 739 | MYBPC3 c.2684G>A 4 24258 0.00016489 740 | MYBPC3 c.2683C>T 1 24788 0.00004034 741 | MYBPC3 c.2518G>A 2 120484 0.00001660 742 | MYBPC3 c.2056G>C 1 61086 0.00001637 743 | MYBPC3 c.1820C>T 1 49580 0.00002017 744 | MYBPC3 c.1816G>A 3 48650 0.00006166 745 | MYBPC3 c.1601C>T 5 109380 0.00004571 746 | MYBPC3 c.1565C>T 3 120274 0.00002494 747 | MYBPC3 c.1554G>A 2 120554 0.00001659 748 | MYBPC3 c.1445C>T 5 118108 0.00004233 749 | MYBPC3 c.1372C>T 6 112904 0.00005314 750 | MYBPC3 c.1294G>A 4 106504 0.00003756 751 | MYBPC3 c.917G>A 4 108298 0.00003694 752 | MYBPC3 c.852-1G>T 1 49084 0.00002037 753 | MYBPC3 c.799C>G 2 24914 0.00008028 754 | MYBPC3 c.752A>T 5 96128 0.00005201 755 | MYBPC3 c.598A>G 4 111112 0.00003600 756 | MYBPC3 c.553A>C 2 105016 0.00001904 757 | MYBPC3 c.512G>A 1 77614 0.00001288 758 | MYBPC3 c.187C>T 4 51686 0.00007739 759 | MYBPC3 c.148A>G 3 77184 0.00003887 760 | MYBPC3 c.131G>A 3 82584 0.00003633 761 | MYBPC3 c.107C>A 3 88396 0.00003394 762 | MYBPC3 c.74G>A 5 87496 0.00005715 763 | MYBPC3 c.50G>A 12 83926 0.00014298 764 | MYBPC3 c.206G>A 1 40094 0.00002494 765 | MYBPC3 c.3569G>T 1 119246 0.00000839 766 | MYBPC3 c.3362G>A 11 87916 0.00012512 767 | MYBPC3 c.2833C>T 1 116746 0.00000857 768 | MYBPC3 c.1334C>T 1 112372 0.00000890 769 | MYBPC3 c.2828G>A 3 117440 0.00002554 770 | MYBPC3 c.305C>T 2 19596 0.00010206 771 | MYBPC3 c.3097C>T 1 120276 0.00000831 772 | MYBPC3 c.643C>T 17 109848 0.00015476 773 | MYBPC3 c.956A>C 6 76038 0.00007891 774 | MYBPC3 c.2381C>T 6 27740 0.00021629 775 | MYBPC3 c.1813_1814insCG 1 47806 0.00002092 776 | MYBPC3 c.926+1G>A 2 108696 0.00001840 777 | MYBPC3 c.2543C>T 1 119588 0.00000836 778 | MYBPC3 c.3190+2T>G 2 118438 0.00001689 779 | MYBPC3 c.442G>A 1 23806 0.00004201 780 | MYBPC3 c.994G>A 1 103078 0.00000970 781 | MYBPC3 c.121C>T 3 85890 0.00003493 782 | MYBPC3 c.3715G>A 2 120538 0.00001659 783 | MYBPC3 c.3316G>A 4 65418 0.00006115 784 | MYBPC3 c.481C>A 1 24106 0.00004148 785 | MYBPC3 c.3683G>A 2 120580 0.00001659 786 | MYBPC3 c.3472G>A 11 73936 0.00014878 787 | MYBPC3 c.3442A>C 1 77750 0.00001286 788 | MYBPC3 c.3103G>A 8 120378 0.00006646 789 | MYBPC3 c.2822T>C 1 117988 0.00000848 790 | MYBPC3 c.2512G>C 3 120548 0.00002489 791 | MYBPC3 c.2501G>A 3 120618 0.00002487 792 | MYBPC3 c.1721G>A 11 90118 0.00012206 793 | MYBPC3 c.1112C>T 3 106048 0.00002829 794 | MYBPC3 c.502G>A 11 23574 0.00046662 795 | MYBPC3 c.362C>T 4 27086 0.00014768 796 | MYBPC3 c.188G>A 2 50846 0.00003933 797 | MYBPC3 c.151G>A 1 75080 0.00001332 798 | MYBPC3 c.130C>T 2 82878 0.00002413 799 | MYBPC3 c.104G>A 7 88510 0.00007909 800 | MYBPC3 c.91G>A 1 89950 0.00001112 801 | MYBPC3 c.3812G>A 1 116500 0.00000858 802 | MYBPC3 c.3800G>A 5 117940 0.00004239 803 | MYBPC3 c.3799C>T 1 118184 0.00000846 804 | MYBPC3 c.3791G>A 1 118738 0.00000842 805 | MYBPC3 c.3788G>A 1 118892 0.00000841 806 | MYBPC3 c.3781G>A 2 119302 0.00001676 807 | MYBPC3 c.3777G>C 1 119600 0.00000836 808 | MYBPC3 c.3767C>T 1 119838 0.00000834 809 | MYBPC3 c.3750C>G 2 120234 0.00001663 810 | MYBPC3 c.3676C>T 7 120582 0.00005805 811 | MYBPC3 c.3670G>A 1 120576 0.00000829 812 | MYBPC3 c.3667G>A 1 120562 0.00000829 813 | MYBPC3 c.3620G>T 1 119002 0.00000840 814 | MYBPC3 c.3614G>A 2 119460 0.00001674 815 | MYBPC3 c.3613C>T 2 119530 0.00001673 816 | MYBPC3 c.3581C>T 10 119870 0.00008342 817 | MYBPC3 c.3572C>T 2 119422 0.00001675 818 | MYBPC3 c.3568C>T 1 118976 0.00000841 819 | MYBPC3 c.3523C>G 1 99064 0.00001009 820 | MYBPC3 c.3512A>G 2 93394 0.00002141 821 | MYBPC3 c.3461C>T 1 75698 0.00001321 822 | MYBPC3 c.3457A>T 1 76058 0.00001315 823 | MYBPC3 c.3454G>T 1 75892 0.00001318 824 | MYBPC3 c.3452C>T 6 76054 0.00007889 825 | MYBPC3 c.3449G>A 1 77038 0.00001298 826 | MYBPC3 c.3383A>C 1 86916 0.00001151 827 | MYBPC3 c.3373G>T 1 87880 0.00001138 828 | MYBPC3 c.3371G>A 1 88272 0.00001133 829 | MYBPC3 c.3361C>T 2 88358 0.00002264 830 | MYBPC3 c.3324G>C 1 66120 0.00001512 831 | MYBPC3 c.3323A>C 3 66208 0.00004531 832 | MYBPC3 c.3299A>T 1 63872 0.00001566 833 | MYBPC3 c.3284C>T 3 51566 0.00005818 834 | MYBPC3 c.3277G>T 1 48700 0.00002053 835 | MYBPC3 c.3274G>A 4 48032 0.00008328 836 | MYBPC3 c.3257G>A 1 45884 0.00002179 837 | MYBPC3 c.3242A>C 1 45040 0.00002220 838 | MYBPC3 c.3233G>A 1 44048 0.00002270 839 | MYBPC3 c.3232T>C 1 43934 0.00002276 840 | MYBPC3 c.3229G>T 1 43344 0.00002307 841 | MYBPC3 c.3223A>G 2 42634 0.00004691 842 | MYBPC3 c.3175G>C 1 119696 0.00000835 843 | MYBPC3 c.3170C>T 1 119884 0.00000834 844 | MYBPC3 c.3154A>G 4 120236 0.00003327 845 | MYBPC3 c.3152A>G 1 120254 0.00000832 846 | MYBPC3 c.3148G>A 10 120306 0.00008312 847 | MYBPC3 c.3143G>A 2 120356 0.00001662 848 | MYBPC3 c.3127T>A 1 120458 0.00000830 849 | MYBPC3 c.3127T>C 1 120458 0.00000830 850 | MYBPC3 c.3122G>A 1 120476 0.00000830 851 | MYBPC3 c.3113T>A 2 120464 0.00001660 852 | MYBPC3 c.3110G>A 1 120436 0.00000830 853 | MYBPC3 c.3109C>T 2 120444 0.00001661 854 | MYBPC3 c.3109C>G 1 120444 0.00000830 855 | MYBPC3 c.3079G>C 1 119878 0.00000834 856 | MYBPC3 c.3072C>A 3 119572 0.00002509 857 | MYBPC3 c.3067A>G 1 119268 0.00000838 858 | MYBPC3 c.3064C>T 1 119072 0.00000840 859 | MYBPC3 c.3043G>A 1 116418 0.00000859 860 | MYBPC3 c.3031G>A 1 114586 0.00000873 861 | MYBPC3 c.3017C>A 1 112246 0.00000891 862 | MYBPC3 c.2984T>A 1 28512 0.00003507 863 | MYBPC3 c.2962G>A 1 30636 0.00003264 864 | MYBPC3 c.2938C>T 2 32028 0.00006245 865 | MYBPC3 c.2915G>A 2 30462 0.00006566 866 | MYBPC3 c.2908C>T 2 30154 0.00006633 867 | MYBPC3 c.2894A>G 1 98472 0.00001016 868 | MYBPC3 c.2891T>C 1 99928 0.00001001 869 | MYBPC3 c.2884G>C 2 102414 0.00001953 870 | MYBPC3 c.2876C>T 1 106520 0.00000939 871 | MYBPC3 c.2841C>A 1 115562 0.00000865 872 | MYBPC3 c.2840A>G 1 115776 0.00000864 873 | MYBPC3 c.2812G>T 1 118556 0.00000843 874 | MYBPC3 c.2800C>G 1 119058 0.00000840 875 | MYBPC3 c.2765G>A 3 118412 0.00002534 876 | MYBPC3 c.2723A>G 1 15932 0.00006277 877 | MYBPC3 c.2716G>A 2 16780 0.00011919 878 | MYBPC3 c.2702T>C 1 19020 0.00005258 879 | MYBPC3 c.2693C>T 1 20304 0.00004925 880 | MYBPC3 c.2672G>A 3 31078 0.00009653 881 | MYBPC3 c.2671C>T 1 31864 0.00003138 882 | MYBPC3 c.2668T>G 1 35190 0.00002842 883 | MYBPC3 c.2641G>A 1 53676 0.00001863 884 | MYBPC3 c.2624A>T 1 63064 0.00001586 885 | MYBPC3 c.2621C>T 2 64110 0.00003120 886 | MYBPC3 c.2602G>A 4 108806 0.00003676 887 | MYBPC3 c.2599A>G 1 109526 0.00000913 888 | MYBPC3 c.2576C>T 1 113726 0.00000879 889 | MYBPC3 c.2570C>T 1 114954 0.00000870 890 | MYBPC3 c.2568G>T 1 115162 0.00000868 891 | MYBPC3 c.2562G>A 4 116352 0.00003438 892 | MYBPC3 c.2560A>T 1 116762 0.00000856 893 | MYBPC3 c.2557G>A 1 117276 0.00000853 894 | MYBPC3 c.2557G>T 1 117276 0.00000853 895 | MYBPC3 c.2542G>A 1 119648 0.00000836 896 | MYBPC3 c.2536G>A 1 120026 0.00000833 897 | MYBPC3 c.2504G>A 1 120604 0.00000829 898 | MYBPC3 c.2503C>T 3 120608 0.00002487 899 | MYBPC3 c.2503C>A 1 120608 0.00000829 900 | MYBPC3 c.2491C>T 1 120680 0.00000829 901 | MYBPC3 c.2489G>A 1 120684 0.00000829 902 | MYBPC3 c.2482G>C 1 120692 0.00000829 903 | MYBPC3 c.2470G>A 1 120692 0.00000829 904 | MYBPC3 c.2450G>C 1 120688 0.00000829 905 | MYBPC3 c.2450G>A 2 120688 0.00001657 906 | MYBPC3 c.2439G>C 2 120620 0.00001658 907 | MYBPC3 c.2426A>G 1 120492 0.00000830 908 | MYBPC3 c.2398G>A 2 37364 0.00005353 909 | MYBPC3 c.2345A>G 1 21092 0.00004741 910 | MYBPC3 c.2339T>G 1 20576 0.00004860 911 | MYBPC3 c.2300A>G 2 120570 0.00001659 912 | MYBPC3 c.2278G>C 4 120618 0.00003316 913 | MYBPC3 c.2275G>A 3 120620 0.00002487 914 | MYBPC3 c.2249C>T 3 120524 0.00002489 915 | MYBPC3 c.2242G>A 3 120472 0.00002490 916 | MYBPC3 c.2200A>G 3 117750 0.00002548 917 | MYBPC3 c.2198G>T 3 117500 0.00002553 918 | MYBPC3 c.2189C>T 1 115458 0.00000866 919 | MYBPC3 c.2188A>C 1 114698 0.00000872 920 | MYBPC3 c.2179G>A 4 110952 0.00003605 921 | MYBPC3 c.2177G>A 4 110276 0.00003627 922 | MYBPC3 c.2164G>A 3 102842 0.00002917 923 | MYBPC3 c.2164G>C 1 102842 0.00000972 924 | MYBPC3 c.2161A>T 1 101754 0.00000983 925 | MYBPC3 c.2135T>C 1 120776 0.00000828 926 | MYBPC3 c.2134G>T 2 120776 0.00001656 927 | MYBPC3 c.2119G>A 2 120772 0.00001656 928 | MYBPC3 c.2099A>T 2 120758 0.00001656 929 | MYBPC3 c.2056G>A 1 61086 0.00001637 930 | MYBPC3 c.2041G>A 1 80126 0.00001248 931 | MYBPC3 c.2033C>A 2 90338 0.00002214 932 | MYBPC3 c.2018T>C 1 110022 0.00000909 933 | MYBPC3 c.1999C>A 1 119594 0.00000836 934 | MYBPC3 c.1978G>T 1 120528 0.00000830 935 | MYBPC3 c.1973C>T 2 120584 0.00001659 936 | MYBPC3 c.1955C>G 1 120578 0.00000829 937 | MYBPC3 c.1942C>T 1 120524 0.00000830 938 | MYBPC3 c.1938G>C 2 120490 0.00001660 939 | MYBPC3 c.1919C>T 2 36860 0.00005426 940 | MYBPC3 c.1915G>A 4 37290 0.00010727 941 | MYBPC3 c.1889A>T 2 50468 0.00003963 942 | MYBPC3 c.1871A>G 2 53822 0.00003716 943 | MYBPC3 c.1867T>A 1 54482 0.00001835 944 | MYBPC3 c.1826C>T 2 50972 0.00003924 945 | MYBPC3 c.1823C>G 3 50520 0.00005938 946 | MYBPC3 c.1809T>G 2 46874 0.00004267 947 | MYBPC3 c.1778C>T 1 28932 0.00003456 948 | MYBPC3 c.1773G>T 1 30306 0.00003300 949 | MYBPC3 c.1726G>A 1 81820 0.00001222 950 | MYBPC3 c.1720C>T 5 92398 0.00005411 951 | MYBPC3 c.1685C>T 1 116300 0.00000860 952 | MYBPC3 c.1678G>C 1 117362 0.00000852 953 | MYBPC3 c.1672G>A 1 117892 0.00000848 954 | MYBPC3 c.1639G>A 1 118306 0.00000845 955 | MYBPC3 c.1623G>C 1 81928 0.00001221 956 | MYBPC3 c.1615A>G 6 92042 0.00006519 957 | MYBPC3 c.1599G>C 1 111074 0.00000900 958 | MYBPC3 c.1595G>A 1 113374 0.00000882 959 | MYBPC3 c.1589G>A 1 117054 0.00000854 960 | MYBPC3 c.1580T>C 1 119166 0.00000839 961 | MYBPC3 c.1546G>A 5 120616 0.00004145 962 | MYBPC3 c.1540A>G 1 120642 0.00000829 963 | MYBPC3 c.1503C>G 1 120676 0.00000829 964 | MYBPC3 c.1499A>C 1 120674 0.00000829 965 | MYBPC3 c.1493C>A 1 120644 0.00000829 966 | MYBPC3 c.1487A>G 1 120616 0.00000829 967 | MYBPC3 c.1472T>G 2 120462 0.00001660 968 | MYBPC3 c.1471G>A 7 120424 0.00005813 969 | MYBPC3 c.1469G>A 1 120410 0.00000830 970 | MYBPC3 c.1459C>A 1 120300 0.00000831 971 | MYBPC3 c.1442G>A 1 117996 0.00000847 972 | MYBPC3 c.1433C>T 2 117534 0.00001702 973 | MYBPC3 c.1423T>C 1 117150 0.00000854 974 | MYBPC3 c.1405C>G 1 115820 0.00000863 975 | MYBPC3 c.1397T>A 1 115342 0.00000867 976 | MYBPC3 c.1375C>T 1 113410 0.00000882 977 | MYBPC3 c.1363C>T 4 112128 0.00003567 978 | MYBPC3 c.1361T>C 2 112346 0.00001780 979 | MYBPC3 c.1358C>T 1 111738 0.00000895 980 | MYBPC3 c.1357C>G 1 111762 0.00000895 981 | MYBPC3 c.1316G>A 2 110650 0.00001808 982 | MYBPC3 c.1309G>T 2 109754 0.00001822 983 | MYBPC3 c.1291G>A 3 106710 0.00002811 984 | MYBPC3 c.1256G>A 2 104254 0.00001918 985 | MYBPC3 c.1256G>C 1 104254 0.00000959 986 | MYBPC3 c.1255C>T 1 104266 0.00000959 987 | MYBPC3 c.1211A>C 1 49204 0.00002032 988 | MYBPC3 c.1180G>A 1 77034 0.00001298 989 | MYBPC3 c.1171G>T 1 86828 0.00001152 990 | MYBPC3 c.1145G>A 1 102512 0.00000975 991 | MYBPC3 c.1133G>C 1 105316 0.00000950 992 | MYBPC3 c.1123G>A 1 106450 0.00000939 993 | MYBPC3 c.1120C>A 1 106582 0.00000938 994 | MYBPC3 c.1101G>T 1 104750 0.00000955 995 | MYBPC3 c.1100A>G 1 104622 0.00000956 996 | MYBPC3 c.1091C>T 7 103158 0.00006786 997 | MYBPC3 c.1084A>G 1 119592 0.00000836 998 | MYBPC3 c.1074T>A 2 119670 0.00001671 999 | MYBPC3 c.1072G>A 1 119694 0.00000835 1000 | MYBPC3 c.1039G>A 2 118514 0.00001688 1001 | MYBPC3 c.1021G>A 3 116106 0.00002584 1002 | MYBPC3 c.1009G>A 1 111296 0.00000899 1003 | MYBPC3 c.1006A>T 1 110474 0.00000905 1004 | MYBPC3 c.1003C>T 2 108234 0.00001848 1005 | MYBPC3 c.976C>T 2 91656 0.00002182 1006 | MYBPC3 c.970A>T 1 86712 0.00001153 1007 | MYBPC3 c.961G>C 1 80000 0.00001250 1008 | MYBPC3 c.960C>G 1 78254 0.00001278 1009 | MYBPC3 c.949G>A 1 70368 0.00001421 1010 | MYBPC3 c.916C>T 1 108134 0.00000925 1011 | MYBPC3 c.886A>G 1 42726 0.00002340 1012 | MYBPC3 c.845G>A 1 28712 0.00003483 1013 | MYBPC3 c.821C>T 2 23316 0.00008578 1014 | MYBPC3 c.815G>A 1 23752 0.00004210 1015 | MYBPC3 c.805T>C 1 24534 0.00004076 1016 | MYBPC3 c.746G>A 1 99080 0.00001009 1017 | MYBPC3 c.745T>C 1 99748 0.00001003 1018 | MYBPC3 c.712C>T 1 108096 0.00000925 1019 | MYBPC3 c.700A>G 2 108824 0.00001838 1020 | MYBPC3 c.684T>G 8 106064 0.00007543 1021 | MYBPC3 c.667G>A 12 99722 0.00012033 1022 | MYBPC3 c.659A>G 4 95742 0.00004178 1023 | MYBPC3 c.644G>A 2 109776 0.00001822 1024 | MYBPC3 c.640G>A 4 110080 0.00003634 1025 | MYBPC3 c.634A>G 1 110768 0.00000903 1026 | MYBPC3 c.631G>A 1 110802 0.00000903 1027 | MYBPC3 c.573G>T 1 108312 0.00000923 1028 | MYBPC3 c.568A>G 1 107756 0.00000928 1029 | MYBPC3 c.554A>G 2 104998 0.00001905 1030 | MYBPC3 c.541G>A 2 100940 0.00001981 1031 | MYBPC3 c.538G>A 1 100372 0.00000996 1032 | MYBPC3 c.527C>G 1 95772 0.00001044 1033 | MYBPC3 c.527C>T 1 95772 0.00001044 1034 | MYBPC3 c.503T>C 2 23754 0.00008420 1035 | MYBPC3 c.451G>A 1 24354 0.00004106 1036 | MYBPC3 c.446C>A 1 24026 0.00004162 1037 | MYBPC3 c.437C>T 2 23688 0.00008443 1038 | MYBPC3 c.434C>A 1 23386 0.00004276 1039 | MYBPC3 c.364G>A 1 27028 0.00003700 1040 | MYBPC3 c.323C>T 3 23248 0.00012904 1041 | MYBPC3 c.287A>G 1 17726 0.00005641 1042 | MYBPC3 c.238G>A 1 29510 0.00003389 1043 | MYBPC3 c.194C>T 2 46730 0.00004280 1044 | MYBPC3 c.166G>A 1 66652 0.00001500 1045 | MYBPC3 c.152C>T 1 74804 0.00001337 1046 | MYBPC3 c.151G>T 2 75080 0.00002664 1047 | MYBPC3 c.145A>T 2 78388 0.00002551 1048 | MYBPC3 c.133G>A 2 81912 0.00002442 1049 | MYBPC3 c.122G>A 3 85626 0.00003504 1050 | MYBPC3 c.118G>A 1 86926 0.00001150 1051 | MYBPC3 c.117G>T 1 87142 0.00001148 1052 | MYBPC3 c.94G>A 7 89538 0.00007818 1053 | MYBPC3 c.94G>C 1 89538 0.00001117 1054 | MYBPC3 c.88G>A 1 89854 0.00001113 1055 | MYBPC3 c.73A>T 2 87770 0.00002279 1056 | MYBPC3 c.67G>A 2 86564 0.00002310 1057 | MYBPC3 c.50G>T 2 83926 0.00002383 1058 | MYBPC3 c.49C>T 2 83364 0.00002399 1059 | MYBPC3 c.46C>T 2 83050 0.00002408 1060 | MYBPC3 c.41A>G 1 82480 0.00001212 1061 | MYBPC3 c.11C>T 4 119646 0.00003343 1062 | MYBPC3 c.3G>A 1 119362 0.00000838 1063 | MYBPC3 c.3518_3520delAGG 1 98522 0.00001015 1064 | MYBPC3 c.3335_3337dupGGT 2 87032 0.00002298 1065 | MYBPC3 c.3323_3325delAGA 1 66078 0.00001513 1066 | MYBPC3 c.3168delC 1 119940 0.00000834 1067 | MYBPC3 c.2555_2556insT 1 117708 0.00000850 1068 | MYBPC3 c.2490_2491insT 3 120680 0.00002486 1069 | MYBPC3 c.2443_2443insCGA 1 120542 0.00000830 1070 | MYBPC3 c.2373_2374insG 1 26444 0.00003782 1071 | MYBPC3 c.1639delG 1 118374 0.00000845 1072 | MYBPC3 c.1227-1_1228dupGGT 1 102352 0.00000977 1073 | MYBPC3 c.1144delC 1 102512 0.00000975 1074 | MYBPC3 c.1143_1144insTC 1 103118 0.00000970 1075 | MYBPC3 c.1082_1084delAGA 1 119552 0.00000836 1076 | MYBPC3 c.146_148delTCA 3 76496 0.00003922 1077 | MYBPC3 c.98_99delCA 1 89464 0.00001118 1078 | TPM1 c.515T>C 1 121332 0.00000824 1079 | TPM1 c.644C>T 1 121278 0.00000825 1080 | TPM1 c.829G>A 4 115578 0.00003461 1081 | TPM1 c.1A>G 1 112046 0.00000892 1082 | TPM1 c.532C>T 1 121286 0.00000824 1083 | TPM1 c.797A>G 5 119058 0.00004200 1084 | TPM1 c.845C>G 38 111126 0.00034195 1085 | TPM1 c.117delG 1 112530 0.00000889 1086 | TPM1 c.841A>G 3 111910 0.00002681 1087 | TPM1 c.62G>T 1 99636 0.00001004 1088 | TPM1 c.92C>A 2 69332 0.00002885 1089 | TPM1 c.155G>T 1 120032 0.00000833 1090 | TPM1 c.252C>G 1 121410 0.00000824 1091 | TPM1 c.253G>A 1 121410 0.00000824 1092 | TPM1 c.257C>G 1 121410 0.00000824 1093 | TPM1 c.301C>G 1 121408 0.00000824 1094 | TPM1 c.301C>T 1 121408 0.00000824 1095 | TPM1 c.302G>A 2 121408 0.00001647 1096 | TPM1 c.314G>A 2 121408 0.00001647 1097 | TPM1 c.323C>T 1 121408 0.00000824 1098 | TPM1 c.354G>T 1 121394 0.00000824 1099 | TPM1 c.355G>A 1 121394 0.00000824 1100 | TPM1 c.410A>G 1 121400 0.00000824 1101 | TPM1 c.459C>A 1 121250 0.00000825 1102 | TPM1 c.511A>G 1 121344 0.00000824 1103 | TPM1 c.514A>C 1 121328 0.00000824 1104 | TPM1 c.572C>T 1 121352 0.00000824 1105 | TPM1 c.629A>C 1 120894 0.00000827 1106 | TPM1 c.635A>T 1 120670 0.00000829 1107 | TPM1 c.659G>A 1 121332 0.00000824 1108 | TPM1 c.740C>A 1 121380 0.00000824 1109 | TPM1 c.775G>A 1 119116 0.00000840 1110 | TPM1 c.784G>A 2 119196 0.00001678 1111 | TPM1 c.814G>A 2 117954 0.00001696 1112 | TPM1 c.838G>A 2 113428 0.00001763 1113 | TPM1 c.851T>C 1 108270 0.00000924 1114 | TPM1 c.493-2A>G 1 121366 0.00000824 1115 | ACTC1 c.301G>A 1 121402 0.00000824 1116 | ACTC1 c.268C>T 1 121402 0.00000824 1117 | ACTC1 c.289C>T 1 121402 0.00000824 1118 | ACTC1 c.623G>A 5 121388 0.00004119 1119 | ACTC1 c.536G>T 1 121410 0.00000824 1120 | ACTC1 c.28C>A 3 119184 0.00002517 1121 | ACTC1 c.1085A>C 1 121412 0.00000824 1122 | ACTC1 c.1010G>A 1 121296 0.00000824 1123 | ACTC1 c.994A>G 2 120936 0.00001654 1124 | ACTC1 c.967G>A 2 116632 0.00001715 1125 | ACTC1 c.903A>C 1 121374 0.00000824 1126 | ACTC1 c.847A>G 1 121412 0.00000824 1127 | ACTC1 c.713T>C 1 121394 0.00000824 1128 | ACTC1 c.688G>T 1 121396 0.00000824 1129 | ACTC1 c.678G>T 1 121396 0.00000824 1130 | ACTC1 c.614C>G 1 121402 0.00000824 1131 | ACTC1 c.281A>G 1 121402 0.00000824 1132 | ACTC1 c.260T>G 1 121402 0.00000824 1133 | ACTC1 c.229A>G 3 121398 0.00002471 1134 | ACTC1 c.217A>G 2 121396 0.00001648 1135 | ACTC1 c.106A>G 1 114756 0.00000871 1136 | ACTC1 c.23C>G 2 119300 0.00001676 1137 | ACTC1 c.19A>T 2 119326 0.00001676 1138 | ACTC1 c.2T>C 1 119198 0.00000839 1139 | ACTC1 c.56_57insCA 1 118556 0.00000843 1140 | ACTC1 c.-22-1G>A 1 118744 0.00000842 1141 | MYL2 c.37G>A 37 120592 0.00030682 1142 | MYL2 c.64G>A 1 120944 0.00000827 1143 | MYL2 c.141C>A 22 121392 0.00018123 1144 | MYL2 c.173G>A 1 120720 0.00000828 1145 | MYL2 c.310A>G 1 121410 0.00000824 1146 | MYL2 c.401A>C 19 121396 0.00015651 1147 | MYL2 c.403-1G>C 3 120852 0.00002482 1148 | MYL2 c.275-2A>G 1 121368 0.00000824 1149 | MYL2 c.459G>C 3 121354 0.00002472 1150 | MYL2 c.380C>T 3 121410 0.00002471 1151 | MYL2 c.359G>A 7 121404 0.00005766 1152 | MYL2 c.433G>A 1 121258 0.00000825 1153 | MYL2 c.431delC 2 121238 0.00001650 1154 | MYL2 c.436G>A 5 121280 0.00004123 1155 | MYL2 c.374C>T 5 121412 0.00004118 1156 | MYL2 c.366G>T 1 121410 0.00000824 1157 | MYL2 c.304G>A 2 121406 0.00001647 1158 | MYL2 c.229A>G 1 121404 0.00000824 1159 | MYL2 c.11_15delAGAAA 2 119144 0.00001679 1160 | MYL2 c.298C>G 5 121406 0.00004118 1161 | MYL2 c.475A>G 1 121368 0.00000824 1162 | MYL2 c.469C>T 2 121368 0.00001648 1163 | MYL2 c.466G>T 2 121366 0.00001648 1164 | MYL2 c.454T>C 2 121346 0.00001648 1165 | MYL2 c.437T>C 1 121280 0.00000825 1166 | MYL2 c.431C>G 2 121224 0.00001650 1167 | MYL2 c.430C>G 8 121202 0.00006601 1168 | MYL2 c.430C>A 1 121202 0.00000825 1169 | MYL2 c.428C>T 3 121200 0.00002475 1170 | MYL2 c.424T>G 1 121162 0.00000825 1171 | MYL2 c.413T>A 1 121024 0.00000826 1172 | MYL2 c.403G>T 2 120876 0.00001655 1173 | MYL2 c.358C>G 1 121410 0.00000824 1174 | MYL2 c.355G>A 5 121410 0.00004118 1175 | MYL2 c.337G>T 1 121408 0.00000824 1176 | MYL2 c.321C>A 1 121412 0.00000824 1177 | MYL2 c.308T>G 15 121408 0.00012355 1178 | MYL2 c.301A>G 1 121406 0.00000824 1179 | MYL2 c.278C>T 1 121384 0.00000824 1180 | MYL2 c.257T>C 1 121400 0.00000824 1181 | MYL2 c.256T>C 4 121400 0.00003295 1182 | MYL2 c.241G>A 2 121402 0.00001647 1183 | MYL2 c.203A>G 1 121370 0.00000824 1184 | MYL2 c.172C>T 1 120642 0.00000829 1185 | MYL2 c.124G>T 1 121388 0.00000824 1186 | MYL2 c.97T>C 1 121360 0.00000824 1187 | MYL2 c.64G>C 1 120944 0.00000827 1188 | MYL2 c.64G>T 1 120944 0.00000827 1189 | MYL2 c.59T>A 1 120956 0.00000827 1190 | MYL2 c.58A>G 1 120954 0.00000827 1191 | MYL2 c.49G>A 2 120878 0.00001655 1192 | MYL2 c.34G>T 1 120482 0.00000830 1193 | MYL2 c.31G>A 2 120352 0.00001662 1194 | MYL2 c.31G>T 1 120352 0.00000831 1195 | MYL2 c.28G>A 1 120234 0.00000832 1196 | MYL2 c.7C>A 1 117246 0.00000853 1197 | MYL2 c.353_354insG 1 121410 0.00000824 1198 | MYL2 c.170-6_170-2dupCCCTA 1 120350 0.00000831 1199 | MYL2 c.92_93+1delAGG 1 120402 0.00000831 1200 | MYL2 c.47delA 1 120856 0.00000827 1201 | MYL3 c.170C>G 11 121338 0.00009066 1202 | MYL3 c.461G>A 3 121398 0.00002471 1203 | MYL3 c.466G>A 2 121398 0.00001647 1204 | MYL3 c.532G>A 32 121336 0.00026373 1205 | MYL3 c.460C>T 2 121400 0.00001647 1206 | MYL3 c.235G>A 4 121318 0.00003297 1207 | MYL3 c.188G>A 3 121342 0.00002472 1208 | MYL3 c.55G>T 3 121328 0.00002473 1209 | MYL3 c.4G>C 5 120230 0.00004159 1210 | MYL3 c.170C>A 14 121338 0.00011538 1211 | MYL3 c.530A>G 6 121330 0.00004945 1212 | MYL3 c.446T>C 1 121408 0.00000824 1213 | MYL3 c.92G>A 13 121392 0.00010709 1214 | MYL3 c.557A>C 1 121368 0.00000824 1215 | MYL3 c.484G>A 1 120994 0.00000826 1216 | MYL3 c.374A>G 1 121410 0.00000824 1217 | MYL3 c.227G>A 1 121320 0.00000824 1218 | MYL3 c.91C>T 2 121386 0.00001648 1219 | MYL3 c.73C>T 2 121374 0.00001648 1220 | MYL3 c.8C>A 1 120248 0.00000832 1221 | MYL3 c.521C>T 1 121320 0.00000824 1222 | MYL3 c.493A>G 1 121142 0.00000825 1223 | MYL3 c.488G>C 1 121088 0.00000826 1224 | MYL3 c.485A>G 1 121028 0.00000826 1225 | MYL3 c.476C>T 3 121396 0.00002471 1226 | MYL3 c.461G>T 1 121398 0.00000824 1227 | MYL3 c.456G>T 1 121404 0.00000824 1228 | MYL3 c.439A>G 1 121408 0.00000824 1229 | MYL3 c.400G>T 1 121412 0.00000824 1230 | MYL3 c.389A>T 1 121412 0.00000824 1231 | MYL3 c.347C>T 2 121408 0.00001647 1232 | MYL3 c.317C>A 1 121410 0.00000824 1233 | MYL3 c.311T>C 1 121406 0.00000824 1234 | MYL3 c.301C>G 1 120910 0.00000827 1235 | MYL3 c.294G>T 1 120974 0.00000827 1236 | MYL3 c.280C>T 1 121120 0.00000826 1237 | MYL3 c.274G>A 1 121166 0.00000825 1238 | MYL3 c.245C>T 4 121284 0.00003298 1239 | MYL3 c.241C>T 1 121296 0.00000824 1240 | MYL3 c.220G>A 7 121328 0.00005769 1241 | MYL3 c.202G>C 1 121336 0.00000824 1242 | MYL3 c.194C>G 1 121340 0.00000824 1243 | MYL3 c.187C>T 1 121336 0.00000824 1244 | MYL3 c.184G>A 1 121330 0.00000824 1245 | MYL3 c.176T>C 1 121334 0.00000824 1246 | MYL3 c.160T>G 1 121340 0.00000824 1247 | MYL3 c.152T>C 2 121100 0.00001652 1248 | MYL3 c.152T>G 1 121100 0.00000826 1249 | MYL3 c.140C>T 3 120954 0.00002480 1250 | MYL3 c.136T>C 1 120856 0.00000827 1251 | MYL3 c.106G>A 1 121398 0.00000824 1252 | MYL3 c.94C>T 2 121392 0.00001648 1253 | MYL3 c.91C>G 1 121386 0.00000824 1254 | MYL3 c.85C>G 1 121386 0.00000824 1255 | MYL3 c.82G>A 1 121382 0.00000824 1256 | MYL3 c.61C>T 1 121340 0.00000824 1257 | MYL3 c.36T>G 1 121202 0.00000825 1258 | MYL3 c.30G>C 1 121080 0.00000826 1259 | MYL3 c.17C>G 1 120734 0.00000828 1260 | MYL3 c.1A>G 2 120050 0.00001666 1261 | MYL3 c.410_411insT 1 121410 0.00000824 1262 | MYL3 c.209_211delAGA 1 121332 0.00000824 1263 | MYL3 c.28_30delAAG 1 121094 0.00000826 1264 | MYL3 c.2delT 1 120220 0.00000832 1265 | -------------------------------------------------------------------------------- /data-raw/HCM_BRU_Variants_and_freqs.txt: -------------------------------------------------------------------------------- 1 | Gene CodingVar ProteinVar VarType CaseCount ExACFreq 2 | MYH7 c.1063G>A p.A355T missense 4 0.00000000 3 | MYH7 c.1075T>G p.F359V missense 1 0.00000000 4 | MYH7 c.1357C>T p.R453C missense 1 0.00000000 5 | MYH7 c.1358G>A p.R453H missense 1 0.00000000 6 | MYH7 c.1405G>A p.D469N missense 1 0.00000824 7 | MYH7 c.1447G>A p.E483K missense 1 0.00000824 8 | MYH7 c.1464C>A p.F488L missense 1 0.00000000 9 | MYH7 c.1753A>T p.I585F missense 1 0.00000000 10 | MYH7 c.1988G>A p.R663H missense 1 0.00001648 11 | MYH7 c.2011C>T p.R671C missense 1 0.00000000 12 | MYH7 c.2221G>T p.G741W missense 1 0.00000000 13 | MYH7 c.2242T>A p.S748T missense 1 0.00000000 14 | MYH7 c.2243C>T p.S748F missense 1 0.00000000 15 | MYH7 c.2302G>A p.G768R missense 2 0.00000000 16 | MYH7 c.2360G>A p.R787H missense 1 0.00022244 17 | MYH7 c.2389G>A p.A797T missense 2 0.00003296 18 | MYH7 c.2539A>G p.K847E missense 2 0.00000000 19 | MYH7 c.2555T>C p.M852T missense 1 0.00000000 20 | MYH7 c.2606G>A p.R869H missense 1 0.00003295 21 | MYH7 c.2606G>C p.R869P missense 1 0.00000000 22 | MYH7 c.2717A>G p.D906G missense 3 0.00000000 23 | MYH7 c.2770G>A p.E924K missense 1 0.00000000 24 | MYH7 c.2782G>A p.D928N missense 1 0.00000000 25 | MYH7 c.2890G>C p.V964L missense 1 0.00042006 26 | MYH7 c.2945T>C p.M982T missense 3 0.00090601 27 | MYH7 c.3134G>T p.R1045L missense 1 0.00001648 28 | MYH7 c.3981C>A p.N1327K missense 1 0.00010347 29 | MYH7 c.4066G>A p.E1356K missense 1 0.00000000 30 | MYH7 c.4124A>G p.Y1375C missense 1 0.00000000 31 | MYH7 c.428G>A p.R143Q missense 1 0.00000824 32 | MYH7 c.4317_4319dupTGC inframe 1 0.00000000 33 | MYH7 c.4377G>T p.K1459N missense 1 0.00030546 34 | MYH7 c.4645-2A>C essential splice site 1 0.00000000 35 | MYH7 c.4954G>T p.D1652Y missense 1 0.00002475 36 | MYH7 c.5005G>T p.E1669X nonsense 1 0.00000000 37 | MYH7 c.5075T>A p.V1692E missense 1 0.00000000 38 | MYH7 c.508G>A p.E170K missense 2 0.00000000 39 | MYH7 c.5134C>T p.R1712W missense 1 0.00000000 40 | MYH7 c.5326A>G p.S1776G missense 1 0.00003295 41 | MYH7 c.5334C>A p.H1778Q missense 1 0.00000000 42 | MYH7 c.5341C>T p.R1781C missense 1 0.00000000 43 | MYH7 c.5380C>A p.Q1794K missense 1 0.00000000 44 | MYH7 c.5470A>G p.N1824D missense 1 0.00000000 45 | MYH7 c.5471A>G p.N1824S missense 1 0.00000000 46 | MYH7 c.5507C>T p.S1836L missense 1 0.00004949 47 | MYH7 c.5749G>T p.V1917F missense 1 0.00000000 48 | MYH7 c.5779A>T p.I1927F missense 1 0.00003297 49 | MYBPC3 c.1000G>A p.E334K missense 1 0.00033786 50 | MYBPC3 c.1084A>C p.S362R missense 1 0.00000000 51 | MYBPC3 c.1201C>T p.Q401X nonsense 1 0.00000000 52 | MYBPC3 c.1217G>A p.S406N missense 1 0.00000000 53 | MYBPC3 c.1218C>A p.S406R missense 1 0.00000000 54 | MYBPC3 c.1321G>A p.E441K missense 1 0.00016221 55 | MYBPC3 c.1351+1G>A essential splice site 1 0.00000000 56 | MYBPC3 c.1359delT frameshift 1 0.00000000 57 | MYBPC3 c.1405C>T p.Q469X nonsense 1 0.00000000 58 | MYBPC3 c.1504C>T p.R502W missense 4 0.00002486 59 | MYBPC3 c.1505G>A p.R502Q missense 1 0.00000000 60 | MYBPC3 c.1518C>A p.D506E missense 1 0.00000000 61 | MYBPC3 c.1519G>A p.G507R missense 3 0.00067944 62 | MYBPC3 c.1544A>G p.N515S missense 1 0.00014093 63 | MYBPC3 c.1624G>C p.E542Q missense 1 0.00002485 64 | MYBPC3 c.1765C>T p.R589C missense 1 0.00000000 65 | MYBPC3 c.1822C>T p.P608S missense 1 0.00014057 66 | MYBPC3 c.1828G>A p.D610N missense 1 0.00000000 67 | MYBPC3 c.188G>A p.R63Q missense 1 0.00003933 68 | MYBPC3 c.2011G>A p.V671I missense 1 0.00000000 69 | MYBPC3 c.2030C>T p.P677L missense 1 0.00000000 70 | MYBPC3 c.223G>A p.D75N missense 1 0.00009129 71 | MYBPC3 c.2267delC frameshift 1 0.00000000 72 | MYBPC3 c.2308G>A p.D770N missense 1 0.00000830 73 | MYBPC3 c.2373dupG frameshift 1 0.00000000 74 | MYBPC3 c.2376G>A p.W792X nonsense 1 0.00000000 75 | MYBPC3 c.2429G>A p.R810H missense 2 0.00003319 76 | MYBPC3 c.2441_2443delAGA inframe 1 0.00032354 77 | MYBPC3 c.2459G>A p.R820Q missense 1 0.00001657 78 | MYBPC3 c.2558delG frameshift 1 0.00000000 79 | MYBPC3 c.2737+2_2737+3delTG essential splice site 1 0.00000000 80 | MYBPC3 c.2827C>T p.R943X nonsense 1 0.00001700 81 | MYBPC3 c.2864_2865delCT frameshift 1 0.00000000 82 | MYBPC3 c.2905+1G>A essential splice site 1 0.00000000 83 | MYBPC3 c.2905C>T p.Q969X nonsense 1 0.00000000 84 | MYBPC3 c.3004C>T p.R1002W missense 2 0.00067147 85 | MYBPC3 c.3137C>T p.T1046M missense 1 0.00005814 86 | MYBPC3 c.3163A>T p.K1055X nonsense 1 0.00000000 87 | MYBPC3 c.3190+1G>A essential splice site 1 0.00000000 88 | MYBPC3 c.3226_3227insT frameshift 1 0.00000000 89 | MYBPC3 c.3257G>A p.W1086X nonsense 1 0.00002179 90 | MYBPC3 c.327_332delCCCTGC inframe 1 0.00000000 91 | MYBPC3 c.3286G>T p.E1096X nonsense 1 0.00000000 92 | MYBPC3 c.3326C>T p.T1109I missense 1 0.00016647 93 | MYBPC3 c.3408C>A p.Y1136X nonsense 1 0.00000000 94 | MYBPC3 c.3535G>A p.E1179K missense 1 0.00045916 95 | MYBPC3 c.3613C>T p.R1205W missense 1 0.00001673 96 | MYBPC3 c.3742G>A p.G1248R missense 1 0.00003324 97 | MYBPC3 c.3763G>A p.A1255T missense 1 0.00007504 98 | MYBPC3 c.3763G>C p.A1255P missense 1 0.00000000 99 | MYBPC3 c.3771C>A p.N1257K missense 1 0.00000000 100 | MYBPC3 c.3787C>T p.R1263W missense 1 0.00003362 101 | MYBPC3 c.442G>A p.G148R missense 1 0.00004201 102 | MYBPC3 c.451G>A p.D151N missense 1 0.00004106 103 | MYBPC3 c.640G>A p.D214N missense 1 0.00003634 104 | MYBPC3 c.655G>C p.V219L missense 1 0.00000000 105 | MYBPC3 c.710A>C p.Y237S missense 1 0.00000000 106 | MYBPC3 c.772G>A p.E258K missense 1 0.00003903 107 | MYBPC3 c.806C>A p.S269X nonsense 1 0.00000000 108 | MYBPC3 c.821+2T>C essential splice site 1 0.00000000 109 | TNNT2 c.236T>A p.I79N missense 1 0.00000000 110 | TNNT2 c.662T>C p.I221T missense 1 0.00012358 111 | TNNT2 c.785A>G p.N262S missense 1 0.00000000 112 | TNNT2 c.832C>T p.R278C missense 2 0.00042906 113 | TNNT2 c.857G>A p.R286H missense 1 0.00007882 114 | TNNI3 c.407G>A p.R136Q missense 1 0.00000831 115 | TNNI3 c.433C>T p.R145W missense 1 0.00000830 116 | TNNI3 c.485G>A p.R162Q missense 1 0.00002498 117 | TNNI3 c.557G>A p.R186Q missense 1 0.00000000 118 | TNNI3 c.561G>T p.E187D missense 1 0.00000829 119 | TNNI3 c.587A>G p.D196G missense 1 0.00000000 120 | TNNI3 c.608G>T p.G203V missense 1 0.00000000 121 | TPM1 c.842T>C p.M281T missense 1 0.00000000 122 | MYL2 c.221C>T p.P74L missense 1 0.00000000 123 | MYL2 c.401A>C p.E134A missense 1 0.00015651 124 | MYL2 c.428C>T p.P143L missense 1 0.00002475 125 | MYL3 c.170C>G p.A57G missense 1 0.00009066 126 | MYL3 c.460C>T p.R154C missense 1 0.00001647 127 | MYL3 c.532G>A p.D178N missense 1 0.00026373 128 | ACTC1 c.76G>A p.D26N missense 1 0.00000000 129 | ACTC1 c.787C>G p.L263V missense 1 0.00000000 130 | ACTC1 c.850A>T p.I284F missense 1 0.00000000 131 | -------------------------------------------------------------------------------- /data-raw/SupplementaryTable1.txt: -------------------------------------------------------------------------------- 1 | measureset_id symbol hgvs_c ExACCount Class Notes 2 | 43475* MYL2 c.401A>C 19 VUS 8/8 missense prediction algorithms predict deleterious. Identified in 2 individuals with a second 'likely pathogenic' MYL2 variant (ClinVar SCV000060059.4). An in vitro functional study suggests that this variant may reduce cardiac muscle contraction efficiency (Burghardt 2013). Second hit causing more severe phenotype suggested. [BS1 + PP3] 3 | 43121* MYL3 c.170C>A 14 Likely Benign 8/8 missense algorithms predict deleterious. Mutations at the same residue have been reported for HCM (Lee 2001). Indentified along with with pathogenic and likely pathogenic variants (ClinVar SCV000059671.4, SCV000207109.1). 2 affected siblings reported both with variant as homozygote (no support for recessive MYL3 inheritance). [BS1 + BP5 + PP3] 4 | 180694 MYOM1 c.1900+3A>C 466 Likely Benign Originally identified as a homozygote in 3 yr old (ClinVar SCV000195854.1). No other data to support. 7 homozygotes in ExAC. Splicing tools predict no effect. [BS1 + BP4] 5 | 41926 CRYAB c.460G>A 93 Likely Benign Identified in a HCM case with an alternative variant sufficient to cause disease (ClinVar SCV000060874.4). Previous pathogenic assertion in ClinVar is from 'literature only' for DCM (no segregation, only 200 controls used for reference). All others say 'VUS' [BS1 + BP5] 6 | 12411 TNNT2 c.853C>T 40 VUS Reported in >10 individuals with HCM and segregated in 5 affected individuals from multiple families (Watkins 1995, Elliott 1999, Garcia-Castro 2003, Torricelli 2003, Van Driest 2003, Theopistou 2004, Ingles 2005, Zeller 2006, Garcia-Castro 2009, Kaski 2009, Gimeno 2009, Millat 2010). Up to half of individuals carried a second variant (ClinVar SCV000060277.4). Residue not evolutionarily conserved. A milder affect with late age of onset has been suggested. In vitro studies suggest may have an affect on muscle contraction (Morimoto 1999, Yanaga 1999, Szczesna 2000, Hernandez 2005) however these might not represent acurately protein function. [BS1] 7 | 177700 MYBPC3 c.2441_2443delAGA 39 VUS In-frame deletion. Experimental studies have shown that this deletion does not lead to decreased MYBPC3 stability in vitro (Bahrudin 2008). Has been identified in 4 adult unaffected relatives (ClinVar SCV000203960.3). [BS1 + PM4] 8 | 14064 MYL2 c.37G>A 37 Benign ClinVar 'Pathogenic' assertion from literature only (ClinVar SCV000035365.1). Not found in an affected family member (Ball 2012). Other weak non-segregation data. Found in a 56 year old healthy individual (Ball 2012). Found in Ashkenazi Jewish individuals. Reported to lead to increased Ca2+ affinity (Szczesna 2001). [BS1 + BS4] 9 | 161310 MYBPC3 c.961G>A 37 Likely Benign Evidence based on presence in HCM individuals (1 with a MYH7 variant (Maron 2012)) and absence in ESP and nearby residues reported as pathogenic (Ser311Leu and Arg326Gln). No segregation or functional data. Reported multiple times in ClinVar with other variants that could explain phenotype (SCV000208258.4, SCV000261724.2). [BS1 + BP5] 10 | 177902 MYBPC3 c.1000G>A 36 VUS Very high Asian freqeuncy (0.45% in ExAC). Reduced stability of MYBPC3, higher Ca2+ transients and action potential durations in HL-1 cells and rat cardiac myocytes (Bahrudin 2011). [BS1] 11 | 161305 MYBPC3 c.13G>C 36 Likely Benign Reported in two families; one individual but not unaffected mother and another individual but not unaffected sibling. 3 individuals with variant also carry a pathogenic mutation in the same gene (ClinVar SCV000198995.3). Amino acid not conserved in mammals. [BS1 + BP5] 12 | 191577 ANKRD1 c.368C>T 28 Likely Benign 8/8 missense algorithms predict benign. Mutant protein correctly incorporates into the sarcomere (Crocini 2013). Amino acid change is not conserved. [BS1 + BP4] 13 | 14149 MYH6 c.3195G>C 28 VUS 6/7 missense algorithms predict deleterious. Originally identified as in an affected (died of congestive heart failure at 45 yrs) and not his 2 unnafected offspring or 150 controls (Carniel 2005). Amino acid change seen in birds. [BS1 + PP3] 14 | 12445 TNNC1 c.435C>A 28 VUS Functional studies using mutant porcine cardiac skinned fibers showed significantly increased force development and force recovery compared to wildtype (Landstrom 2008) with reduced Ca2+ binding [only in vitro studies]. No segregation data. Has also been reported in a patient with idiopathic DCM who also harbored a missense variant in MYBPC3 gene (Pinto 2011) [BS1] 15 | 42900 MYH7 c.2360G>A 27 Likely Benign 11 reports in total (7 literature, 4 LMM in ClinVar (SCV000059440.4)), majority South Asian decent - very high South Asian frequency (0.1%). 3 probands had another variant sufficient to cause disease. Only moderately conserved amino acid. Possible non-segregation (not in the father of an individual who died of sudden cardiac death; ClinVar SCV000059440.4). [BS1 + BP5] 16 | 43911 ACTN2 c.1484C>T 26 Benign 4 individuals in the literature (Chiu 2010, Theis 2006) and 2 reported on ClinVar (SCV000060535.4). Non segregation with disease also reported (2 affecteds; ClinVar SCV000060535.4). No supporting segregation data. [BS1 + BS4] 17 | 42536 MYBPC3 c.1468G>A 25 Likely Benign 7/7 missense algorithms predict deleterious. Identified in individuals with HCM, DCM and LVNC. Conserved residue. Multiple reports of cooccurance with an alternative variant that is sufficient to cause disease (ClinVar SCV000059050.4). [BS1 + BP5 + PP3] 18 | 31766 MYL2 c.141C>A 22 Likely Benign 7/8 missense algorithms predict benign. Co-occurance with likely pathogenic (tested) MYH7:c.5134C>T mutation (segregation data) in literature (Andersen 2001, Hougs 2005). Other reports of a second variant (ClinVar SCV000060036.4). In vitro studies show reduced force development and power output (Szczesna-Cordary 2004, Abraham 2009, Greenberg 2009, Greenberg 2010). [BS1 + BP5 + BP4] 19 | 44710 TCAP c.458G>A 20 Likely Benign 7/8 missense algorithms predict benign. Only pathogenic in ClinVar by 'literature only'. Identified in siblings but no solid segregation data (Hayashi 2004). Functional studies in yeast show the variant leads to an increase in the ability of TCAP to bind with titin and CS-1 (Hayashi 2004)- in vitro assay may not accurately represent biological function. Amino acid change is not conserved - 7 mammals carry the variant amino acid. [BS1 + BP4] 20 | 36601 MYBPC3 c.1321G>A 18 Likely Benign Multiple reports of co-occurence with alternative pathogenic variants (ClinVar SCV000059030.4 and SCV000208011.4). Some computational modelling data suggest a confirmational effect (Gajendrarao 2015). [BS1 + BP5] 21 | 42775 MYBPC3 c.624G>C 18 VUS Reports of possible non-segregation (ClinVar SCV000284248.1). [BS1] 22 | 14111 MYH7 c.2183C>T 18 Likely Benign Pathogenic in CliNVar from 'literature only' - VUS by 3 other submitters. Identified originally in cis with another MYH7 mutation in 2 siblings with HCM (Blair 2001) - other variant (c.1816G>A) is absent in ExAC and has been shown to segregate in multiple families (24 literature reports) [BS1 + BP2] 23 | 4243 MYLK2 c.260C>T 16 VUS Pathogenic in ClinVar from 'literature only', no other submissions. Found as a compound het in a 13yr old and with an MYH7 variant (also in mildy affected parents; Davis (2001). Other compound het variant is also common. [BS1] 24 | 12197 VCL c.2923C>T 16 VUS Pathogenic in ClinVar from 'literature only' .Reduced levels of vinculin in the patient with this variant, but other studies found that this was a general feature of HCM and not related to specific genotypes (Vasile 2006). [BS1] 25 | 14122 MYH7 c.1322C>T 16 VUS Pathogenic in ClinVar from 'literature only', in Laing distal myopathy. No change in mRNA levels. Amino acid is not conserved in mammals. [BS1] 26 | 30143 MYBPC3 c.2618C>A 13 VUS Identified as a compound het and in homozygous state (consaguinous family; Nanni 2003, Ingles 2005). No segregation data in HCM. 1 homozygote seen in ExAC. [BS1] 27 | 31780 MYL3 c.170C>G 11 VUS 7/8 missense algorithms predict damaging. Functional study showing lowercomplex affinity not significant for this variant after multiple testing (Lossie 2012). Segregation in 5 affected members of 2 families (ClinVar SCV000199362.3). All reports in East Asians (highest ExAC frequency at 0.05%). [BS1 + PP3 + PP1] 28 | 45533 LDB3 c.1823C>T 11 VUS Pathogenic in ClinVar from 'literature only'. No segregation or functional data. [BS1] 29 | 42679 MYBPC3 c.3049G>A 10 Likely Benign Not conserved amino acid - 7/7 missense algorithms predict benign. No functional or segregation data. [BS1 + BP4] 30 | 9380 SCN5A c.4534C>T 9 VUS 8/8 missense algorithms predict damaging. Reported in association with Brugada syndrome (Rook 1999, Deschnes 2000, Meregalli 2009, Crotti 2012). In vitro functional studies show a potential impact (Rook 1999, Deschnes 2000), however these may not be representative of in vivo protein function. [BS1 + PP3] 31 | 6055 KCNE2 c.79C>T 9 VUS 7/8 missense algorithms predict damaging. Identified in 2 Chinese families for Atrial Fibrillation (Yang 2004). Only parent and child tested for mutation (50% chance of segregation). An in vitro study on rat cardiomyocytes saw supression of L-type Calcium current (Liu 2014). [BS1 + PP3] 32 | 164023 MYBPC3 c.3763G>A 9 VUS 1/5 individuals with variant also had a likely pathogenic variant in another gene (ClinVar SCV000198796.2). No other data available. [BS1] 33 | 161309 MYBPC3 c.2269G>A 8 VUS Conservative amino acid change. No functional or segregation data found. [BS1] 34 | 42664 MYBPC3 c.2873C>T 7 Likely Benign 2 documented individuals also carry a MYBPC3 variant sufficient to explain disease (ClinVar SCV000059185.4). Amino acid is not well conserved 7/7 missense algorithms predict benign. [BS1 + BP4 + BP2] 35 | 48097 LMNA c.976T>A 7 Likely Benign Position not conserved 7/8 missense algorithms predict benign, simulation studies suggest no effect on coil B dimerisation. [BS1 + BP4] 36 | 42566 MYBPC3 c.1786G>A 7 VUS 6/7 missense algorithms predict damaging. No clear segregation data (one suggestion the variant doesn't segregate (ClinVar SCV000259413.1)). [BS1 + PP3] 37 | 36613 MYBPC3 c.529C>T 6 Likely Benign Different missense at the same position is common. Identified with another pathogenic MYBPC3 variant (ClinVar SCV000198969.3). Otherwise found in DCM. Position not well conserved. [BS1 + BP5] 38 | 180968 MYBPC3 c.2381C>T 6 VUS No evidence towards a pathogenic classification found. [BS1] 39 | 42756 MYBPC3 c.461T>C 5 Likely Benign Position not conserved, 6/7 missense algorithms predict benign.Identified in a child with a further homozygoes MYBPC3 variant (ClinVar SCV000059282.4). [BS1 + BP4] 40 | 177629 MYH7 c.5326A>G 4 Likely Benign Large majority of identified cases are of Asian ancestry. 2 individuals at GeneDx have an alternative pathogenic variant (ClinVar SCV000208629.3). Only segregation data is in 2 affected siblings and not in the unaffected sibling (Blair 2002). [BS1 + BP5] 41 | 191705 MYBPC3 c.640G>A 4 VUS No evidence towards a pathogenic classification found. [BS1] 42 | 14147 MYH6 c.2384G>A 4 VUS Identified in a 75 year old patient and not found in 170 controls (Niimura 2002). No other data. [BS1] 43 | 161313 MYBPC3 c.223G>A 3 VUS No evidence towards a pathogenic classification found. [BS1] 44 | 42706 MYBPC3 c.3330+5G>C 2 Pathogenic Prevalence in affecteds stastically increased over controls (http://biorxiv.org/content/early/2016/02/24/041111). Shown to produce a truncated protein product and to segregate with disease (Watkins 1995). [BS1 + PVS1 + PS4 + PP1] 45 | -------------------------------------------------------------------------------- /data-raw/Table1.txt: -------------------------------------------------------------------------------- 1 | Disease Prevalence Commonest causative variant Case count Case frequency (95% CI) Penetrance* Expected population frequency Model predicted maximum ExAC AC Observed ExAC AC 2 | HCM 1/500 MYBPC3:c.1504C>T 104/6179 1.7% (1.4-2.0%) 0.5 3.4x10-5 (2.7-4.0x10-5) 9 3 3 | DCM 1/250 TNNT2:c.629_631delAGA 18/1254 1.4% (0.78-2.1%) 0.5 5.6x10-5 (3.1-8.4x10-5) 16 0 4 | ARVC 1/1000 PKP2:c.2146-1G>C 24/361 6.7% (4.1-9.2%) 0.5 6.7x10-5 (4.1-9.2x10-5) 17 6 5 | LQTS 1/2000 KCNQ1:c.797T>C 30/2500 1.2% (0.77-1.6%) 0.5 6.0x10-6 (3.9-8.2x10-6) 3 0 6 | Brugada 1/1000 SCN5A:c.5350G>A 14/2111 0.66% (0.32-1.0%) 0.5 6.6x10-6 (0.32-1.0x10-5) 3 0 -------------------------------------------------------------------------------- /data-raw/Table2.txt: -------------------------------------------------------------------------------- 1 | Disease Maximum allelic contribution Prevalence Penetrance* Maximum population frequency Maximum tolerated ExAC allele count 2 | Marfan 0.015 1/3000 0.5 5.0x10-6 2 3 | Noonan 0.10 1/1000 0.5 1.0x10-4 18 4 | CPVT 0.10 1/10,000 0.5 1.0x10-5 3 5 | Classic Ehlers-Danlos 0.40 1/20,000 0.5 2.0x10-5 5 6 | -------------------------------------------------------------------------------- /data-raw/authors.txt: -------------------------------------------------------------------------------- 1 | Name Affiliation coFirst coLast Corresponding 2 | Nicola Whiffin "National Heart & Lung Institute, Imperial College London" T 3 | Nicola Whiffin "NIHR Royal Brompton Cardiovascular Biomedical Research Unit, Royal Brompton & Harefield Hospitals & Imperial College London" 4 | Eric Minikel "Analytic & Translational Genetics Unit, Massachusetts General Hospital, Boston MA" T 5 | Eric Minikel "Program in Medical and Population Genetics, Broad Institute of MIT & Harvard, Cambridge MA" 6 | Roddy Walsh "National Heart & Lung Institute, Imperial College London" 7 | Roddy Walsh "NIHR Royal Brompton Cardiovascular Biomedical Research Unit, Royal Brompton & Harefield Hospitals & Imperial College London" 8 | Anne O'Donnell-Luria "Analytic & Translational Genetics Unit, Massachusetts General Hospital, Boston MA" 9 | Anne O'Donnell-Luria "Program in Medical and Population Genetics, Broad Institute of MIT & Harvard, Cambridge MA" 10 | Konrad Karczewski "Analytic & Translational Genetics Unit, Massachusetts General Hospital, Boston MA" 11 | Konrad Karczewski "Program in Medical and Population Genetics, Broad Institute of MIT & Harvard, Cambridge MA" 12 | Alexander Y Ing "Laboratory for Molecular Medicine, Partners HealthCare Personalized Medicine, Cambridge, MA" 13 | Alexander Y Ing "Department of Pathology, Massachusetts General Hospital and Harvard Medical School, Boston MA" 14 | Paul JR Barton "National Heart & Lung Institute, Imperial College London" 15 | Paul JR Barton "NIHR Royal Brompton Cardiovascular Biomedical Research Unit, Royal Brompton & Harefield Hospitals & Imperial College London" 16 | Birgit Funke "Laboratory for Molecular Medicine, Partners HealthCare Personalized Medicine, Cambridge MA" 17 | Birgit Funke "Department of Pathology, Massachusetts General Hospital and Harvard Medical School, Boston MA" 18 | Stuart A Cook "National Heart & Lung Institute, Imperial College London" T 19 | Stuart A Cook "NIHR Royal Brompton Cardiovascular Biomedical Research Unit, Royal Brompton & Harefield Hospitals & Imperial College London" 20 | Stuart A Cook "National Heart Centre Singapore, Singapore" 21 | Stuart A Cook "Duke-National University of Singapore, Singapore" 22 | Daniel MacArthur "Analytic & Translational Genetics Unit, Massachusetts General Hospital, Boston MA" T 23 | Daniel MacArthur "Program in Medical and Population Genetics, Broad Institute of MIT & Harvard, Cambridge MA" 24 | Daniel MacArthur "Department of Medicine, Harvard Medical School, Boston MA" 25 | James S Ware "National Heart & Lung Institute, Imperial College London" T T 26 | James S Ware "NIHR Royal Brompton Cardiovascular Biomedical Research Unit, Royal Brompton & Harefield Hospitals & Imperial College London" 27 | James S Ware "MRC Clinical Sciences Centre, Imperial College London" 28 | James S Ware "Program in Medical and Population Genetics, Broad Institute of MIT & Harvard, Cambridge MA" 29 | -------------------------------------------------------------------------------- /data/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ImperialCardioGenetics/frequencyFilter/00b08ccf41bbbd927ac3d021d302e80a4ad4ca5d/data/.keep -------------------------------------------------------------------------------- /data/filtering_stats.tsv: -------------------------------------------------------------------------------- 1 | ancestry filter af_limit total_vars filtered_vars proportion_removed 2 | afr af_filter 1e-06 13616.64 73.65 0.994591176677947 3 | amr af_filter 1e-06 12235.66 53.1 0.995660225929782 4 | eas af_filter 1e-06 12450.28 74.54 0.994012986053326 5 | nfe af_filter 1e-06 11953.86 44.85 0.99624807384393 6 | sas af_filter 1e-06 12230.67 67.09 0.994514609583939 7 | afr af_filter 3.16227766016838e-06 13616.64 73.65 0.994591176677947 8 | amr af_filter 3.16227766016838e-06 12235.66 53.1 0.995660225929782 9 | eas af_filter 3.16227766016838e-06 12450.28 74.54 0.994012986053326 10 | nfe af_filter 3.16227766016838e-06 11953.86 44.85 0.99624807384393 11 | sas af_filter 3.16227766016838e-06 12230.67 67.09 0.994514609583939 12 | afr af_filter 1e-05 13616.64 77.56 0.994304028012784 13 | amr af_filter 1e-05 12235.66 56.3 0.995398695289016 14 | eas af_filter 1e-05 12450.28 77.69 0.993759979695236 15 | nfe af_filter 1e-05 11953.86 53.3 0.99554118920583 16 | sas af_filter 1e-05 12230.67 69.99 0.994277500741987 17 | afr af_filter 3.16227766016838e-05 13616.64 83.98 0.99383254606129 18 | amr af_filter 3.16227766016838e-05 12235.66 70.16 0.994265940701196 19 | eas af_filter 3.16227766016838e-05 12450.28 82.42 0.993380068560707 20 | nfe af_filter 3.16227766016838e-05 11953.86 68.5 0.994269633407117 21 | sas af_filter 3.16227766016838e-05 12230.67 84.82 0.993064975181245 22 | afr af_filter 1e-04 13616.64 117.04 0.991404634329761 23 | amr af_filter 1e-04 12235.66 87.44 0.992853675241058 24 | eas af_filter 1e-04 12450.28 113.99 0.990844382616295 25 | nfe af_filter 1e-04 11953.86 85.94 0.992810690438068 26 | sas af_filter 1e-04 12230.67 108.26 0.991148481644914 27 | afr af_filter 0.000316227766016838 13616.64 148.22 0.989114788964091 28 | amr af_filter 0.000316227766016838 12235.66 115.9 0.990527687104741 29 | eas af_filter 0.000316227766016838 12450.28 145.17 0.988340021268598 30 | nfe af_filter 0.000316227766016838 11953.86 109.44 0.990844798249268 31 | sas af_filter 0.000316227766016838 12230.67 141.93 0.988395566228179 32 | afr af_filter 0.001 13616.64 205.9 0.98487879535627 33 | amr af_filter 0.001 12235.66 151.9 0.987585467396119 34 | eas af_filter 0.001 12450.28 189.21 0.984802751424064 35 | nfe af_filter 0.001 11953.86 142.34 0.988092549184949 36 | sas af_filter 0.001 12230.67 190.95 0.98438760918249 37 | afr af_filter 0.00316227766016838 13616.64 310.82 0.977173517108479 38 | amr af_filter 0.00316227766016838 12235.66 204.09 0.98332006610187 39 | eas af_filter 0.00316227766016838 12450.28 258.31 0.979252675441837 40 | nfe af_filter 0.00316227766016838 11953.86 198.54 0.983391138929183 41 | sas af_filter 0.00316227766016838 12230.67 264.28 0.978392025947883 42 | afr af_filter 0.01 13616.64 535.44 0.960677523970671 43 | amr af_filter 0.01 12235.66 274.81 0.977540238940932 44 | eas af_filter 0.01 12450.28 345.48 0.972251226478441 45 | nfe af_filter 0.01 11953.86 298.89 0.974996361008076 46 | sas af_filter 0.01 12230.67 379.18 0.968997610106397 47 | afr af_filter 0.05 13616.64 1377.35 0.898848027119759 48 | amr af_filter 0.05 12235.66 503.26 0.958869403039967 49 | eas af_filter 0.05 12450.28 555.22 0.955405019003589 50 | nfe af_filter 0.05 11953.86 591.47 0.950520584982591 51 | sas af_filter 0.05 12230.67 664.79 0.945645659640886 52 | -------------------------------------------------------------------------------- /figures/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ImperialCardioGenetics/frequencyFilter/00b08ccf41bbbd927ac3d021d302e80a4ad4ca5d/figures/.keep -------------------------------------------------------------------------------- /figures/Figure1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ImperialCardioGenetics/frequencyFilter/00b08ccf41bbbd927ac3d021d302e80a4ad4ca5d/figures/Figure1.png -------------------------------------------------------------------------------- /frequencyFilter.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | -------------------------------------------------------------------------------- /manuscript/.Rprofile: -------------------------------------------------------------------------------- 1 | source("../.Rprofile", chdir = TRUE) 2 | -------------------------------------------------------------------------------- /manuscript/SupplementaryMaterial.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Supplementary Material - Using high-resolution variant frequencies to empower clinical genome interpretation" 3 | output: 4 | word_document: 5 | reference_docx: template2.docx 6 | 7 | bibliography: "suppReferences.bib" 8 | csl: "nature.csl" 9 | --- 10 | 11 | ```{r, echo=FALSE, include=FALSE} 12 | #read all scripts in the R directory, to load functions 13 | sourceDir <- function(path, trace = TRUE, ...) { 14 | for (nm in list.files(path, pattern = "\\.[RrSsQq]$")) { 15 | if(trace) cat(nm,":") 16 | source(file.path(path, nm), ...) 17 | if(trace) cat("\n") 18 | } 19 | } 20 | 21 | #load functions 22 | #sourceDir(file.path("..","R")) 23 | source("../R/setup.R") 24 | ``` 25 | 26 | ### Supplementary table 1 27 | 28 | Variants previously reported as causative of HCM either in ClinVar, or in a clinical series of 6179 HCM cases, but that were observed in ExAC above the maximum tolerated allele count for HCM (AC>9 globally) were manually curated according to the ACMG guidelines for interpretation and classification on sequence variants. *These variants were previously categorised as Likely Pathogenic in the LMM and Oxford HCM cohorts. 29 | 30 | ```{r} 31 | read.delim("../data-raw/SupplementaryTable1.txt") %>% kable 32 | ``` 33 | 34 | ### Supplementary note 1 - Curation of a high frequency PCD variant 35 | 36 | _NME8_ NM\_016616.4:c.271-27C>T which is reported as Pathogenic in ClinVar is found in 2306/120984 ExAC individuals. This variant was initially reported as pathogenic on the basis of two compound heterozygous cases when specifically searching for NME8 variants in a set of patients, and was found in 2/196 control chromosomes `r citep("10.1073/pnas.0611405104")`. We further note that _NME8_ has not otherwise been associated with PCD and shows no evidence of missense nor loss-of-function constraint and that this splice variant affects a non-canonical transcript. During our curation exercise we found that this variant meets none of the ACMG criteria for assertions of pathogenicity, and therefore we reclassified it as a VUS. 37 | 38 | ### Supplementary note 2 - Dealing with penetrance 39 | 40 | It is often difficult to obtain accurate penetrance information for reported variants, and it is also difficult to know what degree of penetrance to expect or assume in newly discovered pathogenic variants. In this work we uniformly apply a value of 50% penetrance equivalent to that reported for our HCM example variant, and the lowest we found reported for any of our examples. We recognise several other approaches that can be used to deal with the issues of penetrance, these include: setting a penetrance level equivalent to the minimum that is 'clinically actionable' for a disorder; lowering the penetrance if reduced penetrance is expected in a family; or using a two tiered approach, initially searching for a high-moderate penetrance variant but allowing for a lower-penetrance variant in a second pass. We believe that the ease of re-calculating our "maximum credible population allele frequency" lends itself to any of these approaches. We provide an online calculator to facilitate the exploration of these parameters ( *temporary URL for review - username/password = frequency/filter*). 41 | 42 | ##Supplemental references 43 | 44 | ```{r, echo=FALSE, message=FALSE} 45 | write.bibtex(file="suppReferences.bib") 46 | ``` 47 | -------------------------------------------------------------------------------- /manuscript/manuscript.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Using high-resolution variant frequencies to empower clinical genome interpretation" 3 | output: 4 | # html_document 5 | word_document: 6 | reference_docx: newTemplate.docx 7 | 8 | bibliography: "references.bib" 9 | csl: "nature.csl" 10 | --- 11 | 12 | ```{r, echo=FALSE, include=FALSE} 13 | #load functions 14 | source(file.path("..","R","setup.R")) 15 | source(file.path("..","R","sumariseCaseSeriesVariants.R")) 16 | #create figures 17 | source(file.path("..","R","Figures.R")) 18 | ``` 19 | 20 | ```{r authors, results='asis'} 21 | read.delim("../data-raw/authors.txt") %>% 22 | formatAuthors 23 | ``` 24 | 25 | ## Abstract 100-150 words 26 | 27 | Whole exome and genome sequencing have transformed the discovery of genetic variants that cause human Mendelian disease, but discriminating pathogenic from benign variants remains a daunting challenge. Rarity is recognised as a necessary, although not sufficient, criterion for pathogenicity, but frequency cutoffs used in Mendelian analysis are often arbitrary and overly lenient. Recent very large reference datasets, such as the Exome Aggregation Consortium (ExAC), provide an unprecedented opportunity to obtain robust frequency estimates even for very rare variants. Here we present a statistical framework for the frequency-based filtering of candidate disease-causing variants, accounting for disease prevalence, genetic and allelic heterogeneity, inheritance mode, penetrance, and sampling variance in reference datasets. Using the example of cardiomyopathy, we show that our approach reduces by two-thirds the number of candidate variants under consideration in the average exome, and identifies 43 variants previously reported as pathogenic that can now be reclassified. We present precomputed allele frequency cutoffs for all variants in the ExAC dataset. 28 | 29 | ## Introduction 30 | 31 | Whole exome and whole genome sequencing have been instrumental in identifying causal variants in Mendelian disease patients `r citep("10.1016/j.ajhg.2015.06.009")`. As every individual harbors ~12,000-14,000 predicted protein-altering variants `r citep("10.1038/nature19057")`, distinguishing disease-causing variants from benign bystanders is perhaps the principal challenge in contemporary clinical genetics. A variant's low frequency in, or absence from, reference databases is now recognised as a necessary, but not sufficient, criterion for variant pathogenicity `r citep(c("10.1038/gim.2015.30","10.1038/nature13127"))`. The recent availability of very large reference databases, such as the Exome Aggregation Consortium (ExAC) `r citep("10.1038/nature19057")` dataset, which has characterised the population allele frequencies of 10 million genomic variants through the analysis of exome sequencing data from over 60,000 humans, provides an opportunity to obtain robust frequency estimates even for rare variants, improving the theoretical power for allele frequency filtering in Mendelian variant discovery efforts. 32 | 33 | In practice, there exists considerable ambiguity around what allele frequency should be considered "too common", with the lenient values of 1% and 0.1% often invoked as conservative frequency cutoffs for recessive and dominant diseases respectively `r citep("10.1038/nrg3031")`. Population genetics, however, dictates that severe disease-causing variants must be much rarer than these cutoffs, except in cases of bottlenecked populations, balancing selection, or other special circumstances `r citep(c("10.1093/molbev/msp190","10.1073/pnas.1322563111"))`. 34 | 35 | It is intuitive that when assessing a variant for a causative role in a dominant Mendelian disease, the frequency of a variant in a reference sample, not selected for the condition, should not exceed the prevalence of the condition `r citep(c("10.1016/j.ajhg.2016.03.024","10.1126/scitranslmed.aad5169"))`. This rule must, however, be refined to account for different inheritance modes, genetic and allelic heterogeneity, and reduced penetrance. In addition, for rare variants, estimation of true population allele frequency is clouded by considerable sampling variance, even in the largest samples currently available. These limitations have encouraged the adoption of very lenient approaches when filtering variants by allele frequency `r citep(c("10.1016/S0140-6736(14)61705-0","10.1038/ng.3304"))`, and recognition that more stringent approaches that account for disease-specific genetic architecture are urgently needed`r citep("10.1016/j.ajhg.2016.03.024")`. 36 | 37 | Here we present a statistical framework for assessing whether rare variants are sufficiently rare to cause penetrant Mendelian disease, while accounting for both architecture and sampling variance in observed allele counts. We demonstrate that allele frequency cutoffs well below 0.1% are justified for a variety of human disease phenotypes and that such filters can remove an additional two-thirds of variants from consideration when compared to traditionally lenient frequency cutoffs. We present pre-computed allele frequency filtering values for all variants in the Exome Aggregation Consortium database, which are now available through the ExAC data browser and for download, to assist others in applying our framework. 38 | 39 | ## Results 40 | 41 | #### *Defining the statistical framework* 42 | 43 | For a penetrant dominant Mendelian allele to be disease causing, it cannot be present in the general population more frequently that the disease it causes. Furthermore, if the disease is genetically heterogeneous, it must not be more frequent than the proportion of cases attributable to that gene, or indeed to any single variant. We can therefore define the maximum credible population allele frequency (for a pathogenic allele) as: 44 | 45 | *maximum credible population AF = prevalence x maximum allelic contribution x 1/penetrance* 46 | 47 | where _maximum allelic contribution_ is the maximum proportion of cases potentially attributable to a single allele, a measure of heterogeneity. 48 | 49 | We do not know the true population allele frequency of any variant, having only an observed allele frequency in a finite population sample. Moreover, confidence intervals around this observed frequency are problematic to estimate given our incomplete knowledge of the frequency spectrum of rare variants, which appears to be skewed towards very rare variants. For instance, a variant observed only once in a sample of 10,000 chromosomes is much more likely to have a frequency < 1:10,000 than a frequency >1:10,000. `r citep("10.1038/nature19057")` 50 | 51 | If we turn the problem around, and begin instead from allele frequency, specifying a maximum _true_ allele frequency value we are willing to consider in the population (using the equation above), then we can estimate the probability distribution for allele counts in a given sample size. This follows a binomial distribution, and can be satisfactorily approximated with a Poisson distribution (see **Online Methods**). This allows us to set an upper limit on the number of alleles in a sample that is consistent with a given population frequency. 52 | 53 | Taking a range of cardiac disorders as exemplars, we use this framework to define the maximum credible allele frequency for disease-causing variants in each condition, and define and validate a set of maximum tolerated allele counts in the ExAC reference population sample. Figure 1 shows the general outline of our approach. 54 | 55 | ### Figure 1 56 | 57 | > A flow diagram of our approach, applied to a dominant condition. First, a disease-level maximum credible population allele frequency is calculated, based on disease prevalence, heterogeneity and penetrance. This is then used to calculate the maximum tolerated allele count in a reference dataset, taking into acount the size of this dataset. 58 | 59 | ![figure1](../figures/Figure1.png) 60 | 61 | #### *Application and validation in hypertrophic cardiomyopathy* 62 | 63 | We illustrate our general approach using the dominant cardiac disorder hypertrophic cardiomyopathy (HCM), which has an estimated prevalence of 1 in 500 in the general population `r citep("10.1038/gim.2014.205")`. As there have been previous large-scale genetic studies of HCM, with series of up to 6,179 individuals `r citep(c("10.1038/gim.2014.205","10.1038/gim.2016.90"))`, we can make the assumption that no newly identified variant will be more frequent in cases that those identified to date (at least for well-studied ancestries). This allows us to define the maximum allelic contribution of any single variant to the disorder. In these large case series, the largest proportion of cases is attributable to the missense variant _MYBPC3_ c.1504C>T (p.Arg502Trp), found in 104/6179 HCM cases (1.7%; 95%CI 1.4-2.0%) `r citep(c("10.1038/gim.2014.205","10.1038/gim.2016.90"))`. We therefore take the upper bound of this proportion (0.02) as an estimate of the maximum allelic contribution in HCM (Table 1). Our maximum expected population allele frequency for this allele, assuming 50% penetrance as previously reported `r citep("10.1161/CIRCRESAHA.109.216291")`, is 1/500 x 1/2 (dividing prevalence per individual by the number of chromosomes per individual) x 0.02 x 1/0.5 = 4.0x10^-5^, which we take as the maximum credible population AF for any causative variant for HCM (Table 1). 64 | 65 | ### Table 1 66 | > Details of the most prevalent pathogenic variants in case cohorts for five cardiac conditions. Shown along with the frequency in cases is the estimated population allele frequency (calculated as: case frequency x disease prevalence x 1/2 x 1/variant penetrance) and the observed frequency in the ExAC dataset. ^\*^As penetrance estimates for individual variants are not widely available, we have applied an estimate of 0.5 across all disorders (see **Supplementary information**). HCM - hypertrophic cardiomyopathy; DCM - dilated cardiomyopathy; ARVC - arrhythmogenic right ventricular cardiomyopathy; LQTS - long QT syndrome. Case cohorts and prevalence estimates were obtained from: HCM`r citep(c("10.1038/gim.2014.205","10.1038/gim.2016.90"))`, DCM`r citep(c("10.1038/gim.2014.205","10.1038/gim.2016.90","10.1038/nrcardio.2013.105"))`, ARVC`r citep(c("10.1038/gim.2016.90","10.1016/j.ijcard.2005.12.015"))`, LQTS`r citep(c("10.1016/j.hrthm.2009.05.021","10.1016/j.cjca.2012.07.847"))` and Brugada`r citep(c("10.1016/j.hrthm.2009.09.069","10.1016/j.hlc.2015.07.020"))`. 67 | 68 | ```{r} 69 | read.delim("../data-raw/Table1.txt",check.names=F) %>% pander 70 | ``` 71 | 72 | To apply this threshold while remaining robust to chance variation in observed allele counts, we ask how many times a variant with population allele frequency of 4.0x10^-5^ can be observed in a random population sample of a given size. For a 5% error rate we take the 95th percentile of a poisson distribution with λ = expected allele count, which is given by: sample size (chromosomes) × expected population allele frequency (**Online Methods**). For HCM this gives us a maximum tolerated allele count of 9, assuming 50% penetrance (or 5 for fully penetrant alleles), for variants genotyped in the full ExAC cohort (sample size=121,412 chromosomes). The _MYBPC3_:c.1504C>T variant is observed 3 times in ExAC (freq=2.49x10^-5^; Table 1). 73 | 74 | To facilitate these calculations, we have produced an online calculator () that will compute maximum credible population allele frequency and maximum sample allele count for a user-specified genetic architecture, and conversely allow users to dynamically explore what genetic architecture(s) might be most compatible with an observed variant having a causal role in disease. 75 | 76 | ```{r} 77 | summariseCaseSeriesVariants() 78 | ``` 79 | 80 | To assess these thresholds empirically, we explored the ExAC allele frequency spectrum of `r myVariantsN` distinct autosomal variants identified in 6179 recently published HCM cases referred for diagnostic sequencing, and individually assessed and reported according to international guidelines`r citep(c("10.1038/gim.2014.205","10.1038/gim.2016.90"))`. 81 | `r myPassPath`/`r myAllPath` (`r signif(100*myPassPath/myAllPath,3)`%) variants reported as ‘Pathogenic’ or ‘Likely Pathogenic’ fell below our threshold (Figure 2), including all variants with a clear excess in cases. `r myAbsentPath` of these variants are absent from ExAC. The `r myFailPath` variants historically classified as ‘Likely Pathogenic’, but prevalent in ExAC in this analysis, were reassessed using contemporary ACMG criteria: there was no strong evidence in support of pathogenicity, and they were reclassified in light of these findings (Supplementary Table 1). 82 | This analysis identifies `r myFailVus`/`r myAllVus` (`r signif(100*myFailVus/myAllVus,3)`%) VUS that are unlikely to be truly causative for HCM. 83 | 84 | ### Figure 2 85 | > Plot of ExAC allele count (all populations) against case allele count for variants classified as VUS, Likely Pathogenic or Pathogenic in 6179 HCM cases. The dotted lines represent the maximum tolerated ExAC allele counts in HCM for 50% (dark blue) and 100% penetrance (light blue). Variants are colour coded according to reported pathogenicity. Where classifications from contributing laboratories were discordant the more conservative classification is plotted. The inset panel shows the full dataset, while the main panel expands the region of primary interest. 86 | 87 | ![figure2](../figures/Figure2.png) 88 | 89 | The above analysis applied a single global allele count limit of 9 for HCM, however, as allele frequencies differ between populations, filtering based on frequencies in individual populations may provide greater power `r citep("10.1038/nature19057")`. For example, a variant relatively common in any one population is unlikely pathogenic, even if rare in other populations, provided the disease prevalence and architecture is consistent across populations. We therefore compute a maximum tolerated AC for each distinct sub-population of our reference sample, and filter based on the highest allele frequency observed in any major continental population (see **Online Methods**). 90 | 91 | To further validate this approach, we examined all 601 variants identified in ClinVar `r citep("10.1093/nar/gkt1113")` as "Pathogenic" or "Likely Pathogenic" and non-conflicted for HCM. 558 (93%) were sufficiently rare when assessed as described. 43 variants were insufficiently rare in at least one ExAC population, and were therefore re-curated. 42 of these had no segregation or functional data sufficient to demonstrate pathogenicity in the heterozygous state, and we would classify as VUS at most. The remaining variant (MYBPC3:c.3330+5G>C) had convincing evidence of pathogenicity, though with uncertain penetrance (see **Supplementary information**), and was observed twice in the African/African American ExAC population. This fell outside the 95% confidence interval for an underlying population frequency <4x10^-5^, but within the 99% confidence threshold: a single outlier due to stochastic variation is unsurprising given that these nominal probabilities are not corrected for multiple testing across 601 variants. In light of our updated assessment, 20 variants were reclassified as Benign/Likely Benign and 22 as VUS according to the American College for Medical Genetics and Genomics (ACMG) guidelines for variant interpretation `r citep("10.1038/gim.2015.30")` (Supplementary Table 1). 92 | 93 | #### *Extending this approach to other disorders* 94 | 95 | This approach can be readily applied in diseases where large case series are available to assess the genetic and allelic architecture, such as the inherited cardiac conditions displayed in Table 1. In the absence of large case series, we must estimate the genetic architecture parameters by extrapolating from similar disorders and/or variant databases. 96 | 97 | Where disease-specific variant databases exist, we can use these to help estimate the maximum allelic contribution in lieu of individual case series. For example, Marfan syndrome is a rare connective tissue disorder caused by variants in the _FBN1_ gene. The UMD-FBN1 database `r citep("10.1002/humu.10249")` contains 3077 variants in _FBN1_ from 280 references (last updated 28/08/14). The most common variant is in 30/3006 records (1.00%; 95CI 0.53-1.46%), which likely overestimates its contribution to disease if related individuals are not systematically excluded. Taking the upper bound of this frequency as our maximum allelic contribution, we derive a maximum tolerated allele count of 2 (Table 2). None of the five most common variants in the database are present in ExAC. 98 | 99 | Where no mutation database exists, we can use what is known about similar disorders to estimate the maximum allelic contribution. For the cardiac conditions with large cases series in Table 1, the maximum proportion of cases attributable to any one variant is 6.7% (95CI 4.1-9.2%; PKP2:c.2146-1G>C found in 24/361 ARVC cases `r citep("10.1038/gim.2016.90")`). We therefore take the upper bound of this confidence interval (rounded up to 0.1) as an estimate of the maximum allelic contribution for other genetically heterogeneous cardiac conditions, unless we can find disease-specific evidence to alter it. For Noonan syndrome and Catecholaminergic Polymorphic Ventricular Tachycardia (CPVT - an inherited cardiac arrhythmia syndrome) with prevalences of 1 in 1000 `r citep("10.1016/S0140-6736(12)61023-X")` and 1 in 10,000 `r citep("10.1038/ejhg.2013.55")` respectively, this translates to maximum population frequencies of 5x10^-5^ and 5x10^-6^ and maximum tolerated ExAC allele counts of 10 and 2 (Table 2). 100 | 101 | Finally, if the allelic heterogeneity of a disorder is not well characterised, it is conservative to assume minimal heterogeneity, so that the contribution of each gene is modelled as attributable to one allele, and the maximum allelic contribution is substituted by the maximum genetic contribution (i.e the maximum proportion of the disease attributable to single gene). For classic Ehlers-Danlos syndrome, up to 40% of the disease is caused by variation in the _COL5A1_ gene `r citep("10.1097/GIM.0b013e3181eed412")`. Taking 0.4 as our maximum allelic contribution, and a population prevalence of 1/20,000 `r citep("10.1097/GIM.0b013e3181eed412")` we derive a maximum tolerated ExAC AC of 5 (Table 2). 102 | 103 | ### Table 2 104 | > Maximum credible population frequencies and maximum tolerated ExAC allele counts for variants causative of exemplar inherited cardiac conditions, assuming a penetrance of 0.5 throughout. CPVT - catecholaminergic polymorphic ventricular tachycardia; FH - familial hypercholesterolaemia. Prevalence estimates were obtained from: Marfan`r citep("10.1016/S0140-6736(05)67789-6")`, Noonan`r citep("10.1016/S0140-6736(12)61023-X")`, CPVT`r citep("10.1038/ejhg.2013.55")` and classical Ehlers-Danlos`r citep("10.1097/GIM.0b013e3181eed412")`. 105 | 106 | ```{r} 107 | read.delim("../data-raw/Table2.txt",check.names=F) %>% pander 108 | ``` 109 | 110 | Here we have illustrated frequencies analysed at the level of the disease. In some cases this may be further refined by calculating distinct thresholds for individual genes, or even variants. For example, if there is one common founder mutation but no other variants that are recurrent across cases, then it would make sense to have the founder mutation as an exception to the calculated threshold. 111 | 112 | #### *Application to recessive diseases* 113 | 114 | So far we have considered diseases with a dominant inheritance model. Our framework is readily modified for application in recessive disease, and to illustrate this we consider the example of Primary Ciliary Dyskinesia (PCD), which has a prevalence of up to 1 in 10,000 individuals in the general population `r citep("10.1136/archdischild-2013-304831")`. 115 | 116 | Intuitively, if one penetrant recessive variant were to be responsible for all PCD cases, it could have a maximum population frequency of $\sqrt(1/10000)$. The maximum frequency of a recessive disease-causing variant in the population can be more completely defined as: 117 | *max credible allele frequency* = $\sqrt(prevalence) \times$ *maximum allelic contribution* $\times \sqrt(maximum genetic contribution) \times 1/\sqrt(penetrance)$ 118 | 119 | where *maximum genetic contribution* represents the proportion of all cases that are attributable to the gene under evaluation, and *maximum allelic contribution* represents the proportion of cases attributable to that gene that are attributable to an individual variant (full derivation can be found in **Online Methods**). 120 | 121 | We can refine our evaluation of PCD by estimating the maximum genetic and allelic contribution. Across previously published cohorts of PCD cases `r citep(c("10.1164/rccm.200603-370OC","10.1164/rccm.200601-084OC","10.1038/ng.2277"))`, *DNAI1* IVS1+2\_3insT was the most common variant with a total of 17/358 alleles (4.7% 95CI 2.5-7.0%). Given that ~9% of all patients with PCD have disease-causing variants in _DNAI1_ and the IVS1+2\_3insT variant is estimated to account for ~57% of variant alleles in _DNAI1_ `r citep("10.1164/rccm.200603-370OC")`, we can take these values as estimates of the maximum genetic and allelic contribution for PCD, yielding a maximum expected population AF of $\sqrt(1/10000) \times 0.57 \times \sqrt0.09 \times 1/\sqrt0.5 = 2.42 \times10$^-3^ This translates to a maximum tolerated ExAC AC of 322. *DNAI1* IVS1+2\_3insT is itself present at 56/121108 ExAC alleles (45/66636 non-Finnish European alleles). A single variant reported to cause PCD in ClinVar occurs in ExAC with AC > 332 (_NME8_ NM\_016616.4:c.271-27C>T; AC=2306/120984): our model therefore indicates that this variant frequency is too common to be disease-causing, and consistent with this we note that it meets none of the current ACMG criteria for assertions of pathogenicity, and have reclassified it as VUS (see **Supplementary information**). 122 | 123 | #### *Pre-computing threshold values for the ExAC populations* 124 | 125 | For each ExAC variant, we defined a "filtering allele frequency" that represents the threshold disease-specific "maximum credible allele frequency" at or below which the disease could not plausibly be caused by that variant. A variant with a filtering allele frequency ≥ the maximum credible allele frequency for the disease under consideration should be filtered, while a variant with a filtering allele frequency below the maximum credible remains a candidate. This value has been pre-computed for all variants in ExAC (see **Online Methods**), and is available via the ExAC VCF and browser (http://exac.broadinstitute.org). 126 | 127 | To assess the efficiency of our approach, we calculated the filtering allele frequency based on 60,206 exomes from ExAC and applied these filters to a simulated dominant Mendelian variant discovery analysis on the remaining 500 exomes (see **Online Methods**). Filtering at allele frequencies lower than 0.1% can substantially reduce the number of predicted protein-altering variants in consideration, with the mean number of variants per exome falling from 176 at a cutoff of 0.1% to 63 at a cutoff of 0.0001% (Figure 3a). 128 | Additionally, we compared the prevalence of variants in HCM genes in cases and controls across the allele frequency spectrum, and computed disease odds ratios for different frequency bins. The odds ratio for disease-association increases markedly at very low allele frequencies (Figure 3b) demonstrating that increasing the stringency of a frequency filter improves the information content of a genetic result. 129 | 130 | ### Figure 3 131 | 132 | > The clinical utility of stringent allele frequency thresholds. (a) The number of predicted protein-altering variants (definition in Online Methods) per exome as a function of the frequency filter applied. A one-tailed 95% confidence interval is used, meaning that variants were removed from consideration if their AC would fall within the top 5% of the Poisson probability distribution for the user's maximum credible AF (x axis). (b) The odds ratio for HCM disease-association against allele frequency. The prevalence of variants in HCM-associated genes (_MYH7_, _MYBPC3_ and other sarcomeric (_TNNT2_, _TNNI3_, _MYL2_, _MYL3_, _TPM1_ and _ACTC1_, analysed collectively) in 322 HCM cases and 60,706 ExAC controls were compared for a range of allele frequency bins, and an odds ratio computed (see **Online Methods**). Data for each bin is plotted at the upper allele frequency cutoff. Error bars represent 95% confidence intervals. The probability that a variant is pathogenic is much greater at very low allele frequencies. 133 | 134 | ![figure3](../figures/Figure3.png) 135 | 136 | ## Discussion 137 | 138 | We have outlined a statistically robust framework for assessing whether a variant is 'too common' to be causative for a Mendelian disorder of interest. To our knowledge, there is currently no equivalent guidance on the use of variant frequency information, resulting in inconsistent thresholds across both clinical and research settings. Furthermore, though disease-specific thresholds are recommended`r citep("10.1016/j.ajhg.2016.03.024")`, in practice the same thresholds may be used across all diseases, even where they have widely differing genetic architectures and prevalences. We have shown the importance of applying stringent AF thresholds, in that many more variants can be removed from consideration, and the remaining variants have a much higher likelihood of being relevant. We also show, using HCM as an example, how lowering this threshold does not remove true dominant pathogenic variants. 139 | 140 | In order to assist others in applying our framework, we have precomputed a 'filtering allele frequency' for all variants across the ExAC dataset. This is defined such that if the filtering allele frequency of a variant is at or above the "maximum credible population allele frequency" for the disease in question, then that variant is not a credible candidate (in other words, for any population allele frequency below the threshold value, the probability of the observed allele count in the ExAC sample is <0.05). Once a user has determined their "maximum credible population allele frequency", they may remove from consideration ExAC variants for which the filtering allele frequency is greater than or equal to than the chosen value. 141 | 142 | We recognize several limitations of our approach. 143 | First, the approach is limited by our understanding of the prevalence and genetic architecture of the disease in question: this characterisation will vary for different diseases and in different populations, though we illustrate approaches to estimation and extrapolation of parameters. In particular, we must be wary of extrapolating to or from less-well characterised populations that could harbour population-specific founder mutations. It is critical to define the genetic architecture in the population under study. Secondly, it is often difficult to obtain accurate penetrance information for reported variants, and it is also difficult to know what degree of penetrance to expect or assume for newly discovered pathogenic variants (see **Supplementary information** for alternative approaches). 144 | 145 | Thirdly, while we believe that ExAC is depleted of severe childhood inherited conditions, and not enriched for cardiomyopathies, it could be enriched relative to the general population for some Mendelian conditions, including Mendelian forms of common diseases such as diabetes or coronary disease that have been studied in contributing cohorts. Where this is possible, the maximum credible population allele frequency can be simply computed based on the estimated disease prevalence in the ExAC cohort, rather than the population prevalence. Finally, although the resulting allele frequency thresholds are more stringent than those previously used, they are likely to still be very lenient for many applications. For instance, we base our calculation on the most prevalent known pathogenic variant from a disease cohort. For HCM, for which more than 6,000 people have been sequenced, it is unlikely that any single newly identified variant, not previously catalogued in this large cohort, will explain a similarly large proportion of the disease as the most common causal variant, at least in well-studied populations. Future work may therefore involve modeling the frequency distribution of all known variants for a disorder, to further refine these thresholds. 146 | 147 | The power of our approach is limited by currently available datasets. Increases in both the ancestral diversity and size of reference datasets will bring additional power to our method over time. We have avoided filtering on variants observed only once, because a single observation provides little information about true allele frequency. A ten-fold increase in sample size, resulting from projects such as the US Precision Medicine Initiative, will separate vanishingly rare variants from those whose frequency really is ~1 in 100,000. Increased phenotypic information linked to reference datasets will also reduce limitations due to uncertain disease status, and improve prevalence estimates, adding further power to our approach. 148 | 149 | ## Acknowledgements 150 | 151 | This work was supported by the Wellcome Trust (107469/Z/15/Z), the Medical Research Council (UK), the NIHR Biomedical Research Unit in Cardiovascular Disease at Royal Brompton & Harefield NHS Foundation Trust and Imperial College London, the Fondation Leducq (11 CVD-01), a Health Innovation Challenge Fund (HICF-R6-373) award from the Wellcome Trust and Department of Health, UK, and by the National Institute of Diabetes and Digestive and Kidney Diseases and the National Institute of General Medical Sciences of the NIH (awards U54DK105566 and R01GM104371). EVM is supported by the National Institutes of Health under a Ruth L. Kirschstein National Research Service Award (NRSA) NIH Individual Predoctoral Fellowship (F31) (award AI122592-01A1). AHO-L is supported by National Institutes of Health under Ruth L. Kirschstein National Research Service Award 4T32GM007748. 152 | 153 | This publication includes independent research commissioned by the Health Innovation Challenge Fund (HICF), a parallel funding partnership between the Department of Health and Wellcome Trust. The views expressed in this work are those of the authors and not necessarily those of the Department of Health or Wellcome Trust. 154 | 155 | ## Data availability 156 | 157 | All data required to reproduce these analyses is available at . The manuscript was compiled in R, and source code for the analysis, figures and manuscript, are available at the same location. Curated variant interpretations are deposited in ClinVar *Accession & DOI to be added*. ExAC annotations are available at . Our allele frequency calculator app is located at , and the source code available at . 158 | 159 | ## Methods 160 | 161 | ### Calculating maximum tolerated allele counts 162 | 163 | The maximum frequency of a dominant disease-causing variant in the population was defined as: 164 | *maximum credible population AF = prevalence* $\times$ *maximum allelic contribution* $\times$ *1/penetrance* 165 | 166 | Estimates of disease prevalence were obtained from the literature. Where multiple different values were reported, the highest was used in the calculation, which leads to lenient filtering. A variant penetrance of 0.5 was used for all analyses, as penetrance estimates for individual variants are not widely available. This corresponds to the reported penetrance of the HCM variant used to illustrate our approach `r citep("10.1161/CIRCRESAHA.109.216291")` and is the minimum found when researching other variants/disorders. 167 | 168 | Determination of the maximum allelic contribution (a measure of heterogeneity) is described in the text. Where a large cohort exists for a disorder, the upper confidence interval of the frequency of the most common variant in this cohort, was used as the maximum allelic contribution. 169 | 170 | Having established a maximum credible allele frequency (AF), the maximum tolerated allele count (AC) was computed as the AC occurring at the upper bound of the one-tailed 95% confidence interval (95%CI AC) for that allele frequency, given the observed allele number (AN). Since the population is drawn without replacement, this would strictly be a hypergeometric distribution, but this can be modeled as binomial as the sample is much smaller than the population from which it is drawn. For ease of computation, we approximate this with a Poisson distribution. In R, this is implemented as `max_ac = qpois(quantile_limit,an*af)`, where `max_ac` is the 95%CI AC, `quantile_limit` is 0.95 (for a one-sided 95%CI), `an` is the observed allele number, and `af` is the maximum credible population allele frequency. 171 | 172 | ### Application to recessive diseases 173 | 174 | The prevalence of a recessive condition can be related to the allele frequency of causative variants by: 175 | *Prevalence =* $\sum$ *(allele frequency of causative alleles in each contributing gene)^2^ x penetrance* 176 | approximating to: 177 | *Prevalence = (combined frequency of causative alleles in gene)^2^ x (number of similar genes) x penetrance* 178 | and expanding to: 179 | *Prevalence = (max individual allele frequency x 1/maximum allelic contribution)^2^ x 1/maximum genetic contribution x penetrance* 180 | 181 | where *maximum genetic contribution* represents the proportion of all cases that are attributable to the gene under evaluation, and *maximum allelic contribution* represents the proportion of cases attributable to that gene that are attributable to an individual variant. The maximum frequency of a recessive disease causing variant in the population was therefore defined as: 182 | *max credible allele frequency* = $\sqrt(prevalence) \times$ *maximum allelic contribution* $\times \sqrt(maximum genetic contribution) \times 1/\sqrt(penetrance)$ 183 | 184 | ### Pre-computing filtering allele frequency values for ExAC 185 | 186 | We define the "filtering allele frequency" for a variant, or `af_filter`, as the highest true population allele frequency for which the upper bound of the 95% confidence interval of allele count under a Poisson distribution is still less than the variant's observed allele count in the reference sample. It functions as equivalent to a lower bound estimate for the true allele frequency of an observed variant: if the filtering allele frequency of a variant is at or above the maximum credible allele frequency for a disease, then the variant is considered too common to be causative of the disease. 187 | 188 | Consider, for example, a variant with an observed AC=3 and AN=100,000. If a user's maximum credible allele frequency for their disease is 1 in 100,000, then this variant should be kept in consideration as potentially pathogenic, because the upper bound of the Poisson 95%CI is AC=3. On the other hand, if the user's credible tolerated allele frequency is 1 in 200,000 then this variant should be filtered out, as the 95%CI upper bound is only AC=2. We define `af_filter` as the highest AF value for which a variant should be filtered out. 189 | 190 | In the example, the highest allele frequency that gives a 95%CI AC of 2 when AN=100,000 is approximately 8.17e-6. Instead of solving exactly for such values, which would require solving the inverse cumulative distribution function of the Poisson distribution, we derive a numerical approximation in two steps: 191 | 192 | 1. For each variant in consideration, we use R's `uniroot` function to find an AF value (though not necessarily the highest AF value) for which the 95%CI AC is one less than the observed AC. 193 | 2. We then loop, incrementing by units of millionths, and return the highest AF value that still gives a 95%CI AC less than the observed AC. 194 | 195 | In order to pre-compute `af_filter` values for all of ExAC (verson 0.3.1), we apply this procedure to the AC and AN values for each of the five major continental populations in ExAC, and take the highest result from any population. Usually, this is from the population with the highest nominal allele frequency. However, because the tightness of a 95% confidence interval in the Poisson distribution depends upon sample size, the stringency of the filter depends upon the allele number (AN). The stringency of the filter therefore varies appropriately according the the size of the sub-population in which the variant is observed, and sequencing coverage at that site, and `af_filter` is occasionally derived from a population other than the one with the highest nominal allele frequency. 196 | 197 | For this analysis, we used adjusted AC and AN, meaning variant calls with GQ≥20 and DP≥10. 198 | 199 | #### Treatment of singletons and other populations 200 | 201 | It is worth considering whether a single observation in a reference sample should ever be treated as incompatible with disease. Using the approach outlined above, it can be inferred that an ExAC AC=1 would be considered incompatible with a true population allele frequency <2.9x10^-6^ (with 95% confidence). For a penetrant disease with a prevalence of 1:1,000,000, the probability of observing a specific causative allele in ExAC is <0.01, even if the disease is genetically homogeneous with just one causative variant. In practice however, we feel that there are few, if any, diseases that are extremely rare yet have sufficiently well-characterized genetic architecture to discard singleton variants from a reference sample. Therefore, for singletons (variants observed exactly once in ExAC), we set the filtering allele frequency to zero. 202 | 203 | We also note that occasionally a variant is seen in individuals falling under the Finnish or "Other" population categories in ExAC, and is a singleton or absent in all five continental populations. For these variants, the filtering allele frequency is set to zero. Because the Finnish are a bottlenecked population, disease-causing alleles may reach frequencies that would be impossible in large outbred populations. Similarly, because we have not assigned ancestry for the "Other" individuals, it is difficult to assess the population frequency of variants seen only in this set of individuals. Users are left to judge whether variants that would not be filtered on the basis of frequency in the five continental populations, but that are recurrent in Finnish or "Other" populations, should be removed from consideration according to the specific circumstances. 204 | 205 | #### Simulated Mendelian variant discovery analysis 206 | 207 | To simulate Mendelian variant discovery, we randomly selected 100 individuals from each of five major continental populations and filtered their exomes against filtering allele frequencies derived from the remaining 60,206 ExAC individuals. The subset of individuals was the same as that previously reported `r citep("10.1038/nature19057")`. 208 | Predicted protein-altering variants are defined as missense and equivalent (including in-frame indels, start lost, stop lost, and mature miRNA-altering), and protein-truncating variants (nonsense, essential splice site, and frameshift). 209 | 210 | ### Variant curation 211 | 212 | We utilized the July 9, 2015 release of ClinVar, extracting variants from XML and TXT releases into a single tab-delimited file through use of a Python implementation of vt normalize `r citep("10.1093/bioinformatics/btv112")`, as described previously `r citep("10.1038/nature19057")`. Only variants annotated as pathogenic and non-conflicted were investigated. ExAC counts were determined by matching on chromosome, position, reference, and alternate alleles. For all variants above the proposed maximum tolerated allele count for HCM, all HGMD annotated literature was reviewed and the level of evidence supporting disease pathogenicity was curated according to ACMG criteria `r citep("10.1038/gim.2015.30")`. 213 | 214 | ### Calculating odds ratios for HCM variant burden 215 | 216 | We used a cohort of 322 patients recruited to the Royal Brompton Hospital cardiac Biomedical Research Unit with diagnosis of HCM confirmed by cardiac MRI. These samples were sequenced using the IlluminaTruSight Cardio Sequencing Kit `r citep("10.1007/s12265-016-9673-5")` on theIlluminaMiSeq and NextSeq platforms. This study was subject to ethical approval (REC: 09/H0504/104+5) and informed consent was obtained for all subjects. The number of rare variants in _MYBPC3_, _MYH7_ and the six other sarcomeric genes associated with HCM (_TNNT2_, _TNNI3_, _MYL2_, _MYL3_, _TPM1_ and _ACTC1_) were calculated for this HCM cohort, and for reference population samples from ExAC. Case/control variant frequencies were calculated for all protein altering variants (frameshift, nonsense, splice donor/acceptor, missense and in-frame insertions/deletions), with frequencies and case/control odds ratios calculated separately for non-overlapping ExAC allele frequency bins with the following breakpoints: 1x10^-5^, 5x10^-5^, 1x10^-4^, 5x10^-4^ and 1x10^-3^. Odds Ratios were calculated as OR = (cases with variant / cases without variant) / (ExAC samples with variant / ExAC samples without variant) along with 95% confidence intervals. In the absence of sample-level genotype data for ExAC, the number of samples with a variant was approximated by the total number of variant alleles - i.e. assuming that each rare variant was found in a distinct sample. 217 | 218 | ## Code availability 219 | 220 | The manuscript was compiled in R, and source code for the analysis, figures and manuscript, are available at . The source code for our allele frequency calculator app is located at . 221 | 222 | ## Supplementary information 223 | 224 | Supplementary note 1 - Curation of a high frequency PCD variant 225 | 226 | Supplementary note 2 - Dealing with penetrance 227 | 228 | Supplementary table 1 229 | 230 | ##Bibliography 231 | 232 | ```{r, echo=FALSE, message=FALSE} 233 | write.bibtex(file="references.bib") 234 | ``` 235 | -------------------------------------------------------------------------------- /manuscript/nature.csl: -------------------------------------------------------------------------------- 1 | 2 | 118 | -------------------------------------------------------------------------------- /manuscript/newTemplate.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ImperialCardioGenetics/frequencyFilter/00b08ccf41bbbd927ac3d021d302e80a4ad4ca5d/manuscript/newTemplate.docx -------------------------------------------------------------------------------- /manuscript/template2.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ImperialCardioGenetics/frequencyFilter/00b08ccf41bbbd927ac3d021d302e80a4ad4ca5d/manuscript/template2.docx -------------------------------------------------------------------------------- /packrat/init.R: -------------------------------------------------------------------------------- 1 | local({ 2 | 3 | ## Helper function to get the path to the library directory for a 4 | ## given packrat project. 5 | getPackratLibDir <- function(projDir = NULL) { 6 | path <- file.path("packrat", "lib", R.version$platform, getRversion()) 7 | 8 | if (!is.null(projDir)) { 9 | 10 | ## Strip trailing slashes if necessary 11 | projDir <- sub("/+$", "", projDir) 12 | 13 | ## Only prepend path if different from current working dir 14 | if (!identical(normalizePath(projDir), normalizePath(getwd()))) 15 | path <- file.path(projDir, path) 16 | } 17 | 18 | path 19 | } 20 | 21 | ## Ensure that we set the packrat library directory relative to the 22 | ## project directory. Normally, this should be the working directory, 23 | ## but we also use '.rs.getProjectDirectory()' if necessary (e.g. we're 24 | ## rebuilding a project while within a separate directory) 25 | libDir <- if (exists(".rs.getProjectDirectory")) 26 | getPackratLibDir(.rs.getProjectDirectory()) 27 | else 28 | getPackratLibDir() 29 | 30 | ## Unload packrat in case it's loaded -- this ensures packrat _must_ be 31 | ## loaded from the private library. Note that `requireNamespace` will 32 | ## succeed if the package is already loaded, regardless of lib.loc! 33 | if ("packrat" %in% loadedNamespaces()) 34 | try(unloadNamespace("packrat"), silent = TRUE) 35 | 36 | if (suppressWarnings(requireNamespace("packrat", quietly = TRUE, lib.loc = libDir))) { 37 | 38 | # Check 'print.banner.on.startup' -- when NA and RStudio, don't print 39 | print.banner <- packrat::get_opts("print.banner.on.startup") 40 | if (print.banner == "auto" && is.na(Sys.getenv("RSTUDIO", unset = NA))) { 41 | print.banner <- TRUE 42 | } else { 43 | print.banner <- FALSE 44 | } 45 | return(packrat::on(print.banner = print.banner)) 46 | } 47 | 48 | ## Escape hatch to allow RStudio to handle bootstrapping. This 49 | ## enables RStudio to provide print output when automagically 50 | ## restoring a project from a bundle on load. 51 | if (!is.na(Sys.getenv("RSTUDIO", unset = NA)) && 52 | is.na(Sys.getenv("RSTUDIO_PACKRAT_BOOTSTRAP", unset = NA))) { 53 | Sys.setenv("RSTUDIO_PACKRAT_BOOTSTRAP" = "1") 54 | setHook("rstudio.sessionInit", function(...) { 55 | # Ensure that, on sourcing 'packrat/init.R', we are 56 | # within the project root directory 57 | if (exists(".rs.getProjectDirectory")) { 58 | owd <- getwd() 59 | setwd(.rs.getProjectDirectory()) 60 | on.exit(setwd(owd), add = TRUE) 61 | } 62 | source("packrat/init.R") 63 | }) 64 | return(invisible(NULL)) 65 | } 66 | 67 | ## Bootstrapping -- only performed in interactive contexts, 68 | ## or when explicitly asked for on the command line 69 | if (interactive() || "--bootstrap-packrat" %in% commandArgs(TRUE)) { 70 | 71 | message("Packrat is not installed in the local library -- ", 72 | "attempting to bootstrap an installation...") 73 | 74 | ## We need utils for the following to succeed -- there are calls to functions 75 | ## in 'restore' that are contained within utils. utils gets loaded at the 76 | ## end of start-up anyhow, so this should be fine 77 | library("utils", character.only = TRUE) 78 | 79 | ## Install packrat into local project library 80 | packratSrcPath <- list.files(full.names = TRUE, 81 | file.path("packrat", "src", "packrat") 82 | ) 83 | 84 | ## No packrat tarballs available locally -- try some other means of installation 85 | if (!length(packratSrcPath)) { 86 | 87 | message("> No source tarball of packrat available locally") 88 | 89 | ## There are no packrat sources available -- try using a version of 90 | ## packrat installed in the user library to bootstrap 91 | if (requireNamespace("packrat", quietly = TRUE) && packageVersion("packrat") >= "0.2.0.99") { 92 | message("> Using user-library packrat (", 93 | packageVersion("packrat"), 94 | ") to bootstrap this project") 95 | } 96 | 97 | ## Couldn't find a user-local packrat -- try finding and using devtools 98 | ## to install 99 | else if (requireNamespace("devtools", quietly = TRUE)) { 100 | message("> Attempting to use devtools::install_github to install ", 101 | "a temporary version of packrat") 102 | library(stats) ## for setNames 103 | devtools::install_github("rstudio/packrat") 104 | } 105 | 106 | ## Try downloading packrat from CRAN if available 107 | else if ("packrat" %in% rownames(available.packages())) { 108 | message("> Installing packrat from CRAN") 109 | install.packages("packrat") 110 | } 111 | 112 | ## Fail -- couldn't find an appropriate means of installing packrat 113 | else { 114 | stop("Could not automatically bootstrap packrat -- try running ", 115 | "\"'install.packages('devtools'); devtools::install_github('rstudio/packrat')\"", 116 | "and restarting R to bootstrap packrat.") 117 | } 118 | 119 | # Restore the project, unload the temporary packrat, and load the private packrat 120 | packrat::restore(prompt = FALSE, restart = TRUE) 121 | 122 | ## This code path only reached if we didn't restart earlier 123 | unloadNamespace("packrat") 124 | requireNamespace("packrat", lib.loc = libDir, quietly = TRUE) 125 | return(packrat::on()) 126 | 127 | } 128 | 129 | ## Multiple packrat tarballs available locally -- try to choose one 130 | ## TODO: read lock file and infer most appropriate from there; low priority because 131 | ## after bootstrapping packrat a restore should do the right thing 132 | if (length(packratSrcPath) > 1) { 133 | warning("Multiple versions of packrat available in the source directory;", 134 | "using packrat source:\n- ", shQuote(packratSrcPath)) 135 | packratSrcPath <- packratSrcPath[[1]] 136 | } 137 | 138 | 139 | lib <- file.path("packrat", "lib", R.version$platform, getRversion()) 140 | if (!file.exists(lib)) { 141 | dir.create(lib, recursive = TRUE) 142 | } 143 | lib <- normalizePath(lib, winslash = "/") 144 | 145 | message("> Installing packrat into project private library:") 146 | message("- ", shQuote(lib)) 147 | 148 | surround <- function(x, with) { 149 | if (!length(x)) return(character()) 150 | paste0(with, x, with) 151 | } 152 | 153 | ## The following is performed because a regular install.packages call can fail 154 | peq <- function(x, y) paste(x, y, sep = " = ") 155 | installArgs <- c( 156 | peq("pkgs", surround(packratSrcPath, with = "'")), 157 | peq("lib", surround(lib, with = "'")), 158 | peq("repos", "NULL"), 159 | peq("type", surround("source", with = "'")) 160 | ) 161 | installCmd <- paste(sep = "", 162 | "utils::install.packages(", 163 | paste(installArgs, collapse = ", "), 164 | ")") 165 | 166 | fullCmd <- paste( 167 | surround(file.path(R.home("bin"), "R"), with = "\""), 168 | "--vanilla", 169 | "--slave", 170 | "-e", 171 | surround(installCmd, with = "\"") 172 | ) 173 | system(fullCmd) 174 | 175 | ## Tag the installed packrat so we know it's managed by packrat 176 | ## TODO: should this be taking information from the lockfile? this is a bit awkward 177 | ## because we're taking an un-annotated packrat source tarball and simply assuming it's now 178 | ## an 'installed from source' version 179 | 180 | ## -- InstallAgent -- ## 181 | installAgent <- 'InstallAgent: packrat 0.4.7-1' 182 | 183 | ## -- InstallSource -- ## 184 | installSource <- 'InstallSource: source' 185 | 186 | packratDescPath <- file.path(lib, "packrat", "DESCRIPTION") 187 | DESCRIPTION <- readLines(packratDescPath) 188 | DESCRIPTION <- c(DESCRIPTION, installAgent, installSource) 189 | cat(DESCRIPTION, file = packratDescPath, sep = "\n") 190 | 191 | # Otherwise, continue on as normal 192 | message("> Attaching packrat") 193 | library("packrat", character.only = TRUE, lib.loc = lib) 194 | 195 | message("> Restoring library") 196 | restore(restart = FALSE) 197 | 198 | # If the environment allows us to restart, do so with a call to restore 199 | restart <- getOption("restart") 200 | if (!is.null(restart)) { 201 | message("> Packrat bootstrap successfully completed. ", 202 | "Restarting R and entering packrat mode...") 203 | return(restart()) 204 | } 205 | 206 | # Callers (source-erers) can define this hidden variable to make sure we don't enter packrat mode 207 | # Primarily useful for testing 208 | if (!exists(".__DONT_ENTER_PACKRAT_MODE__.") && interactive()) { 209 | message("> Packrat bootstrap successfully completed. Entering packrat mode...") 210 | packrat::on() 211 | } 212 | 213 | Sys.unsetenv("RSTUDIO_PACKRAT_BOOTSTRAP") 214 | 215 | } 216 | 217 | }) 218 | -------------------------------------------------------------------------------- /packrat/packrat.lock: -------------------------------------------------------------------------------- 1 | PackratFormat: 1.4 2 | PackratVersion: 0.4.7.1 3 | RVersion: 3.3.0 4 | Repos: BioCsoft=https://bioconductor.org/packages/3.3/bioc, 5 | BioCann=https://bioconductor.org/packages/3.3/data/annotation, 6 | BioCexp=https://bioconductor.org/packages/3.3/data/experiment, 7 | BioCextra=https://bioconductor.org/packages/3.3/extra, 8 | CRAN=https://cran.rstudio.com/ 9 | 10 | Package: BH 11 | Source: CRAN 12 | Version: 1.60.0-1 13 | Hash: 889445e87a2acd4cc58440957f3b0d1a 14 | 15 | Package: DBI 16 | Source: CRAN 17 | Version: 0.4-1 18 | Hash: 6524d5c5d30ff74084ac028eeb19eb1b 19 | 20 | Package: R6 21 | Source: CRAN 22 | Version: 2.1.2 23 | Hash: c515e41c73294952cbb33ee1c1be5a2d 24 | 25 | Package: RColorBrewer 26 | Source: CRAN 27 | Version: 1.1-2 28 | Hash: c0d56cd15034f395874c870141870c25 29 | 30 | Package: RCurl 31 | Source: CRAN 32 | Version: 1.95-4.7 33 | Hash: 7756ed9df5d79ca87bc9e93f85d89b87 34 | Requires: bitops 35 | 36 | Package: RJSONIO 37 | Source: CRAN 38 | Version: 1.3-0 39 | Hash: fb672e20eb6f3010a3639f855d8ef6de 40 | 41 | Package: Rcpp 42 | Source: CRAN 43 | Version: 0.12.6 44 | Hash: 79f89fabd832de0a2e60aa05d9027f32 45 | 46 | Package: RefManageR 47 | Source: CRAN 48 | Version: 0.10.5 49 | Hash: 987c0eb72c07d2e8b568f53cd22ef706 50 | Requires: RCurl, RJSONIO, XML, bibtex, lubridate, plyr, stringr 51 | 52 | Package: XML 53 | Source: CRAN 54 | Version: 3.98-1.3 55 | Hash: 689166755239c3300769ca2d600381c6 56 | 57 | Package: assertthat 58 | Source: CRAN 59 | Version: 0.1 60 | Hash: 0afb92b59b02593c70ff8046700ba9d3 61 | 62 | Package: base64enc 63 | Source: CRAN 64 | Version: 0.1-3 65 | Hash: c590d29e555926af053055e23ee79efb 66 | 67 | Package: bibtex 68 | Source: CRAN 69 | Version: 0.4.0 70 | Hash: 1884fc6fc10f47e9c6a77bfd9370b28a 71 | 72 | Package: bitops 73 | Source: CRAN 74 | Version: 1.0-6 75 | Hash: 67d0775189fd0041d95abca618c5c07e 76 | 77 | Package: caTools 78 | Source: CRAN 79 | Version: 1.17.1 80 | Hash: 97cb6f6293cd18d17df77a6383cc6763 81 | Requires: bitops 82 | 83 | Package: colorspace 84 | Source: CRAN 85 | Version: 1.2-6 86 | Hash: 00bb12245cd975c450cc4a960884fa15 87 | 88 | Package: curl 89 | Source: CRAN 90 | Version: 0.9.5 91 | Hash: 03562aad7537914f516bd67e9bba28b1 92 | 93 | Package: dichromat 94 | Source: CRAN 95 | Version: 2.0-0 96 | Hash: 08eed0c80510af29bb15f840ccfe37ce 97 | 98 | Package: digest 99 | Source: CRAN 100 | Version: 0.6.8 101 | Hash: de202f956596a476d09304e3a2ebf1e5 102 | 103 | Package: dplyr 104 | Source: CRAN 105 | Version: 0.5.0 106 | Hash: fc7f06a5772a1b2845ef6e74b8aff740 107 | Requires: BH, DBI, R6, Rcpp, assertthat, lazyeval, magrittr, tibble 108 | 109 | Package: evaluate 110 | Source: CRAN 111 | Version: 0.9 112 | Hash: cf338d73ba4ee3a4fefec612cdfd38db 113 | Requires: stringr 114 | 115 | Package: formatR 116 | Source: CRAN 117 | Version: 1.2 118 | Hash: cce53206179a99ae5a8bcf154ab5abde 119 | 120 | Package: ggplot2 121 | Source: CRAN 122 | Version: 2.1.0 123 | Hash: 50e297b0191179c39c7dcae0eff72b51 124 | Requires: digest, gtable, plyr, reshape2, scales 125 | 126 | Package: gtable 127 | Source: CRAN 128 | Version: 0.1.2 129 | Hash: 38a0f066373a60824cd3ade2362af363 130 | 131 | Package: highr 132 | Source: CRAN 133 | Version: 0.5 134 | Hash: 25f9c02030b94c8154273699a7df9cf8 135 | 136 | Package: htmltools 137 | Source: CRAN 138 | Version: 0.3.5 139 | Hash: f81546c00234653ca3a7dcd0b7e1a757 140 | Requires: Rcpp, digest 141 | 142 | Package: httr 143 | Source: CRAN 144 | Version: 1.1.0 145 | Hash: 27ffb4b4dbb8ac4019126070d66910fc 146 | Requires: R6, curl, jsonlite, mime, openssl 147 | 148 | Package: jsonlite 149 | Source: CRAN 150 | Version: 0.9.19 151 | Hash: 4a983f753b6e88ae0f5a6ac152d8cc32 152 | 153 | Package: knitauthors 154 | Source: github 155 | Version: 0.0.1 156 | Hash: 9bdff792c9601b965853e78177d208ec 157 | Requires: dplyr 158 | GithubRepo: knitauthors 159 | GithubUsername: jamesware 160 | GithubRef: master 161 | GithubSha1: 15f87cf98f561cc097d47f463905b2ca5b4ab35b 162 | 163 | Package: knitcitations 164 | Source: CRAN 165 | Version: 1.0.7 166 | Hash: 8ae09e4d9390a166ac95ca75c91767a8 167 | Requires: RefManageR, digest, httr 168 | 169 | Package: knitr 170 | Source: CRAN 171 | Version: 1.12 172 | Hash: bbb197833ff9b57c328c0fc9f924cb92 173 | Requires: digest, formatR, highr, yaml, markdown, stringr, evaluate 174 | 175 | Package: labeling 176 | Source: CRAN 177 | Version: 0.3 178 | Hash: ecf589b42cd284b03a4beb9665482d3e 179 | 180 | Package: lazyeval 181 | Source: CRAN 182 | Version: 0.1.10 183 | Hash: 9679f1ac7f6bc07bc79755f34cd15e1f 184 | 185 | Package: lubridate 186 | Source: CRAN 187 | Version: 1.5.0 188 | Hash: 038715dfce23c748aef45c22f46e4b75 189 | Requires: stringr 190 | 191 | Package: magrittr 192 | Source: CRAN 193 | Version: 1.5 194 | Hash: bdc4d48c3135e8f3b399536ddf160df4 195 | 196 | Package: markdown 197 | Source: CRAN 198 | Version: 0.7.7 199 | Hash: fea2343a1119d61b0cc5c0a950d103a3 200 | Requires: mime 201 | 202 | Package: mime 203 | Source: CRAN 204 | Version: 0.3 205 | Hash: 629133973445ad75e5438a2bb6b05c17 206 | 207 | Package: munsell 208 | Source: CRAN 209 | Version: 0.4.2 210 | Hash: 72d343a6664778029f49a7ad36de1cab 211 | Requires: colorspace 212 | 213 | Package: openssl 214 | Source: CRAN 215 | Version: 0.9.4 216 | Hash: 5ceb5d83bba44e8e5a764ac6ad32e9fb 217 | 218 | Package: packrat 219 | Source: CRAN 220 | Version: 0.4.7-1 221 | Hash: 16ca16d36eb18d1d16113606ecc5c400 222 | 223 | Package: pander 224 | Source: CRAN 225 | Version: 0.6.0 226 | Hash: c7dc3e820aad53039a123e2242e5d566 227 | Requires: Rcpp, digest 228 | 229 | Package: plyr 230 | Source: CRAN 231 | Version: 1.8.2 232 | Hash: 46fd0a3c03da5fc861f77b79400fd311 233 | Requires: Rcpp 234 | 235 | Package: reshape2 236 | Source: CRAN 237 | Version: 1.4.1 238 | Hash: 55a31e20f8a216855e138ca83756a4b0 239 | Requires: Rcpp, plyr, stringr 240 | 241 | Package: rmarkdown 242 | Source: CRAN 243 | Version: 1.0 244 | Hash: 7c60cd30594b34874a6a7e729f0a6fad 245 | Requires: base64enc, caTools, evaluate, htmltools, jsonlite, knitr, 246 | yaml 247 | 248 | Package: scales 249 | Source: CRAN 250 | Version: 0.3.0 251 | Hash: 3ed76b93ca7adf79721d3533b1a1ac95 252 | Requires: RColorBrewer, Rcpp, dichromat, labeling, munsell, plyr 253 | 254 | Package: stringi 255 | Source: CRAN 256 | Version: 0.4-1 257 | Hash: 5f5be3e0eaf1ce0228196e80c47b0ab3 258 | 259 | Package: stringr 260 | Source: CRAN 261 | Version: 1.0.0 262 | Hash: 2676dd5f88890910962b733b0f9540e1 263 | Requires: magrittr, stringi 264 | 265 | Package: tibble 266 | Source: CRAN 267 | Version: 1.1 268 | Hash: 5467b4aa158b2d984538f7ffde17171c 269 | Requires: Rcpp, assertthat, lazyeval 270 | 271 | Package: tidyr 272 | Source: CRAN 273 | Version: 0.5.1 274 | Hash: 56af57d037a1d229dfa50601debf3f4b 275 | Requires: Rcpp, dplyr, lazyeval, magrittr, stringi, tibble 276 | 277 | Package: yaml 278 | Source: CRAN 279 | Version: 2.1.13 280 | Hash: 4854ccabebc225e8a7309fb4a74980de 281 | -------------------------------------------------------------------------------- /packrat/packrat.opts: -------------------------------------------------------------------------------- 1 | auto.snapshot: TRUE 2 | use.cache: FALSE 3 | print.banner.on.startup: auto 4 | vcs.ignore.lib: TRUE 5 | vcs.ignore.src: TRUE 6 | external.packages: 7 | knitr 8 | devtools 9 | local.repos: 10 | load.external.packages.on.startup: TRUE 11 | ignored.packages: 12 | quiet.package.installation: TRUE 13 | snapshot.recommended.packages: FALSE 14 | -------------------------------------------------------------------------------- /packrat/src/packrat/packrat_0.4.7-1.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ImperialCardioGenetics/frequencyFilter/00b08ccf41bbbd927ac3d021d302e80a4ad4ca5d/packrat/src/packrat/packrat_0.4.7-1.tar.gz -------------------------------------------------------------------------------- /src/hcm_curation.R: -------------------------------------------------------------------------------- 1 | setwd('~/d/sci/src/freq_filter') 2 | options(stringsAsFactors=FALSE) 3 | source('../exac_2015/exac_constants.R') 4 | 5 | # load ExAC dataset 6 | if (!("exac" %in% ls(globalenv()))) { 7 | exac = load_exac_data(reload=T) 8 | } 9 | 10 | clinvar = read.table('../clinvar/output/clinvar.tsv',sep='\t',header=T,quote='',comment.char='') 11 | clinvar$pos_id = paste(clinvar$chrom, formatC(clinvar$pos,width=9,flag='0'), clinvar$ref, clinvar$alt, sep='_') 12 | clinvar$exac_ac_adj = exac$ac_adj[match(clinvar$pos_id, exac$pos_id)] 13 | clinvar$exac_ac_adj[is.na(clinvar$exac_ac_adj)] = 0 14 | # "Literature only" refers to ClinVar submissions that have no submitter except OMIM and/or GeneReviews 15 | clinvar$lit_only = nchar(gsub('OMIM','',gsub('GeneReviews','',gsub(';','',clinvar$all_submitters)))) == 0 16 | 17 | hcm = grepl("cardiomyopathy",clinvar$all_traits,ignore.case=TRUE) & grepl("hypertrophic",clinvar$all_traits,ignore.case=TRUE) 18 | 19 | sum(hcm) # 1063 20 | sum(hcm & clinvar$pathogenic==1) # 631 21 | sum(hcm & clinvar$exac_ac_adj > 9 & clinvar$pathogenic==1) # 50 22 | sum(hcm & clinvar$exac_ac_adj > 9 & clinvar$pathogenic==1 & clinvar$all_pmids != '') # 50 23 | sum(hcm & clinvar$exac_ac_adj > 9 & clinvar$pathogenic==1 & clinvar$lit_only) # 2 24 | sum(hcm & clinvar$exac_ac_adj > 9 & clinvar$pathogenic==1 & clinvar$conflicted==0) # 28 25 | sum(hcm & clinvar$exac_ac_adj < 9 & clinvar$pathogenic==1) # 577 26 | sum(hcm & clinvar$exac_ac_adj < 9 & clinvar$pathogenic==1 & clinvar$conflicted==0) # 573 27 | 28 | # create a table of these variants to browse in Rstudio 29 | hcm_prev = clinvar[hcm & clinvar$exac_ac_adj > 9 & clinvar$pathogenic==1 & clinvar$conflicted==0,] 30 | 31 | # write out a table of HCM variants per Nicky's request 32 | write.table(clinvar[hcm,],'data/clinvar_hcm.tsv',sep='\t',row.names=F,col.names=T,quote=F) 33 | 34 | pcd = grepl("primary",clinvar$all_traits,ignore.case=TRUE) & grepl("ciliary",clinvar$all_traits,ignore.case=TRUE) & grepl("dyskinesia",clinvar$all_traits,ignore.case=TRUE) 35 | sum(pcd) # 246 36 | sum(pcd & clinvar$pathogenic==1 & clinvar$conflicted==0) # 128 37 | sum(pcd & clinvar$pathogenic==1 & clinvar$conflicted==0 & clinvar$exac_ac_adj > 154) # 1 38 | 39 | nccm = grepl("cardiomyopathy",clinvar$all_traits,ignore.case=TRUE) & grepl("compaction",clinvar$all_traits,ignore.case=TRUE) 40 | sum(nccm) # 85 41 | sum(nccm & clinvar$pathogenic==1 & clinvar$conflicted==0) # 42 42 | sum(nccm & clinvar$pathogenic==1 & clinvar$conflicted==0 & clinvar$exac_ac_adj > 18) # 3 43 | 44 | to_look_at = clinvar$pathogenic==1 & clinvar$conflicted==0 & ((nccm & clinvar$exac_ac_adj > 18) | (pcd & clinvar$exac_ac_adj > 154)) 45 | write.table(clinvar[to_look_at,],'data/clinvar_pcd_nccm_high_af.tsv',sep='\t',row.names=F,col.names=T,quote=F) 46 | -------------------------------------------------------------------------------- /src/precompute_exac_af_filter.R: -------------------------------------------------------------------------------- 1 | setwd('~/d/sci/src/freq_filter') 2 | options(stringsAsFactors=FALSE) 3 | source('../exac_2015/exac_constants.R') 4 | 5 | # load ExAC dataset 6 | if (!("exac" %in% ls(globalenv()))) { 7 | exac = load_exac_data(reload=F) 8 | } 9 | 10 | # forward method by James Ware: 11 | # given a desired AF to filter on for Mendelian analyses, what is the max AC that would be expected in a dataset? 12 | find_max_ac = function(af,an,ci=.95) { 13 | if (af == 0) { 14 | return (0) 15 | } else { 16 | quantile_limit = ci # ci for one-sided, 1-(1-ci)/2 for two-sided 17 | max_ac = qpois(quantile_limit,an*af) 18 | return (max_ac) 19 | } 20 | } 21 | 22 | 23 | # backward method: given an observed AC, what is the highest AF filter for which one should remove this variant? 24 | # This function will accept an AC_Adj and an AN_Adj and will return the highest AF filter for which you would want to reject 25 | # this variant as a potential Mendelian causal allele.# for uniroot to work, lower has to be rarer than a singleton, and upper has to be higher than fixed 26 | # Important notes about parameters: 27 | # 1. for uniroot to work, lower has to be rarer than a singleton, and upper has to be higher than fixed (100% AF), 28 | # hence the AF here runs from 0.1 alleles in ExAC to an impossible AF of 200% 29 | # 2. you need a tight tolerance (tol) to get accurate results from uniroot at low AF, so we use 1e-7 as default. 30 | # 3. we also specify a precision (1e-6) for incrementing to find the upper boundary of the range of AFs for which 31 | # the 95%CI AC is less than the observed AC 32 | find_af_filter = function(ac, an, ci=.95, lower=(.1/(2*60706)), upper=2, tol=1e-7, precision=1e-6) { 33 | if (is.na(ac) | is.na(an) | ac == 0 | an == 0 | ac == 1) { 34 | return (0.0) 35 | } else { 36 | quantile_limit = ci # ci for one-sided, 1-(1-ci)/2 for two-sided 37 | # this has been tested and with the default parameters here, uniroot seems to never fail for an AC, AN values 38 | # in ExAC. still, it's good to have this tryCatch in here for debugging or in case anyone wants to override 39 | # the defaults. instead of just giving the error/warning message it also tells you which AC and AN values 40 | # it failed on. 41 | attempt_uniroot = tryCatch({ 42 | uniroot_result = uniroot(f = function(af,ac,an) { return (ac - 1 - qpois(p=quantile_limit,lambda=an*af)) },lower=lower,upper=upper,ac=ac,an=an,tol=tol) 43 | }, warning = function(w) { 44 | print(paste("ac= ",as.character(ac),", an= ",as.character(an)," warning = ",as.character(w),sep='')) 45 | return (0.0) 46 | }, error = function(e) { 47 | print(paste("ac= ",as.character(ac),", an= ",as.character(an)," error = ",as.character(e),sep='')) 48 | return (0.0) 49 | }, finally = { 50 | }) 51 | max_af = round(uniroot_result$root,-log10(precision)) # round to nearest millionth 52 | while(find_max_ac(af=max_af,an=an) < ac) { 53 | max_af = max_af + precision # increase by millionths until you find the upper bound - the highest AF for which 95%CI AC is still less than observed AC 54 | } 55 | max_af = max_af - precision # back off one unit from the AF that violated the 95%CI AC < obs AC condition 56 | return (max_af) 57 | } 58 | } 59 | 60 | # This finds the af_filter for, say, a vector of AC at 10,000 differnet alleles and a corresponding vector of 10,000 AN values 61 | # it's basically just a wrapper for mapply, which is trivial but makes more readable the code below in find_highest_af_filter. 62 | find_af_filter_vectorized = function(ac_vector, an_vector, ci=.95, lower=(.1/(2*60706)), upper=2, tol=1e-7) { 63 | return (mapply(find_af_filter, ac_vector, an_vector, ci=ci, lower=lower, upper=upper, tol=tol)) 64 | } 65 | 66 | # In some cases, the highest af_filter value will _NOT_ come from the popmax population, because 67 | # sometimes the population with higher AN_Adj will give more filtering power even if its AF is slightly lower 68 | # therefore, it is important to be able to take the highest af_filter value from a vector of population AC and AN 69 | # this function will accept a list of vectors of AC and of AN. for instance, ac_list may be a list of 5 vectors, which are 70 | # each 10,000 value of ac_afr, ac_amr, ac_eas, ac_nfe, and ac_sas. an_list is the corresponding list of an_ vectors. 71 | find_highest_af_filter = function(ac_list,an_list,ci=.95) { 72 | len_ac = length(list(ac_list)) 73 | len_an = length(list(an_list)) 74 | stopifnot(len_ac==len_an) # check that we have the same number of AC and AN columns 75 | # make a list of af_filter vectors for each population 76 | # SIMPLIFY=FALSE makes it return a list instead of concatenating the vectors 77 | af_filter_list = mapply(find_af_filter_vectorized,ac_list,an_list,SIMPLIFY=FALSE) 78 | # now take pmax of the population-specific af_filters, returning ONE vector 79 | # containing the highest af_filter value across populations 80 | highest_af_filter = do.call(pmax, af_filter_list) 81 | highest_af_filter = numeric(length(af_filter_list[[1]])) 82 | highest_af_filter_pop = character(length(af_filter_list[[1]])) 83 | for (i in 1:length(highest_af_filter)) { # couldn't figure out a way to do both things with do.call so just looping 84 | to_compare = mapply("[[",af_filter_list,i) 85 | idx = which.max(to_compare) 86 | highest_af_filter[i] = to_compare[idx] # get max filter 87 | highest_af_filter_pop[i] = names(to_compare)[idx] # get the pop that gave the max filter 88 | } 89 | highest_af_filter_pop = gsub("ac_","",highest_af_filter_pop) 90 | highest_af_filter_pop[highest_af_filter==0] = 'none' 91 | return (list(af_filter=highest_af_filter, af_filter_pop=highest_af_filter_pop)) 92 | } 93 | 94 | # QC test #1: do we get the same answer forward and backward? 95 | an = 2*60706 96 | find_af_filter(find_max_ac(af=.001,an=an),an=an) 97 | find_af_filter(find_max_ac(af=.01,an=an),an=an) 98 | 99 | # QC test 2. do we get roughly the same answer across a whole range of AC? 100 | an = 2*60706 # for now assume full genotyping 101 | afs = c((1:9)*10^-6,(1:9)*10^-5,(1:9)*10^-4,(1:9)*10^-3,1*10^-2) 102 | plot(NA,NA,xlim=range(afs),ylim=c(1,300),xlab='AF',ylab='95%CI max AC',log='xy',axes=FALSE) 103 | forward_ac = numeric(length(afs)) 104 | backward_af = numeric(length(afs)) 105 | for (i in 1:length(afs)) { 106 | forward_ac[i] = find_max_ac(afs[i],an=an) 107 | backward_af[i] = find_af_filter(ac=forward_ac[i],an=an) 108 | } 109 | points(x=afs,y=forward_ac,col='red',type='s',lwd=10) 110 | points(x=backward_af,y=forward_ac,col='blue',type='s',lwd=3) 111 | legend('topleft',c('forward (James)','backward (Eric)'),col=c('red','blue'),lwd=c(10,3)) 112 | axis(side=1,at=10^(-6:-2),labels=percent(10^(-6:-2)),lwd=0,lwd.ticks=1) 113 | axis(side=2,at=(0:6)*50,labels=(0:6)*50,lwd=0,lwd.ticks=1,las=1) 114 | 115 | # QC test 3. is the backward method sensitive to changes in AC at the lower end - singleton, doubleton etc. 116 | find_af_filter(ac=1,an=2*60706,ci=.95) 117 | find_af_filter(ac=2,an=2*60706,ci=.95) 118 | find_af_filter(ac=3,an=2*60706,ci=.95) 119 | find_af_filter(ac=4,an=2*60706,ci=.95) 120 | 121 | # QC test 4. does the backward method compute in reasonable time? 122 | # (note just using ac_popmax and an_popmax for this initial test, though we'll ultimately want to find the 123 | # highest af_filter across all populations) 124 | exac_test = head(exac,n=200000) 125 | start_time = proc.time() 126 | mapply(find_af_filter, exac_test$ac_popmax, exac_test$an_popmax, ci=.95) 127 | proc.time() - start_time 128 | # takes roughly in the range of ~200 seconds for 200K variants 129 | 130 | # QC test 5. for an actual set of ExAC variants, does running backwards and then forwards reproduce the actual AC reaosnably well? 131 | exac_test$af_filter = mapply(find_af_filter, ac=exac_test$ac_popmax, an=exac_test$an_popmax, ci=.95) 132 | exac_test$ac_forward = mapply(find_max_ac, af=exac_test$af_filter, an=exac_test$an_popmax, ci=.95) 133 | cor.test(exac_test$ac_popmax, exac_test$ac_forward) 134 | # yes, perfectly 135 | 136 | # QC test 6. does the AF filter compare to the actual AF popmax in a reasonable way 137 | #png('display/af_popmax_vs_af_filter.png',height=600,width=600) 138 | par(mar=c(5,5,2,2)) 139 | plot(exac_test$af_popmax, exac_test$af_filter, pch=20, xlab='', ylab='', 140 | log='xy', xlim=10^c(-6,-1), ylim=10^c(-6,-1), axes=FALSE) 141 | abline(a=0,b=1,col='red') 142 | axis(side=1,at=10^(-5:-1),labels=percent(10^(-5:-1)),lwd=0,lwd.ticks=1) 143 | axis(side=2,at=10^(-5:-1),labels=percent(10^(-5:-1)),lwd=0,lwd.ticks=1,las=1) 144 | mtext(side=1,line=3,text='AF popmax',font=2) 145 | mtext(side=2,line=4,text='AF filter',font=2) 146 | #dev.off() 147 | # yes, all points are below the diagonal, and the correlation is fairly tight 148 | # do a sanity check that much of the noisiness can be explained by differences in AN: 149 | find_af_filter(ac=100,an=100000) 150 | find_af_filter(ac=10,an=10000) 151 | find_af_filter(ac=1,an=1000) 152 | # ok, good... 153 | 154 | # QC test 7. does the find highest AF filter function work correctly and give the same answer 155 | # when called two different ways? 156 | # first, call with pmax and separate mapply calls 157 | exac_test$af_filter_max = pmax(mapply(find_af_filter,ac=exac_test$ac_afr,an=exac_test$an_afr), 158 | mapply(find_af_filter,ac=exac_test$ac_amr,an=exac_test$an_amr), 159 | mapply(find_af_filter,ac=exac_test$ac_eas,an=exac_test$an_eas), 160 | mapply(find_af_filter,ac=exac_test$ac_nfe,an=exac_test$an_nfe), 161 | mapply(find_af_filter,ac=exac_test$ac_sas,an=exac_test$an_sas)) 162 | 163 | # now call with find_highest_af_filter which bundles all of that together 164 | ptm = proc.time() 165 | test_filtering_data = find_highest_af_filter(ac_list=as.list(exac_test[,c("ac_afr","ac_amr","ac_eas","ac_nfe","ac_sas")]), 166 | an_list=as.list(exac_test[,c("an_afr","an_amr","an_eas","an_nfe","an_sas")])) 167 | exac_test$af_filter_max_2 = test_filtering_data[["af_filter"]] 168 | exac_test$af_filter_pop = test_filtering_data[["af_filter_pop"]] 169 | proc.time() - ptm # 310 seconds for 200K variants 170 | # this predicts just over 4 hours to compute af_filter for all of exac 171 | 172 | # check that the correlation between these two methods is perfect 173 | cor.test(exac_test$af_filter_max, exac_test$af_filter_max_2) 174 | # it is. 175 | 176 | # and do the results still look reasonable w.r.t. af_popmax? 177 | plot(exac_test$af_popmax, exac_test$af_filter_max_2,pch=20) 178 | # yep 179 | 180 | # ok, next actually compute af_filter for all of exac (N.B. this block took about 3.5 hours to complete) 181 | filtering_data = find_highest_af_filter(ac_list=as.list(exac[,c("ac_afr","ac_amr","ac_eas","ac_nfe","ac_sas")]), 182 | an_list=as.list(exac[,c("an_afr","an_amr","an_eas","an_nfe","an_sas")])) 183 | exac$af_filter = filtering_data[["af_filter"]] 184 | exac$af_filter_pop = filtering_data[["af_filter_pop"]] 185 | 186 | # write out results for public release 187 | af_filter_data = exac[,c("chrom","pos","ref","alt","af_filter","af_filter_pop")] 188 | write.table(af_filter_data,"af_filter_data.tsv",sep='\t',row.names=F,col.names=T,quote=F) 189 | 190 | 191 | # a few QC checks. 192 | # most variants should fall on the diagonal where popmax == af_filter_pop 193 | table(exac[,c("popmax","af_filter_pop")]) 194 | # af_filter_pop 195 | # popmax afr amr eas nfe none sas 196 | # AFR 786392 202 56 101735 807486 6256 197 | # AMR 1804 387449 181 83244 589595 7134 198 | # EAS 1073 641 488158 56861 648524 7575 199 | # NFE 24 1 9 1273470 2499020 1332 200 | # SAS 139 50 29 61030 1147140 764055 201 | # ok, that looks good. 202 | # And the correlation should be tight: 203 | cor.test(exac$af_filter, exac$af_popmax) 204 | # which it is: cor = 0.9881759 205 | 206 | ### FIGURE 207 | 208 | # variants remaining after filters 209 | # things to loop over in generating summary stats 210 | # allele frequency thresholds 211 | af_limits = c(1e-6, 10^-5.5, 1e-5, 10^-4.5, 1e-4, 10^-3.5, 1e-3, 10^-2.5, 1e-2, 5e-2) 212 | ancestries = c(names(pop_names)[c(1,2,3,5,7)]) # "afr" "amr" "eas" "nfe" "sas" 213 | filters = c("af_filter") 214 | # only doing dominant analysis so don't need this line: 215 | # zygosities = c('any','hom') 216 | 217 | # We don't have permission to release individual genotype-level data for the "leave-out 500" for the 218 | # simulated Mendelian analysis, but such data are needed in order to compute the filtering statistics 219 | # for Figure 3A. Thus, this part of the code can only be reproduced in-house. recalculate_stats is 220 | # therefore set to FALSE by default for public release. We who are running the code locally can set to 221 | # TRUE to recompute the filtering statistics. Note that this involves re-computing the af_filter 222 | # on the 60,206 individuals who aren't left out, plus a bunch of costly joins between ExAC and the lv500 223 | # dataset. Thus, this block of code takes several hours and should be run overnight. 224 | recalculate_stats = FALSE 225 | if ('lv500.table.gz' %in% list.files('../exac_papers/misc_data/') & recalculate_stats) { 226 | lv = read.table('../exac_papers/misc_data/lv500.table.gz',sep='\t',header=TRUE,comment.char='',quote='') 227 | colnames(lv) = tolower(colnames(lv)) 228 | lv = lv[,-which(colnames(lv)=='ac_hemi')] 229 | 230 | # join relevant cols to ExAC 231 | lv$pos_id = paste(lv$chrom, formatC(lv$pos,width=9,flag='0'), lv$ref, lv$alt, sep='_') 232 | exac$pos_id = paste(exac$chrom, formatC(exac$pos,width=9,flag='0'), exac$ref, exac$alt, sep='_') 233 | # figure out how to match lv500 to ExAC 234 | match_indices = match(lv$pos_id, exac$pos_id) 235 | 236 | # compute AC and AN for the 60,206 non-left-out individuals 237 | for (colname in colnames(lv)[7:26]) { 238 | # compute ACs in ExAC minus the leave-out 500 239 | lv[,paste("exac",colname,sep="_")] = exac[match_indices,colname] - lv[,colname] 240 | } 241 | 242 | # re-calculate af_filter without the leave-out individuals in there 243 | lv$af_filter = find_highest_af_filter(ac_list=as.list(lv[,c("exac_ac_afr","exac_ac_amr","exac_ac_eas","exac_ac_nfe","exac_ac_sas")]), 244 | an_list=as.list(lv[,c("exac_an_afr","exac_an_amr","exac_an_eas","exac_an_nfe","exac_an_sas")]))[["af_filter"]] 245 | 246 | # copy `use` and `category` from exac (and only use if lv500 also has ac_adj > 0) 247 | lv$use = exac$use[match_indices] & lv$ac_adj > 0 248 | lv$category = exac$category[match_indices] 249 | 250 | # some constants and stats 251 | n_indiv = {} # count the number of ExAC individuals in each continental ancestry 252 | for (ancestry in ancestries) { 253 | n_indiv[[ancestry]] = max(lv[,paste("an",ancestry,sep="_")])/2 254 | } 255 | sum(n_indiv) # this will be 500 256 | 257 | # now calculate summary stats for Figure 3A 258 | stats = expand.grid(ancestries,filters,af_limits) 259 | colnames(stats) = c('ancestry','filter','af_limit') 260 | stats$ancestry = as.character(stats$ancestry) 261 | stats$total_vars = 0 262 | stats$filtered_vars = 0 263 | stats$proportion_removed = 0.0 264 | for (af_limit in af_limits) { 265 | for (filter in filters) { 266 | # only consider "use" variants that are missense, missense-equivalent, or protein-truncating 267 | include_in_total = lv$use & !is.na(lv$category) & lv$category %in% c(mis_like,lof_like) 268 | # note the < logic in next line. because af_filter is an af for which 95%CI AC < obs AC, 269 | # users should remove variants whose af_filter is _greater than or equal to_ their maximum credible AF 270 | # conversely, the "filtered" set of variants includes only those with af_filter strictly _less than_ 271 | # the user's maximum credible AF 272 | include_in_filtered = include_in_total & (lv[,filter] < af_limit) 273 | for (ancestry in ancestries) { 274 | # find target row of stats table 275 | row = stats$ancestry == ancestry & stats$filter == filter & stats$af_limit == af_limit 276 | exac_colname = paste("ac",ancestry,sep="_") # column of interest in exac 277 | # below, these n_indiv terms cancel in the final calculation, but are included as a reminder 278 | # that conceptually what we are computing is the average number of variants _in each person_ 279 | # of a given ancestry that are removed by an allele frequency filter 280 | average_variants_without_af_filter = sum(lv[include_in_total,exac_colname]) / n_indiv[ancestry] 281 | average_variants_with_af_filter = sum(lv[include_in_filtered,exac_colname]) / n_indiv[ancestry] 282 | proportion_removed = 1 - average_variants_with_af_filter / average_variants_without_af_filter 283 | stats$proportion_removed[row] = proportion_removed # store it in the table 284 | stats$total_vars[row] = average_variants_without_af_filter 285 | stats$filtered_vars[row] = average_variants_with_af_filter 286 | } 287 | } 288 | } 289 | # write out results so that others can at least reproduce Figure 3A, given this table of stats 290 | write.table(stats,'data/filtering_stats.tsv',sep='\t',quote=FALSE,col.names=TRUE,row.names=FALSE) 291 | } else { 292 | # if not re-calculating stats, read them in and then you can reproduce Figure 3A 293 | stats = read.table('data/filtering_stats.tsv',sep='\t',quote='',header=TRUE) 294 | } 295 | 296 | # get colors and x axis values for plotting Figure 3A. if you end up with a Figure 3A that lacks 297 | # the curves themselves and has just the axes, it's probably because you forgot to run this line 298 | stats$acol = pop_colors[stats$ancestry] 299 | 300 | # this saves a version of Figure 3A with gridlines. The actual version of Figure 3A for publication 301 | # is produced in analysis/Figures.Rmd as a multi-panel plot with Figure 3B, from a modified version of this code. 302 | png('figures/variants_after_filtering.png',width=800,height=500,res=100) 303 | par(mar=c(4,5,1,1)) 304 | plot(NA, NA, xlim=c(-2,-6), ylim=c(0,550), ann=FALSE, axes=FALSE, yaxs='i') 305 | abline(h=(0:5)*100, lwd=.5) 306 | for (ancestry in ancestries) { 307 | rows = stats$ancestry==ancestry & stats$af_limit < .05 308 | points(x=log10(stats$af_limit[rows]), y=stats$filtered_vars[rows], type='l', lwd=5, col=stats$acol[rows]) 309 | } 310 | axis(side=1, at=-2:-6, labels=c("1%","0.1%","0.01%","1e-5","1e-6"), lwd=0, lwd.ticks=1) 311 | axis(side=2, at=(0:5)*100, labels=(0:5)*100, lwd=0, lwd.ticks=0, las=2) 312 | mtext(side=1, line=2.5, text='your maximum credible population AF') 313 | mtext(side=2, line=3.5, text='mean predicted protein-altering variants per exome') 314 | legend('topright',pop_names[ancestries],col=pop_colors[ancestries],text.font=2,text.col=pop_colors[ancestries],lwd=5) 315 | dev.off() 316 | 317 | # statistics quoted in text -- averages across populations at different filtering thresholds 318 | mean(stats$filtered_vars[stats$af_limit==.001]) 319 | mean(stats$filtered_vars[stats$af_limit==1e-6]) 320 | 321 | 322 | -------------------------------------------------------------------------------- /src/vcf_to_var_indiv_table.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import vcf 4 | import sys 5 | import pysam 6 | import normalize # https://github.com/ericminikel/minimal_representation/blob/master/normalize.py 7 | 8 | # suggested use: vcf_to_var_indiv_table.py $lv500genos $b37ref > genotypes.table 9 | # or, bsub -q priority -J lv500 -o lv500.o -e lv500.e ". private_paths.bash; vcf_to_var_indiv_table.py $lv500genos $b37ref > genotypes.table" 10 | 11 | def vcf_to_var_indiv_table(genos_vcf, fasta_path): 12 | genos = vcf.Reader(filename=genos_vcf,strict_whitespace=True) 13 | pysam_fasta = pysam.FastaFile(fasta_path) 14 | sys.stdout.write('\t'.join(map(str,['chrom','pos','ref','alt','filter','sample','zyg','gt','gq','dp','ad']))+'\n') 15 | for record in genos: 16 | if len(record.FILTER) > 0: 17 | filter_val = record.FILTER[0] 18 | else: 19 | filter_val = 'PASS' 20 | for alt_allele in record.ALT: 21 | allele_number = record.ALT.index(alt_allele) + 1 22 | alt_allele = str(alt_allele) 23 | chrom, pos, ref, alt = normalize.normalize(pysam_fasta, record.CHROM, record.POS, record.REF, alt_allele) 24 | for sample in record.samples: 25 | if sample['GT'] is None: 26 | continue 27 | if allele_number in map(int,sample.gt_alleles): 28 | if all(x==allele_number for x in map(int,sample.gt_alleles)): 29 | zyg = 'hom' 30 | else: 31 | zyg = 'het' 32 | sys.stdout.write('\t'.join(map(str,[chrom, pos, ref, alt, filter_val,sample.sample,zyg,sample['GT'],sample['GQ'],sample['DP'],sample['AD'][allele_number-1]]))+'\n') 33 | 34 | if __name__ == '__main__': 35 | vcf_to_var_indiv_table(sys.argv[1], sys.argv[2]) 36 | 37 | --------------------------------------------------------------------------------