├── .gitignore ├── Example ├── AlleleFreq.png ├── Best2FatherMatches.png ├── Best2MotherMatches.png ├── BestFatherMatches.png ├── BestFatherMatchesE.png ├── BestMotherMatches.png ├── BestMotherMatchesE.png ├── BothMatches.csv ├── CallRate.png ├── Co-call-.png ├── Co-call-HWdgm.05.png ├── ExpMM-Father.png ├── ExpMM-Mother.png ├── FatherMatches.csv ├── FatherVerify.png ├── G-diag.png ├── GBSRun.R ├── GBSRun.Rout ├── GHWdgm.05-diag.png ├── GHWdgm.05diagdepth.png ├── Gcompare.png ├── GcompareHWdgm.05.png ├── Gdiagdepth.png ├── GroupsParentCounts.csv ├── HWdisMAFsig.png ├── HapMap.hmc.txt.gz ├── Heatmap-G5HWdgm.05.png ├── HeatmapOrderHWdgm.05.csv ├── HighRelatednessHWdgm.05.csv ├── KGDCourse-Mapnet2015.pdf ├── KGDCourseInstructions-Mapnet2015.pdf ├── LRT-QQ.png ├── LRT-hist.png ├── MAF.png ├── MAFHWdgm.05.png ├── MotherMatches.csv ├── MotherVerify.png ├── PC1v2G5HWdgm.05.png ├── PC1vDepthHWdgm.05.png ├── PC1vInbHWdgm.05.png ├── PCG5HWdgm.05.pdf ├── Ped-GBS.csv ├── Ped-Groups.csv ├── PedVerify.csv ├── SNPCallRate.png ├── SNPDepth.png ├── SNPDepthHist.png ├── SampDepth-scored.png ├── SampDepth.png ├── SampDepthCR.png ├── SampDepthHist.png ├── SampleStats.csv ├── X2star-QQ.png ├── finplot.png └── seqID.csv ├── GBS-Chip-Gmatrix.R ├── GBS-PopGen.R ├── GBS-Rcpp-functions.cpp ├── GBSPedAssign.R ├── GBSRun.R ├── KGDManual.pdf ├── LICENSE ├── ParExample ├── GBSParDeer.R └── GBSParentage-Annotated.pdf ├── PopGExamples └── FstSim.R ├── README.md ├── flake.lock ├── flake.nix └── vcf2ra.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.$$$ 2 | -------------------------------------------------------------------------------- /Example/AlleleFreq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/AlleleFreq.png -------------------------------------------------------------------------------- /Example/Best2FatherMatches.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/Best2FatherMatches.png -------------------------------------------------------------------------------- /Example/Best2MotherMatches.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/Best2MotherMatches.png -------------------------------------------------------------------------------- /Example/BestFatherMatches.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/BestFatherMatches.png -------------------------------------------------------------------------------- /Example/BestFatherMatchesE.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/BestFatherMatchesE.png -------------------------------------------------------------------------------- /Example/BestMotherMatches.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/BestMotherMatches.png -------------------------------------------------------------------------------- /Example/BestMotherMatchesE.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/BestMotherMatchesE.png -------------------------------------------------------------------------------- /Example/BothMatches.csv: -------------------------------------------------------------------------------- 1 | "IndivID","seqID","Inb","BestFatherMatch","FatherMatch2nd","Fatherrel","Fatherrel2nd","Father12rel","mmrateFather","mmnumFather","exp.mmrateFather","mmrateFather2","exp.mmrateFather2","Fathersd","FatherReliability","FatherAssign","FatherInb","BestMotherMatch","MotherMatch2nd","Motherrel","Motherrel2nd","Mother12rel","mmrateMother","mmnumMother","exp.mmrateMother","mmrateMother2","exp.mmrateMother2","MotherAssign","MotherInb","mmrateF1M1","mmnumF1M1","exp.mmrateF1M1","mmrateF2M1","mmnumF2M1","exp.mmrateF2M1","mmrateF1M2","mmnumF1M2","exp.mmrateF1M2","mmrateF2M2","mmnumF2M2","exp.mmrateF2M2","relF1M1","relF1M2","relF2M1","relF2M2","BothAssign","Alternate" 2 | 6,"Seq6",0.383534452775455,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,2,3,0.107415457906596,0.0335530021853082,-0.0544974083276382,0.0844317776986106,8421,0.0112815548179676,0.083876392684465,0.0114131444435993,"N",0.749600589156423,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"N","" 3 | 7,"Seq7",0.539534217185718,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,2,3,0.0570259698278973,0.0444797382052058,-0.0544974083276382,0.101791530944625,7368,0.0150990152396775,0.0884836617305103,0.0148000867490769,"N",0.749600589156423,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"N","" 4 | 8,"Seq8",0.565170512934824,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,2,3,0.167674030196227,0.0416852798384816,-0.0544974083276382,0.0898573081372927,7779,0.0148318560635457,0.0885428770625145,0.0142101267606034,"N",0.749600589156423,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"N","" 5 | 9,"Seq9",0.497705441450656,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,3,2,0.0814615905022324,0.0230371149534904,-0.0544974083276382,0.0834485938220378,8676,0.0138914866041284,0.0979281945695535,0.0137828293072655,"N",0.487559175744679,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"N","" 6 | 10,"Seq10",0.342266614952426,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,2,3,0.172082501355211,0.0538219718245321,-0.0544974083276382,0.0763227361413979,8713,0.0104483175016531,0.0810374407104652,0.0100592133205013,"N",0.749600589156423,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"N","" 7 | 11,"Seq11",0.288372016473646,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,2,3,0.0967331314673893,0.059544844969064,-0.0544974083276382,0.0778999886608459,8819,0.00931920236772354,0.0761335326357748,0.00921203850005878,"N",0.749600589156423,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"N","" 8 | 12,"Seq12",0.565992263606612,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,3,2,0.0905431913576992,0.0322305350116227,-0.0544974083276382,0.0849438722370096,8641,0.0143660368343879,0.101533307563458,0.0144190810371504,"N",0.487559175744679,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"N","" 9 | 13,"Seq13",0.513216319837214,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,2,3,0.176093693516121,0.0936244123826835,-0.0544974083276382,0.0837249567153104,8086,0.0133047163630537,0.0823841645471635,0.0131420279358963,"N",0.749600589156423,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"N","" 10 | 14,"Seq14",0.491704907073318,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,2,3,0.063601063565203,0.0589539601223658,-0.0544974083276382,0.0988502523836231,7132,0.0158393407809956,0.0845536737552528,0.0148943875749028,"N",0.749600589156423,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"N","" 11 | 15,"Seq15",0.56991221052882,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,2,3,0.0944472074320187,0.00779214643886423,-0.0544974083276382,0.0938511326860841,7725,0.0146984994367624,0.0946132596685083,0.0149931062115617,"N",0.749600589156423,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"N","" 12 | 16,"Seq16",0.502931877240651,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,3,2,0.0858840869207445,0.0528001658989243,-0.0544974083276382,0.085026249714677,8762,0.0137973168648867,0.0993614303959132,0.0141813708340843,"N",0.487559175744679,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"N","" 13 | 17,"Seq17",0.451429988680822,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,2,3,0.169389087802133,0.129437827144759,-0.0544974083276382,0.084758457121458,8011,0.0126799044808789,0.0788590604026846,0.0122338417782938,"N",0.749600589156423,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"N","" 14 | 18,"Seq18",0.379951462480052,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,2,3,0.208858431431865,0.0789809459465704,-0.0544974083276382,0.0778617083091226,8605,0.0113619749672345,0.078757225433526,0.0113182258649615,"Y",0.749600589156423,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"M","" 15 | 19,"Seq19",0.366228057026487,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,2,3,0.117990471602531,0.0936602372211071,-0.0544974083276382,0.0798159861989649,8695,0.0104253376771264,0.0767120503962609,0.0103058571950161,"N",0.749600589156423,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"N","" 16 | 20,"Seq20",0.23013693513627,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,2,3,0.142007758845196,0.0534557575397165,-0.0544974083276382,0.0655575143234905,9076,0.00759043435179583,0.0703600038606312,0.00774867908679424,"N",0.749600589156423,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"N","" 17 | 21,"Seq21",0.409957845652311,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,3,2,0.0942381157725536,0.0854742901289454,-0.0544974083276382,0.0800566387103801,9181,0.0121732521426099,0.0905854915919971,0.0126456082068757,"N",0.487559175744679,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"N","" 18 | 22,"Seq22",0.351550401557691,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,2,3,0.109133750652248,0.0562901936163714,-0.0544974083276382,0.0792422317199954,8657,0.0105069624979591,0.0788563288563289,0.0104194071477264,"N",0.749600589156423,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"N","" 19 | 77,"Seq77",1.22893379690863,76,51,0.176822895878788,0.0704621521672901,-0.000951750477507351,0.0276134122287968,507,0.0204405940201939,0.0318725099601594,0.0204632718700759,NA,NA,"N",0.552686531192246,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"N","" 20 | 78,"Seq78",0.538172372890508,75,76,0.173153649164656,0.0732474796252066,-0.114982209743796,0.0791747978812378,7174,0.0128859928702175,0.0878798974734529,0.0134655385508251,NA,NA,"N",0.729665591584273,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"N","" 21 | 79,"Seq79",0.697339577211706,75,76,0.214861762765384,0.113450991256966,-0.114982209743796,0.0862068965517241,4524,0.0194211000282475,0.0729526519000203,0.0183347547961564,NA,NA,"Y",0.729665591584273,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"F","" 22 | 80,"Seq80",0.689126979347568,76,75,0.107117672764965,0.0700896225443905,-0.114982209743796,0.0868362831858407,7232,0.0164846862994294,0.0963986339646073,0.0161582577685021,NA,NA,"N",0.552686531192246,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"N","" 23 | 81,"Seq81",0.368566595325077,75,76,0.21299175409325,0.0806090589702811,-0.114982209743796,0.0660127253446448,7544,0.0104189064220588,0.0784494693124135,0.0115154672236285,NA,NA,"Y",0.729665591584273,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"F","" 24 | 82,"Seq82",0.630624434480554,75,76,0.14751388038286,0.0580430167594395,-0.114982209743796,0.0882584712371946,6345,0.0157208462876105,0.092987159587978,0.0158692869502086,NA,NA,"N",0.729665591584273,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"N","" 25 | 83,"Seq83",0.649692805777502,75,76,0.205030936731317,0.156799406009068,-0.114982209743796,0.0887328439195659,6266,0.0163875478720433,0.0835501590054929,0.0171344307176068,0.0176166666922938,84.1,"B",0.729665591584273,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"B","" 26 | 84,"Seq84",0.501406203146767,75,76,0.0848384610818344,0.0566767234899142,-0.114982209743796,0.0882554474441551,7297,0.0130929145774736,0.0856045143474607,0.0131756860888192,NA,NA,"N",0.729665591584273,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"N","" 27 | 85,"Seq85",0.745995544673454,75,76,0.0674362316694876,0.0433271847258371,-0.114982209743796,0.100684655658478,4966,0.0195356322776762,0.0852827408362498,0.0191480590911474,NA,NA,"N",0.729665591584273,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"N","" 28 | 86,"Seq86",0.615256941968028,75,76,0.211303503807913,0.104441655229781,-0.114982209743796,0.0834246360932853,6389,0.0156579075120659,0.0858458961474037,0.0161387090255536,NA,NA,"Y",0.729665591584273,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"F","" 29 | 87,"Seq87",0.588451072285493,76,75,0.108814085395529,0.075185173383128,-0.114982209743796,0.0860798759208996,7737,0.0156151509317027,0.0921746776084408,0.0148968085935884,NA,NA,"N",0.552686531192246,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"N","" 30 | 88,"Seq88",0.638662920207778,75,76,0.0935134469651019,0.0680141283782321,-0.114982209743796,0.0923962435625568,6602,0.0154136198223976,0.0919279519679786,0.0157701659420167,NA,NA,"N",0.729665591584273,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"N","" 31 | 89,"Seq89",0.717954049522055,75,76,0.194627301018226,0.115012244979374,-0.114982209743796,0.0890304804546237,5807,0.0184285040619402,0.0866325104797392,0.0178361511382535,NA,NA,"N",0.729665591584273,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"N","" 32 | 90,"Seq90",0.579729237815198,75,76,0.157055508369126,0.0816075561133294,-0.114982209743796,0.0841746248294679,7330,0.0130501437337347,0.0879370003579525,0.0136742093114208,NA,NA,"N",0.729665591584273,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"N","" 33 | 91,"Seq91",0.606221760723515,75,76,0.145864328160801,0.0740650666839441,-0.114982209743796,0.0904631366325206,6931,0.0143305220162879,0.0878161509821543,0.0142502481000007,NA,NA,"N",0.729665591584273,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"N","" 34 | 92,"Seq92",0.655835456629786,75,76,0.099421750916209,0.0511537562838907,-0.114982209743796,0.0973511051121984,5927,0.0171103139252848,0.0895909645909646,0.0166558085636702,NA,NA,"N",0.729665591584273,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"N","" 35 | 93,"Seq93",0.556273353779163,75,76,0.127507216087632,0.078664605666079,-0.114982209743796,0.0846132748217224,7292,0.0133949124300978,0.0891872452649245,0.0142112387371009,NA,NA,"N",0.729665591584273,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,0,NA,NA,NA,NA,NA,"N","" 36 | -------------------------------------------------------------------------------- /Example/CallRate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/CallRate.png -------------------------------------------------------------------------------- /Example/Co-call-.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/Co-call-.png -------------------------------------------------------------------------------- /Example/Co-call-HWdgm.05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/Co-call-HWdgm.05.png -------------------------------------------------------------------------------- /Example/ExpMM-Father.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/ExpMM-Father.png -------------------------------------------------------------------------------- /Example/ExpMM-Mother.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/ExpMM-Mother.png -------------------------------------------------------------------------------- /Example/FatherMatches.csv: -------------------------------------------------------------------------------- 1 | "IndivID","seqID","BestFatherMatch","FatherMatch2nd","Fatherrel","Fatherrel2nd","Father12rel","mmrateFather","mmnumFather","exp.mmrateFather","mmrateFather2","exp.mmrateFather2","Fathersd","FatherReliability","FatherAssign","Inb","FatherInb" 2 | 77,"Seq77",76,51,0.176822895878788,0.0704621521672901,-0.000951750477507351,0.0276134122287968,507,0.0204405940201939,0.0318725099601594,0.0204632718700759,NA,NA,"N",1.22893379690863,0.552686531192246 3 | 78,"Seq78",75,76,0.173153649164656,0.0732474796252066,-0.114982209743796,0.0791747978812378,7174,0.0128859928702175,0.0878798974734529,0.0134655385508251,NA,NA,"N",0.538172372890508,0.729665591584273 4 | 79,"Seq79",75,76,0.214861762765384,0.113450991256966,-0.114982209743796,0.0862068965517241,4524,0.0194211000282475,0.0729526519000203,0.0183347547961564,NA,NA,"Y",0.697339577211706,0.729665591584273 5 | 80,"Seq80",76,75,0.107117672764965,0.0700896225443905,-0.114982209743796,0.0868362831858407,7232,0.0164846862994294,0.0963986339646073,0.0161582577685021,NA,NA,"N",0.689126979347568,0.552686531192246 6 | 81,"Seq81",75,76,0.21299175409325,0.0806090589702811,-0.114982209743796,0.0660127253446448,7544,0.0104189064220588,0.0784494693124135,0.0115154672236285,NA,NA,"Y",0.368566595325077,0.729665591584273 7 | 82,"Seq82",75,76,0.14751388038286,0.0580430167594395,-0.114982209743796,0.0882584712371946,6345,0.0157208462876105,0.092987159587978,0.0158692869502086,NA,NA,"N",0.630624434480554,0.729665591584273 8 | 83,"Seq83",75,76,0.205030936731317,0.156799406009068,-0.114982209743796,0.0887328439195659,6266,0.0163875478720433,0.0835501590054929,0.0171344307176068,0.0176166666922938,84.1,"B",0.649692805777502,0.729665591584273 9 | 84,"Seq84",75,76,0.0848384610818344,0.0566767234899142,-0.114982209743796,0.0882554474441551,7297,0.0130929145774736,0.0856045143474607,0.0131756860888192,NA,NA,"N",0.501406203146767,0.729665591584273 10 | 85,"Seq85",75,76,0.0674362316694876,0.0433271847258371,-0.114982209743796,0.100684655658478,4966,0.0195356322776762,0.0852827408362498,0.0191480590911474,NA,NA,"N",0.745995544673454,0.729665591584273 11 | 86,"Seq86",75,76,0.211303503807913,0.104441655229781,-0.114982209743796,0.0834246360932853,6389,0.0156579075120659,0.0858458961474037,0.0161387090255536,NA,NA,"Y",0.615256941968028,0.729665591584273 12 | 87,"Seq87",76,75,0.108814085395529,0.075185173383128,-0.114982209743796,0.0860798759208996,7737,0.0156151509317027,0.0921746776084408,0.0148968085935884,NA,NA,"N",0.588451072285493,0.552686531192246 13 | 88,"Seq88",75,76,0.0935134469651019,0.0680141283782321,-0.114982209743796,0.0923962435625568,6602,0.0154136198223976,0.0919279519679786,0.0157701659420167,NA,NA,"N",0.638662920207778,0.729665591584273 14 | 89,"Seq89",75,76,0.194627301018226,0.115012244979374,-0.114982209743796,0.0890304804546237,5807,0.0184285040619402,0.0866325104797392,0.0178361511382535,NA,NA,"N",0.717954049522055,0.729665591584273 15 | 90,"Seq90",75,76,0.157055508369126,0.0816075561133294,-0.114982209743796,0.0841746248294679,7330,0.0130501437337347,0.0879370003579525,0.0136742093114208,NA,NA,"N",0.579729237815198,0.729665591584273 16 | 91,"Seq91",75,76,0.145864328160801,0.0740650666839441,-0.114982209743796,0.0904631366325206,6931,0.0143305220162879,0.0878161509821543,0.0142502481000007,NA,NA,"N",0.606221760723515,0.729665591584273 17 | 92,"Seq92",75,76,0.099421750916209,0.0511537562838907,-0.114982209743796,0.0973511051121984,5927,0.0171103139252848,0.0895909645909646,0.0166558085636702,NA,NA,"N",0.655835456629786,0.729665591584273 18 | 93,"Seq93",75,76,0.127507216087632,0.078664605666079,-0.114982209743796,0.0846132748217224,7292,0.0133949124300978,0.0891872452649245,0.0142112387371009,NA,NA,"N",0.556273353779163,0.729665591584273 19 | -------------------------------------------------------------------------------- /Example/FatherVerify.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/FatherVerify.png -------------------------------------------------------------------------------- /Example/G-diag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/G-diag.png -------------------------------------------------------------------------------- /Example/GBSRun.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | genofile <- "HapMap.hmc.txt.gz" 4 | 5 | #source("/GBS-Chip-Gmatrix.R") 6 | source("../../GBS-Chip-Gmatrix.R") 7 | Gfull <- calcG() 8 | GHWdgm.05 <- calcG(which(HWdis > -0.05),"HWdgm.05", npc=4) # recalculate using Hardy-Weinberg disequilibrium cut-off at -0.05 9 | 10 | pedfile <- "Ped-GBS.csv" 11 | groupsfile <- "Ped-Groups.csv" 12 | 13 | rel.thresh <- 0.2 14 | emm.thresh <- 0.075 # to make results same as before emm used (to match original example) 15 | GCheck <- "GHWdgm.05$G5" 16 | #source("/GBSPedAssign.R") 17 | source("../../GBSPedAssign.R") 18 | 19 | #G5 <- GHWdgm.05$G5 20 | #save(G5,seqID,file="G5.RData") 21 | -------------------------------------------------------------------------------- /Example/GBSRun.Rout: -------------------------------------------------------------------------------- 1 | 2 | R version 3.6.0 (2019-04-26) -- "Planting of a Tree" 3 | Copyright (C) 2019 The R Foundation for Statistical Computing 4 | Platform: x86_64-redhat-linux-gnu (64-bit) 5 | 6 | R is free software and comes with ABSOLUTELY NO WARRANTY. 7 | You are welcome to redistribute it under certain conditions. 8 | Type 'license()' or 'licence()' for distribution details. 9 | 10 | Natural language support but running in an English locale 11 | 12 | R is a collaborative project with many contributors. 13 | Type 'contributors()' for more information and 14 | 'citation()' on how to cite R or R packages in publications. 15 | 16 | Type 'demo()' for some demos, 'help()' for on-line help, or 17 | 'help.start()' for an HTML browser interface to help. 18 | Type 'q()' to quit R. 19 | 20 | > genofile <- "HapMap.hmc.txt.gz" 21 | > 22 | > #source("/GBS-Chip-Gmatrix.R") 23 | > source("../../GBS-Chip-Gmatrix.R") 24 | KGD version: 1.0.0 Pre-release 1 25 | Loading required package: Rcpp 26 | Loading C++ functions: /home/doddsk/GBS/Code/GBS-Rcpp-functions.cpp 27 | Data file has 96 samples 28 | Data file has 14709 SNPs 29 | 0 SNPs with MAF=0 or depth < 0.01 removed 30 | Analysing 96 individuals and 14709 SNPs 31 | Proportion of missing genotypes: 0.3640826 Callrate: 0.6359174 32 | Mean sample depth: 9.281431 33 | > Gfull <- calcG() 34 | Calculating G matrix, analysis code: 35 | Using global allele frequencies 36 | # SNPs: 14709 37 | # individuals: 96 38 | Mean co-call rate (for sample pairs): 0.4780837 39 | Min co-call rate (for sample pairs): 0.002719423 40 | Proportion of missing genotypes: 0.3640826 Callrate: 0.6359174 41 | Mean sample depth: 9.281431 42 | Mean self-relatedness (G5 diagonal): 1.215244 43 | Self-Rel vs log(depth) regression = -0.0928 p = 3.86e-14 (min depth = 0.1) 44 | > GHWdgm.05 <- calcG(which(HWdis > -0.05),"HWdgm.05", npc=4) # recalculate using Hardy-Weinberg disequilibrium cut-off at -0.05 45 | Calculating G matrix, analysis code: HWdgm.05 46 | Using global allele frequencies 47 | # SNPs: 11500 48 | # individuals: 96 49 | Mean co-call rate (for sample pairs): 0.3688646 50 | Min co-call rate (for sample pairs): 8.695652e-05 51 | Proportion of missing genotypes: 0.445558 Callrate: 0.554442 52 | Mean sample depth: 4.348901 53 | Mean self-relatedness (G5 diagonal): 1.577778 54 | Loading required package: parallelDist 55 | Using parallelDist function in heatmap 56 | Self-Rel vs log(depth) regression = -0.117 p = 4.81e-12 (min depth = 0.1) 57 | minimum eigenvalue: 4.608917e-36 58 | first 8 eigenvalues: 0.2184196 0.1793586 0.1293921 0.06300112 0.02642587 0.01990788 0.01812084 0.0138231 59 | > 60 | > pedfile <- "Ped-GBS.csv" 61 | > groupsfile <- "Ped-Groups.csv" 62 | > 63 | > rel.thresh <- 0.2 64 | > emm.thresh <- 0.075 # to make results same as before emm used (to match original example) 65 | > GCheck <- "GHWdgm.05$G5" 66 | > #source("/GBSPedAssign.R") 67 | > source("../../GBSPedAssign.R") 68 | GBS-PedAssign for KGD version: 1.0.0 Pre-release 69 | Parentage parameter settings 70 | ---------------------------- 71 | rel.threshF 0.2 72 | rel.threshM 0.2 73 | emm.thresh 0.075 74 | emm.thresh2 0.15 75 | emmdiff.thresh2 0 76 | mindepth.mm 1 77 | inb.thresh 0.2 (parent relatedness v inbreeding) 78 | minr4inb 79 | boota.thresh 99 80 | depth.min 0 (for bootstrapping) 81 | depth.max Inf (for bootstrapping) 82 | nboot 1000 83 | boot.thresh 0.05 (relatedness difference to invoke bootstrapping) 84 | matchmethod rel 85 | 56 matches out of 78 Father comparisons: 71.8 % 86 | Mean relatedness for Father matches 0.26 87 | Mean relatedness for Father non-matches -0.0238 88 | 51 matches out of 78 Mother comparisons: 65.4 % 89 | Mean relatedness for Mother matches 0.298 90 | Mean relatedness for Mother non-matches 0.0181 91 | Mean relatedness for full-sib families (as given) 92 | famfathers fammothers noffspring meanrel 93 | 1 23 1 17 0.2286663 94 | 2 47 24 18 0.2653806 95 | 3 71 48 18 0.2921438 96 | 4 94 72 17 0.2499820 97 | Mean relatedness within all full-sib families 0.2596066 98 | Mean relatedness between individuals in full-sib families with different parents -0.08722714 99 | 100 | Summary of Father Assignments 101 | 102 | B N Y Sum 103 | 1 13 3 17 104 | 105 | Summary of Mother Assignments 106 | 107 | N Y Sum 108 | 16 1 17 109 | 110 | Summary of joint Assignments 111 | 112 | B F M N Sum 113 | 1 3 1 29 34 114 | There were 34 warnings (use warnings() to see them) 115 | > 116 | > #G5 <- GHWdgm.05$G5 117 | > #save(G5,seqID,file="G5.RData") 118 | > 119 | > proc.time() 120 | user system elapsed 121 | 9.485 0.467 12.095 122 | -------------------------------------------------------------------------------- /Example/GHWdgm.05-diag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/GHWdgm.05-diag.png -------------------------------------------------------------------------------- /Example/GHWdgm.05diagdepth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/GHWdgm.05diagdepth.png -------------------------------------------------------------------------------- /Example/Gcompare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/Gcompare.png -------------------------------------------------------------------------------- /Example/GcompareHWdgm.05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/GcompareHWdgm.05.png -------------------------------------------------------------------------------- /Example/Gdiagdepth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/Gdiagdepth.png -------------------------------------------------------------------------------- /Example/GroupsParentCounts.csv: -------------------------------------------------------------------------------- 1 | "IndivID","ParGroup","Genotyped","FatherFreq","MotherFreq" 2 | "1","B","Y",0,0 3 | "2","B","Y",0,13 4 | "23","A","Y",0,0 5 | "24","B","Y",0,0 6 | "25","B","Y",0,0 7 | "26","B","Y",0,0 8 | "27","A","Y",0,0 9 | "28","A","Y",0,0 10 | "3","B","Y",0,4 11 | "4","A","Y",0,0 12 | "47","A","Y",0,0 13 | "48","B","Y",0,0 14 | "49","B","Y",0,0 15 | "5","A","Y",0,0 16 | "50","B","Y",0,0 17 | "51","A","Y",0,0 18 | "52","A","Y",0,0 19 | "71","A","Y",0,0 20 | "72","B","Y",0,0 21 | "73","B","Y",0,0 22 | "74","B","Y",0,0 23 | "75","A","Y",14,0 24 | "76","A","Y",3,0 25 | "94","A","Y",0,0 26 | -------------------------------------------------------------------------------- /Example/HWdisMAFsig.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/HWdisMAFsig.png -------------------------------------------------------------------------------- /Example/HapMap.hmc.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/HapMap.hmc.txt.gz -------------------------------------------------------------------------------- /Example/Heatmap-G5HWdgm.05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/Heatmap-G5HWdgm.05.png -------------------------------------------------------------------------------- /Example/HeatmapOrderHWdgm.05.csv: -------------------------------------------------------------------------------- 1 | rowInd,seqIDInd,seqID 2 | 96,96,Blank 3 | 4,4,Seq55 4 | 82,82,Seq57 5 | 81,81,Seq58 6 | 92,92,Seq48 7 | 86,86,Seq56 8 | 94,94,Seq53 9 | 1,1,Ref 10 | 32,32,Seq5 11 | 55,55,Seq27 12 | 20,20,Seq74 13 | 17,17,Seq85 14 | 15,15,Seq79 15 | 25,25,Seq83 16 | 11,11,Seq93 17 | 28,28,Seq91 18 | 27,27,Seq77 19 | 66,66,Seq25 20 | 54,54,Seq28 21 | 63,63,Seq39 22 | 68,68,Seq24 23 | 77,77,Seq41 24 | 71,71,Seq43 25 | 58,58,Seq38 26 | 69,69,Seq42 27 | 65,65,Seq45 28 | 59,59,Seq46 29 | 60,60,Seq37 30 | 70,70,Seq30 31 | 72,72,Seq35 32 | 56,56,Seq47 33 | 76,76,Seq33 34 | 75,75,Seq44 35 | 64,64,Seq34 36 | 73,73,Seq32 37 | 57,57,Seq36 38 | 62,62,Seq29 39 | 74,74,Seq40 40 | 61,61,Seq31 41 | 52,52,Seq12 42 | 31,31,Seq4 43 | 42,42,Seq2 44 | 51,51,Seq13 45 | 50,50,Seq8 46 | 53,53,Seq9 47 | 37,37,Seq15 48 | 45,45,Seq21 49 | 41,41,Seq7 50 | 48,48,Seq16 51 | 40,40,Seq14 52 | 33,33,Seq23 53 | 39,39,Seq17 54 | 34,34,Seq10 55 | 46,46,Seq6 56 | 38,38,Seq11 57 | 49,49,Seq19 58 | 47,47,Seq18 59 | 35,35,Seq20 60 | 36,36,Seq22 61 | 10,10,Seq94 62 | 78,78,Seq51 63 | 90,90,Seq49 64 | 79,79,Seq52 65 | 6,6,Seq69 66 | 93,93,Seq60 67 | 80,80,Seq71 68 | 2,2,Seq68 69 | 88,88,Seq61 70 | 7,7,Seq67 71 | 84,84,Seq70 72 | 83,83,Seq62 73 | 3,3,Seq59 74 | 5,5,Seq64 75 | 95,95,Seq63 76 | 85,85,Seq65 77 | 87,87,Seq66 78 | 89,89,Seq54 79 | 43,43,Seq3 80 | 67,67,Seq26 81 | 9,9,Seq76 82 | 44,44,Seq1 83 | 16,16,Seq89 84 | 23,23,Seq86 85 | 24,24,Seq90 86 | 12,12,Seq78 87 | 8,8,Seq75 88 | 91,91,Seq50 89 | 21,21,Seq72 90 | 19,19,Seq73 91 | 22,22,Seq81 92 | 26,26,Seq88 93 | 29,29,Seq82 94 | 30,30,Seq92 95 | 13,13,Seq84 96 | 18,18,Seq80 97 | 14,14,Seq87 98 | -------------------------------------------------------------------------------- /Example/HighRelatednessHWdgm.05.csv: -------------------------------------------------------------------------------- 1 | "Indiv1","Indiv2","G5rel","SelfRel1","SelfRel2" 2 | "Seq20","Blank",0.910659290004949,1.23013693513627,2.93826574816052 3 | "Seq18","Blank",0.90502637244154,1.37995146248005,2.93826574816052 4 | "Seq12","Blank",1.67639826454047,1.56599226360661,2.93826574816052 5 | -------------------------------------------------------------------------------- /Example/KGDCourse-Mapnet2015.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/KGDCourse-Mapnet2015.pdf -------------------------------------------------------------------------------- /Example/KGDCourseInstructions-Mapnet2015.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/KGDCourseInstructions-Mapnet2015.pdf -------------------------------------------------------------------------------- /Example/LRT-QQ.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/LRT-QQ.png -------------------------------------------------------------------------------- /Example/LRT-hist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/LRT-hist.png -------------------------------------------------------------------------------- /Example/MAF.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/MAF.png -------------------------------------------------------------------------------- /Example/MAFHWdgm.05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/MAFHWdgm.05.png -------------------------------------------------------------------------------- /Example/MotherMatches.csv: -------------------------------------------------------------------------------- 1 | "IndivID","seqID","BestMotherMatch","MotherMatch2nd","Motherrel","Motherrel2nd","Mother12rel","mmrateMother","mmnumMother","exp.mmrateMother","mmrateMother2","exp.mmrateMother2","MotherAssign","Inb","MotherInb" 2 | 6,"Seq6",2,3,0.107415457906596,0.0335530021853082,-0.0544974083276382,0.0844317776986106,8421,0.0112815548179676,0.083876392684465,0.0114131444435993,"N",0.383534452775455,0.749600589156423 3 | 7,"Seq7",2,3,0.0570259698278973,0.0444797382052058,-0.0544974083276382,0.101791530944625,7368,0.0150990152396775,0.0884836617305103,0.0148000867490769,"N",0.539534217185718,0.749600589156423 4 | 8,"Seq8",2,3,0.167674030196227,0.0416852798384816,-0.0544974083276382,0.0898573081372927,7779,0.0148318560635457,0.0885428770625145,0.0142101267606034,"N",0.565170512934824,0.749600589156423 5 | 9,"Seq9",3,2,0.0814615905022324,0.0230371149534904,-0.0544974083276382,0.0834485938220378,8676,0.0138914866041284,0.0979281945695535,0.0137828293072655,"N",0.497705441450656,0.487559175744679 6 | 10,"Seq10",2,3,0.172082501355211,0.0538219718245321,-0.0544974083276382,0.0763227361413979,8713,0.0104483175016531,0.0810374407104652,0.0100592133205013,"N",0.342266614952426,0.749600589156423 7 | 11,"Seq11",2,3,0.0967331314673893,0.059544844969064,-0.0544974083276382,0.0778999886608459,8819,0.00931920236772354,0.0761335326357748,0.00921203850005878,"N",0.288372016473646,0.749600589156423 8 | 12,"Seq12",3,2,0.0905431913576992,0.0322305350116227,-0.0544974083276382,0.0849438722370096,8641,0.0143660368343879,0.101533307563458,0.0144190810371504,"N",0.565992263606612,0.487559175744679 9 | 13,"Seq13",2,3,0.176093693516121,0.0936244123826835,-0.0544974083276382,0.0837249567153104,8086,0.0133047163630537,0.0823841645471635,0.0131420279358963,"N",0.513216319837214,0.749600589156423 10 | 14,"Seq14",2,3,0.063601063565203,0.0589539601223658,-0.0544974083276382,0.0988502523836231,7132,0.0158393407809956,0.0845536737552528,0.0148943875749028,"N",0.491704907073318,0.749600589156423 11 | 15,"Seq15",2,3,0.0944472074320187,0.00779214643886423,-0.0544974083276382,0.0938511326860841,7725,0.0146984994367624,0.0946132596685083,0.0149931062115617,"N",0.56991221052882,0.749600589156423 12 | 16,"Seq16",3,2,0.0858840869207445,0.0528001658989243,-0.0544974083276382,0.085026249714677,8762,0.0137973168648867,0.0993614303959132,0.0141813708340843,"N",0.502931877240651,0.487559175744679 13 | 17,"Seq17",2,3,0.169389087802133,0.129437827144759,-0.0544974083276382,0.084758457121458,8011,0.0126799044808789,0.0788590604026846,0.0122338417782938,"N",0.451429988680822,0.749600589156423 14 | 18,"Seq18",2,3,0.208858431431865,0.0789809459465704,-0.0544974083276382,0.0778617083091226,8605,0.0113619749672345,0.078757225433526,0.0113182258649615,"Y",0.379951462480052,0.749600589156423 15 | 19,"Seq19",2,3,0.117990471602531,0.0936602372211071,-0.0544974083276382,0.0798159861989649,8695,0.0104253376771264,0.0767120503962609,0.0103058571950161,"N",0.366228057026487,0.749600589156423 16 | 20,"Seq20",2,3,0.142007758845196,0.0534557575397165,-0.0544974083276382,0.0655575143234905,9076,0.00759043435179583,0.0703600038606312,0.00774867908679424,"N",0.23013693513627,0.749600589156423 17 | 21,"Seq21",3,2,0.0942381157725536,0.0854742901289454,-0.0544974083276382,0.0800566387103801,9181,0.0121732521426099,0.0905854915919971,0.0126456082068757,"N",0.409957845652311,0.487559175744679 18 | 22,"Seq22",2,3,0.109133750652248,0.0562901936163714,-0.0544974083276382,0.0792422317199954,8657,0.0105069624979591,0.0788563288563289,0.0104194071477264,"N",0.351550401557691,0.749600589156423 19 | -------------------------------------------------------------------------------- /Example/MotherVerify.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/MotherVerify.png -------------------------------------------------------------------------------- /Example/PC1v2G5HWdgm.05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/PC1v2G5HWdgm.05.png -------------------------------------------------------------------------------- /Example/PC1vDepthHWdgm.05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/PC1vDepthHWdgm.05.png -------------------------------------------------------------------------------- /Example/PC1vInbHWdgm.05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/PC1vInbHWdgm.05.png -------------------------------------------------------------------------------- /Example/PCG5HWdgm.05.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/PCG5HWdgm.05.pdf -------------------------------------------------------------------------------- /Example/Ped-GBS.csv: -------------------------------------------------------------------------------- 1 | IndivID,seqID,Family,Relationship,FatherID,MotherID,FatherGroup,MotherGroup 2 | 1,Seq1,1,Dam,5,3,, 3 | 2,Seq2,1,Grand Dam,,,, 4 | 3,Seq3,1,Grand Dam,,,, 5 | 4,Seq4,1,Grand Sire,,,, 6 | 5,Seq5,1,Grand Sire,,,, 7 | 6,Seq6,1,Offspring,23,1,,B 8 | 7,Seq7,1,Offspring,23,1,,B 9 | 8,Seq8,1,Offspring,23,1,,B 10 | 9,Seq9,1,Offspring,23,1,,B 11 | 10,Seq10,1,Offspring,23,1,,B 12 | 11,Seq11,1,Offspring,23,1,,B 13 | 12,Seq12,1,Offspring,23,1,,B 14 | 13,Seq13,1,Offspring,23,1,,B 15 | 14,Seq14,1,Offspring,23,1,,B 16 | 15,Seq15,1,Offspring,23,1,,B 17 | 16,Seq16,1,Offspring,23,1,,B 18 | 17,Seq17,1,Offspring,23,1,,B 19 | 18,Seq18,1,Offspring,23,1,,B 20 | 19,Seq19,1,Offspring,23,1,,B 21 | 20,Seq20,1,Offspring,23,1,,B 22 | 21,Seq21,1,Offspring,23,1,,B 23 | 22,Seq22,1,Offspring,23,1,,B 24 | 23,Seq23,1,Sire,4,2,, 25 | 24,Seq24,2,Dam,28,25,, 26 | 25,Seq25,2,Grand Dam,,,, 27 | 26,Seq26,2,Grand Dam,,,, 28 | 27,Seq27,2,Grand Sire,,,, 29 | 28,Seq28,2,Grand Sire,,,, 30 | 29,Seq29,2,Offspring,47,24,, 31 | 30,Seq30,2,Offspring,47,24,, 32 | 31,Seq31,2,Offspring,47,24,, 33 | 32,Seq32,2,Offspring,47,24,, 34 | 33,Seq33,2,Offspring,47,24,, 35 | 34,Seq34,2,Offspring,47,24,, 36 | 35,Seq35,2,Offspring,47,24,, 37 | 36,Seq36,2,Offspring,47,24,, 38 | 37,Seq37,2,Offspring,47,24,, 39 | 38,Seq38,2,Offspring,47,24,, 40 | 39,Seq39,2,Offspring,47,24,, 41 | 40,Seq40,2,Offspring,47,24,, 42 | 41,Seq41,2,Offspring,47,24,, 43 | 42,Seq42,2,Offspring,47,24,, 44 | 43,Seq43,2,Offspring,47,24,, 45 | 44,Seq44,2,Offspring,47,24,, 46 | 45,Seq45,2,Offspring,47,24,, 47 | 46,Seq46,2,Offspring,47,24,, 48 | 47,Seq47,2,Sire,27,26,, 49 | 48,Seq48,3,Dam,52,50,, 50 | 49,Seq49,3,Grand Dam,,,, 51 | 50,Seq50,3,Grand Dam,,,, 52 | 51,Seq51,3,Grand Sire,,,, 53 | 52,Seq52,3,Grand Sire,,,, 54 | 53,Seq53,3,Offspring,71,48,, 55 | 54,Seq54,3,Offspring,71,48,, 56 | 55,Seq55,3,Offspring,71,48,, 57 | 56,Seq56,3,Offspring,71,48,, 58 | 57,Seq57,3,Offspring,71,48,, 59 | 58,Seq58,3,Offspring,71,48,, 60 | 59,Seq59,3,Offspring,71,48,, 61 | 60,Seq60,3,Offspring,71,48,, 62 | 61,Seq61,3,Offspring,71,48,, 63 | 62,Seq62,3,Offspring,71,48,, 64 | 63,Seq63,3,Offspring,71,48,, 65 | 64,Seq64,3,Offspring,71,48,, 66 | 65,Seq65,3,Offspring,71,48,, 67 | 66,Seq66,3,Offspring,71,48,, 68 | 67,Seq67,3,Offspring,71,48,, 69 | 68,Seq68,3,Offspring,71,48,, 70 | 69,Seq69,3,Offspring,71,48,, 71 | 70,Seq70,3,Offspring,71,48,, 72 | 71,Seq71,3,Sire,51,49,, 73 | 72,Seq72,4,Dam,75,73,, 74 | 73,Seq73,4,Grand Dam,,,, 75 | 74,Seq74,4,Grand Dam,,,, 76 | 75,Seq75,4,Grand Sire,,,, 77 | 76,Seq76,4,Grand Sire,,,, 78 | 77,Seq77,4,Offspring,94,72,A, 79 | 78,Seq78,4,Offspring,94,72,A, 80 | 79,Seq79,4,Offspring,94,72,A, 81 | 80,Seq80,4,Offspring,94,72,A, 82 | 81,Seq81,4,Offspring,94,72,A, 83 | 82,Seq82,4,Offspring,94,72,A, 84 | 83,Seq83,4,Offspring,94,72,A, 85 | 84,Seq84,4,Offspring,94,72,A, 86 | 85,Seq85,4,Offspring,94,72,A, 87 | 86,Seq86,4,Offspring,94,72,A, 88 | 87,Seq87,4,Offspring,94,72,A, 89 | 88,Seq88,4,Offspring,94,72,A, 90 | 89,Seq89,4,Offspring,94,72,A, 91 | 90,Seq90,4,Offspring,94,72,A, 92 | 91,Seq91,4,Offspring,94,72,A, 93 | 92,Seq92,4,Offspring,94,72,A, 94 | 93,Seq93,4,Offspring,94,72,A, 95 | 94,Seq94,4,Sire,76,74,, 96 | 95,Blank,,negative,,,, 97 | 96,Ref,,reference,,,, 98 | -------------------------------------------------------------------------------- /Example/Ped-Groups.csv: -------------------------------------------------------------------------------- 1 | IndivID,ParGroup 2 | 1,B 3 | 24,B 4 | 48,B 5 | 72,B 6 | 2,B 7 | 3,B 8 | 25,B 9 | 26,B 10 | 49,B 11 | 50,B 12 | 73,B 13 | 74,B 14 | 4,A 15 | 5,A 16 | 27,A 17 | 28,A 18 | 51,A 19 | 52,A 20 | 75,A 21 | 76,A 22 | 23,A 23 | 47,A 24 | 71,A 25 | 94,A 26 | -------------------------------------------------------------------------------- /Example/PedVerify.csv: -------------------------------------------------------------------------------- 1 | "IndivID","seqID","Family","Relationship","FatherID","MotherID","FatherGroup","MotherGroup","FatherRel","FatherEMM","FatherMatch","MotherRel","MotherEMM","MotherMatch","FandMEMM","FandMmatch" 2 | 1,"Seq1",1,"Dam",5,3,"","",0.0133096517063664,0.0771019455686063,FALSE,0.0716349072453436,0.0727294995322404,FALSE,0.151285041470243,FALSE 3 | 2,"Seq2",1,"Grand Dam",NA,NA,"","",NA,NA,NA,NA,NA,NA,NA,NA 4 | 3,"Seq3",1,"Grand Dam",NA,NA,"","",NA,NA,NA,NA,NA,NA,NA,NA 5 | 4,"Seq4",1,"Grand Sire",NA,NA,"","",NA,NA,NA,NA,NA,NA,NA,NA 6 | 5,"Seq5",1,"Grand Sire",NA,NA,"","",NA,NA,NA,NA,NA,NA,NA,NA 7 | 6,"Seq6",1,"Offspring",23,1,"","B",0.232646677049644,0.053344613831886,TRUE,-0.0197589039357523,0.0794703819915572,FALSE,0.151941789994031,FALSE 8 | 7,"Seq7",1,"Offspring",23,1,"","B",0.243086954098656,0.0600256383425452,TRUE,-0.0688709244170686,0.0856907806292212,FALSE,0.132842513162407,FALSE 9 | 8,"Seq8",1,"Offspring",23,1,"","B",0.231907717784,0.0620982508754007,TRUE,-0.037417552320503,0.0852508331408504,FALSE,0.141064329954371,FALSE 10 | 9,"Seq9",1,"Offspring",23,1,"","B",0.186236777025123,0.0654260128517249,FALSE,-0.130047049947091,0.0920517422498498,FALSE,0.140956012670794,FALSE 11 | 10,"Seq10",1,"Offspring",23,1,"","B",0.224027277286345,0.053359259111451,TRUE,-0.0159733158245511,0.0767282675858107,FALSE,0.147255411328726,FALSE 12 | 11,"Seq11",1,"Offspring",23,1,"","B",0.210086565439628,0.0483056920474837,TRUE,-0.0384803508727466,0.0778553900100724,FALSE,0.159547356481305,FALSE 13 | 12,"Seq12",1,"Offspring",23,1,"","B",0.239542318709594,0.0603181544492042,TRUE,-0.0233305003141921,0.0831491607155219,FALSE,0.136943189695379,FALSE 14 | 13,"Seq13",1,"Offspring",23,1,"","B",0.2605295319736,0.0561174323248354,TRUE,-0.120603372297643,0.094818698675296,FALSE,0.148336108921054,FALSE 15 | 14,"Seq14",1,"Offspring",23,1,"","B",0.173779049945335,0.0616389240141009,FALSE,-0.0209266115405779,0.0770061088060997,FALSE,0.129306946226123,FALSE 16 | 15,"Seq15",1,"Offspring",23,1,"","B",0.24478502318727,0.0630621403909354,TRUE,-0.00325296100956614,0.0818515142667081,FALSE,0.138811406041392,FALSE 17 | 16,"Seq16",1,"Offspring",23,1,"","B",0.266683353081555,0.0571712770035826,TRUE,-0.0583452278487774,0.0849334930700323,FALSE,0.144341120059814,FALSE 18 | 17,"Seq17",1,"Offspring",23,1,"","B",0.305225438979726,0.0504835147971137,TRUE,-0.0819462045761633,0.0876247078043629,FALSE,0.146174775333968,FALSE 19 | 18,"Seq18",1,"Offspring",23,1,"","B",0.270503840284889,0.0517211892810881,TRUE,-0.0665132565137574,0.0859549861041481,FALSE,0.150017271555023,FALSE 20 | 19,"Seq19",1,"Offspring",23,1,"","B",0.231482609872723,0.0515022584588379,TRUE,-0.0242110709215646,0.0813836035764981,FALSE,0.152071582960359,FALSE 21 | 20,"Seq20",1,"Offspring",23,1,"","B",0.246557722781691,0.041500087443996,TRUE,-0.033850702037788,0.0719721980554252,FALSE,0.165768581773352,FALSE 22 | 21,"Seq21",1,"Offspring",23,1,"","B",0.207312877202818,0.0571210602066871,TRUE,0.0168166324553849,0.0785210964498207,FALSE,0.146353104971819,FALSE 23 | 22,"Seq22",1,"Offspring",23,1,"","B",0.261813400157755,0.0501584213532464,TRUE,-0.00384245149000161,0.077390946068189,FALSE,0.146736343494042,FALSE 24 | 23,"Seq23",1,"Sire",4,2,"","",0.319046446895086,0.0574509384886019,TRUE,0.285221160467362,0.061215295720628,TRUE,0.122739391108484,TRUE 25 | 24,"Seq24",2,"Dam",28,25,"","",0.362367081269235,0.0545341681982652,TRUE,0.345552965766181,0.0529069985794186,TRUE,0.105740067789177,TRUE 26 | 25,"Seq25",2,"Grand Dam",NA,NA,"","",NA,NA,NA,NA,NA,NA,NA,NA 27 | 26,"Seq26",2,"Grand Dam",NA,NA,"","",NA,NA,NA,NA,NA,NA,NA,NA 28 | 27,"Seq27",2,"Grand Sire",NA,NA,"","",NA,NA,NA,NA,NA,NA,NA,NA 29 | 28,"Seq28",2,"Grand Sire",NA,NA,"","",NA,NA,NA,NA,NA,NA,NA,NA 30 | 29,"Seq29",2,"Offspring",47,24,"","",0.291291531449358,0.0577236545679737,TRUE,0.278414908594725,0.0617884196153399,TRUE,0.115779775640329,TRUE 31 | 30,"Seq30",2,"Offspring",47,24,"","",0.22571356718382,0.0583041209299821,TRUE,0.325706931221261,0.0528617498047169,TRUE,0.12170283475135,TRUE 32 | 31,"Seq31",2,"Offspring",47,24,"","",0.256068062014956,0.052152124664696,TRUE,0.266811364994435,0.0531896697207904,TRUE,0.121702717572799,TRUE 33 | 32,"Seq32",2,"Offspring",47,24,"","",0.25211899358389,0.0566269273794045,TRUE,0.338337521381749,0.0544233166919934,TRUE,0.117578403925432,TRUE 34 | 33,"Seq33",2,"Offspring",47,24,"","",0.257952827488508,0.0560041076680063,TRUE,0.282198807133501,0.0577860166825542,TRUE,0.11310558033067,TRUE 35 | 34,"Seq34",2,"Offspring",47,24,"","",0.221432002140846,0.0578036999326241,TRUE,0.281589034176651,0.0580886793378991,TRUE,0.121223030612854,TRUE 36 | 35,"Seq35",2,"Offspring",47,24,"","",0.277646947664433,0.0538302377974444,TRUE,0.323354917863087,0.0569576807680938,TRUE,0.121289534215534,TRUE 37 | 36,"Seq36",2,"Offspring",47,24,"","",0.236312198543704,0.0576158033614215,TRUE,0.368414389730377,0.049068850745443,TRUE,0.117479476330444,TRUE 38 | 37,"Seq37",2,"Offspring",47,24,"","",0.216209172847265,0.0567585011913375,TRUE,0.322856772303826,0.0530445378613742,TRUE,0.119132769967996,TRUE 39 | 38,"Seq38",2,"Offspring",47,24,"","",0.265928268055682,0.0498563303319311,TRUE,0.329829113673729,0.0461746268026999,TRUE,0.124854391813416,TRUE 40 | 39,"Seq39",2,"Offspring",47,24,"","",0.215582375083549,0.0615048152078,TRUE,0.324265584067305,0.0560171555615014,TRUE,0.110405009145895,TRUE 41 | 40,"Seq40",2,"Offspring",47,24,"","",0.266784893782695,0.0518555863371941,TRUE,0.232056465880761,0.0570434123043398,TRUE,0.116982786571733,TRUE 42 | 41,"Seq41",2,"Offspring",47,24,"","",0.229429757355832,0.059088068537475,TRUE,0.288838540828066,0.0620087519473559,TRUE,0.119430719262597,TRUE 43 | 42,"Seq42",2,"Offspring",47,24,"","",0.237121458828967,0.0574712922794604,TRUE,0.341258854004461,0.0537529441296503,TRUE,0.108666314486583,TRUE 44 | 43,"Seq43",2,"Offspring",47,24,"","",0.264558495227018,0.0560617143292488,TRUE,0.327288755831356,0.0565224121711239,TRUE,0.11595638717193,TRUE 45 | 44,"Seq44",2,"Offspring",47,24,"","",0.231035781343046,0.0555657053715322,TRUE,0.315183854912586,0.0563141252986979,TRUE,0.1109075337272,TRUE 46 | 45,"Seq45",2,"Offspring",47,24,"","",0.266293364517122,0.0587357788747418,TRUE,0.333061280648208,0.0623571143541309,TRUE,0.115954642132907,TRUE 47 | 46,"Seq46",2,"Offspring",47,24,"","",0.269491950587098,0.0547893176340382,TRUE,0.296335833887386,0.0582407297525242,TRUE,0.117064857248319,TRUE 48 | 47,"Seq47",2,"Sire",27,26,"","",0.278256591198061,0.0554068340636181,TRUE,0.221819863068013,0.0632552583665564,TRUE,0.117624366927284,TRUE 49 | 48,"Seq48",3,"Dam",52,50,"","",0.232373351945356,0.0658176765983323,TRUE,0.290700173569131,0.0656647159545283,TRUE,0.142970332433179,TRUE 50 | 49,"Seq49",3,"Grand Dam",NA,NA,"","",NA,NA,NA,NA,NA,NA,NA,NA 51 | 50,"Seq50",3,"Grand Dam",NA,NA,"","",NA,NA,NA,NA,NA,NA,NA,NA 52 | 51,"Seq51",3,"Grand Sire",NA,NA,"","",NA,NA,NA,NA,NA,NA,NA,NA 53 | 52,"Seq52",3,"Grand Sire",NA,NA,"","",NA,NA,NA,NA,NA,NA,NA,NA 54 | 53,"Seq53",3,"Offspring",71,48,"","",0.277791823055767,0.0613498350170969,TRUE,0.267504886291935,0.0616603277742138,TRUE,0.127325231125283,TRUE 55 | 54,"Seq54",3,"Offspring",71,48,"","",0.277744355207079,0.0614327494199005,TRUE,0.292578016048389,0.0580828266524961,TRUE,0.127559250653045,TRUE 56 | 55,"Seq55",3,"Offspring",71,48,"","",0.229276637163375,0.061691593487116,TRUE,0.265762733076781,0.0618710563245197,TRUE,0.10826084344674,TRUE 57 | 56,"Seq56",3,"Offspring",71,48,"","",0.240972673017432,0.0646723694411746,TRUE,0.271832608346615,0.0649899095676441,TRUE,0.129212533625641,TRUE 58 | 57,"Seq57",3,"Offspring",71,48,"","",0.320855403494477,0.0283857789434868,TRUE,0.381226556554461,0.0222190926680524,TRUE,0.0428840238818208,TRUE 59 | 58,"Seq58",3,"Offspring",71,48,"","",0.299679851322982,0.0597450243041945,TRUE,0.314307438099968,0.0576062541766979,TRUE,0.130650837897698,TRUE 60 | 59,"Seq59",3,"Offspring",71,48,"","",0.319275724212776,0.0544599663810274,TRUE,0.339373926647553,0.0491735685285714,TRUE,0.126111309024281,TRUE 61 | 60,"Seq60",3,"Offspring",71,48,"","",0.192621149187342,0.0671951882854957,FALSE,0.294176668211275,0.0613055406441131,TRUE,0.131275636865754,FALSE 62 | 61,"Seq61",3,"Offspring",71,48,"","",0.258741708741244,0.0605365173043916,TRUE,0.325037963228196,0.0604078423270985,TRUE,0.133425594476023,TRUE 63 | 62,"Seq62",3,"Offspring",71,48,"","",0.305637419794134,0.054820657960116,TRUE,0.297005926663799,0.055502231668541,TRUE,0.125753205576738,TRUE 64 | 63,"Seq63",3,"Offspring",71,48,"","",0.25703919536171,0.0614902331878694,TRUE,0.265385898447617,0.0564900346331201,TRUE,0.134773973431821,TRUE 65 | 64,"Seq64",3,"Offspring",71,48,"","",0.271303725353518,0.063080655348366,TRUE,0.275150917954856,0.0619235107080335,TRUE,0.113349565098963,TRUE 66 | 65,"Seq65",3,"Offspring",71,48,"","",0.250342830448996,0.0617394555450415,TRUE,0.279931204572511,0.0594868063906915,TRUE,0.126705002799156,TRUE 67 | 66,"Seq66",3,"Offspring",71,48,"","",0.237241106980824,0.0623944928850089,TRUE,0.35282992501633,0.0521888076701545,TRUE,0.131674085651312,TRUE 68 | 67,"Seq67",3,"Offspring",71,48,"","",0.290186721607132,0.0656855697194163,TRUE,0.318902108337811,0.0619472659812556,TRUE,0.124212505232093,TRUE 69 | 68,"Seq68",3,"Offspring",71,48,"","",0.264081519669989,0.0661517801735436,TRUE,0.268165561110117,0.060117241763568,TRUE,0.119455273960035,TRUE 70 | 69,"Seq69",3,"Offspring",71,48,"","",0.214732054381001,0.0678492969693458,TRUE,0.311792360921698,0.0609077278865865,TRUE,0.107952609416682,TRUE 71 | 70,"Seq70",3,"Offspring",71,48,"","",0.282277019885227,0.059478829806904,TRUE,0.288058770459796,0.0624376012888989,TRUE,0.128766889318092,TRUE 72 | 71,"Seq71",3,"Sire",51,49,"","",0.339685305131657,0.0562044341905446,TRUE,0.298686095229579,0.0627019092820277,TRUE,0.122697296939296,TRUE 73 | 72,"Seq72",4,"Dam",75,73,"","",0.270833865852327,0.0457229339022268,TRUE,0.309801614687751,0.0497899321522008,TRUE,0.0755362312550887,TRUE 74 | 73,"Seq73",4,"Grand Dam",NA,NA,"","",NA,NA,NA,NA,NA,NA,NA,NA 75 | 74,"Seq74",4,"Grand Dam",NA,NA,"","",NA,NA,NA,NA,NA,NA,NA,NA 76 | 75,"Seq75",4,"Grand Sire",NA,NA,"","",NA,NA,NA,NA,NA,NA,NA,NA 77 | 76,"Seq76",4,"Grand Sire",NA,NA,"","",NA,NA,NA,NA,NA,NA,NA,NA 78 | 77,"Seq77",4,"Offspring",94,72,"A","",-0.583924821961568,0.0336352034748202,FALSE,0.582292807219007,0.0363234104529542,TRUE,0.028840563932362,FALSE 79 | 78,"Seq78",4,"Offspring",94,72,"A","",-0.0305932090653356,0.0866765253499339,FALSE,0.236608653217542,0.0373557828794942,TRUE,0.189508679201496,FALSE 80 | 79,"Seq79",4,"Offspring",94,72,"A","",0.0099861969162934,0.0775355707235797,FALSE,0.228652637094922,0.0739677762787122,TRUE,0.126443103118739,FALSE 81 | 80,"Seq80",4,"Offspring",94,72,"A","",-0.0325412081235178,0.0901890439302678,FALSE,0.229635130371493,0.0531683083638634,TRUE,0.174125088710619,FALSE 82 | 81,"Seq81",4,"Offspring",94,72,"A","",-0.0255027225461536,0.0744137331181359,FALSE,0.197152591752703,0.0308334160347018,FALSE,0.195114572088714,FALSE 83 | 82,"Seq82",4,"Offspring",94,72,"A","",-0.027797341845907,0.087077019591302,FALSE,0.251999002126021,0.0613577028673327,TRUE,0.173411844258038,FALSE 84 | 83,"Seq83",4,"Offspring",94,72,"A","",-0.0432058214880612,0.091818033462556,FALSE,0.227961797986901,0.0529003341132835,TRUE,0.157576248162788,FALSE 85 | 84,"Seq84",4,"Offspring",94,72,"A","",-0.0110505150316921,0.0815506247671387,FALSE,0.176290262374843,0.0350359876986942,FALSE,0.177873459776249,FALSE 86 | 85,"Seq85",4,"Offspring",94,72,"A","",-0.0215700688663871,0.0869754758306069,FALSE,0.225107440405979,0.0696043418874428,TRUE,0.13973329439075,FALSE 87 | 86,"Seq86",4,"Offspring",94,72,"A","",-0.0773514846758445,0.0906560179318863,FALSE,0.237804400815186,0.054496035215718,TRUE,0.162246243004636,FALSE 88 | 87,"Seq87",4,"Offspring",94,72,"A","",-0.0278056993823275,0.0857936209312674,FALSE,0.153536524150851,0.0467381276836397,FALSE,0.169459466960884,FALSE 89 | 88,"Seq88",4,"Offspring",94,72,"A","",-0.0719930701497567,0.0954611388457666,FALSE,0.0896724715980715,0.0597570161666447,FALSE,0.17648762047528,FALSE 90 | 89,"Seq89",4,"Offspring",94,72,"A","",-0.0524708127631565,0.0913053287169726,FALSE,0.222399914350894,0.0684914133801877,TRUE,0.154183641461129,FALSE 91 | 90,"Seq90",4,"Offspring",94,72,"A","",-0.0518981083310224,0.0857438205401683,FALSE,0.148651075022043,0.0402308991849632,FALSE,0.182941423188812,FALSE 92 | 91,"Seq91",4,"Offspring",94,72,"A","",-0.0266296488642358,0.0854236846709664,FALSE,0.159470584575196,0.0450563396562913,FALSE,0.17728921581462,FALSE 93 | 92,"Seq92",4,"Offspring",94,72,"A","",-0.0170550205298986,0.0882315884268492,FALSE,0.131438496866652,0.0610405986654158,FALSE,0.168555416309484,FALSE 94 | 93,"Seq93",4,"Offspring",94,72,"A","",-0.0249950207567369,0.0833089299651253,FALSE,0.176532450916866,0.0449833755566274,FALSE,0.185758690894299,FALSE 95 | 94,"Seq94",4,"Sire",76,74,"","",0.0266202425739709,0.0824009929764057,FALSE,-0.0853666521812897,0.0941811766767425,FALSE,0.163603599792878,FALSE 96 | 95,"Blank",NA,"negative",NA,NA,"","",NA,NA,NA,NA,NA,NA,NA,NA 97 | 96,"Ref",NA,"reference",NA,NA,"","",NA,NA,NA,NA,NA,NA,NA,NA 98 | -------------------------------------------------------------------------------- /Example/SNPCallRate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/SNPCallRate.png -------------------------------------------------------------------------------- /Example/SNPDepth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/SNPDepth.png -------------------------------------------------------------------------------- /Example/SNPDepthHist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/SNPDepthHist.png -------------------------------------------------------------------------------- /Example/SampDepth-scored.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/SampDepth-scored.png -------------------------------------------------------------------------------- /Example/SampDepth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/SampDepth.png -------------------------------------------------------------------------------- /Example/SampDepthCR.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/SampDepthCR.png -------------------------------------------------------------------------------- /Example/SampDepthHist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/SampDepthHist.png -------------------------------------------------------------------------------- /Example/SampleStats.csv: -------------------------------------------------------------------------------- 1 | "seqID","callrate","sampdepth" 2 | "Ref",0.647630702291114,9.2084438099123 3 | "Seq68",0.494255217893807,5.83948602896186 4 | "Seq59",0.729621320280101,11.4449656672785 5 | "Seq55",0.479434359915698,5.1913114419743 6 | "Seq64",0.514922836358692,5.52906383846624 7 | "Seq69",0.448296961044259,4.70684614861649 8 | "Seq67",0.604255897749677,7.55530627506969 9 | "Seq75",0.575294037664015,7.4837174519002 10 | "Seq76",0.684206948126997,9.92820722006934 11 | "Seq94",0.602012373376844,7.71473247671494 12 | "Seq93",0.706030321571827,10.4771228499558 13 | "Seq78",0.691889319464274,11.0450064586308 14 | "Seq84",0.707593990074104,10.4264735875994 15 | "Seq87",0.641715956217282,8.77816302943776 16 | "Seq79",0.372425045890271,3.7449180773676 17 | "Seq89",0.508940104697804,5.83968998572303 18 | "Seq85",0.420830783873819,4.15344346998436 19 | "Seq80",0.587531443334013,7.61479366374329 20 | "Seq73",0.562580732884628,7.21952546060235 21 | "Seq74",0.39533618872799,3.98001223740567 22 | "Seq72",0.187096335576858,1.47909443198042 23 | "Seq81",0.750424909919097,12.4193351009586 24 | "Seq86",0.579985043170848,7.56557209871507 25 | "Seq90",0.713100822625603,10.5976612958053 26 | "Seq83",0.56088109320824,6.985315113196 27 | "Seq88",0.612278197022231,7.97831259772928 28 | "Seq77",0.0350125773336053,0.203548847644299 29 | "Seq91",0.647358759942892,9.27289414644095 30 | "Seq82",0.57277857094296,7.14392548779659 31 | "Seq92",0.525256645591135,6.27595349785845 32 | "Seq4",0.668638248691277,9.27690529607723 33 | "Seq5",0.626147256781562,8.11802297912842 34 | "Seq23",0.667074580189,9.17519885784214 35 | "Seq10",0.830443945883473,14.9684546876062 36 | "Seq20",0.896118022979128,20.5876674145081 37 | "Seq22",0.820789992521585,14.6513019239921 38 | "Seq15",0.699367734040383,9.74199469712421 39 | "Seq11",0.845808688558026,16.8295601332517 40 | "Seq17",0.718539669590047,12.2222448840846 41 | "Seq14",0.605955537426066,7.56577605547624 42 | "Seq7",0.639948330953838,8.09076075871915 43 | "Seq2",0.641647970630226,8.69787205112516 44 | "Seq3",0.739275273641988,11.6464069617241 45 | "Seq1",0.730097219389489,11.3476782922021 46 | "Seq21",0.747977428785098,12.3252430484737 47 | "Seq6",0.781834251138759,13.3189203888776 48 | "Seq18",0.803725610170644,13.6737371677204 49 | "Seq16",0.698143993473384,9.78679719899381 50 | "Seq19",0.822489632197974,15.7247943435992 51 | "Seq8",0.686994357196274,10.023251070773 52 | "Seq13",0.730437147324767,10.9213406757767 53 | "Seq12",0.684818818410497,9.65986810796111 54 | "Seq9",0.696240397035828,9.54374872527024 55 | "Seq28",0.708069889183493,10.4477530763478 56 | "Seq27",0.757903324495207,12.864164797063 57 | "Seq47",0.706234278332993,10.5121354272894 58 | "Seq36",0.789992521585424,13.6302943775919 59 | "Seq38",0.815963015840642,16.0384798422734 60 | "Seq46",0.696784281732273,10.1356992317629 61 | "Seq37",0.72703786797199,11.3009721938949 62 | "Seq31",0.780066625875314,13.4230743082467 63 | "Seq29",0.700047589910939,10.3103542049086 64 | "Seq39",0.638792575973893,8.46026242436603 65 | "Seq34",0.738527432184377,10.9806920932762 66 | "Seq45",0.643551567067782,8.29750492895506 67 | "Seq25",0.722346862465157,11.6773404038344 68 | "Seq26",0.646270990550003,8.94792304031545 69 | "Seq24",0.648446529335781,9.10544564552315 70 | "Seq42",0.594941872323067,7.9773607995105 71 | "Seq30",0.727581752668434,11.6113943843905 72 | "Seq43",0.67897205792372,9.77632741858726 73 | "Seq35",0.736147936637433,11.8692637160922 74 | "Seq32",0.735604051940989,11.3457746957645 75 | "Seq40",0.739615201577266,11.944863688898 76 | "Seq44",0.594941872323067,7.35610850499694 77 | "Seq33",0.681215582296553,9.2722142905704 78 | "Seq41",0.706030321571827,10.2330545924264 79 | "Seq51",0.674620980352165,10.0216194166837 80 | "Seq52",0.602760214834455,7.75511591542593 81 | "Seq71",0.615405534026786,8.17132367937997 82 | "Seq58",0.663811272010334,9.35937181317561 83 | "Seq57",0.0719287511047658,0.454823577401591 84 | "Seq62",0.737575633965599,11.9744374192671 85 | "Seq70",0.671697600108777,9.25623767761235 86 | "Seq65",0.604187912162621,8.23121898157591 87 | "Seq56",0.64953429872867,8.59929294989462 88 | "Seq66",0.705010537765994,10.444353796995 89 | "Seq61",0.641715956217282,8.42368617853015 90 | "Seq54",0.64586307702767,9.35794411584744 91 | "Seq49",0.50656060915086,5.93418995173023 92 | "Seq50",0.434903800394316,4.58725950098579 93 | "Seq48",0.591882520905568,7.53538649806241 94 | "Seq60",0.597525324631178,7.66612278197022 95 | "Seq53",0.670745801889999,9.65857638180706 96 | "Seq63",0.700455503433272,10.3409477190836 97 | "Blank",0.0053028757903324,0.0265823645387178 98 | -------------------------------------------------------------------------------- /Example/X2star-QQ.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/X2star-QQ.png -------------------------------------------------------------------------------- /Example/finplot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/Example/finplot.png -------------------------------------------------------------------------------- /Example/seqID.csv: -------------------------------------------------------------------------------- 1 | "seqID" 2 | "Ref" 3 | "Seq68" 4 | "Seq59" 5 | "Seq55" 6 | "Seq64" 7 | "Seq69" 8 | "Seq67" 9 | "Seq75" 10 | "Seq76" 11 | "Seq94" 12 | "Seq93" 13 | "Seq78" 14 | "Seq84" 15 | "Seq87" 16 | "Seq79" 17 | "Seq89" 18 | "Seq85" 19 | "Seq80" 20 | "Seq73" 21 | "Seq74" 22 | "Seq72" 23 | "Seq81" 24 | "Seq86" 25 | "Seq90" 26 | "Seq83" 27 | "Seq88" 28 | "Seq77" 29 | "Seq91" 30 | "Seq82" 31 | "Seq92" 32 | "Seq4" 33 | "Seq5" 34 | "Seq23" 35 | "Seq10" 36 | "Seq20" 37 | "Seq22" 38 | "Seq15" 39 | "Seq11" 40 | "Seq17" 41 | "Seq14" 42 | "Seq7" 43 | "Seq2" 44 | "Seq3" 45 | "Seq1" 46 | "Seq21" 47 | "Seq6" 48 | "Seq18" 49 | "Seq16" 50 | "Seq19" 51 | "Seq8" 52 | "Seq13" 53 | "Seq12" 54 | "Seq9" 55 | "Seq28" 56 | "Seq27" 57 | "Seq47" 58 | "Seq36" 59 | "Seq38" 60 | "Seq46" 61 | "Seq37" 62 | "Seq31" 63 | "Seq29" 64 | "Seq39" 65 | "Seq34" 66 | "Seq45" 67 | "Seq25" 68 | "Seq26" 69 | "Seq24" 70 | "Seq42" 71 | "Seq30" 72 | "Seq43" 73 | "Seq35" 74 | "Seq32" 75 | "Seq40" 76 | "Seq44" 77 | "Seq33" 78 | "Seq41" 79 | "Seq51" 80 | "Seq52" 81 | "Seq71" 82 | "Seq58" 83 | "Seq57" 84 | "Seq62" 85 | "Seq70" 86 | "Seq65" 87 | "Seq56" 88 | "Seq66" 89 | "Seq61" 90 | "Seq54" 91 | "Seq49" 92 | "Seq50" 93 | "Seq48" 94 | "Seq60" 95 | "Seq53" 96 | "Seq63" 97 | "Blank" 98 | -------------------------------------------------------------------------------- /GBS-PopGen.R: -------------------------------------------------------------------------------- 1 | PopGenver <- "1.3.1" 2 | cat("GBS-PopGen for KGD version:",PopGenver,"\n") 3 | 4 | heterozygosity <- function(indsubsetgf=1:nind,snpsubsetgf=1:nsnps,maxiter=100,convtol=0.001){ 5 | nsnpsgf <- length(snpsubsetgf) 6 | nindgf <- length(indsubsetgf) 7 | genongf <- genon[indsubsetgf,snpsubsetgf,drop=FALSE] 8 | depth.use <- depth[indsubsetgf,snpsubsetgf,drop=FALSE] 9 | depth.use[depth.use==0] <- NA # dont want to average over obs with depth 0 10 | obsgfreq <- cbind(colSums(genongf==0,na.rm=TRUE),colSums(genongf==1,na.rm=TRUE),colSums(genongf==2,na.rm=TRUE))/colSums(!is.na(genongf)) 11 | obshet <- mean(obsgfreq[,2],na.rm=TRUE) 12 | psub <- calcp(indsubset=indsubsetgf)[snpsubsetgf] 13 | mafsub <- pmin(psub,1-psub) 14 | ehettrue <- mean(2*mafsub*(1-mafsub),na.rm=TRUE) # exp het if true genos observed 15 | ehetstar <- mean(2*mafsub*(1-mafsub)*(1-2*colMeans(depth2K(depth.use),na.rm=TRUE)),na.rm=TRUE) 16 | ohet2 <- mean(obsgfreq[,2],na.rm=TRUE)/mean(1-2*depth2K(depth.use),na.rm=TRUE) 17 | ohet <- rep(NA,nsnpsgf) 18 | for(isnp in 1:nsnpsgf) { 19 | genodepth <- depth.use[,isnp] 20 | pnew <- obsgfreq[isnp,] 21 | ng <- sum(!is.na(genodepth)) 22 | convtest<- 1; itcount <- 0 23 | while(convtest>convtol & itcount 0 & pgsub < 1 ) 76 | snpsubset <- snpsubset[usnp] 77 | aX2 <- rbind(round(genon[indsubset,snpsubset,drop=FALSE]/2),ceiling(genon[indsubset,snpsubset,drop=FALSE]/2)) 78 | effnuma1 <- 2*(1-depth2K(depth[indsubset, snpsubset,drop=FALSE])) # 2(1-K) 79 | snppopeffn <- rowsum(effnuma1,populations[indsubset]) # effective number reads popn x snps 80 | X2results <- apply(rbind(aX2,snppopeffn),MARGIN=2,chisq.adj,y=rep(populations[indsubset],2)) 81 | effnuma <- colSums(2*(1-depth2K(depth[indsubset, snpsubset,drop=FALSE]))) # 2(1-K) 82 | Fst.results[usnp] <- npops * X2results / (effnuma * (npops - varadj)) 83 | pvalue[usnp] <- pchisq(X2results,df=npops-1,lower.tail=FALSE) 84 | cat("Fst Mean:",mean(Fst.results,na.rm=TRUE),"Median:",median(Fst.results,na.rm=TRUE),"p-value:",mean(pvalue,na.rm=TRUE),"\n") 85 | if(SNPtest) Fst.results <- list(Fst=Fst.results, pvalue=pvalue) 86 | Fst.results 87 | } 88 | 89 | 90 | Fst.GBS.pairwise <- function(snpsubset, indsubset, populations,sortlevels=TRUE, SNPtest=FALSE, ...) { 91 | if (missing(snpsubset)) snpsubset <- 1:nsnps 92 | if (missing(indsubset)) indsubset <- 1:nind 93 | popnames <- unique(populations[indsubset]) 94 | if(sortlevels) popnames <- sort(popnames) 95 | npops <- length(popnames) 96 | Fst.results <- array(dim=c(npops,npops,length(snpsubset))) 97 | if(SNPtest) pvalue <- Fst.results 98 | Fst.means <- Fst.medians <- pvalue.means <- array(dim=c(npops,npops),dimnames=list(popnames,popnames)) 99 | for (ipop in 1:(npops-1)) { 100 | indsubseti <- indsubset[which(populations[indsubset] == popnames[ipop])] 101 | for (jpop in (ipop+1):npops) { 102 | indsubsetj <- indsubset[which(populations[indsubset] == popnames[jpop])] 103 | Fsttemp <- Fst.GBS(snpsubset,c(indsubseti,indsubsetj),populations, SNPtest = SNPtest, ...) 104 | if(SNPtest) Fst.results[ipop,jpop,] <- Fsttemp$Fst else Fst.results[ipop,jpop,] <- Fsttemp 105 | if(SNPtest) pvalue[ipop,jpop,] <- Fsttemp$pvalue 106 | Fst.means[ipop,jpop] <- mean(Fst.results[ipop,jpop,],na.rm=TRUE) 107 | Fst.medians[ipop,jpop] <- median(Fst.results[ipop,jpop,],na.rm=TRUE) 108 | if(SNPtest) pvalue.means[ipop,jpop] <- mean(pvalue[ipop,jpop,],na.rm=TRUE) 109 | } 110 | } 111 | cat("Pairwise Fst Means\n"); print(Fst.means[-npops,-1]) 112 | cat("Pairwise Fst Medians\n");print(Fst.medians[-npops,-1]) 113 | if(SNPtest) cat("Pairwise p-value Means\n"); print(pvalue.means[-npops,-1]) 114 | if(SNPtest) Fst.results <- list(Fst=Fst.results, pvalue=pvalue) 115 | Fst.results 116 | } 117 | 118 | 119 | 120 | 121 | popmaf <- function(snpsubset, indsubset, populations=NULL, subpopulations=NULL, indcol, colobj, minsamps=10, mafmin=0, sortlevels=TRUE, unif = FALSE) { 122 | #populations defined relative to full set (length nind) 123 | if (missing(snpsubset)) snpsubset <- 1:nsnps 124 | if (missing(indsubset)) indsubset <- 1:nind 125 | if (!missing(colobj)) {populations=colobj$collabels[match(colobj$sampcol,colobj$collist)]; indcol <- colobj$sampcol } 126 | nindsub <- length(indsubset) 127 | if (missing(indcol)) indcol <- rep("black",nindsub) 128 | if(is.null(populations)) populations <- rep("",nindsub) 129 | if(is.null(subpopulations)) subpopulations <- rep("",nindsub) 130 | popnames <- unique(populations[indsubset]) 131 | sublevs <- unique(subpopulations[indsubset]) 132 | if(sortlevels) {popnames <- sort(popnames); sublevs <- sort(sublevs) } 133 | nsub <- length(sublevs) 134 | if(nsub > 1) histdensity=30 else histdensity=NULL 135 | anglespan <- 180*(1-1/nsub) 136 | maxfreq <- 0 137 | if(unif) { 138 | for (i in seq_along(popnames)) { 139 | thigroup <- popnames[i] 140 | for (j in 1:nsub){ 141 | thissub <- sublevs[j] 142 | indgroup <- intersect(indsubset,which(populations==thigroup & subpopulations==thissub)) 143 | if(length(indgroup) >= minsamps) { 144 | plev <- calcp(indsubset=indgroup,pmethod="G") 145 | mafgroup <- pmin(plev,1-plev) 146 | snpsgroup <- intersect(snpsubset,which(mafgroup >= mafmin)) 147 | histinfo <- suppressWarnings(mafplot(mafgroup[snpsgroup], doplot=FALSE ) ) 148 | maxfreq <- max(maxfreq,histinfo$counts) 149 | } 150 | } 151 | } 152 | } 153 | for (i in seq_along(popnames)) { 154 | thigroup <- popnames[i] 155 | for (j in 1:nsub){ 156 | thissub <- sublevs[j] 157 | indgroup <- intersect(indsubset,which(populations==thigroup & subpopulations==thissub)) 158 | if(length(indgroup) >= minsamps) { 159 | plev <- calcp(indsubset=indgroup,pmethod="G") 160 | mafgroup <- pmin(plev,1-plev) 161 | snpsgroup <- intersect(snpsubset,which(mafgroup >= mafmin)) 162 | groupcol <- unique(indcol[indgroup]); if (length(groupcol) > 1) groupcol <- "black" 163 | if (!unif) mafplot(mafgroup[snpsgroup],plotname=paste0("MAF-",thigroup,thissub),barcol=groupcol,main=paste("MAF for",thigroup,thissub), 164 | density=histdensity, angle=anglespan*((j-1)/(max(2,nsub)-1) - 0.5) ) # angle is irrelevant when nsub=1 165 | if (unif) mafplot(mafgroup[snpsgroup],plotname=paste0("MAF-",thigroup,thissub),barcol=groupcol,main=paste("MAF for",thigroup,thissub), 166 | density=histdensity, angle=anglespan*((j-1)/(max(2,nsub)-1) - 0.5), ylim=c(0,maxfreq) ) # angle is irrelevant when nsub=1 167 | nnz <- sum(mafgroup[snpsubset]>0,na.rm=TRUE) 168 | ng.2 <- sum(mafgroup[snpsubset]>0.2,na.rm=TRUE) 169 | cat(thigroup,thissub,"n=",length(indgroup),"# SNPs with MAF>0",nnz,"# SNPs with MAF>0.2",ng.2,"Proportion",ng.2/nnz,"\n") 170 | } 171 | } 172 | } 173 | } 174 | 175 | 176 | popG <- function(Guse, populations, diag=FALSE) { 177 | #average a G matrix by populations (without self-rel) + mean self-rel by populations 178 | numericpops <- is.numeric(populations) 179 | if(numericpops) populations <- as.character(populations) 180 | Xpops <- model.matrix(~populations -1) 181 | poptext <- names(attr(Xpops,"contrast")) 182 | popnames <- sub(poptext,"",colnames(Xpops),fixed=TRUE) 183 | colnames(Xpops) <- popnames 184 | npops <- colSums(Xpops) 185 | popG <- t(Xpops %*% diag(1/npops)) %*% Guse %*% Xpops %*% diag(1/npops) 186 | popSelf <- t(Xpops %*% diag(1/npops)) %*% diag(Guse) 187 | if(!diag) diag(popG) <- (diag(popG) * npops - popSelf) / (npops - 1) 188 | colnames(popG) <- rownames(popG) <- rownames(popSelf) <- colnames(Xpops) 189 | colnames(popSelf) <- "Inbreeding" 190 | list(G=popG, Inb = popSelf-1) 191 | } 192 | 193 | DAPC.GBS <- function(Guse,populations=NULL, n.pca=NULL, perc.pca=90) { 194 | if(!require(MASS)) stop("MASS library is required") 195 | if(is.null(populations)) stop("(currently) need populations to be specified") 196 | PC <- svd(scale(Guse, scale=FALSE)) 197 | eval <- sign(PC$d) * PC$d^2/sum(PC$d^2) 198 | varexpl = 100* cumsum(eval)/sum(eval) 199 | cat("minimum eigenvalue: ", min(eval), "\n") 200 | if(is.null(n.pca)) n.pca= min(sum(varexpl < perc.pca) + 1, length(eval)) 201 | neprint <- min(2*n.pca,length(eval)) 202 | cat("first",neprint,"eigenvalues:",eval[1:neprint],"\n") 203 | cat("using",n.pca,"principal components\n") 204 | PC$x <- PC$u[,1:n.pca] %*% diag(PC$d[1:n.pca],nrow=n.pca) # nrow to get correct behaviour when npc=1 205 | ldaPC <- lda(PC$x, populations, tol=1e-30) 206 | ldaPC$x <- PC$x %*% ldaPC$scaling 207 | #alternatively predict(ldaPC)$x (also has $class and $posterior) 208 | ldaPC 209 | } 210 | 211 | 212 | manhatplot <- function(value, chrom, pos, plotname, qdistn=qunif, keyrot=0, symsize=0.8, legendm = NULL, ...) { 213 | chromcol <- colourby(chrom) 214 | colkey(chromcol,"chrom",srt=keyrot) 215 | plotord <- order(chrom,pos) 216 | valuetext <- gsub("[^[:alnum:]]", ".", deparse(substitute(value))) 217 | symsize0 <- symsize 218 | if(length(symsize)==length(value)) symsize <- symsize[plotord] 219 | png(paste0(plotname,"-Manhat.png"),width=1200) 220 | plot(value[plotord], col=chromcol$sampcol[plotord],xlab="Position",ylab=valuetext,cex=symsize, xaxt="n") 221 | if(!is.null(legendm)) legendm() 222 | dev.off() 223 | png(paste0(plotname,"-QQ.png")) 224 | if(length(symsize)==length(value)) symsize <- symsize0[order(value)] 225 | qqplot(qdistn(ppoints(length(value)),...), y=value, xlab="Theoretical quantiles", ylab=paste(valuetext,"quantiles"), cex=symsize, 226 | sub="Line for mid 98% of values", col=chromcol$sampcol[order(value)]) 227 | qqline(value,col=2, distribution = function(p) qdistn(p, ...),probs=c(0.01,0.99)) 228 | if(!is.null(legendm)) legendm() 229 | dev.off() 230 | } 231 | 232 | # 233 | #For the beta distribution see dbeta. 234 | #For the binomial (including Bernoulli) distribution see dbinom. 235 | #For the Cauchy distribution see dcauchy. 236 | #For the chi-squared distribution see dchisq. 237 | #For the exponential distribution see dexp. 238 | #For the F distribution see df. 239 | #For the gamma distribution see dgamma. 240 | #For the geometric distribution see dgeom. (This is also a special case of the negative binomial.) 241 | #For the hypergeometric distribution see dhyper. 242 | #For the log-normal distribution see dlnorm. 243 | #For the multinomial distribution see dmultinom. 244 | #For the negative binomial distribution see dnbinom. 245 | #For the normal distribution see dnorm. 246 | #For the Poisson distribution see dpois. 247 | #For the Student's t distribution see dt. 248 | #For the uniform distribution see dunif. 249 | #For the Weibull distribution see dweibull. 250 | 251 | 252 | # select & pair (from different chromosomes) snps 253 | # based on T Bilton code 254 | snpselection <- function(chromosome,position,nsnpperchrom=100,seltype="centre",randseed=NULL, snpsubset,chromuse) { 255 | #seltype is centre, even or random 256 | if (missing(snpsubset)) snpsubset <- 1:length(chromosome) 257 | if (missing(chromuse)) chromuse <- unique(chromosome) 258 | if(seltype=="center") seltype <- "centre" 259 | usnp <- intersect(snpsubset, which(chromosome %in% chromuse)) 260 | chromlist <- unique(chromosome[usnp]) 261 | chromnone <- setdiff(chromuse,chromlist) 262 | if(length(chromnone) > 0 ) cat("Warning: no SNPs requested on chromosome(s)", chromnone, "\n") 263 | if(seltype=="random") { 264 | set.seed(randseed) 265 | snpchoose <- function(x) { 266 | indx <- which(chromosome[usnp] == x) 267 | nsnp <- min(c(length(indx),nsnpperchrom)) ## if there are fewer than nsnpperchrom SNPs on the chromosome 268 | return(sample(indx,nsnp)) 269 | } 270 | } 271 | if(seltype=="even") { 272 | snpchoose <- function(x) { 273 | indx <- which(chromosome[usnp] == x) 274 | nsnp <- min(c(length(indx),nsnpperchrom)) ## if there are fewer than nsnpperchrom SNPs on the chromosome 275 | snpsep <- length(indx)/nsnpperchrom 276 | temp <- sort(position[usnp][indx], index.return=TRUE) 277 | return(indx[temp$ix[round(seq(snpsep/2,by=snpsep,length.out=nsnpperchrom))]]) 278 | } 279 | } 280 | if(seltype=="centre") { 281 | meanpos <- aggregate(position[usnp] ~ chromosome[usnp],FUN=mean) 282 | colnames(meanpos) <- c("chr","pos") 283 | chromlist <- unique(chromosome[usnp]) 284 | snpchoose <- function(x) { 285 | indx <- which(chromosome[usnp] == x) 286 | nsnp <- min(c(length(indx),nsnpperchrom)) ## if there are fewer than nsnpperchrom SNPs on the chromosome 287 | temp <- sort(abs(position[usnp][indx] - meanpos$pos[which(meanpos$chr==x)]), index.return=TRUE) 288 | return(indx[sort(temp$ix[1:nsnp])]) 289 | } 290 | } 291 | snplist <- sapply(chromlist, snpchoose, simplify = FALSE) 292 | nchrom <- length(snplist) 293 | pairs <- do.call("rbind", sapply(1:(nchrom-1), function(x) as.matrix(expand.grid(snplist[[x]], unlist(snplist[(x+1):nchrom]), KEEP.OUT.ATTRS = FALSE)) )) 294 | pairs[,1] <- usnp[pairs[,1]] 295 | pairs[,2] <- usnp[pairs[,2]] 296 | return(pairs) 297 | } 298 | 299 | ### T Bilton, select SNPs (for LD analysis) from a UR object (modified) 300 | snpselectionUR <- function(URobj, nsnpperchrom=100, nchrom, ...){ 301 | chromlist <- unique(URobj$.__enclos_env__$private$chrom) 302 | if (!missing(nchrom)) chromlist <- chromlist[1:nchrom] 303 | URpairs <- snpselection (chromosome=URobj$.__enclos_env__$private$chrom,position=URobj$.__enclos_env__$private$pos,nsnpperchrom=nsnpperchrom,chromuse=chromlist, ...) 304 | return(URpairs) 305 | } 306 | 307 | Nefromr2 <- function(r2auto,nLD, alpha=1, weighted=FALSE,minN=1) { 308 | #r2auto = set of r2 values across different autosomes 309 | #nLD = # individuals for r2 calcs 310 | #alpha = mutation parameter 311 | #beta=1 (2) for phase unknown (known) 312 | if(length(nLD) ==1) nLD <- rep(nLD,length(r2auto)) 313 | uN <- which(nLD >= minN) 314 | r2auto <- r2auto[uN] 315 | nLD <- nLD[uN] 316 | wt <- rep(1,length(r2auto)) 317 | if(weighted) wt <- nLD 318 | meanN <- mean(nLD) 319 | Neauto <- (1/weighted.mean(r2auto,wt,na.rm=TRUE) - 1) /2 320 | beta <- 1; Neauto.adj.b1 <- (1/(weighted.mean(r2auto,wt,na.rm=TRUE) -1/(beta*meanN)) - alpha) /2 321 | beta <- 2; Neauto.adj.b2 <- (1/(weighted.mean(r2auto,wt,na.rm=TRUE) -1/(beta*meanN)) - alpha) /2 322 | Neauto.med <- (1/median(r2auto,na.rm=TRUE) - 1) /2 323 | beta <- 1; Neauto.med.adj.b1 <- (1/(median(r2auto,na.rm=TRUE) -1/(beta*meanN)) - alpha) /2 324 | beta <- 2; Neauto.med.adj.b2 <- (1/(median(r2auto,na.rm=TRUE) -1/(beta*meanN)) - alpha) /2 325 | data.frame(n=meanN,Neauto,Neauto.adj.b1,Neauto.adj.b2,Neauto.med,Neauto.med.adj.b1,Neauto.med.adj.b2) 326 | } 327 | 328 | 329 | -------------------------------------------------------------------------------- /GBS-Rcpp-functions.cpp: -------------------------------------------------------------------------------- 1 | // disable all run-time checks in armadillo (e.g. bounds checking, etc.) 2 | #define ARMA_NO_DEBUG 3 | 4 | // we depend on the R package "RcppArmadillo" 5 | // [[Rcpp::depends(RcppArmadillo)]] 6 | #include 7 | 8 | // we need OpenMP for parallelisation 9 | #include 10 | // [[Rcpp::plugins(openmp)]] 11 | 12 | 13 | // helper function for deciding number of threads 14 | static int check_nThreads(int nThreads) { 15 | // maximum number of threads available 16 | int maxThreads = omp_get_max_threads(); 17 | 18 | if (nThreads <= 0) { 19 | // if nThreads is set to zero then use everything 20 | nThreads = maxThreads; 21 | } 22 | else if (nThreads > maxThreads) { 23 | // don't allow more threads than the maximum available 24 | nThreads = maxThreads; 25 | } 26 | 27 | return nThreads; 28 | } 29 | 30 | 31 | // function for finding row medians (alternative to apply(depth, 1, median)) 32 | // requires integer type matrix as input, returns list of doubles 33 | // [[Rcpp::export]] 34 | std::vector rcpp_rowMedians(const arma::imat &depth, int nThreads) { 35 | // set up number of threads 36 | nThreads = check_nThreads(nThreads); 37 | 38 | // number of rows 39 | const int nrows = depth.n_rows; 40 | 41 | // vector for storing the result 42 | std::vector medians(nrows); 43 | 44 | // loop over the rows 45 | #pragma omp parallel for num_threads(nThreads) 46 | for (int i = 0; i < nrows; i++) { 47 | // convert the row to double type, to compute median correctly 48 | arma::rowvec row = arma::conv_to::from(depth.row(i)); 49 | 50 | // compute the median for this row 51 | medians[i] = arma::median(row); 52 | } 53 | 54 | return medians; 55 | } 56 | 57 | // function for finding row maximums (alternative to apply(mat, 1, max)) 58 | // requires integer type matrix as input, return list of integers 59 | // [[Rcpp::export]] 60 | std::vector rcpp_rowMaximums(const arma::imat &mat, int nThreads) { 61 | // set up number of threads 62 | nThreads = check_nThreads(nThreads); 63 | 64 | // number of rows 65 | const int nrows = mat.n_rows; 66 | 67 | // create vector to store the result 68 | std::vector maximums(nrows); 69 | 70 | // loop over rows 71 | #pragma omp parallel for num_threads(nThreads) 72 | for (int i = 0; i < nrows; i++) { 73 | // find the maximum for this row 74 | maximums[i] = mat.row(i).max(); 75 | } 76 | 77 | return maximums; 78 | } 79 | 80 | // C++ version of depth2K function 81 | // [[Rcpp::export]] 82 | Rcpp::NumericMatrix rcpp_depth2K(const Rcpp::NumericMatrix &A, int nThreads) { 83 | // set up number of threads 84 | nThreads = check_nThreads(nThreads); 85 | 86 | // create the output matrix (same size as input) 87 | Rcpp::NumericMatrix Aout(A.rows(), A.cols()); 88 | 89 | // number of elements 90 | const long Asize = A.rows() * A.cols(); 91 | 92 | // loop over elements in parallel and apply operation 93 | #pragma omp parallel for num_threads(nThreads) 94 | for (long i = 0; i < Asize; i++) { 95 | Aout[i] = 1.0 / pow(2.0, A[i]); 96 | } 97 | 98 | // return matrix 99 | return Aout; 100 | } 101 | 102 | // C++ version of depth2Kmodp function 103 | // [[Rcpp::export]] 104 | Rcpp::NumericMatrix rcpp_depth2Kmodp(const Rcpp::NumericMatrix &depthvals, double modp, int nThreads) { 105 | // set up number of threads 106 | nThreads = check_nThreads(nThreads); 107 | 108 | // create matrix for storing the result 109 | Rcpp::NumericMatrix result(depthvals.rows(), depthvals.cols()); 110 | 111 | // size of the matrix 112 | const long size = depthvals.rows() * depthvals.cols(); 113 | 114 | // loop over the elements in parallel 115 | #pragma omp parallel for num_threads(nThreads) 116 | for (long i = 0; i < size; i++) { 117 | double value = 0.5 * pow(modp, depthvals[i] - 1.0); 118 | result[i] = (value == 0) ? 1.0 : value; 119 | } 120 | return result; 121 | } 122 | 123 | // C++ version of depth2Kbb function 124 | // [[Rcpp::export]] 125 | Rcpp::NumericMatrix rcpp_depth2Kbb(const Rcpp::NumericMatrix & depthvals, int nThreads, const double alph = 9999) { 126 | // set up number of threads 127 | nThreads = check_nThreads(nThreads); 128 | // create matrix for storing the result 129 | Rcpp::NumericMatrix result(depthvals.rows(), depthvals.cols()); 130 | // size of the matrix 131 | const long size = depthvals.rows() * depthvals.cols(); 132 | // precompute factor 133 | const double factor = 1.0/R::beta(alph, alph); 134 | // loop over the elements in parallel 135 | #pragma omp parallel for num_threads(nThreads) 136 | for (long i = 0; i < size; i++) { 137 | result[i] = R::beta(alph, depthvals[i] + alph) * factor; 138 | } 139 | return result; 140 | } 141 | 142 | 143 | // function for setting unused values of P0, P1 and genon01 to zero 144 | // modifies the matrices in-place (i.e. doesn't return anything) 145 | // [[Rcpp::export]] 146 | void rcpp_assignP0P1Genon01(Rcpp::NumericMatrix &P0, Rcpp::NumericMatrix &P1, Rcpp::NumericMatrix &genon01, 147 | const Rcpp::LogicalMatrix &usegeno, const Rcpp::NumericMatrix &dsub, double d4i, int nThreads) { 148 | // set up number of threads 149 | nThreads = check_nThreads(nThreads); 150 | 151 | // number of elements (assumes all inputs are the same size!) 152 | const long size = P0.rows() * P0.cols(); 153 | 154 | // loop over elements in parallel 155 | #pragma omp parallel for num_threads(nThreads) 156 | for (long i = 0; i < size; i++) { 157 | // set to zero if they match the conditions 158 | if (dsub[i] < d4i) { 159 | P0[i] = 0.0; 160 | P1[i] = 0.0; 161 | genon01[i] = 0.0; 162 | } 163 | else if (!usegeno[i]) { 164 | P0[i] = 0.0; 165 | P1[i] = 0.0; 166 | } 167 | } 168 | 169 | // nothing to return, matrices are modified in-place 170 | return; 171 | } 172 | -------------------------------------------------------------------------------- /GBSPedAssign.R: -------------------------------------------------------------------------------- 1 | #!/bin/echo Source me don't execute me 2 | pedver <- "1.3.0" 3 | cat("GBS-PedAssign for KGD version:",pedver,"\n") 4 | 5 | verif.ch <- c(".","Y","N") # NA, Y, N 6 | assign.rank <<- c("Y","I","B","A","E","F","M","N") 7 | # assign.pch <- c(16,2,6,1,15,13,13,4) 8 | assign.pch <<- c(16,2,6,1,15,70,77,4) 9 | 10 | # assume all in pedfile are in the genotype results. To do: remove those that are not 11 | pedsetup <- function() { 12 | OK4ped <<- TRUE 13 | if (!exists("developer")) developer <<- FALSE 14 | if (!exists("rel.thresh")) rel.thresh <<- 0.4 15 | if (!exists("rel.threshF")) rel.threshF <<- rel.thresh 16 | if (!exists("rel.threshM")) rel.threshM <<- rel.thresh 17 | if (!exists("emm.thresh")) emm.thresh <<- 0.01 # Excess for single parent match 18 | if (!exists("emm.thresh2")) emm.thresh2 <<- 2*emm.thresh # Excess for parent-pair match 19 | if (!exists("doublemm")) doublemm <<- FALSE # count 2 for AA x AA = BB types? 20 | if (!exists("emmdiff.thresh2")) emmdiff.thresh2 <<- 0 # alternate parent-pair based on emm 21 | if (!exists("inb.thresh")) inb.thresh <<- 0.2 # par relatedness - 2 * inbreeding 22 | if (!exists("minr4inb")) minr4inb <<- NULL # par relatedness - inbreeding 23 | if (!exists("boota.thresh")) boota.thresh <<- 99 # assignment threshold 24 | if (!exists("mindepth.mm")) mindepth.mm <<- 1 # changed to 1 to coincide to change to using exp mm rate 25 | if (!exists("matchmethod")) matchmethod <<- "rel" # choose best 2 parents based on "rel" or "EMM" 26 | if (!exists("indsubset")) indsubset <<- seq_along(seqID) 27 | if (!exists("snpsubset")) snpsubset <<- seq(nsnps) 28 | if (!exists("depth.min")) depth.min <<- 0 # for bootstrapping 29 | if (!exists("depth.max")) depth.max <<- Inf # for bootstrapping 30 | if (!exists("puse")) puse <<- p 31 | if (!exists("nboot")) nboot <<- 1000 # for bootstrapping 32 | if (!exists("boot.thresh")) boot.thresh <<- 0.05 # rel diff for invoking bootstrapping 33 | if (!exists("allow.selfing")) allow.selfing <<- FALSE # if FALSE find best parent pair without selfing 34 | cat("Parentage parameter settings\n----------------------------\n rel.threshF\t",rel.threshF, 35 | "\n rel.threshM\t",rel.threshM, 36 | "\n emm.thresh\t",emm.thresh, 37 | "\n doublemm\t",doublemm, 38 | "\n emm.thresh2\t",emm.thresh2, 39 | "\n emmdiff.thresh2",emmdiff.thresh2, 40 | "\n mindepth.mm\t",mindepth.mm, 41 | "\n inb.thresh\t",inb.thresh,"(parent relatedness - 2 * inbreeding)", 42 | "\n minr4inb\t",minr4inb, 43 | "\n boota.thresh\t",boota.thresh, 44 | "\n depth.min\t",depth.min,"(for bootstrapping)", 45 | "\n depth.max\t",depth.max,"(for bootstrapping)", 46 | "\n nboot\t\t",nboot, 47 | "\n boot.thresh\t",boot.thresh,"(relatedness difference to invoke bootstrapping)", 48 | "\n matchmethod\t",matchmethod, 49 | "\n allow.selfing\t",allow.selfing, 50 | "\n") 51 | if(length(indsubset) != nrow(eval(parse(text = GCheck)))) { 52 | OK4ped <<- FALSE 53 | cat("Number of individuals",length(indsubset),"does not match G matrix",nrow(eval(parse(text = GCheck))),"\n") 54 | } 55 | if (exists("pedfile")) if(!file.exists(pedfile)) { 56 | OK4ped <<- FALSE 57 | cat("Warning: Pedigree file", pedfile, "not found\n") 58 | } 59 | } #pedsetup 60 | 61 | panel.yeqx <- function(x,y,col.points="black",col.line="red",...){ #panel function for pairs with identity line added 62 | points(x,y,col=col.points,...) 63 | abline(a = 0,b = 1, col=col.line, ...) 64 | } 65 | coordprop <- function(propn,crange) crange[1]+propn*diff(crange) # function to find proportional positions on plots 66 | 67 | mismatch.par <- function(offspring.id, par.id, pedinfo) { 68 | # ids as in the pedigree file, if only 1 parent compare with all offspring 69 | noffspring <- length(offspring.id) 70 | if (length(par.id) == 1) par.id <- rep(par.id, noffspring) 71 | nmismatch <- ncompare <- exp.mmrate <- rep(NA, noffspring) 72 | opos <- match(pedinfo$seqID[match(offspring.id, pedinfo$IndivID)], seqID) 73 | ppos <- match(pedinfo$seqID[match(par.id, pedinfo$IndivID)], seqID) 74 | for (ioffspring in 1:noffspring) { 75 | depthi <- depth[opos[ioffspring], ] 76 | depthj <- depth[ppos[ioffspring], ] 77 | usnp <- intersect(snpsubset,which(depthi >= mindepth.mm & depthj >= mindepth.mm)) 78 | pi <- genon[opos[ioffspring], usnp]/2 79 | pj <- genon[ppos[ioffspring], usnp]/2 80 | Ko <- depth2K(depthi[usnp]) 81 | Kp <- depth2K(depthj[usnp]) 82 | nmismatch[ioffspring] <- length(which(abs(pi - pj) == 1)) 83 | ncompare[ioffspring] <- length(usnp) 84 | ptemp <- puse[usnp] 85 | P <- ptemp*(1-ptemp) 86 | expmm <- rep(NA,length(usnp)) 87 | ug <- which(pi==1) 88 | if(length(ug)>0) expmm[ug] <- P[ug] *(ptemp[ug]*Kp[ug] + (1-ptemp[ug])*Ko[ug] + Kp[ug]*Ko[ug] ) / (ptemp[ug]^2+2*P[ug]*Ko[ug]) 89 | ug <- which(pi==0.5) 90 | if(length(ug)>0) expmm[ug] <- 0 91 | ug <- which(pi==0) 92 | if(length(ug)>0) expmm[ug] <- P[ug] *(ptemp[ug]*Ko[ug] + (1-ptemp[ug])*Kp[ug] + Kp[ug]*Ko[ug] ) / ((1-ptemp[ug])^2+2*P[ug]*Ko[ug]) 93 | exp.mmrate[ioffspring] <- mean(expmm,na.rm=TRUE) 94 | } 95 | mmrate <- nmismatch/ncompare 96 | list(mmrate=mmrate,ncompare=ncompare,exp.mmrate=exp.mmrate) 97 | } 98 | 99 | cemult <- function(x,A) t(x*t(A)) # columnwise elementwise mult of A by x (usually length(x) = nrow(A)) 100 | ceadd <- function(x,A) t(x+t(A)) # columnwise elementwise add of A by x (usually length(x) = nrow(A)) 101 | 102 | mismatch.par.comb <- function(offspring.id, par.id, pedinfo) { 103 | # ids as in the pedigree file, all combs of offspring and parent (for given parent type) 104 | noffspring <- length(offspring.id) 105 | nmismatch <- ncompare <- exp.mmrate <- matrix(NA, nrow=noffspring,ncol=length(par.id)) 106 | opos <- match(pedinfo$seqID[match(offspring.id, pedinfo$IndivID)], seqID) 107 | ppos <- match(pedinfo$seqID[match(par.id, pedinfo$IndivID)], seqID) 108 | depthj <- depth[ppos, ] 109 | Kpall <- depth2K(depthj) 110 | for (ioffspring in 1:noffspring) { 111 | depthi <- depth[opos[ioffspring], ] 112 | usnp <- intersect(snpsubset,which(depthi >= mindepth.mm)) 113 | pi <- genon[opos[ioffspring], usnp]/2 114 | pj <- genon[ppos, usnp]/2 115 | Ko <- depth2K(depthi[usnp]) 116 | Kp <- Kpall[,usnp] 117 | nmismatch[ioffspring,] <- rowSums((abs(matrix(pi,nrow=nrow(pj),ncol=ncol(pj),byrow=TRUE) - pj) == 1),na.rm=TRUE) 118 | ncompare[ioffspring,] <- rowSums(!is.na(pj)) 119 | ptemp <- puse[usnp] 120 | P <- ptemp*(1-ptemp) 121 | expmm <- matrix(NA,ncol=length(usnp),nrow=length(par.id)) 122 | ug <- which(pi==1) 123 | if(length(ug)>0) expmm[,ug] <- cemult(P[ug]/ (ptemp[ug]^2+2*P[ug]*Ko[ug]), ceadd((1-ptemp[ug])*Ko[ug],cemult(ptemp[ug],Kp[,ug]) + cemult(Ko[ug],Kp[,ug])) ) 124 | ug <- which(pi==0.5) 125 | if(length(ug)>0) expmm[,ug] <- 0 126 | ug <- which(pi==0) 127 | if(length(ug)>0) expmm[,ug] <- cemult(P[ug]/ ((1-ptemp[ug])^2+2*P[ug]*Ko[ug]) , ceadd(ptemp[ug]*Ko[ug], cemult((1-ptemp[ug]),Kp[,ug]) + cemult(Ko[ug],Kp[,ug]) )) 128 | expmm[which(is.na(pj) | (depthj[,usnp] < mindepth.mm)) ] <- NA 129 | exp.mmrate[ioffspring,] <- rowMeans(expmm,na.rm=TRUE) 130 | } 131 | mmrate <- nmismatch/ncompare 132 | list(mmrate=mmrate,ncompare=ncompare,exp.mmrate=exp.mmrate) 133 | } 134 | 135 | mismatch.2par <- function(offspring.id, par1.id, par2.id,alph=Inf, pedinfo) { 136 | noffspring <- length(offspring.id) 137 | nmismatch <- ncompare <- exp.mmrate <- rep(NA, noffspring) 138 | opos <- match(pedinfo$seqID[match(offspring.id, pedinfo$IndivID)], seqID) 139 | p1pos <- match(pedinfo$seqID[match(par1.id, pedinfo$IndivID)], seqID) 140 | p2pos <- match(pedinfo$seqID[match(par2.id, pedinfo$IndivID)], seqID) 141 | for (ioffspring in 1:noffspring) { 142 | depthi <- depth[opos[ioffspring], ] 143 | depthj <- depth[p1pos[ioffspring], ] 144 | depthk <- depth[p2pos[ioffspring], ] 145 | usnp <- intersect(snpsubset,which(pmin(depthi,depthj,depthk) >= mindepth.mm)) 146 | pi <- genon[opos[ioffspring], usnp]/2 147 | pj <- genon[p1pos[ioffspring], usnp]/2 148 | pk <- genon[p2pos[ioffspring], usnp]/2 149 | Ko <- depth2K(depthi[usnp]) 150 | Kf <- depth2K(depthj[usnp]) 151 | Km <- depth2K(depthk[usnp]) 152 | ptemp <- puse[usnp] 153 | P <- ptemp*(1-ptemp) 154 | expmm <- rep(NA,length(usnp)) 155 | if(!doublemm) { 156 | ug <- which(pi==1) 157 | if(length(ug)>0) expmm[ug] <- 158 | ( ptemp[ug]^2 * P[ug] *(Km[ug]+Kf[ug])*(1+ Ko[ug]) + P[ug]^2*(2*Ko[ug] + Km[ug] + Kf[ug] - Kf[ug] *Km[ug] +2*Km[ug] *Ko[ug] +2 *Kf[ug] *Ko[ug] - 2 *Kf[ug] *Km[ug] *Ko[ug]) + 159 | 2*P[ug] *(1-ptemp[ug])^2 * Ko[ug] ) / (ptemp[ug]^2+2*P[ug]*Ko[ug]) 160 | ug <- which(pi==0.5) 161 | if(length(ug)>0) expmm[ug] <- ( (1-2*P[ug]) *(Km[ug]+Kf[ug]) +4*P[ug]*Kf[ug] *Km[ug] ) / 2 162 | ug <- which(pi==0) 163 | if(length(ug)>0) expmm[ug] <- 164 | ( 2*ptemp[ug]^2 * P[ug] * Ko[ug] + P[ug]^2*(2*Ko[ug] + Km[ug] + Kf[ug] - Kf[ug] *Km[ug] +2*Km[ug] *Ko[ug] +2 *Kf[ug] *Ko[ug] - 2 *Kf[ug] *Km[ug] *Ko[ug]) + 165 | P[ug] *(1-ptemp[ug])^2 * (Km[ug]+Kf[ug])*(1+ Ko[ug])) / ((1-ptemp[ug])^2+2*P[ug]*Ko[ug]) 166 | } else { # doublemm 167 | ug <- which(pi==1) 168 | if(length(ug)>0) expmm[ug] <- 169 | ( ptemp[ug]^2 * P[ug] *(Km[ug]+Kf[ug])*(1+ Ko[ug]) + P[ug]^2*(2*Ko[ug] + Km[ug] + Kf[ug] +2*Km[ug] *Ko[ug] +2 *Kf[ug] *Ko[ug]) + 170 | 2*P[ug] *(1-ptemp[ug])^2 * Ko[ug] * (1+ Km[ug] + Kf[ug]) ) / (ptemp[ug]^2+2*P[ug]*Ko[ug]) 171 | ug <- which(pi==0.5) 172 | if(length(ug)>0) expmm[ug] <- ( (1-2*P[ug]) *(Km[ug]+Kf[ug]) +4*P[ug]*Kf[ug] *Km[ug] ) / 2 173 | ug <- which(pi==0) 174 | if(length(ug)>0) expmm[ug] <- 175 | ( 2*ptemp[ug]^2 * P[ug] * Ko[ug] * (1+ Km[ug] + Kf[ug]) + P[ug]^2*(2*Ko[ug] + Km[ug] + Kf[ug] +2*Km[ug] *Ko[ug] +2 *Kf[ug] *Ko[ug]) + 176 | P[ug] *(1-ptemp[ug])^2 * (Km[ug]+Kf[ug])*(1+ Ko[ug])) / ((1-ptemp[ug])^2+2*P[ug]*Ko[ug]) 177 | } 178 | exp.mmrate[ioffspring] <- mean(expmm,na.rm=TRUE) 179 | nmismatch[ioffspring] <- length(which(abs(pi - pj) == 1 | abs(pi - pk) == 1 | (pj == pk & !pj==0.5 & pi == 0.5))) 180 | if(doublemm) nmismatch[ioffspring] <- nmismatch[ioffspring] + length(which(abs(pi - pj) == 1 & pj==pk )) 181 | ncompare[ioffspring] <- length(usnp) 182 | } 183 | mmrate <- nmismatch/ncompare 184 | list(mmrate=mmrate,ncompare=ncompare,exp.mmrate=exp.mmrate) 185 | } 186 | 187 | parmatch <- function(partype, Gmatrix, pedinfo) { 188 | if (missing(partype)) partype <- "Father" 189 | ParseqID <- with(pedinfo, seqID[match(pedinfo[, paste0(partype, "ID")], IndivID)]) 190 | offspringpos <- match(pedinfo$seqID, seqID[indsubset]) # all in pedigree file considered as offspring 191 | parpos <- match(ParseqID, seqID[indsubset]) 192 | ParRel <- Gmatrix[cbind(offspringpos, parpos)] 193 | Parmm <- mismatch.par(pedinfo$IndivID,pedinfo[, paste0(partype, "ID")], pedinfo) 194 | ParEMM <- Parmm$mmrate - Parmm$exp.mmrate 195 | rel.threshpar <- ifelse(partype == "Father", rel.threshF,rel.threshM) 196 | ParMatch <- (ParRel > rel.threshpar & ParEMM < emm.thresh) 197 | noffsp <- length(offspringpos) 198 | tempch <- verif.ch[match(ParMatch, c(NA,TRUE,FALSE))] 199 | if(any(!is.na(ParRel))) { 200 | relrange <- seq(min(ParRel,na.rm=TRUE),max(ParRel,na.rm=TRUE),diff(range(ParRel,na.rm=TRUE))/20) 201 | emmrange <- seq(min(ParEMM,na.rm=TRUE),max(ParEMM,na.rm=TRUE),diff(range(ParEMM,na.rm=TRUE))/20) 202 | png(paste0(partype, "Verify.png"), width = 640, height = 640, pointsize = cex.pointsize * 15) 203 | nthresh <- length(c(relrange,emmrange)) 204 | plotch <- c(rep(1,noffsp),rep(46,nthresh)) # circles, dots for thresholds 205 | pairs(cbind(c(ParRel, relrange,rep(rel.threshpar, length(emmrange))),c(ParEMM, rep(emm.thresh,length(relrange)),emmrange), c(offspringpos, rep(NA,nthresh)), c(parpos, rep(NA,nthresh))), 206 | labels = c("Relatedness", "EMM", "Offspring order", paste(partype, "order")), gap=0, pch=plotch) 207 | dev.off() 208 | } 209 | ncompare <- sum(!is.na(ParMatch)) 210 | nmatch <- sum(ParMatch, na.rm = TRUE) 211 | matchperc <- 100 * nmatch/ncompare 212 | cat(nmatch, "matches out of", ncompare, partype, "comparisons:", format(matchperc, digits = 3), "%\n") 213 | if (nmatch > 0) 214 | cat("Mean relatedness for", partype, "matches", format(mean(ParRel[which(ParMatch)]), digits = 3), "\n") 215 | if (ncompare > nmatch) 216 | cat("Mean relatedness for", partype, "non-matches", format(mean(ParRel[which(!ParMatch)]), digits = 3), "\n") 217 | ParInb <- diag(Gmatrix)[parpos] - 1 218 | matchinfo <- data.frame(ParRel, ParEMM, ParMatch,ParInb) 219 | names(matchinfo) <- paste0(partype, c("Rel", "EMM", "Match","Inb")) 220 | pedinfo <- cbind(pedinfo, matchinfo) 221 | tempinfo <- pedinfo; tempinfo[,paste0(partype, "rel")] <- tempinfo[,paste0(partype, "Rel")] 222 | parEplot(partype,ParEMM,tempinfo[,paste0(partype, "Rel")],matchtype="Rec", 223 | plotcol=fcolo[match(pedinfo$seqID,seqID)],relthresh=rel.threshpar) 224 | 225 | png(paste0("ExpMM-Rec", partype, ".png"), width = 640, height = 640, pointsize = cex.pointsize * 18) 226 | plot(Parmm$mmrate ~ Parmm$exp.mmrate , main = paste("Rec", partype, "Matches"), xlab = "Expected mismatch rate", 227 | ylab = "Raw mismatch rate",col=fcolo[match(pedinfo$seqID,seqID)], cex=0.8, pch=tempch) 228 | # legend("bottomright",title="Verify",cex=0.75,pch=verif.pch,legend=c("","Y","N") 229 | abline(a=0,b=1,col="red") 230 | abline(a=emm.thresh,b=1,col="grey") 231 | edges <- par("usr") # xl,xr,yb,yt 232 | poly1 <- data.frame(x1 = c(edges[1], edges[1], edges[2],edges[2],edges[1]), 233 | y1 = c(emm.thresh+edges[1],edges[4],edges[4],emm.thresh+edges[2],emm.thresh+edges[1])) 234 | polygon(poly1,col = rgb(0,0,0,alpha=0.1),border=NA) 235 | dev.off() 236 | 237 | if(developer) { 238 | Exprel <- 0.5 + pedinfo$Inb + ParInb/2 # par-offspr rel from theory 239 | reldevn <- ParRel - Exprel 240 | lmrel <- lm(ParRel ~ pedinfo$Inb + ParInb ) 241 | print(summary(lmrel)) 242 | reldevplots(partype,ParEMM,reldevn,matchtype="Rec",gmatch=tempinfo,Exprelvar=Exprel,plotcol=fcolo[match(pedinfo$seqID,seqID)],plotch=1) 243 | } 244 | pedinfo 245 | } 246 | 247 | bestmatch <- function(ospos, parpos, Guse, partype, matchcriterion = "rel", groupname=group, pedinfo) { 248 | #matchcriterion == "EMM" added later, but made to work exactly the same (will redo EMM for best 2 later) 249 | if(!matchcriterion == "EMM") matchcriterion <- "rel" 250 | if (missing(partype)) partype <- "Par" 251 | groupsize <- length(na.omit(parpos)) 252 | if(groupsize > 0 ) { 253 | diag(Guse) <- -1 # prevent self-parenting 254 | parchk <- Guse[ospos, parpos,drop=FALSE] 255 | maxpos.2 <- rep(NA,nrow(parchk)) 256 | if(matchcriterion == "rel") { 257 | maxpos <- apply(parchk, 1, which.max) 258 | if(groupsize > 1) { 259 | parchktemp <- parchk 260 | parchktemp[cbind(1:nrow(parchk), maxpos)] <- -1 261 | maxpos.2 <- apply(parchktemp, 1, which.max) 262 | rm(parchktemp) 263 | } 264 | } 265 | if(matchcriterion == "EMM") { 266 | offspringID.bm <- pedinfo$IndivID[match(seqID[indsubset][ospos],pedinfo$seqID)] 267 | parGroupID.bm <- pedinfo$IndivID[match(seqID[indsubset][parpos],pedinfo$seqID)] 268 | mm.bm <- mismatch.par.comb(offspringID.bm,parGroupID.bm, pedinfo) 269 | EMMchk <- with(mm.bm,mmrate-exp.mmrate) 270 | maxpos <- apply(EMMchk, 1, which.min) 271 | EMMchk[cbind(1:nrow(EMMchk), maxpos)] <- 1 272 | if(groupsize > 1) maxpos.2 <- apply(EMMchk, 1, which.min) 273 | } 274 | maxrel <- cbind(parchk[cbind(1:nrow(parchk), maxpos)], parchk[cbind(1:nrow(parchk), maxpos.2)]) 275 | rel12 <- Guse[cbind(parpos[maxpos],parpos[maxpos.2])] 276 | out.df <- data.frame(seqID[indsubset][ospos], seqID[indsubset][parpos[maxpos]], seqID[indsubset][parpos[maxpos.2]], 277 | relatedness = maxrel[, 1], rel2nd = maxrel[, 2], rel12=rel12, stringsAsFactors = FALSE) 278 | } else { 279 | # cat("Warning: a",partype,"group has no genotyped individuals, includes offspring seqID:",seqID[indsubset][ospos][1],"\n") 280 | cat("Warning:",partype,"group",groupname, "has no genotyped individuals\n") 281 | nprog <- length(ospos) 282 | out.df <- data.frame(seqID[indsubset][ospos],character(nprog),character(nprog),rep(NA,nprog),rep(NA,nprog),rep(NA,nprog),stringsAsFactors = FALSE) 283 | } 284 | names(out.df) <- c("seqID", paste0("Best", partype, "Match"), paste0(partype, "Match2nd"), paste0(partype, "rel"), paste0(partype, "rel2nd"), paste0(partype, "12rel") ) 285 | out.df 286 | } 287 | 288 | bestmatesmatch <- function(ospos, matespos, Guse, matchcriterion = "rel", pedinfo) { 289 | #matespos has a column for each mate (male, female) 290 | if(!matchcriterion == "EMM") matchcriterion <- "rel" 291 | if(nrow(matespos) > 0 ) { 292 | selfrel <- diag(Guse) 293 | diag(Guse) <- -1 # prevent self-parenting 294 | par1chk <- Guse[ospos, matespos[,1],drop=FALSE] 295 | par2chk <- Guse[ospos, matespos[,2],drop=FALSE] 296 | if(matchcriterion == "rel") { 297 | maxpos <- apply(pmin(par1chk,par2chk), 1, which.max) # find the best min parent rel (for each offspring) 298 | parchktemp <- par1chk 299 | parchktemp[cbind(1:nrow(parchktemp), maxpos)] <- -1 # only need to do this for one mate 300 | maxpos.2 <- apply(pmin(parchktemp,par2chk), 1, which.max) 301 | rm(parchktemp) 302 | } 303 | if(matchcriterion == "EMM") { 304 | offspringID.bm <- pedinfo$IndivID[match(seqID[indsubset][ospos],pedinfo$seqID)] 305 | parGroupID1.bm <- pedinfo$IndivID[match(seqID[indsubset][matespos[,1]],pedinfo$seqID)] 306 | parGroupID2.bm <- pedinfo$IndivID[match(seqID[indsubset][matespos[,2]],pedinfo$seqID)] 307 | # in a loop for now - is there a better way (similar to mismatch.par.comb ?) 308 | maxpos <- maxpos.2 <- integer(length(ospos)) 309 | for(ipos in 1:length(ospos)) { 310 | mm.bm <- mismatch.2par(rep(offspringID.bm[ipos],length(matespos)),parGroupID1.bm,parGroupID2.bm,pedinfo=pedinfo) # check offspring against all possible mate pairs 311 | EMMchk <- with(mm.bm,mmrate-exp.mmrate) 312 | maxpos[ipos] <- which.min(EMMchk) 313 | EMMchk[maxpos[ipos]] <- 1 314 | maxpos.2[ipos] <- which.min(EMMchk) 315 | } 316 | } 317 | diag(Guse) <- selfrel 318 | maxrel <- cbind(par1chk[cbind(1:nrow(par1chk), maxpos)], par2chk[cbind(1:nrow(par2chk), maxpos)], 319 | par1chk[cbind(1:nrow(par1chk), maxpos.2)],par2chk[cbind(1:nrow(par2chk), maxpos.2)]) #F1,M1, F2, M2 320 | rel12 <- cbind(Guse[cbind(matespos[maxpos,1],matespos[maxpos.2,1])],Guse[cbind(matespos[maxpos,2],matespos[maxpos.2,2])]) #F1F2, M1M2 321 | out.df <- data.frame(seqID[indsubset][ospos], seqID[indsubset][matespos[maxpos,1]], seqID[indsubset][matespos[maxpos,2]], 322 | seqID[indsubset][matespos[maxpos.2,1]], seqID[indsubset][matespos[maxpos.2,2]], 323 | maxrel, rel12, stringsAsFactors = FALSE) 324 | } else { 325 | out.df <- data.frame(matrix(character(0),ncol=5),matrix(numeric(0),ncol=6), stringsAsFactors = FALSE) 326 | } 327 | names(out.df) <- c("seqID", "BestFatherMatch", "BestMotherMatch","FatherMatch2nd","MotherMatch2nd", 328 | "Fatherrel","Motherrel","Fatherrel2nd","Motherrel2nd","Father12rel","Mother12rel") 329 | out.df 330 | } 331 | 332 | parEplot <- function(partype,EMMvar,relvar,matchtype="Best",plotcol="black",relthresh=rel.thresh,emmthresh=emm.thresh) { 333 | png(paste0(matchtype, partype, "MatchesE.png"), width = 640, height = 640, pointsize = cex.pointsize * 18) 334 | plot(EMMvar ~ relvar, main = paste(matchtype, partype, "Matches"), xlab = "Estimated Relatedness", 335 | ylab = "Excess mismatch rate",col=plotcol, cex=0.8) 336 | abline(v=relthresh, col="grey") 337 | abline(h=emmthresh, col="grey") 338 | edges <- par("usr") # xl,xr,yb,yt 339 | poly1 <- data.frame(x1 = c(edges[1],relthresh,relthresh,edges[2],edges[2],edges[1],edges[1]), 340 | y1 = c(edges[3],edges[3],emm.thresh,emm.thresh,edges[4],edges[4],edges[3])) 341 | polygon(poly1,col = rgb(0,0,0,alpha=0.1),border=NA) 342 | dev.off() 343 | } 344 | 345 | reldevplots <- function(partype,EMMvar,relvar,matchtype="Best",gmatch,Exprelvar,plotcol="black",plotch=1) { 346 | png(paste0(matchtype, partype, "MatchesDE.png"), width = 640, height = 640, pointsize = cex.pointsize * 18) 347 | plot(EMMvar ~ relvar, main = paste(matchtype, partype, "Matches"), xlab = "Relatedness Deviation", 348 | ylab = "Excess mismatch rate",col=plotcol, cex=0.8) 349 | abline(v=rel.thresh-0.5, col="grey") 350 | abline(h=emm.thresh, col="grey") 351 | dev.off() 352 | png(paste0(matchtype, partype, "MatchesExprel.png"), width = 640, height = 640, pointsize = cex.pointsize * 18) 353 | plot(gmatch[, paste0(partype, "rel")] ~ Exprelvar, pch=plotch, main = paste(matchtype, partype, "Matches"), 354 | xlab="Expected relatedness using estimated Inbreeding", ylab="Estimated relatedness") 355 | abline(a=0,b=1,col="red") 356 | abline(h=0.5,col="red",lty=2) 357 | dev.off() 358 | } 359 | 360 | trioplots <- function(BothMatches) { 361 | uo <- match(BothMatches$seqID,seqID) 362 | plotch <- assign.pch[match(BothMatches$BothAssign,assign.rank)] 363 | plotcol <- rep("mediumblue",length(uo)) 364 | plotcol[which(sampdepth[uo] < 1 )] <- "skyblue2" 365 | plotcol[which(sampdepth[uo] < 0.5 ) ] <- "grey75" 366 | uplot <- which(!is.na(BothMatches$relF1M1)) 367 | if(length(uplot) > 0) { 368 | if(sum(!is.na(BothMatches$Inb[uplot])) > 0) { 369 | png("ParRel-Inb.png", width = 960, height = 960, pointsize = cex.pointsize * 21) 370 | # plot(BothMatches$relF1M1[uplot] ~ BothMatches$Inb[uplot],pch=plotch[uplot],col=plotcol[uplot],sub="0.5 <= mean depth < 1",col.sub="skyblue2", 371 | # xlab="Estimated Inbreeding",ylab="Estimated (best match) parent relatedness", cex.sub=0.9) 372 | # abline(a=0, b=2, col="red") 373 | # lines(x=c(min(min(BothMatches$Inb[uplot]), (minr4inb-inb.thresh)/2), (minr4inb-inb.thresh)/2,max(BothMatches$Inb[uplot])), 374 | # y=c(minr4inb,minr4inb,2*max(BothMatches$Inb[uplot])+inb.thresh),col="grey") 375 | plot(BothMatches$Inb[uplot] ~ BothMatches$relF1M1[uplot] ,pch=plotch[uplot],col=plotcol[uplot],sub="0.5 <= mean depth < 1",col.sub="skyblue2", 376 | ylab="Estimated Inbreeding",xlab="Estimated (best match) parent relatedness", cex.sub=0.9) 377 | abline(a=0, b=1/2, col="red") 378 | lines(y=c(min(min(BothMatches$Inb[uplot]), (minr4inb-inb.thresh)/2), (minr4inb-inb.thresh)/2,max(BothMatches$Inb[uplot])), 379 | x=c(minr4inb,minr4inb,2*max(BothMatches$Inb[uplot])+inb.thresh),col="grey") 380 | edges <- par("usr") # xl,xr,yb,yt 381 | poly1 <- data.frame(x1 = c(minr4inb, minr4inb, edges[2],edges[2],minr4inb), 382 | y1 = c(edges[3],(minr4inb-inb.thresh)/2,(edges[2]-inb.thresh)/2,edges[3],edges[3])) 383 | polygon(poly1,col = rgb(0,0,0,alpha=0.1),border=NA) 384 | title(sub="X: unassigned parent(s)",adj=0,cex.sub=0.9) 385 | title(sub="mean depth < 0.5",col.sub="grey75",adj=0.95,cex.sub=0.8) 386 | dev.off() 387 | } 388 | png(paste0("ExpMM-Both.png"), width = 640, height = 640, pointsize = cex.pointsize * 18) 389 | plot(mmrateF1M1 ~ exp.mmrateF1M1, data=BothMatches[uplot,,drop=FALSE], main = paste("Best Parent Matches"), xlab = "Expected mismatch rate", 390 | ylab = "Raw mismatch rate",col=fcolo[uo][uplot], pch=plotch[uplot], cex=0.8) 391 | abline(a=0,b=1,col="red") 392 | abline(a=emm.thresh2,b=1,col="grey") 393 | pch.used <- sort(match(unique(BothMatches$BothAssign[uplot]),assign.rank)) 394 | legend("bottomright",title="Assign",cex=0.75,pch=assign.pch[pch.used],legend=assign.rank[pch.used]) 395 | edges <- par("usr") # xl,xr,yb,yt 396 | poly1 <- data.frame(x1 = c(edges[1], edges[1], edges[2],edges[2],edges[1]), 397 | y1 = c(emm.thresh2+edges[1],edges[4],edges[4],emm.thresh2+edges[2],emm.thresh2+edges[1])) 398 | polygon(poly1,col = rgb(0,0,0,alpha=0.1),border=NA) 399 | dev.off() 400 | } 401 | } 402 | 403 | groupmatch <- function(Guse, partype, pedinfo, groupsinfo) { 404 | rel.thresh <- ifelse(partype == "Father", rel.threshF,rel.threshM) 405 | groupIDs <- unique(pedinfo[, paste0(partype, "Group")]) 406 | groupIDs <- na.omit(groupIDs) 407 | groupIDs <- groupIDs[!groupIDs == ""] 408 | ngroups <- length(groupIDs) 409 | nsnpsub <- length(snpsubset) 410 | if (ngroups > 0) { 411 | for (g in 1:ngroups) { 412 | group <- groupIDs[g] 413 | offspringID <- pedinfo$IndivID[which(pedinfo[, paste0(partype, "Group")] == group)] 414 | ParGroupID <- groupsinfo$IndivID[which(groupsinfo$ParGroup == group)] 415 | offspringseqID <- with(pedinfo, seqID[match(offspringID, IndivID)]) 416 | ParGroupseqID <- with(pedinfo, seqID[match(ParGroupID, IndivID)]) 417 | offspringpos <- match(offspringseqID, seqID[indsubset]) 418 | parpos <- match(ParGroupseqID, seqID[indsubset]) 419 | gmatch <- bestmatch(offspringpos, parpos, Guse, partype, matchcriterion = matchmethod, groupname=group,pedinfo=pedinfo) 420 | if (g == 1) allgmatch <- gmatch else allgmatch <- rbind(allgmatch, gmatch) 421 | } 422 | allgmatch$IndivID <- pedinfo$IndivID[match(allgmatch$seqID, pedinfo$seqID)] 423 | ncolallg <- ncol(allgmatch) 424 | allgmatch <- allgmatch[, c(ncolallg, 1:(ncolallg-1))] 425 | if (nrow(allgmatch) > 0) { 426 | allgmatch[, paste0("Best", partype, "Match")] <- pedinfo$IndivID[match(allgmatch[, paste0("Best", partype, "Match")], pedinfo$seqID)] 427 | allgmatch[, paste0(partype, "Match2nd")] <- pedinfo$IndivID[match(allgmatch[, paste0(partype, "Match2nd")], pedinfo$seqID)] 428 | mmstats <- mismatch.par(allgmatch$IndivID, allgmatch[, paste0("Best", partype, "Match")], pedinfo) 429 | allgmatch[, paste0("mmrate", partype)] <- mmstats$mmrate 430 | allgmatch[, paste0("mmnum", partype)] <- mmstats$ncompare 431 | allgmatch[, paste0("exp.mmrate", partype)] <- mmstats$exp.mmrate 432 | mmstats <- mismatch.par(allgmatch$IndivID, allgmatch[, paste0(partype, "Match2nd")], pedinfo) 433 | allgmatch[, paste0("mmrate", partype,"2")] <- mmstats$mmrate 434 | allgmatch[, paste0("exp.mmrate", partype,"2")] <- mmstats$exp.mmrate 435 | EMMrate <- allgmatch[, paste0("mmrate", partype)] - allgmatch[, paste0("exp.mmrate", partype)] 436 | EMMrate2 <- allgmatch[, paste0("mmrate", partype, "2")] - allgmatch[, paste0("exp.mmrate", partype, "2")] 437 | ### bootstrap section 438 | bootpos <- which(allgmatch[,paste0(partype, "rel")] > rel.thresh & EMMrate < emm.thresh & allgmatch[,paste0(partype, "rel")] - allgmatch[,paste0(partype, "rel2nd")] < boot.thresh) 439 | bootpos <- intersect(bootpos,which(!is.na(allgmatch[, paste0(partype, "Match2nd")]))) 440 | if(length(bootpos) > 0) { 441 | for(bcase in seq_along(bootpos)) { 442 | offspringseqID <- allgmatch$seqID[bootpos[bcase]] 443 | ParseqID <- with(pedinfo, seqID[match(allgmatch[bootpos[bcase], paste0("Best", partype, "Match")], IndivID)]) 444 | Par2seqID <- with(pedinfo, seqID[match(allgmatch[bootpos[bcase], paste0(partype, "Match2nd")], IndivID)]) 445 | offspringpos <- match(offspringseqID, seqID[indsubset]) 446 | parpos <- match(ParseqID, seqID[indsubset]) 447 | par2pos <- match(Par2seqID, seqID[indsubset]) 448 | offs.depth <- depth[indsubset[offspringpos], snpsubset] 449 | par.depth <- depth[indsubset[parpos], snpsubset] 450 | par2.depth <- depth[indsubset[par2pos], snpsubset] 451 | offs.genon0 <- genon[indsubset[offspringpos], snpsubset] 452 | par.genon0 <- genon[indsubset[parpos], snpsubset] 453 | par2.genon0 <- genon[indsubset[par2pos], snpsubset] 454 | if (depth.min > 1 | depth.max < Inf) { 455 | offs.genon0[offs.depth < depth.min] <- NA 456 | offs.genon0[offs.depth > depth.max] <- NA 457 | par.genon0[par.depth < depth.min] <- NA 458 | par.genon0[par.depth > depth.max] <- NA 459 | offs.depth[is.na(offs.genon0)] <- 0 460 | par.depth[is.na(par.genon0)] <- 0 461 | } 462 | offs.usegeno <- !is.na(offs.genon0) 463 | par.usegeno <- !is.na(par.genon0) 464 | par2.usegeno <- !is.na(par2.genon0) 465 | offs.genon0 <- offs.genon0 - rep.int(2 * puse[snpsubset], rep(1, nsnpsub)) 466 | offs.genon0[is.na(offs.genon0)] <- 0 # equivalent to using 2p for missing genos 467 | par.genon0 <- par.genon0 - rep.int(2 * puse[snpsubset], rep(1, nsnpsub)) 468 | par.genon0[is.na(par.genon0)] <- 0 # equivalent to using 2p for missing genos 469 | par2.genon0 <- par2.genon0 - rep.int(2 * puse[snpsubset], rep(1, nsnpsub)) 470 | par2.genon0[is.na(par2.genon0)] <- 0 # equivalent to using 2p for missing genos 471 | # relcheck <- tcrossprod(matrix(offs.genon0,nrow=1),matrix(par.genon0,nrow=1)) / 472 | # sum((2*puse[snpsubset]*(1-puse[snpsubset]))[offs.usegeno & par.usegeno]) 473 | bootrels <- bootrels2 <- double(nboot) 474 | for (b in seq(nboot)) { 475 | bootsnps <- sample.int(nsnpsub,replace=TRUE) 476 | bootrels[b] <- tcrossprod(matrix(offs.genon0[bootsnps],nrow=1),matrix(par.genon0[bootsnps],nrow=1)) / 477 | sum((2*puse[snpsubset[bootsnps]]*(1-puse[snpsubset[bootsnps]]))[offs.usegeno[bootsnps] & par.usegeno[bootsnps]]) 478 | bootrels2[b] <- tcrossprod(matrix(offs.genon0[bootsnps],nrow=1),matrix(par2.genon0[bootsnps],nrow=1)) / 479 | sum((2*puse[snpsubset[bootsnps]]*(1-puse[snpsubset[bootsnps]]))[offs.usegeno[bootsnps] & par2.usegeno[bootsnps]]) 480 | } 481 | allgmatch[bootpos[bcase],paste0(partype,"sd")] <- sd(bootrels) 482 | allgmatch[bootpos[bcase],paste0(partype,"Reliability")] <- 100*sum(bootrels > bootrels2) /nboot 483 | } 484 | } #bootpos 485 | ### end of bootstrap section 486 | tempAssign <- rep("Y",nrow(allgmatch)) 487 | if(length(bootpos) > 0) tempAssign[which(allgmatch[,paste0(partype,"Reliability")] < boota.thresh )] <- "B" 488 | tempAssign[which(EMMrate > emm.thresh)] <- "E" 489 | # tempAssign[which(allgmatch[, paste0(partype, "rel")] < rel.thresh | is.na(allgmatch[, paste0(partype, "rel")]))] <- "N" # replace with next 2 for NA 490 | tempAssign[which(allgmatch[, paste0(partype, "rel")] < rel.thresh )] <- "N" 491 | tempAssign[is.na(allgmatch[, paste0(partype, "rel")])] <- NA 492 | # for E assigns, check if the 2nd parent is possible. 493 | tempAssign[which(allgmatch[, paste0(partype, "rel2nd")] >= rel.thresh & EMMrate2 < emm.thresh & tempAssign == "E")] <- "A" 494 | allgmatch[, paste0(partype, "Assign")] <- tempAssign 495 | cat("\nSummary of",partype,"Assignments\n") 496 | print(addmargins(table(allgmatch[, paste0(partype, "Assign")],useNA="ifany"))) 497 | png(paste0("Best", partype, "Matches.png"), width = 640, height = 640, pointsize = cex.pointsize * 18) 498 | plot(allgmatch[, paste0("mmrate", partype)] ~ allgmatch[, paste0(partype, "rel")], main = paste("Best", partype, "Matches"), xlab = "Estimated Relatedness", 499 | ylab = "Raw mismatch rate",col=fcolo[match(allgmatch$seqID,seqID)], cex=0.8) 500 | abline(v=rel.thresh, col="grey") 501 | dev.off() 502 | parEplot(partype,EMMrate,allgmatch[, paste0(partype, "rel")],plotcol=fcolo[match(allgmatch$seqID,seqID)],relthresh=rel.thresh) 503 | tempch <- assign.pch[match(tempAssign,assign.rank)] 504 | png(paste0("ExpMM-", partype, ".png"), width = 640, height = 640, pointsize = cex.pointsize * 18) 505 | plot(allgmatch[, paste0("mmrate", partype)] ~ allgmatch[, paste0("exp.mmrate", partype)] , main = paste("Best", partype, "Matches"), xlab = "Expected mismatch rate", 506 | ylab = "Raw mismatch rate",col=fcolo[match(allgmatch$seqID,seqID)], cex=0.8, pch=tempch) 507 | pch.used <- sort(match(unique(tempAssign),assign.rank)) 508 | legend("bottomright",title="Assign",cex=0.75,pch=assign.pch[pch.used],legend=assign.rank[pch.used]) 509 | abline(a=0,b=1,col="red") 510 | abline(a=emm.thresh,b=1,col="grey") 511 | edges <- par("usr") # xl,xr,yb,yt 512 | poly1 <- data.frame(x1 = c(edges[1], edges[1], edges[2],edges[2],edges[1]), 513 | y1 = c(emm.thresh+edges[1],edges[4],edges[4],emm.thresh+edges[2],emm.thresh+edges[1])) 514 | polygon(poly1,col = rgb(0,0,0,alpha=0.1),border=NA) 515 | dev.off() 516 | mmpalette <- colorRampPalette(c("blue","red"))(50) 517 | mmcol <- mmpalette[trunc(1+50*(EMMrate-min(EMMrate,na.rm=TRUE))/(diff(range(EMMrate,na.rm=TRUE))+1E-6))] 518 | legend_image <- as.raster(matrix(rev(mmpalette), ncol = 1)) 519 | xyrange <- range(c(allgmatch[, paste0(partype, "rel")],allgmatch[, paste0(partype,"rel2nd")]),na.rm=TRUE) 520 | png(paste0("Best2", partype, "Matches.png"), width = 640, height = 640, pointsize = cex.pointsize * 18) 521 | plot(allgmatch[, paste0(partype,"rel2nd")] ~ allgmatch[, paste0(partype, "rel")], main = paste("Best", partype, "Matches"), xlab = "Estimated Relatedness", 522 | ylab = "Relatedness to 2nd best",col=mmcol,xlim=xyrange,ylim=xyrange, cex=0.8) 523 | abline(a=0,b=1) 524 | abline(v=rel.thresh, col="grey") 525 | abline(h=rel.thresh, col="grey") 526 | rasterImage(legend_image, coordprop(0.05,xyrange), coordprop(0.7,xyrange), coordprop(0.1,xyrange), coordprop(0.9,xyrange)) 527 | text(x=coordprop(0.11,xyrange),y=coordprop(0.7,xyrange),signif(min(EMMrate,na.rm=TRUE),2),pos=4,cex=0.8) 528 | text(x=coordprop(0.11,xyrange),y=coordprop(0.9,xyrange),signif(max(EMMrate,na.rm=TRUE),2),pos=4,cex=0.8) 529 | text(x=coordprop(0,xyrange),y=coordprop(0.95,xyrange),"Excess MM rate best",pos=4,cex=0.8) 530 | dev.off() 531 | noffspringpar <- data.frame(table(allgmatch[, paste0("Best", partype, "Match")])) 532 | colnames(noffspringpar)[2] <- paste0(partype, "Freq") 533 | groupsinfo <- merge(groupsinfo, noffspringpar, by.x = "IndivID", by.y = "Var1", all = TRUE) 534 | groupsinfo[is.na(groupsinfo[, paste0(partype, "Freq")]), paste0(partype, "Freq")] <- 0 535 | uo <- match(allgmatch$seqID,seqID[indsubset]) 536 | allgmatch$Inb <- diag(Guse)[uo] - 1 537 | Parposped <- match(allgmatch[,paste0("Best", partype, "Match")],pedinfo$IndivID) 538 | Parposg <- match(pedinfo$seqID[Parposped],seqID[indsubset]) 539 | allgmatch[,paste0(partype,"Inb")] <- diag(Guse)[Parposg] - 1 540 | if(developer) { 541 | Exprel <- 0.5 + allgmatch$Inb+allgmatch[,paste0(partype,"Inb")]/2 # par-offspr rel from theory 542 | reldevn <- allgmatch[,paste0(partype,"rel")] - Exprel 543 | uY <- which(tempAssign == "Y") 544 | if(length(uY) > 0) { 545 | lmrel <- lm(allgmatch[uY,paste0(partype,"rel")] ~ allgmatch$Inb[uY] + allgmatch[uY,paste0(partype,"Inb")] ) 546 | print(summary(lmrel)) 547 | } 548 | reldevplots(partype,EMMrate,reldevn,gmatch=allgmatch,Exprelvar=Exprel,plotcol=fcolo[match(allgmatch$seqID,seqID)],plotch=tempch) 549 | } 550 | } 551 | write.csv(allgmatch, paste0(partype, "Matches.csv"), row.names = FALSE) 552 | list(groupmatch=allgmatch, groupsinfo=groupsinfo) 553 | } else { 554 | NULL 555 | } 556 | } 557 | 558 | matesmatch <- function(Guse, pedinfo, matesinfo) { 559 | groupIDs <- na.omit(unique(pedinfo$MatesGroup)) 560 | groupIDs <- groupIDs[!groupIDs == ""] 561 | ngroups <- length(groupIDs) 562 | if (ngroups > 0) { 563 | for (g in 1:ngroups) { 564 | group <- groupIDs[g] 565 | offspringID <- pedinfo$IndivID[which(pedinfo$MatesGroup == group)] 566 | GroupID <- matesinfo[which(matesinfo$MatesGroup == group),c("MaleID","FemaleID")] 567 | offspringseqID <- with(pedinfo, seqID[match(offspringID, IndivID)]) 568 | GroupseqID <- with(pedinfo, cbind(seqID[match(GroupID[,1], IndivID)],seqID[match(GroupID[,2], IndivID)])) 569 | offspringpos <- match(offspringseqID, seqID[indsubset]) 570 | matespos <- cbind(match(GroupseqID[,1], seqID[indsubset]),match(GroupseqID[,2], seqID[indsubset])) 571 | gmatch <- bestmatesmatch(offspringpos, matespos, Guse, matchcriterion = matchmethod,pedinfo=pedinfo) 572 | if (g == 1) allgmatch <- gmatch else allgmatch <- rbind(allgmatch, gmatch) 573 | } 574 | allgmatch$IndivID <- pedinfo$IndivID[match(allgmatch$seqID, pedinfo$seqID)] 575 | ncolallg <- ncol(allgmatch) 576 | allgmatch <- allgmatch[, c(ncolallg, 1:(ncolallg-1))] 577 | if (nrow(allgmatch) > 0) { 578 | allgmatch$BestFatherMatch <- pedinfo$IndivID[match(allgmatch$BestFatherMatch, pedinfo$seqID)] 579 | allgmatch$BestMotherMatch <- pedinfo$IndivID[match(allgmatch$BestMotherMatch, pedinfo$seqID)] 580 | allgmatch$FatherMatch2nd <- pedinfo$IndivID[match(allgmatch$FatherMatch2nd, pedinfo$seqID)] 581 | allgmatch$MotherMatch2nd <- pedinfo$IndivID[match(allgmatch$MotherMatch2nd, pedinfo$seqID)] 582 | mmstats <- mismatch.par(allgmatch$IndivID, allgmatch$BestFatherMatch, pedinfo) 583 | allgmatch$mmrateFather <- mmstats$mmrate 584 | allgmatch$mmnumFather <- mmstats$ncompare 585 | allgmatch$exp.mmrateFather <- mmstats$exp.mmrate 586 | EMMrateFather <- allgmatch$mmrateFather - allgmatch$exp.mmrateFather 587 | #not doing 2nds - its the combo that is important here (so no single parent A's either) 588 | mmstats <- mismatch.par(allgmatch$IndivID, allgmatch$BestMotherMatch, pedinfo) 589 | allgmatch$mmrateMother <- mmstats$mmrate 590 | allgmatch$mmnumMother <- mmstats$ncompare 591 | allgmatch$exp.mmrateMother <- mmstats$exp.mmrate 592 | EMMrateMother <- allgmatch$mmrateMother - allgmatch$exp.mmrateMother 593 | # closerel / boostrapping implemented for mating pairs analysis 594 | tempAssign <- rep("Y",nrow(allgmatch)) 595 | tempAssign[which(pmax(EMMrateFather,EMMrateMother) > emm.thresh)] <- "E" 596 | tempAssign[which(allgmatch$Fatherrel < rel.threshF)] <- "N" 597 | tempAssign[which(allgmatch$Motherrel < rel.threshM)] <- "N" 598 | allgmatch$BothAssign <- tempAssign 599 | tempch <- assign.pch[match(tempAssign,assign.rank)] 600 | #havent done joint EMM yet ... 601 | parEplot("Father",EMMrateFather,allgmatch$Fatherrel,plotcol=fcolo[match(allgmatch$seqID,seqID)],relthresh=rel.threshF) 602 | parEplot("Mother",EMMrateMother,allgmatch$Motherrel,plotcol=fcolo[match(allgmatch$seqID,seqID)],relthresh=rel.threshM) 603 | noffspringmates <- data.frame(table(with(allgmatch,paste(BestFatherMatch,BestMotherMatch,sep=":")))) 604 | colnames(noffspringmates)[2] <- "MatesPairFreq" 605 | matesinfo$pairID <- with(matesinfo,paste(MaleID,FemaleID,sep=":")) 606 | matesinfo <- merge(matesinfo, noffspringmates, by.x = "pairID", by.y = "Var1", all = TRUE) 607 | matesinfo$MatesPairFreq[is.na(matesinfo$MatesPairFreq)] <- 0 608 | matesinfo$pairID <- NULL 609 | uo <- match(allgmatch$seqID,seqID[indsubset]) 610 | allgmatch$Inb <- diag(Guse)[uo] - 1 611 | Parposped <- match(allgmatch$BestFatherMatch,pedinfo$IndivID) 612 | Parposg <- match(pedinfo$seqID[Parposped],seqID[indsubset]) 613 | allgmatch$FatherInb <- diag(Guse)[Parposg] - 1 614 | Parposped <- match(allgmatch$BestMotherMatch,pedinfo$IndivID) 615 | Parposg <- match(pedinfo$seqID[Parposped],seqID[indsubset]) 616 | allgmatch$MotherInb <- diag(Guse)[Parposg] - 1 617 | if(developer) { 618 | uY <- which(tempAssign == "Y") 619 | Exprel <- 0.5 + allgmatch$Inb+allgmatch$FatherInb/2 # par-offspr rel from theory 620 | reldevn <- allgmatch$Fatherrel - Exprel 621 | if(length(uY) > 0) { 622 | lmrel <- lm(allgmatch$Fatherrel[uY] ~ allgmatch$Inb[uY] + allgmatch$FatherInb[uY] ) 623 | print(summary(lmrel)) 624 | } 625 | reldevplots("Father",EMMrateFather,reldevn,gmatch=allgmatch,Exprelvar=Exprel,plotcol=fcolo[match(allgmatch$seqID,seqID)],plotch=tempch) 626 | Exprel <- 0.5 + allgmatch$Inb+allgmatch$MotherInb/2 # par-offspr rel from theory 627 | reldevn <- allgmatch$Motherrel - Exprel 628 | if(length(uY) > 0) { 629 | lmrel <- lm(allgmatch$Motherrel[uY] ~ allgmatch$Inb[uY] + allgmatch$MotherInb[uY] ) 630 | print(summary(lmrel)) 631 | } 632 | reldevplots("Mother",EMMrateMother,reldevn,gmatch=allgmatch,Exprelvar=Exprel,plotcol=fcolo[match(allgmatch$seqID,seqID)],plotch=tempch) 633 | } 634 | } 635 | write.csv(allgmatch, "MatePairMatches.csv", row.names = FALSE) 636 | list(matesmatch=allgmatch, matesinfo=matesinfo) 637 | } else { 638 | NULL 639 | } 640 | } 641 | 642 | ssbbmm <- function(bbpar,uuse, pedinfo, BothMatches, quiet=FALSE) { 643 | depth2K <<- depth2Kchoose (dmodel="bb", bbpar) 644 | mmstatsbb <- mismatch.2par(BothMatches$IndivID,BothMatches$BestFatherMatch, BothMatches$BestMotherMatch,pedinfo=pedinfo) 645 | mmssbb <- sum((mmstatsbb$mmrate-mmstatsbb$exp.mmrate)[uuse]^2) 646 | if(!quiet) cat("bb param = ",bbpar,"ss = ",mmssbb,"\n") 647 | mmssbb 648 | } 649 | 650 | ssmpmm <- function(mppar,uuse, pedinfo, BothMatches, quiet=FALSE) { 651 | depth2K <<- depth2Kchoose (dmodel="modp", mppar) 652 | mmstatsmp <- mismatch.2par(BothMatches$IndivID,BothMatches$BestFatherMatch, BothMatches$BestMotherMatch,pedinfo=pedinfo) 653 | mmssmp <- sum((mmstatsmp$mmrate-mmstatsmp$exp.mmrate)[uuse]^2) 654 | if(!quiet) cat("mp param = ",mppar,"ss = ",mmssmp,"\n") 655 | mmssmp 656 | } 657 | 658 | addtagIDs <- function(sampinfo,indvar,tagvar,matchtype="both", pedresults) { 659 | matchtype <- tolower(matchtype) 660 | # if(matchtype=="both") pedresults <- BothMatches 661 | # if(matchtype=="father") pedresults <- FatherMatches 662 | # if(matchtype=="mother") pedresults <- MotherMatches 663 | progpos <- match(pedresults$IndivID,sampinfo[,indvar]) 664 | pedresults$IndivTag <- sampinfo[progpos,tagvar] 665 | if(matchtype=="both" | matchtype=="father") { 666 | fpos <- match(pedresults$BestFatherMatch,sampinfo[,indvar]) 667 | pedresults$FatherTag <- sampinfo[fpos,tagvar] 668 | } 669 | if(matchtype=="both" | matchtype=="mother") { 670 | mpos <- match(pedresults$BestMotherMatch,sampinfo[,indvar]) 671 | pedresults$MotherTag <- sampinfo[mpos,tagvar] 672 | } 673 | pedresults 674 | } 675 | 676 | bestparPCA <- function(Gobj,sfx="",keypos=NULL, pedinfo, BothMatches) { 677 | plotch <- assign.pch[match(BothMatches$BothAssign,assign.rank)] 678 | ukeep <- which(seqID[Gobj$indsubset] %in% pedinfo$seqID) 679 | uf <- match(seqID[Gobj$indsubset][ukeep],BothMatches$seqID) 680 | uo <- match(BothMatches$seqID,seqID[Gobj$indsubset][ukeep]) 681 | nprog=length(uo) 682 | pchuse <- plotch[uf] 683 | pchuse[is.na(pchuse)] <- 16 684 | png(paste0("PC-BestParents",sfx,".png"), width = 640, height = 640, pointsize = cex.pointsize * 15) 685 | with(Gobj$PC, plot(x[ukeep, 2] ~ x[ukeep, 1], cex = 1, col = fcolo[Gobj$indsubset][ukeep], pch=pchuse, xlab = "Principal component 1", ylab = "Principal component 2") ) 686 | ParentLines <- data.frame(x=rep(NA,3*nprog),y=rep(NA,3*nprog)) 687 | ParentLines[seq(1,(3*nprog-2),3),] <- Gobj$PC$x[ukeep[uo],1:2] 688 | ParentLines[seq(2,(3*nprog-1),3),] <- Gobj$PC$x[match(pedinfo$seqID[match(BothMatches$BestFatherMatch,pedinfo$IndivID)],seqID[Gobj$indsubset]),1:2] 689 | lines(ParentLines,col="blue") 690 | ParentLines[seq(2,(3*nprog-1),3),] <- Gobj$PC$x[match(pedinfo$seqID[match(BothMatches$BestMotherMatch,pedinfo$IndivID)],seqID[Gobj$indsubset]),1:2] 691 | lines(ParentLines,col="deeppink") 692 | pch.used <- sort(match(unique(BothMatches$BothAssign),assign.rank)) 693 | if(!is.null(keypos)) legend(keypos,title="Assign",cex=0.75,pch=assign.pch[pch.used],legend=assign.rank[pch.used]) 694 | dev.off() 695 | invisible(NULL) 696 | } 697 | 698 | GBSPed <- function () { 699 | pedsetup() 700 | outobj <- list() 701 | if (OK4ped & exists("pedfile") & exists("GCheck")) { 702 | pedinfo <- suppressWarnings(read.csv(pedfile, stringsAsFactors = FALSE, colClasses=c(FatherGroup="character", MotherGroup="character"))) 703 | pedinfo <- pedinfo[!is.na(pedinfo$seqID), ] 704 | pedinfo <- pedinfo[!is.na(match(pedinfo$seqID, seqID[indsubset])), ] 705 | uo <- match(pedinfo$seqID,seqID) 706 | pedinfo$Inb <- diag(eval(parse(text = GCheck)))[uo] - 1 707 | dupids <- which(duplicated(pedinfo$IndivID)) 708 | if(length(dupids)>0) cat("Warning: dupicates in IndivID",pedinfo$IndivID[dupids],"\n") 709 | dupids <- which(duplicated(pedinfo$seqID)) 710 | if(length(dupids)>0) cat("Warning: dupicates in seqID",pedinfo$seqID[dupids],"\n") 711 | if ("FatherID" %in% colnames(pedinfo)) 712 | pedinfo <- parmatch("Father", eval(parse(text = GCheck)), pedinfo=pedinfo) 713 | if ("MotherID" %in% colnames(pedinfo)) 714 | pedinfo <- parmatch("Mother", eval(parse(text = GCheck)), pedinfo=pedinfo) 715 | if ("FatherID" %in% colnames(pedinfo) & "MotherID" %in% colnames(pedinfo)) { 716 | Par2mm <- mismatch.2par(pedinfo$IndivID,pedinfo$FatherID,pedinfo$MotherID, pedinfo=pedinfo) 717 | pedinfo$FandMEMM <- with(Par2mm,mmrate-exp.mmrate) 718 | pedinfo$FandMmatch <- with(pedinfo,FatherMatch & MotherMatch & FandMEMM < emm.thresh2) 719 | if (is.character(pedinfo$IndivID)) { 720 | umiss <- which(pedinfo$FatherID == "") 721 | if (length(umiss) > 0) pedinfo$FatherID[umiss] <- NA 722 | umiss <- which(pedinfo$MotherID == "") 723 | if (length(umiss) > 0) pedinfo$MotherID[umiss] <- NA 724 | } 725 | tempch <- verif.ch[match(pedinfo$FandMmatch, c(NA,TRUE,FALSE))] 726 | png(paste0("ExpMM-RecBoth.png"), width = 640, height = 640, pointsize = cex.pointsize * 18) 727 | plot(Par2mm$mmrate ~ Par2mm$exp.mmrate, main = paste("Rec Parent Matches"), xlab = "Expected mismatch rate", 728 | ylab = "Raw mismatch rate",col=fcolo[match(pedinfo$seqID,seqID)], pch=tempch, cex=0.8) 729 | abline(a=0,b=1,col="red") 730 | abline(a=emm.thresh2,b=1,col="grey") 731 | # legend("bottomright",title="Assign",cex=0.75,pch=assign.pch[pch.used],legend=assign.rank[pch.used]) 732 | edges <- par("usr") # xl,xr,yb,yt 733 | poly1 <- data.frame(x1 = c(edges[1], edges[1], edges[2],edges[2],edges[1]), 734 | y1 = c(emm.thresh2+edges[1],edges[4],edges[4],emm.thresh2+edges[2],emm.thresh2+edges[1])) 735 | polygon(poly1,col = rgb(0,0,0,alpha=0.1),border=NA) 736 | dev.off() 737 | 738 | famnumber <- rep(NA, nrow(pedinfo)) 739 | if(is.character(pedinfo$FatherID)) pedinfo$FatherID[pedinfo$FatherID==""] <- NA 740 | if(is.character(pedinfo$MotherID)) pedinfo$MotherID[pedinfo$MotherID==""] <- NA 741 | famtable <- with(pedinfo, table(FatherID, MotherID)) 742 | fampos <- which(famtable > 1, arr.ind = TRUE) 743 | if(nrow(fampos)>0) { 744 | famfathers <- dimnames(famtable)$FatherID[fampos[, 1]] 745 | fammothers <- dimnames(famtable)$MotherID[fampos[, 2]] 746 | if (is.numeric(pedinfo$FatherID)) 747 | famfathers <- as.numeric(famfathers) 748 | if (is.numeric(pedinfo$FatherID)) 749 | famfathers <- as.numeric(famfathers) 750 | noffspring <- famtable[fampos] 751 | nfamilies <- length(noffspring) 752 | famresults <- rep(NA, nfamilies) 753 | for (ifam in 1:nfamilies) { 754 | famnumber[which(pedinfo$FatherID == famfathers[ifam] & pedinfo$MotherID == fammothers[ifam])] <- ifam 755 | uoffspring <- match(pedinfo$seqID[which(famnumber == ifam)], seqID[indsubset]) 756 | famresults[ifam] <- mean(eval(parse(text = GCheck))[uoffspring, uoffspring][upper.tri(diag(nrow = length(uoffspring)))]) 757 | } 758 | cat("Mean relatedness for full-sib families (as given)\n") 759 | print(data.frame(famfathers, fammothers, noffspring, meanrel = famresults)) 760 | cat("Mean relatedness within all full-sib families", weighted.mean(famresults, noffspring), "\n") 761 | } 762 | 763 | uoffspring <- which(!is.na(famnumber)) 764 | Fatherset <- unique(na.omit(pedinfo$FatherID)) 765 | Motherset <- unique(na.omit(pedinfo$MotherID)) 766 | udiff.fathers <- which(!match(pedinfo$FatherID[uoffspring], Fatherset) %*% t(rep(1, length(uoffspring))) == rep(1, length(uoffspring)) %*% 767 | t(match(pedinfo$FatherID[uoffspring], Fatherset)), arr.ind = T) 768 | udiff.mothers <- which(!match(pedinfo$MotherID[uoffspring], Motherset) %*% t(rep(1, length(uoffspring))) == rep(1, length(uoffspring)) %*% 769 | t(match(pedinfo$MotherID[uoffspring], Motherset)), arr.ind = T) 770 | udiff <- as.matrix(merge(udiff.fathers, udiff.mothers)) 771 | opos <- match(pedinfo$seqID[uoffspring], seqID[indsubset]) 772 | cat("Mean relatedness between individuals in full-sib families with different parents", mean(eval(parse(text = GCheck))[opos,opos][udiff]), "\n") 773 | } 774 | outobj$pedinfo <- pedinfo 775 | write.csv(pedinfo, "PedVerify.csv", row.names = FALSE) 776 | 777 | if (exists("matesfile")) if(!file.exists(matesfile)) { 778 | cat("Warning: Mates file", matesfile, "not found\n") 779 | rm(matesfile) 780 | } 781 | if (exists("matesfile")) { ######### check for matching mating pair ########### 782 | suppressWarnings(rm(groupsfile)) # dont use groups 783 | matesinfo <- read.csv(matesfile, stringsAsFactors = FALSE, colClasses=(MatesGroup="character")) 784 | matesinfo <- matesinfo[!duplicated(matesinfo),] # remove row duplicates 785 | matesinfo$Genotyped <- ifelse(is.na(match(pedinfo$seqID[match(matesinfo$MaleID,pedinfo$IndivID)],seqID)) 786 | | is.na(match(pedinfo$seqID[match(matesinfo$FemaleID,pedinfo$IndivID)],seqID)),"N","Y") 787 | if ("MatesGroup" %in% colnames(pedinfo)) { 788 | mresults <- matesmatch(eval(parse(text = GCheck)),pedinfo=pedinfo, matesinfo=matesinfo) 789 | if(is.null(mresults)) BothMatches <- NULL else { 790 | matesinfo <- mresults$matesinfo 791 | BothMatches <- mresults$matesmatch 792 | } 793 | } 794 | if(nrow(BothMatches) > 0) { 795 | BothMatches <- BothMatches[order(BothMatches$IndivID),,drop=FALSE] 796 | mmstats <- mismatch.2par(BothMatches$IndivID, BothMatches$BestFatherMatch, BothMatches$BestMotherMatch,pedinfo=pedinfo) 797 | BothMatches$mmrateF1M1 <- mmstats$mmrate 798 | BothMatches$mmnumF1M1 <- mmstats$ncompare 799 | BothMatches$exp.mmrateF1M1 <- mmstats$exp.mmrate 800 | mmstats <- mismatch.2par(BothMatches$IndivID, BothMatches$FatherMatch2nd, BothMatches$MotherMatch2nd,pedinfo=pedinfo) 801 | BothMatches$mmrateF2M2 <- mmstats$mmrate 802 | BothMatches$mmnumF2M2 <- mmstats$ncompare 803 | BothMatches$exp.mmrateF2M2 <- mmstats$exp.mmrate 804 | uf <- match(pedinfo$seqID[match(BothMatches$BestFather,pedinfo$IndivID)],seqID[indsubset]) 805 | um <- match(pedinfo$seqID[match(BothMatches$BestMother,pedinfo$IndivID)],seqID[indsubset]) 806 | BothMatches$relF1M1 <- eval(parse(text = GCheck))[cbind(uf,um)] 807 | uf <- match(pedinfo$seqID[match(BothMatches$FatherMatch2nd,pedinfo$IndivID)],seqID[indsubset]) 808 | um <- match(pedinfo$seqID[match(BothMatches$MotherMatch2nd,pedinfo$IndivID)],seqID[indsubset]) 809 | BothMatches$relF2M2 <- eval(parse(text = GCheck))[cbind(uf,um)] 810 | uo <- match(BothMatches$seqID,seqID[indsubset]) 811 | BothMatches$Inb <- diag(eval(parse(text = GCheck)))[uo] - 1 812 | tempInb <- BothMatches$Inb; tempInb[is.na(tempInb)] <- 100 # arbitrary high so not a fail in Inb tests 813 | EMMrates <- with(BothMatches,cbind(mmrateF2M2-exp.mmrateF2M2,mmrateF1M1-exp.mmrateF1M1)) 814 | EMMrate.min <- apply(EMMrates, MARGIN=1, min) 815 | if (is.null(minr4inb)) minr4inb <<- min(BothMatches$relF1M1) - 0.001 816 | BothMatches$BothAssign[which(EMMrates[,2] > emm.thresh2 & BothMatches$BothAssign %in% assign.rank[1:4])] <- "E" 817 | BothMatches$BothAssign[EMMrates[,2]-EMMrate.min > emmdiff.thresh2 & EMMrate.min < emm.thresh2 & BothMatches$BothAssign %in% assign.rank[1:5]] <- "A" 818 | BothMatches$BothAssign[which(BothMatches$relF1M1 - 2 * tempInb > inb.thresh & BothMatches$relF1M1 > minr4inb & BothMatches$BothAssign == "Y")] <- "I" 819 | BothMatches$Alternate <- "" 820 | Apos <- which(BothMatches$BothAssign %in% c("A","I")) 821 | for (ipos in Apos) { 822 | altOK <- TRUE 823 | if (BothMatches$relF2M2[ipos] - 2 * tempInb[ipos] > inb.thresh | EMMrate.min[ipos] > emm.thresh2) altOK <- FALSE 824 | if(BothMatches$Fatherrel2nd[ipos] < rel.threshF) altOK <- FALSE 825 | if(BothMatches$Motherrel2nd[ipos] < rel.threshM) altOK <- FALSE 826 | if(altOK) BothMatches$Alternate[ipos] <- "F2M2" 827 | } 828 | outobj$BothMatches <- BothMatches 829 | write.csv(BothMatches,"MatePairMatches.csv", row.names = FALSE) 830 | uo <- match(BothMatches$seqID,seqID) 831 | trioplots(BothMatches=BothMatches) 832 | plotch <- assign.pch[match(BothMatches$BothAssign,assign.rank)] 833 | png("MMrateBothE.png", width = 640, height = 640, pointsize = cex.pointsize * 15) 834 | plot(EMMrates[,1] ~ EMMrates[,2], main="Excess Mismatch Rates", ylab="Father2, Mother2",xlab="Father1, Mother1", col=fcolo[uo],pch=plotch) 835 | abline(a = 0,b = 1, col="red") 836 | dev.off() 837 | cat("\nSummary of joint Assignments\n") 838 | print( addmargins(table(BothMatches$BothAssign, useNA="ifany")) ) 839 | } 840 | } 841 | if (exists("groupsfile")) if(!file.exists(groupsfile)) { 842 | cat("Warning: Groups file", groupsfile, "not found\n") 843 | rm(groupsfile) 844 | } 845 | if (exists("groupsfile")) { ######### find fathers and mothers from possibles ########### 846 | groupsinfo <- read.csv(groupsfile, stringsAsFactors = FALSE, colClasses=(ParGroup="character")) 847 | groupsinfo <- groupsinfo[!duplicated(groupsinfo),] # remove row duplicates 848 | groupsinfo$Genotyped <- ifelse(is.na(match(pedinfo$seqID[match(groupsinfo$IndivID,pedinfo$IndivID)],seqID)),"N","Y") 849 | if ("FatherGroup" %in% colnames(pedinfo)) { 850 | mresults <- groupmatch(eval(parse(text = GCheck)), "Father",pedinfo=pedinfo, groupsinfo=groupsinfo) 851 | if(is.null(mresults)) FatherMatches <- NULL else { 852 | groupsinfo <- mresults$groupsinfo 853 | FatherMatches <- mresults$groupmatch 854 | } 855 | outobj$FatherMatches <- FatherMatches 856 | } 857 | if ("MotherGroup" %in% colnames(pedinfo)) { 858 | mresults <- groupmatch(eval(parse(text = GCheck)), "Mother",pedinfo=pedinfo, groupsinfo=groupsinfo) 859 | if(is.null(mresults)) MotherMatches <- NULL else { 860 | groupsinfo <- mresults$groupsinfo 861 | MotherMatches <- mresults$groupmatch 862 | } 863 | outobj$MotherMatches <- MotherMatches 864 | } 865 | if ("FatherGroup" %in% colnames(pedinfo) & "MotherGroup" %in% colnames(pedinfo)) { 866 | BothMatches <- merge(FatherMatches,MotherMatches,all=TRUE) 867 | if(nrow(BothMatches) > 0) { 868 | BothMatches <- BothMatches[order(BothMatches$IndivID),,drop=FALSE] 869 | if(!allow.selfing) { 870 | uselfed <- with(BothMatches,which(BestFatherMatch==BestMotherMatch)) 871 | if(matchmethod=="EMM") { #whichbetter: 1 retain F1, 2 retain M1 872 | whichbetter <- apply(with(BothMatches[uselfed,],cbind(mmrateMother2-exp.mmrateMother2,mmrateFather2-exp.mmrateFather2)),1,which.min) 873 | } else { #rel method 874 | whichbetter <- apply(with(BothMatches[uselfed,],cbind(Motherrel2nd,Fatherrel2nd)),1,which.max) 875 | } 876 | BothMatches$tempna <- NA 877 | uchange <- which(whichbetter==2) 878 | if(length(uchange)>0) { # promote 2nd father to 1st 879 | new0 <- BothMatches[uselfed[uchange],c("FatherMatch2nd","tempna","Fatherrel2nd","tempna","tempna","mmrateFather2", 880 | "tempna","exp.mmrateFather2","tempna","tempna","tempna","tempna","tempna","tempna")] 881 | changecols <- c("BestFatherMatch","FatherMatch2nd","Fatherrel","Fatherrel2nd","Father12rel","mmrateFather", 882 | "mmnumFather","exp.mmrateFather","mmrateFather2","exp.mmrateFather2","Fathersd","FatherReliability","FatherAssign","FatherInb") 883 | colnames(new0) <- changecols 884 | new0$FatherAssign <- "Y" 885 | new0$FatherAssign[which(new0$mmrateFather - new0$exp.mmrateFather > emm.thresh)] <- "E" 886 | new0$FatherAssign[which(new0$Fatherrel < rel.threshF)] <- "N" 887 | new0$FatherAssign[which(is.na(new0$Fatherrel))] <- "N" 888 | new0$FatherInb <- diag(eval(parse(text = GCheck)))[match(pedinfo$seqID[match(new0$BestFatherMatch, pedinfo$IndivID)], seqID[indsubset])] - 1 889 | BothMatches[uselfed[uchange],changecols] <- new0 890 | # if 2nd mother is the promoted father, remove that and change B results to Y 891 | u2same <- intersect(which(BothMatches$BestFatherMatch == BothMatches$MotherMatch2nd), uselfed[uchange]) 892 | if(length(u2same)>0) { 893 | BothMatches[u2same,c("MotherMatch2nd","Mother12rel","mmrateMother2","exp.mmrateMother2")] <- NA 894 | uB <- which(BothMatches$MotherAssign[u2same]=="B") 895 | if(length(uB)>0) { 896 | BothMatches$MotherAssign[u2same[uB]] <- "Y" # restore 897 | BothMatches[u2same[uB],c("Mothersd","MotherReliability")] <- NA 898 | } 899 | } 900 | } 901 | uchange <- which(whichbetter==1) 902 | if(length(uchange)>0) { 903 | new0 <- BothMatches[uselfed[uchange],c("MotherMatch2nd","tempna","Motherrel2nd","tempna","tempna","mmrateMother2", 904 | "tempna","exp.mmrateMother2","tempna","tempna","tempna","tempna","tempna","tempna")] 905 | changecols <- c("BestMotherMatch","MotherMatch2nd","Motherrel","Motherrel2nd","Mother12rel","mmrateMother", 906 | "mmnumMother","exp.mmrateMother","mmrateMother2","exp.mmrateMother2","Mothersd","MotherReliability","MotherAssign","MotherInb") 907 | colnames(new0) <- changecols 908 | new0$MotherAssign <- "Y" 909 | new0$MotherAssign[which(new0$mmrateMother - new0$exp.mmrateMother > emm.thresh)] <- "E" 910 | new0$MotherAssign[which(new0$Motherrel < rel.threshM)] <- "N" 911 | new0$MotherAssign[which(is.na(new0$Motherrel))] <- "N" 912 | new0$MotherInb <- diag(eval(parse(text = GCheck)))[match(pedinfo$seqID[match(new0$BestMotherMatch, pedinfo$IndivID)], seqID[indsubset])] - 1 913 | BothMatches[uselfed[uchange],changecols] <- new0 914 | # if 2nd father is the promoted mother, remove that and change B results to Y 915 | u2same <- intersect(which(BothMatches$BestMotherMatch == BothMatches$FatherMatch2nd), uselfed[uchange]) 916 | if(length(u2same)>0) { 917 | BothMatches[u2same,c("FatherMatch2nd","Father12rel","mmrateFather2","exp.mmrateFather2")] <- NA 918 | uB <- which(BothMatches$FatherAssign[u2same]=="B") 919 | if(length(uB)>0) { 920 | BothMatches$FatherAssign[u2same[uB]] <- "Y" # restore 921 | BothMatches[u2same[uB],c("Fathersd","FatherReliability")] <- NA 922 | } 923 | } 924 | } 925 | BothMatches$tempna <- NULL 926 | } 927 | mmstats <- mismatch.2par(BothMatches$IndivID, BothMatches$BestFatherMatch, BothMatches$BestMotherMatch,pedinfo=pedinfo) 928 | BothMatches$mmrateF1M1 <- mmstats$mmrate 929 | BothMatches$mmnumF1M1 <- mmstats$ncompare 930 | BothMatches$exp.mmrateF1M1 <- mmstats$exp.mmrate 931 | mmstats <- mismatch.2par(BothMatches$IndivID, BothMatches$FatherMatch2nd, BothMatches$BestMotherMatch,pedinfo=pedinfo) 932 | BothMatches$mmrateF2M1 <- mmstats$mmrate 933 | BothMatches$mmnumF2M1 <- mmstats$ncompare 934 | BothMatches$exp.mmrateF2M1 <- mmstats$exp.mmrate 935 | mmstats <- mismatch.2par(BothMatches$IndivID, BothMatches$BestFatherMatch, BothMatches$MotherMatch2nd,pedinfo=pedinfo) 936 | BothMatches$mmrateF1M2 <- mmstats$mmrate 937 | BothMatches$mmnumF1M2 <- mmstats$ncompare 938 | BothMatches$exp.mmrateF1M2 <- mmstats$exp.mmrate 939 | mmstats <- mismatch.2par(BothMatches$IndivID, BothMatches$FatherMatch2nd, BothMatches$MotherMatch2nd,pedinfo=pedinfo) 940 | BothMatches$mmrateF2M2 <- mmstats$mmrate 941 | BothMatches$mmnumF2M2 <- mmstats$ncompare 942 | BothMatches$exp.mmrateF2M2 <- mmstats$exp.mmrate 943 | # parent relatedness & Inbreeding 944 | uf <- match(pedinfo$seqID[match(BothMatches$BestFather,pedinfo$IndivID)],seqID[indsubset]) 945 | um <- match(pedinfo$seqID[match(BothMatches$BestMother,pedinfo$IndivID)],seqID[indsubset]) 946 | BothMatches$relF1M1 <- eval(parse(text = GCheck))[cbind(uf,um)] 947 | uf <- match(pedinfo$seqID[match(BothMatches$BestFather,pedinfo$IndivID)],seqID[indsubset]) 948 | um <- match(pedinfo$seqID[match(BothMatches$MotherMatch2nd,pedinfo$IndivID)],seqID[indsubset]) 949 | BothMatches$relF1M2 <- eval(parse(text = GCheck))[cbind(uf,um)] 950 | uf <- match(pedinfo$seqID[match(BothMatches$FatherMatch2nd,pedinfo$IndivID)],seqID[indsubset]) 951 | um <- match(pedinfo$seqID[match(BothMatches$BestMother,pedinfo$IndivID)],seqID[indsubset]) 952 | BothMatches$relF2M1 <- eval(parse(text = GCheck))[cbind(uf,um)] 953 | uf <- match(pedinfo$seqID[match(BothMatches$FatherMatch2nd,pedinfo$IndivID)],seqID[indsubset]) 954 | um <- match(pedinfo$seqID[match(BothMatches$MotherMatch2nd,pedinfo$IndivID)],seqID[indsubset]) 955 | BothMatches$relF2M2 <- eval(parse(text = GCheck))[cbind(uf,um)] 956 | uo <- match(BothMatches$seqID,seqID[indsubset]) 957 | BothMatches$Inb <- diag(eval(parse(text = GCheck)))[uo] - 1 958 | tempInb <- BothMatches$Inb; tempInb[is.na(tempInb)] <- 100 # arbitrary high so not a fail in Inb tests 959 | BothMatches$BothAssign <- assign.rank[pmax(match(BothMatches$FatherAssign,assign.rank),match(BothMatches$MotherAssign,assign.rank),na.rm=TRUE)] 960 | BothMatches$BothAssign[BothMatches$FatherAssign=="Y" & is.na(BothMatches$MotherAssign)] <- "F" 961 | BothMatches$BothAssign[BothMatches$MotherAssign=="Y" & is.na(BothMatches$FatherAssign)] <- "M" 962 | EMMrates <- with(BothMatches,cbind(mmrateF2M2-exp.mmrateF2M2,mmrateF1M2-exp.mmrateF1M2,mmrateF2M1-exp.mmrateF2M1,mmrateF1M1-exp.mmrateF1M1)) 963 | EMMrate.min <- apply(EMMrates, MARGIN=1, min, na.rm=TRUE) 964 | if (is.null(minr4inb)) minr4inb <<- min(BothMatches$relF1M1) - 0.001 965 | BothMatches$BothAssign[which(EMMrates[,4] > emm.thresh2 & BothMatches$BothAssign %in% assign.rank[1:4])] <- "E" 966 | BothMatches$BothAssign[EMMrates[,4]-EMMrate.min > emmdiff.thresh2 & EMMrate.min < emm.thresh2 & BothMatches$BothAssign %in% assign.rank[1:5]] <- "A" 967 | BothMatches$BothAssign[which(BothMatches$relF1M1 - 2 * tempInb > inb.thresh & BothMatches$relF1M1 > minr4inb & BothMatches$BothAssign == "Y")] <- "I" 968 | BothMatches$BothAssign[which(BothMatches$FatherAssign == "Y" & BothMatches$BothAssign == "N")] <- "F" 969 | BothMatches$BothAssign[which(BothMatches$MotherAssign == "Y" & BothMatches$BothAssign == "N")] <- "M" 970 | BothMatches$BothAssign[which(BothMatches$FatherAssign == "Y" & BothMatches$BothAssign %in% c("E") & BothMatches$MotherAssign %in% c("E"))] <- "F" 971 | BothMatches$BothAssign[which(BothMatches$MotherAssign == "Y" & BothMatches$BothAssign %in% c("E") & BothMatches$FatherAssign %in% c("E"))] <- "M" 972 | 973 | BothMatches$Alternate <- "" 974 | Apos <- which(BothMatches$BothAssign %in% c("A","I")) 975 | for (ipos in Apos) { 976 | altpar <- c("F2M2","F1M2","F2M1")[which.min(EMMrates[ipos,1:3])] 977 | if(length(altpar) > 0) { 978 | altOK <- TRUE 979 | if (BothMatches[ipos,paste0("rel",altpar)] - 2 * tempInb[ipos] > inb.thresh | EMMrate.min[ipos] > emm.thresh2) altOK <- FALSE 980 | if (grepl("F2",altpar)) { 981 | if(BothMatches[ipos, "Fatherrel2nd"] < rel.threshF | BothMatches[ipos, "mmrateFather2"] - BothMatches[ipos, "exp.mmrateFather2"] > emm.thresh ) altOK <- FALSE 982 | } 983 | if (grepl("M2",altpar)) { 984 | if(BothMatches[ipos, "Motherrel2nd"] < rel.threshM | BothMatches[ipos, "mmrateMother2"] - BothMatches[ipos, "exp.mmrateMother2"] > emm.thresh ) altOK <- FALSE 985 | } 986 | if(altOK) BothMatches$Alternate[ipos] <- altpar 987 | } 988 | } 989 | outobj$BothMatches <- BothMatches 990 | write.csv(BothMatches,"BothMatches.csv", row.names = FALSE) 991 | uo <- match(BothMatches$seqID,seqID) 992 | trioplots(BothMatches=BothMatches) 993 | plotch <- assign.pch[match(BothMatches$BothAssign,assign.rank)] 994 | upairs <- which(!is.na(BothMatches$mmrateF1M1)) 995 | if(length(upairs)>0 & any(!is.na(rowSums(EMMrates[upairs,,drop=FALSE])))) { 996 | png("MMrateBoth.png", width = 640, height = 640, pointsize = cex.pointsize * 15) 997 | pairs(with(BothMatches[upairs,,drop=FALSE],cbind(mmrateF2M2,mmrateF1M2,mmrateF2M1,mmrateF1M1)),upper.panel=panel.yeqx,lower.panel=NULL, 998 | main="Raw Mismatch Rates", labels=c("Father2,\nMother2","Father1,\nMother2","Father2,\nMother1","Father1,\nMother1"), 999 | col.points=fcolo[uo][upairs],pch=plotch[upairs]) 1000 | dev.off() 1001 | png("MMrateBothE.png", width = 640, height = 640, pointsize = cex.pointsize * 15) 1002 | pairs(EMMrates[upairs,,drop=FALSE], main="Excess Mismatch Rates", labels=c("Father2,\nMother2","Father1,\nMother2","Father2,\nMother1","Father1,\nMother1"), 1003 | upper.panel=panel.yeqx,lower.panel=NULL,col.points=fcolo[uo][upairs],pch=plotch[upairs]) 1004 | dev.off() 1005 | } 1006 | cat("\nSummary of joint Assignments\n") 1007 | print( addmargins(table(BothMatches$BothAssign, useNA="ifany")) ) 1008 | } 1009 | } 1010 | write.csv(groupsinfo, "GroupsParentCounts.csv", row.names = FALSE) 1011 | } 1012 | } 1013 | invisible(outobj) 1014 | } # GBSPed 1015 | 1016 | if (!functions.only) PedResults <- GBSPed() 1017 | 1018 | 1019 | if (FALSE) { # temporary working code 1020 | # 1021 | } 1022 | 1023 | -------------------------------------------------------------------------------- /GBSRun.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | genofile <- "HapMap.hmc.txt.gz" 4 | 5 | source("/GBS-Chip-Gmatrix.R") 6 | Gfull <- calcG() 7 | GHWdgm.05 <- calcG(which(HWdis > -0.05),"HWdgm.05", npc=4) # recalculate using Hardy-Weinberg disequilibrium cut-off at -0.05 8 | 9 | pedfile <- "Ped-GBS.csv" 10 | groupsfile <- "Ped-Groups.csv" 11 | 12 | rel.thresh <- 0.2 13 | emm.thresh <- 0.075 # to make results same as before emm used (to match original example) 14 | GCheck <- "GHWdgm.05$G5" 15 | source("/GBSPedAssign.R") 16 | 17 | #G5 <- GHWdgm.05$G5 18 | #save(G5,seqID,file="G5.RData") 19 | -------------------------------------------------------------------------------- /KGDManual.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/KGDManual.pdf -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | {description} 294 | Copyright (C) {year} {fullname} 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | {signature of Ty Coon}, 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | 341 | -------------------------------------------------------------------------------- /ParExample/GBSParDeer.R: -------------------------------------------------------------------------------- 1 | genofile <- "HapMap.hmc.txt.gz" 2 | pedfile <- "DeerPedGBS.csv" 3 | groupsfile <- "Ped-Groups.csv" 4 | sampdepth.thresh <- 0.3 5 | cex.pointsize <- 1.2 6 | functions.only <- TRUE 7 | sink("GBSParDeerOut.txt", split=TRUE) 8 | source("GBS-Chip-Gmatrix.R") 9 | readGBS() 10 | outlevel <- 1 # reduced QC output 11 | GBSsummary() 12 | 13 | breed <- read.table(text=seqID,sep="_",stringsAsFactors=FALSE)[,1] 14 | fcolo <- c("darkblue","darkred")[match(breed,c("W","R"))] 15 | 16 | snpsubset <- which(HWdis > -0.05) 17 | GHW <- calcG(npc=4,snpsubset=snpsubset,sfx="RWHW") 18 | G5 <- GHW$G5 19 | GCheck <- "G5" 20 | set.seed(230985) # to get same bootstrap results if rerun 21 | source("GBSPedAssign.R") 22 | RWResults <- GBSPed() 23 | write.csv(RWResults$BothMatches,"BothMatchesRW.csv",row.names=FALSE,quote=FALSE) # Combined breeds 24 | 25 | dir.create("W") 26 | setwd("W") 27 | indW <- which(breed=="W") 28 | pW <- calcp(indsubset=indW) 29 | snpsubset <- which(HWdis > -0.05 & pW > 0 & pW < 1) 30 | GHWW <- calcG(snpsubset,indsubset=indW,sfx="W",puse=pW,calclevel=1, npc=-2) # using Hardy-Weinberg disequilibrium cut-off at -0.05 31 | G5W <- GHWW$G5 32 | seqIDW <- seqID[indW]; if(length(GHWW$samp.removed) > 0 ) seqIDW <- seqIDW[-GHWW$samp.removed] 33 | GCheck <- "G5W" 34 | puse <- pW 35 | indsubset <- indW 36 | rm(minr4inb) 37 | pedfile <- "../../DeerPedGBS.csv" 38 | groupsfile <- "../../Ped-Groups.csv" 39 | WResults <- GBSPed() 40 | MatchesW <- WResults$BothMatches 41 | write.csv(MatchesW,"BothMatchesW.csv",row.names=FALSE,quote=FALSE) 42 | bestparPCA(GHWW, sfx="W",keypos="bottomright", pedinfo=WResults$pedinfo, BothMatches=MatchesW) 43 | 44 | # Alt models 45 | uY <- which(MatchesW$BothAssign=="Y") 46 | bbopt <- optimize(ssbbmm,lower=0,upper=20, tol=0.001, uuse=uY, pedinfo=WResults$pedinfo, BothMatches=MatchesW) 47 | depth2K <- depth2Kchoose (dmodel="bb", bbopt$minimum) # 4.609 48 | mmstatsW.bb <- mismatch.2par(MatchesW$IndivID,MatchesW$BestFatherMatch, MatchesW$BestMotherMatch,pedinfo=WResults$pedinfo) 49 | names(mmstatsW.bb) <- paste0(names(mmstatsW.bb),".bb") 50 | mpopt <- optimize(ssmpmm,lower=0.5,upper=0.9, tol=0.001, uuse=uY, pedinfo=WResults$pedinfo, BothMatches=MatchesW) 51 | depth2K <- depth2Kchoose (dmodel="modp", mpopt$minimum) # 0.591 52 | mmstatsW.mp <- mismatch.2par(MatchesW$IndivID,MatchesW$BestFatherMatch, MatchesW$BestMotherMatch,pedinfo=WResults$pedinfo) 53 | names(mmstatsW.mp) <- paste0(names(mmstatsW.mp),".mp") 54 | MatchesW <- cbind(MatchesW,mmstatsW.bb,mmstatsW.mp) 55 | write.csv(MatchesW,"BothMatchesW.csv",row.names=FALSE,quote=FALSE) 56 | depth2K <- depth2Kchoose (dmodel="modp") # back to default model 57 | setwd("..") 58 | 59 | 60 | dir.create("R") 61 | setwd("R") 62 | indR <- which(breed=="R") 63 | pR <- calcp(indsubset=indR) 64 | snpsubset <- which(HWdis > -0.05 & pR > 0 & pR < 1) 65 | GHWR <- calcG(snpsubset,indsubset=indR,sfx="R",puse=pR,calclevel=1, npc=-2) # using Hardy-Weinberg disequilibrium cut-off at -0.05 66 | G5R <- GHWR$G5 67 | seqIDR <- seqID[indR]; if(length(GHWR$samp.removed) > 0 ) seqIDR <- seqIDR[-GHWR$samp.removed] 68 | GCheck <- "G5R" 69 | puse <- pR 70 | indsubset <- indR 71 | rm(minr4inb) 72 | RResults <- GBSPed() 73 | MatchesR <- RResults$BothMatches 74 | write.csv(MatchesR, "BothMatchesR.csv", row.names=FALSE, quote=FALSE) 75 | bestparPCA(GHWR, sfx="R",keypos="bottomright", pedinfo=RResults$pedinfo, BothMatches=MatchesR) 76 | 77 | # Alt models 78 | uY <- which(MatchesR$BothAssign=="Y") 79 | bbopt <- optimize(ssbbmm,lower=0,upper=20, tol=0.001, uuse=uY, pedinfo=RResults$pedinfo, BothMatches=MatchesR) 80 | depth2K <- depth2Kchoose (dmodel="bb", bbopt$minimum) # 3.956 81 | mmstatsR.bb <- mismatch.2par(MatchesR$IndivID,MatchesR$BestFatherMatch, MatchesR$BestMotherMatch,pedinfo=RResults$pedinfo) 82 | names(mmstatsR.bb) <- paste0(names(mmstatsR.bb),".bb") 83 | mpopt <- optimize(ssmpmm,lower=0.5,upper=0.8, tol=0.001, uuse=uY, pedinfo=RResults$pedinfo, BothMatches=MatchesR) 84 | depth2K <- depth2Kchoose (dmodel="modp", mpopt$minimum) # 0.604 85 | mmstatsR.mp <- mismatch.2par(MatchesR$IndivID,MatchesR$BestFatherMatch, MatchesR$BestMotherMatch,pedinfo=RResults$pedinfo) 86 | names(mmstatsR.mp) <- paste0(names(mmstatsR.mp),".mp") 87 | MatchesR <- cbind(MatchesR,mmstatsR.bb,mmstatsR.mp) 88 | write.csv(MatchesR, "BothMatchesR.csv", row.names=FALSE, quote=FALSE) 89 | depth2K <- depth2Kchoose (dmodel="modp") # back to default model 90 | 91 | setwd("..") 92 | 93 | sink() 94 | -------------------------------------------------------------------------------- /ParExample/GBSParentage-Annotated.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgResearch/KGD/565fcd8ab115ccafa72e330f214c6c0569d7059b/ParExample/GBSParentage-Annotated.pdf -------------------------------------------------------------------------------- /PopGExamples/FstSim.R: -------------------------------------------------------------------------------- 1 | ### Simulation code for Population differentiation (Fst) paper (in review). 2 | 3 | # fade (Lumley) 4 | fade <- function(colors,alpha) { 5 | rgbcols <- col2rgb(colors) 6 | rgb(rgbcols[1,],rgbcols[2,],rgbcols[3,],alpha,max=255) 7 | } 8 | 9 | # jpeg with new defaults 10 | jpegj <- function(fname,width=15, height=13, units="cm", res=300, quality=95,...) jpeg(filename=fname, width=width, height=height, units=units, res=res, quality=quality, ...) 11 | 12 | codedir <- "" 13 | functions.only <- TRUE 14 | source(paste0(codedir,"GBS-PopGen.R")) 15 | source(paste0(codedir,"GBS-Chip-Gmatrix.R")) 16 | 17 | 18 | #----------------- 19 | ### Simulation 1 20 | set.seed(2018) 21 | popsize <- 100 # currently set to use one value only 22 | popp <- c(0.2, 0.4, 0.6, 0.8) 23 | #popp <- c(0.5,0.5,0.5,0.5) 24 | npop <- length(popp) 25 | meandepth <- c(2,4,10) 26 | nrep <- 1000 27 | nsnps <- nrep # nsnps used by Fst.GBS 28 | dcollist <- c("grey","steelblue","blue") 29 | 30 | results <- data.frame(popsize=integer(0),rep=integer(0),FstAct=double(0), depth=double(0), 31 | Fststar=double(0), Fstunadj=double(0),pstar=double(0),punadj=double(0)) 32 | FstTrue = (npop-1)*var(popp)/(npop*mean(popp)*(1-mean(popp))) 33 | for (ipopsize in seq_along(popsize)) { 34 | popsize0 <- popsize[ipopsize] 35 | nind <- popsize0*npop 36 | poplabels <- rep(1:npop,each=popsize0) 37 | for (ip in seq(npop)) { 38 | p0 <- popp[ip] 39 | genospop <- matrix(sample(0:2,size=popsize0*nrep,prob=c((1-p0)^2,2*p0*(1-p0),p0^2),replace=TRUE),ncol=nrep) 40 | if(ip==1) genos <- genospop else genos <- rbind(genos,genospop) 41 | } 42 | 43 | aX2 <- rbind(round(genos/2),ceiling(genos/2)) 44 | X2results <- apply(rbind(aX2,matrix(2*popsize0,nrow=npop,ncol=nrep)),MARGIN=2,chisq.adj,y=rep(poplabels,2)) 45 | FstAct <- X2results/(2*popsize0*npop) 46 | cat("mean FstAct = ",mean(FstAct),"\n") 47 | 48 | for (idepth in seq_along(meandepth)) { 49 | depth0 <- meandepth[idepth] 50 | cat("mean depth=",depth0,"\n") 51 | depth = matrix(rpois(popsize0*nrep*npop,depth0),ncol = nrep) 52 | Acounts <- matrix(rbinom(popsize0*nrep*npop,depth,genos/2),ncol=nrep) # A allele 53 | genon <- trunc(2*Acounts/depth-1)+1 54 | Fststar <- Fst.GBS(populations=poplabels,SNPtest = TRUE) 55 | cat("propn with p<0.05 (Adj) ",mean(Fststar$pvalue<0.05,na.rm=TRUE),"\n") 56 | depth[depth>0] <- Inf 57 | Fstunadj <- Fst.GBS(populations=poplabels,SNPtest = TRUE) 58 | cat("propn with p<0.05 (Unadj)",mean(Fstunadj$pvalue<0.05,na.rm=TRUE),"\n") 59 | cat("Adj. sddiff, MAE",sd(Fststar$Fst-FstAct,na.rm=TRUE), 60 | mean(abs(Fststar$Fst-FstAct),na.rm=TRUE),"\n") 61 | cat("Unadj. sddiff, MAE",sd(Fstunadj$Fst-FstAct,na.rm=TRUE), 62 | mean(abs(Fstunadj$Fst-FstAct),na.rm=TRUE),"\n") 63 | results <- rbind(results,cbind.data.frame(popsize0,1:nrep,FstAct,depth0, 64 | Fststar=Fststar$Fst,Fstunadj=Fstunadj$Fst,pstar=Fststar$pvalue,punadj=Fstunadj$pvalue)) 65 | #print(summary(lm(Fststar$Fst~FstAct))) 66 | #print(summary(lm(Fststar$Fst~Fstunadj$Fst))) 67 | } 68 | } 69 | 70 | dcolour <- dcollist[match(results$depth0,meandepth)] 71 | 72 | jpegj("Figure1.jpg") 73 | par(mar = par("mar") + c(0,1,-2,0)) 74 | boxplot(c(results$Fststar,FstAct) ~ c(results$depth0,rep(Inf,length(FstAct))),col=c(dcollist,"darkblue"), 75 | xlab=expression(mean~depth~(italic(d))) ,ylab = expression(F[ST]^"*"), cex.lab=1.2,,xaxt="n" ) 76 | axis(side=1,at=1:4,labels=c(meandepth,expression(infinity))) 77 | abline(h=FstTrue,col="red",lwd=2) 78 | abline(h=FstTrue+1/(2*popsize),lty = 2, col="red",lwd=2) 79 | dev.off() 80 | 81 | jpegj("Figure2.jpg") 82 | par(mar = par("mar") + c(0,1,-2,0)) 83 | plot(Fststar ~ FstAct, data=results, col=fade(dcolour,127),pch=16, xlab=expression(paste(F[ST]," with true genotypes")), 84 | ylab=expression(paste(F[ST]^"*"," with sequencing genotypes")),cex.lab=1.2) 85 | abline(a=0,b=1,col="red",lwd=3) 86 | par(family="serif") # set and save orig setting 87 | legend("topleft",legend=meandepth,title="d",title.font=3,col=dcollist,pch=16) 88 | dev.off() 89 | 90 | jpegj("Figure3.jpg", width=22) 91 | layout(matrix(1:2,nrow=1)) 92 | par(mar = par("mar") + c(0,1,-2,0)) 93 | plot(Fststar ~ Fstunadj, data=results, col=fade(dcolour,127),pch=16, 94 | xlab=expression(paste(F[ST]," (without depth adjustment)")), 95 | ylab=expression(paste(F[ST]^"*"," (with depth adjustment)")),cex.lab=1.2) 96 | abline(a=0,b=1,col="red",lwd=3) 97 | text(x=max(results$Fstunadj), y=max(results$Fststar),"A", cex=1.5, adj=c(-1,-1),xpd=TRUE) 98 | op <- par(family="serif") # set and save orig setting 99 | legend("topleft",legend=meandepth,title="d",title.font=3,col=dcollist,pch=16) 100 | par(op) #reset 101 | plot(pstar ~ punadj, data=results, col=fade(dcolour,127),pch=16, 102 | xlab="p-value without depth adjustment", 103 | ylab="p-value with depth adjustment",cex.lab=1.2, log="xy") 104 | abline(a=0,b=1,col="red",lwd=3) 105 | text(x=max(results$punadj), y=max(results$pstar),"B", cex=1.5, adj=c(-1,-1),xpd=TRUE) 106 | op <- par(family="serif") # set and save orig setting 107 | legend("topleft",legend=meandepth,title="d",title.font=3,col=dcollist,pch=16) 108 | dev.off() 109 | 110 | write.csv(results,"FstResultsSim1.csv",row.names=FALSE,quote=FALSE) 111 | 112 | 113 | #----------------- 114 | # find a situation where not adjusting is a problem # Sim 2 115 | set.seed(20246) 116 | popsize <- 10 # currently set to use one value only 117 | popp <- c(0.3, 0.7) 118 | npop <- length(popp) 119 | meandepth <- c(0.5,1,10) 120 | nrep <- 1000 121 | nsnps <- nrep 122 | testlevel <- 0.05 123 | dcollist <- c("yellow3","grey","blue") 124 | 125 | results <- data.frame(popsize=integer(0),rep=integer(0),FstAct=double(0), depth=double(0), 126 | Fststar=double(0), Fstunadj=double(0),pstar=double(0),punadj=double(0)) 127 | (FstTrue = (npop-1)*var(popp)/(npop*mean(popp)*(1-mean(popp)))) 128 | 129 | for (ipopsize in seq_along(popsize)) { 130 | popsize0 <- popsize[ipopsize] 131 | nind <- popsize0*npop 132 | poplabels <- rep(1:npop,each=popsize0) 133 | for (ip in seq(npop)) { 134 | p0 <- popp[ip] 135 | genospop <- matrix(sample(0:2,size=popsize0*nrep,prob=c((1-p0)^2,2*p0*(1-p0),p0^2),replace=TRUE),ncol=nrep) 136 | if(ip==1) genos <- genospop else genos <- rbind(genos,genospop) 137 | } 138 | 139 | aX2 <- rbind(round(genos/2),ceiling(genos/2)) 140 | X2results <- apply(rbind(aX2,matrix(2*popsize0,nrow=npop,ncol=nrep)),MARGIN=2,chisq.adj,y=rep(poplabels,2)) 141 | FstAct <- X2results/(2*popsize0*npop) 142 | cat("mean FstAct = ",mean(FstAct),"\n") 143 | FststarAll <- list() 144 | for (idepth in seq_along(meandepth)) { 145 | depth0 <- meandepth[idepth] 146 | cat("mean depth=",depth0,"\n") 147 | depth = matrix(rpois(popsize0*nrep*npop,depth0),ncol = nrep) 148 | Acounts <- matrix(rbinom(popsize0*nrep*npop,depth,genos/2),ncol=nrep) 149 | genon <- trunc(2*Acounts/depth-1)+1 150 | Fststar <- Fst.GBS(populations=poplabels, SNPtest = TRUE) 151 | FststarAll[[idepth]] <- Fststar 152 | depth[depth>0] <- Inf 153 | Fstunadj <- Fst.GBS(populations=poplabels, SNPtest = TRUE) 154 | cat("Adj. sddiff, MAE, power at",testlevel,":",sd(Fststar$Fst-FstAct,na.rm=TRUE), 155 | mean(abs(Fststar$Fst-FstAct),na.rm=TRUE), 156 | mean(Fststar$pvalue0] <- Inf 225 | Fstunadj <- Fst.GBS(populations=poplabels, SNPtest = TRUE) 226 | cat("propn with p<0.05",mean(Fstunadj$pvalue<0.05,na.rm=TRUE),"\n") 227 | results <- rbind(results,cbind.data.frame(popsize0,1:nrep,FstAct,depth0,Fststar$Fst,Fstunadj$Fst)) 228 | } 229 | } 230 | 231 | 232 | #----------------- 233 | # sim for optimal depth Sim4 234 | set.seed(20241002) #date redone 235 | popp <- c(0.2, 0.4, 0.6, 0.8) 236 | npop <- length(popp) 237 | nrep <- 1000 238 | nsnps <- nrep 239 | readsperpop <- 200 # 200 reads per population 240 | meandepth <- c(0.1,0.25, 0.5, 1,2,4,10) 241 | resultsopt <- data.frame(popsize=integer(0),rep=integer(0),FstAct=double(0), depth=double(0), Fststar=double(0), 242 | Fstunadj=double(0),pstar=double(0),punadj=double(0)) 243 | for (idepth in seq_along(meandepth)) { 244 | depth0 <- meandepth[idepth] 245 | popsize0 <- readsperpop / depth0 246 | nind <- popsize0*npop 247 | poplabels <- rep(1:npop,each=popsize0) 248 | for (ip in seq(npop)) { 249 | p0 <- popp[ip] 250 | genospop <- matrix(sample(0:2,size=popsize0*nrep,prob=c((1-p0)^2,2*p0*(1-p0),p0^2),replace=TRUE),ncol=nrep) 251 | if(ip==1) genos <- genospop else genos <- rbind(genos,genospop) 252 | } 253 | aX2 <- rbind(round(genos/2),ceiling(genos/2)) 254 | X2results <- apply(rbind(aX2,matrix(2*popsize0,nrow=npop,ncol=nrep)),MARGIN=2,chisq.adj,y=rep(poplabels,2)) 255 | FstAct <- X2results/(2*popsize0*npop) 256 | depth = matrix(rpois(popsize0*nrep*npop,depth0),ncol = nrep) 257 | Acounts <- matrix(rbinom(popsize0*nrep*npop,depth,genos/2),ncol=nrep) 258 | genon <- trunc(2*Acounts/depth-1)+1 259 | Fststar <- Fst.GBS(populations=poplabels,SNPtest = TRUE) 260 | depth[depth>0] <- Inf 261 | Fstunadj <- Fst.GBS(populations=poplabels,SNPtest = TRUE) 262 | resultsopt <- rbind(resultsopt,cbind.data.frame(popsize0,1:nrep,FstAct,depth0, 263 | Fststar=Fststar$Fst,Fstunadj=Fstunadj$Fst,pstar=Fststar$pvalue,punadj=Fstunadj$pvalue)) 264 | } 265 | 266 | depthsdopt <- aggregate(resultsopt$Fststar,list(resultsopt$depth0),sd) 267 | #depthsdoptunadj <- aggregate(resultsopt$Fstunadj,list(resultsopt$depth0),sd) 268 | aggregate(resultsopt$Fststar,list(resultsopt$depth0),mean) 269 | powerstar <- resultsopt$pstar < 1e-20 270 | depthpowopt <- aggregate(powerstar,list(resultsopt$depth0),mean, na.rm=TRUE) 271 | 272 | jpegj("Figure5.jpg",height=14) 273 | par(mar = par("mar") + c(0,1,0,2)) 274 | plot(depthsdopt,pch=16,xlab="Depth",ylab=expression(sd(F[ST]^'*')),cex.lab=1.2) 275 | lines(depthsdopt,lwd=2) 276 | par(new = TRUE) 277 | plot(depthpowopt,pch=16,col="blue",axes=FALSE,xlab="",ylab="",bty="n") 278 | lines(depthpowopt,lwd=2,col="blue") 279 | axis(side=4,at=seq(0,1,0.2), col.axis="blue") 280 | topticks <- c(0.1,2,4,10) 281 | axis(side=3,at=topticks, labels = readsperpop /topticks ) 282 | mtext(expression(paste("Power (",alpha,"=",10^-20,")")),line=3, side=4, col="blue",cex=1.2) 283 | mtext("Number of individuals / subpopulation",line=2, side=3,cex=1.2) 284 | dev.off() 285 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # KGD 2 | **K**inship (genetic relatedness) using **G**BS (genotyping-by-sequencing) with **D**epth adjustment 3 | 4 | R code for the analysis of genotyping-by-sequencing (GBS) data, primarily to construct a genomic relationship matrix for the genotyped individuals. The code can be used on its own, or incorporated into other R programs. There are QC tools (primarily graphical output), relationship estimation tools, pedigree verification tools and pedigree ‘mix and match’ tools. The latter two operations require additional input information about the samples genotyped. 5 | 6 | The manual describing the use of the code is **KGDManual.pdf** in the top directory of this repository. 7 | 8 | This project was supported by the Ministry of Business, Innovation and Employment via its funding of the “Genomics for Production & Security in a Biological Economy” programme (Contract ID C10X1306). 9 | 10 | Please cite 11 | 12 | Dodds, K G, McEwan, J C, Brauning, R, Anderson, R A, Van Stijn, T C, Kristjánsson, T and Clarke, S M (2015) Construction of relatedness matrices using genotyping-by-sequencing data. *BMC Genomics* **16**, 1047. [DOI](https://doi.org/10.1186/s12864-015-2252-3) [GoogleScholar citations](https://scholar.google.com/scholar?hl=en&cites=10840046335958418721) 13 | 14 | for parentage assignment 15 | 16 | Dodds, K G, McEwan, J C, Brauning, R, Van Stijn, T C, Rowe, S J, McEwan, K M and Clarke, S M (2019) Exclusion and genomic relatedness methods for assignment of parentage using genotyping-by-sequencing data. *G3: Genes, Genomes, Genetics* **9**, 3239-3247. [DOI](https://doi.org/10.1534/g3.119.400501) [GoogleScholar citations](https://scholar.google.com/scholar?hl=en&cites=16774971615445565137) 17 | 18 | for population differences (Fst functions) 19 | 20 | Dodds, K G, McEwan, J C, Brauning, R, & Clarke, S M (2025). Assessing population allele frequency differences using low-depth sequencing data. _Journal of the Royal Society of New Zealand_, (early online). [DOI](https://doi.org/10.1080/03036758.2025.2500999) 21 | -------------------------------------------------------------------------------- /flake.lock: -------------------------------------------------------------------------------- 1 | { 2 | "nodes": { 3 | "flake-utils": { 4 | "inputs": { 5 | "systems": "systems" 6 | }, 7 | "locked": { 8 | "lastModified": 1731533236, 9 | "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", 10 | "owner": "numtide", 11 | "repo": "flake-utils", 12 | "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", 13 | "type": "github" 14 | }, 15 | "original": { 16 | "owner": "numtide", 17 | "repo": "flake-utils", 18 | "type": "github" 19 | } 20 | }, 21 | "nixpkgs": { 22 | "locked": { 23 | "lastModified": 1731797254, 24 | "narHash": "sha256-df3dJApLPhd11AlueuoN0Q4fHo/hagP75LlM5K1sz9g=", 25 | "owner": "NixOS", 26 | "repo": "nixpkgs", 27 | "rev": "e8c38b73aeb218e27163376a2d617e61a2ad9b59", 28 | "type": "github" 29 | }, 30 | "original": { 31 | "owner": "NixOS", 32 | "ref": "nixos-24.05", 33 | "repo": "nixpkgs", 34 | "type": "github" 35 | } 36 | }, 37 | "root": { 38 | "inputs": { 39 | "flake-utils": "flake-utils", 40 | "nixpkgs": "nixpkgs" 41 | } 42 | }, 43 | "systems": { 44 | "locked": { 45 | "lastModified": 1681028828, 46 | "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", 47 | "owner": "nix-systems", 48 | "repo": "default", 49 | "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", 50 | "type": "github" 51 | }, 52 | "original": { 53 | "owner": "nix-systems", 54 | "repo": "default", 55 | "type": "github" 56 | } 57 | } 58 | }, 59 | "root": "root", 60 | "version": 7 61 | } 62 | -------------------------------------------------------------------------------- /flake.nix: -------------------------------------------------------------------------------- 1 | { 2 | description = "Naive Nix packaging for KGD"; 3 | inputs = { 4 | nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.05"; 5 | 6 | flake-utils.url = "github:numtide/flake-utils"; 7 | }; 8 | 9 | # KGD is not a first-class R package, merely a collection of R source files 10 | # to be source'd into the caller's environment. 11 | # 12 | # The Nix package enables these R sources to be used directly from the Nix store. 13 | 14 | outputs = { self, nixpkgs, flake-utils }: 15 | flake-utils.lib.eachDefaultSystem 16 | (system: 17 | let 18 | pkgs = import nixpkgs { 19 | inherit system; 20 | }; 21 | 22 | # we need a recent Rcpp with this fix: 23 | # https://github.com/RcppCore/Rcpp/pull/1346 24 | recent-Rcpp = pkgs.rPackages.buildRPackage { 25 | name = "Rcpp"; 26 | 27 | version = "1.0.13.6"; 28 | 29 | src = pkgs.fetchFromGitHub { 30 | owner = "RcppCore"; 31 | repo = "Rcpp"; 32 | rev = "83e640b55aeaeba8a746d0da6152cabe8af41154"; 33 | hash = "sha256-x0s9BRIAxxZmNFXRYwtpmT5asD4+mhIUTqwYsaPOgi4="; 34 | }; 35 | }; 36 | 37 | # TODO: it should be possible to simply override the Rcpp dependency for RcppArmadillo in nixpkgs, 38 | # but I didn't work out how to do that 39 | RcppArmadillo-with-recent-Rcpp = pkgs.rPackages.buildRPackage { 40 | name = "RcppArmadillo"; 41 | 42 | version = "0.12.8.1.0"; 43 | 44 | src = pkgs.fetchFromGitHub { 45 | owner = "RcppCore"; 46 | repo = "RcppArmadillo"; 47 | rev = "8abe7be9fc4dd7c1d2b02ed200707232d6fd1f09"; # 0.12.8.1.0 48 | hash = "sha256-+Li4ln/4ZyBY+I8S8X4uSmFaG1D3q5UJOJJB5pLRubo="; 49 | }; 50 | 51 | propagatedBuildInputs = [ recent-Rcpp ]; 52 | }; 53 | 54 | 55 | KDG-R = pkgs.rWrapper.override 56 | { 57 | packages = with pkgs.rPackages; [ 58 | RcppArmadillo-with-recent-Rcpp 59 | data_table 60 | R_utils 61 | plotly 62 | heatmaply 63 | parallelDist 64 | MethComp 65 | MASS 66 | ]; 67 | }; 68 | 69 | KDG-src = pkgs.stdenv.mkDerivation { 70 | pname = "KDG-src"; 71 | version = "1.2.2"; 72 | 73 | src = ./.; 74 | 75 | buildInputs = [ KDG-R ]; 76 | 77 | propagatedBuildInputs = [ KDG-R ]; 78 | 79 | nativeBuildInputs = [ pkgs.dos2unix ]; 80 | 81 | installPhase = '' 82 | mkdir $out 83 | runHook preInstall 84 | cp GBS-Chip-Gmatrix.R GBSPedAssign.R GBS-PopGen.R GBSRun.R GBS-Rcpp-functions.cpp $out 85 | chmod 755 $out/GBSRun.R 86 | dos2unix $out/* 87 | runHook postInstall 88 | ''; 89 | 90 | postFixup = '' 91 | substituteInPlace $out/GBSRun.R --replace '' $out 92 | ''; 93 | }; 94 | in 95 | { 96 | packages = { 97 | src = KDG-src; 98 | }; 99 | }); 100 | } 101 | -------------------------------------------------------------------------------- /vcf2ra.py: -------------------------------------------------------------------------------- 1 | #input: 2 | # infile.vcf 3 | # vcf file, tab delimited 4 | # 5 | #output: 6 | # infile.vcf.ra.tab 7 | # RA (ReferenceAlternative) file, tab-delimited with columns, CHROM, POS, SAMPLES 8 | # CHROM POS 999220 999204 9 | # 1 415 0,0 0,0 10 | # 1 443 1,0 9,0 11 | # 1 448 0,0 0,0 12 | # 13 | #processing: 14 | # indels are removed and reported in infile.vcf.indel 15 | # SNP that are more than biallelic are removed and reported in infile.vcf.polyallele 16 | # all redundant sites are removed and reported in infile.vcf.posred 17 | # ./. is translated into 0,0 18 | 19 | import sys, os 20 | 21 | if len(sys.argv) != 2: 22 | sys.exit('Usage: python %s vcf_file\n' % sys.argv[0]) 23 | 24 | if not os.path.exists(sys.argv[1]): 25 | sys.exit('ERROR: vcf_file \'%s\' was not found!\n' % sys.argv[1]) 26 | 27 | infile = sys.argv[1] 28 | infh = open(infile) 29 | 30 | outfile = infile + '.ra.tab' 31 | ofh = open(outfile, 'w') 32 | 33 | indelfile = infile + '.indel' #indels 34 | indelfh = open(indelfile, 'w') 35 | indels = 0 #rows with indels 36 | 37 | polyallelefile = infile + '.polyallele' #sites that are more than biallelic 38 | polyallelefh = open(polyallelefile, 'w') 39 | polyallele = 0 #polyallelic rows 40 | 41 | posredfile = infile + '.posred' #sites with redundant positions 42 | posredfh = open(posredfile, 'w') 43 | posred = 0 #rows with redundant positions 44 | 45 | #scan input file for redundant positions 46 | ######################################## 47 | 48 | print('Scanning SNP positions') 49 | line = infh.readline() 50 | pos_seen = {} 51 | 52 | while line: 53 | line = line.strip() 54 | if line.startswith('#'): 55 | pass 56 | else: 57 | line = line.split('\t') 58 | chrom = line[0] 59 | if not chrom in pos_seen: 60 | pos_seen[chrom] = {} 61 | pos = line[1] 62 | if not pos in pos_seen[chrom]: 63 | pos_seen[chrom][pos] = 0 64 | pos_seen[chrom][pos] += 1 65 | line = infh.readline() 66 | infh.close() 67 | 68 | counter = 0 69 | for i in pos_seen: 70 | for j in pos_seen[i]: 71 | if pos_seen[i][j] > 1: 72 | counter += 1 73 | 74 | print('\tFound %s redundant positions\n' %counter) 75 | 76 | #filter input file for indels, polyallelic SNPs, redundant positions 77 | ##################################################################### 78 | 79 | infh = open(infile) 80 | 81 | headerlist = ['CHROM', 'POS'] 82 | line = infh.readline() 83 | 84 | snp_counter = 0 85 | line_counter = 0 86 | 87 | #initialize various options for depth related fields 88 | ad_pos = '' 89 | ro_pos = '' 90 | ao_pos = '' 91 | dp4_pos = '' 92 | 93 | while line: 94 | line = line.strip() 95 | outlist = [] 96 | annotlist = [] 97 | empty_genotypes = ['./.', '.,.', '.', '.|.'] 98 | if line.startswith('##'): 99 | pass 100 | elif line.startswith('#CHROM'): 101 | line = line.split('\t') 102 | headerlist += line[9:] 103 | ofh.write('%s\n' %('\t'.join(headerlist))) 104 | print('Found %s samples' %(len(headerlist) - 2)) 105 | for i in headerlist: 106 | if ' ' in i: 107 | print('WARN: spaces in sample names are discouraged %s' %i) 108 | else: 109 | pass 110 | else: 111 | line_counter += 1 112 | line = line.split('\t') 113 | chrom = line[0] 114 | outlist.append(chrom) 115 | annotlist.append(chrom) 116 | pos = line[1] 117 | if pos_seen[chrom][pos] > 1: #filter out redundant positions 118 | posredfh.write('%s\n' %('\t'.join(line))) 119 | posred += 1 120 | else: 121 | outlist.append(pos) 122 | annotlist.append(pos) 123 | ref = line[3] 124 | alt = line[4] 125 | if ref == '.' or alt == '.': #filter out indels 126 | indelfh.write('%s\n' %('\t'.join(line))) 127 | indels += 1 128 | elif alt.count(',') > 0: #filter sites that are more than biallelic 129 | polyallelefh.write('%s\n' %('\t'.join(line))) 130 | polyallele += 1 131 | elif len(ref) > 1 or len(alt) > 1: 132 | indelfh.write('%s\n' %('\t'.join(line))) 133 | indels += 1 134 | else: 135 | if line_counter == 1: #only look at first SNP to determine depth field 136 | format = line[8].split(':') 137 | if "AD" in format: 138 | ad_pos = format.index('AD') 139 | print('Using AD field for depth information') 140 | elif "RO" and "AO" in format: 141 | ro_pos = format.index('RO') 142 | ao_pos = format.index('AO') 143 | print('Using RO and AO fields for depth information') 144 | elif "DP4" in format: 145 | dp4_pos = format.index('DP4') 146 | print('Using DP4 field for depth information') 147 | else: 148 | print("\nERROR: We can't use this vcf file. AD (Allelic Depth), or RO (Reference allele observation count) and AO (Alternate allele observation count) information, or DP4 is needed.\n") 149 | sys.exit() 150 | 151 | for i in line[9:]: 152 | if i in empty_genotypes: #translate uncovered to 0,0 153 | outlist.append('0,0') 154 | else: 155 | i = i.split(':') 156 | if ad_pos or ad_pos==0: 157 | if i[ad_pos] in empty_genotypes: 158 | outlist.append('0,0') 159 | else: 160 | outlist.append(i[ad_pos]) 161 | elif (ro_pos and ao_pos) or (ro_pos == 0 and ao_pos==0): 162 | ad = str(i[ro_pos]) + ',' + str(i[ao_pos]) 163 | outlist.append(ad) 164 | elif (dp4_pos) or dp4_pos==0: 165 | counts = i[dp4_pos].split(',') 166 | allele1 = int(counts[0]) + int(counts[1]) 167 | allele2 = int(counts[2]) + int(counts[3]) 168 | ad = str(allele1) + ',' + str(allele2) 169 | outlist.append(ad) 170 | else: 171 | ##Should never really get here, but if AD, AO and RO are all null, it will break the script 172 | print("\nERROR: Can't find either an Allele Depth (AD) or RO (Reference allele observation count) and AO (Alternate allele observation count) or DP4 at this position.\n") 173 | sys.exit() 174 | ofh.write('%s\n' %('\t'.join(outlist))) 175 | snp_counter += 1 176 | line = infh.readline() 177 | 178 | infh.close() 179 | ofh.close() 180 | 181 | print('\n') 182 | print('%12s SNPs written to %s' %(snp_counter, outfile)) 183 | print('%12s SNPs (rows) ignored because of indels and written to %s' %(indels, indelfile)) 184 | print('%12s SNPs (rows) ignored because of more than two alleles and written to %s' %(polyallele, polyallelefile)) 185 | print('%12s SNPs (rows) ignored because of redundant genomic positions and written to %s' %(posred, posredfile)) 186 | 187 | --------------------------------------------------------------------------------