├── src ├── tools.c ├── README.md ├── Makefile ├── treespace.c ├── paml.h └── baseml.c ├── R ├── pri10s.jpg ├── R.Rproj └── analysis.R ├── doc └── phylogenomic-dating.pdf ├── data ├── 10s-nocal.tree ├── 10s.tree └── 330s.tree ├── .gitignore ├── gH └── mcmctree-outBV.ctl ├── README.md ├── prior └── mcmctree-pr.ctl └── mcmc └── mcmctree.ctl /src/tools.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mariodosreis/divtime/HEAD/src/tools.c -------------------------------------------------------------------------------- /R/pri10s.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mariodosreis/divtime/HEAD/R/pri10s.jpg -------------------------------------------------------------------------------- /doc/phylogenomic-dating.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mariodosreis/divtime/HEAD/doc/phylogenomic-dating.pdf -------------------------------------------------------------------------------- /data/10s-nocal.tree: -------------------------------------------------------------------------------- 1 | (Tree_shrew,((Bushbaby,Mouse_lemur),(Tarsier,(Marmoset,(Rhesus,(Orangutan,(Gorilla,(Chimp,Human)))))))); 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | gH/rst* 2 | gH/tmp* 3 | R/.Rhistory 4 | R/many.tree 5 | gH/2base.t 6 | */SeedUsed 7 | gH/in.basemlg 8 | gH/lnf 9 | gH/out* 10 | gH/rates 11 | gH/rub 12 | */mcmc*.txt 13 | */out*.txt 14 | */FigTree*.tre 15 | */*.BV 16 | .Rproj.user 17 | -------------------------------------------------------------------------------- /R/R.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | -------------------------------------------------------------------------------- /src/README.md: -------------------------------------------------------------------------------- 1 | This is the source code for MCMCTree and BASEML version 4.9e, which were used to 2 | prepare the tutorial. 3 | 4 | MCMCTree and BASEML are part of the PAML package available from bit.ly/ziheng-paml. 5 | Please refer to this for the latest version of the programs. 6 | -------------------------------------------------------------------------------- /data/10s.tree: -------------------------------------------------------------------------------- 1 | 10 1 2 | 3 | (Tree_shrew,((Bushbaby,Mouse_lemur)'B(0.37,0.58,0.01,0.10)',(Tarsier,(Marmoset,(Rhesus,(Orangutan,(Gorilla,(Chimp,Human)'B(0.075,0.10,0.01,0.20)')'B(0.10,0.132,0.01,0.20)')'B(0.112,0.28,0.01,0.10)')'B(0.25,0.337,0.01,0.10)')'ST(0.4754,0.0632,0.98,22.85)'))'S2N(0.698,0.65,0.0365,-3400,0.6502,0.1375,11409)')'G(36,36.9)'; 4 | -------------------------------------------------------------------------------- /src/Makefile: -------------------------------------------------------------------------------- 1 | PRGS = baseml mcmctree 2 | CC = cc # cc, gcc, cl 3 | 4 | CFLAGS = -O3 5 | 6 | LIBS = -lm # -lM 7 | 8 | all : $(PRGS) 9 | 10 | baseml : baseml.c tools.c treesub.c treespace.c paml.h 11 | $(CC) $(CFLAGS) -o $@ baseml.c tools.c $(LIBS) 12 | 13 | mcmctree : mcmctree.c tools.c treesub.c treespace.c paml.h 14 | $(CC) $(CFLAGS) -o $@ mcmctree.c tools.c $(LIBS) 15 | $(CC) $(CFLAGS) -o infinitesites -D INFINITESITES mcmctree.c tools.c $(LIBS) 16 | -------------------------------------------------------------------------------- /gH/mcmctree-outBV.ctl: -------------------------------------------------------------------------------- 1 | seqfile = ../data/10s.phys 2 | treefile = ../data/10s.tree 3 | 4 | ndata = 2 5 | seqtype = 0 * 0: nucleotides; 1:codons; 2:AAs 6 | usedata = 3 * 0: no data (prior); 1:exact likelihood; 7 | * 2: approximate likelihood; 3:out.BV (in.BV) 8 | clock = 2 * 1: global clock; 2: independent rates; 3: correlated rates 9 | 10 | model = 4 * 0:JC69, 1:K80, 2:F81, 3:F84, 4:HKY85 11 | alpha = 0.5 * alpha for gamma rates at sites 12 | ncatG = 5 * No. categories in discrete gamma 13 | 14 | cleandata = 0 * remove sites with ambiguity data (1:yes, 0:no)? 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Bayesian Molecular Clock Dating Tutorial Using Genome-Scale Data 2 | 3 | This repository contains the necessary files to run the tutorial 4 | on divergence time estimation using genome-scale data. 5 | 6 | File description: 7 | 8 | `data/10s.phys`: Alignment of protein-coding genes of 10 primate species in two partitions. 9 | 10 | `data/10s.tree` and `data/10s-nocal.tree`: Tree of 10 primate species with and without fossil calibrations respectively. 11 | 12 | `data/330s.phys`: Alignment of 1st and 2nd sites of mitochondrial protein-coding genes of 330 primate species in one partition. 13 | 14 | `data/330s.tree`: Tree of 330 primate species. 15 | 16 | `doc/phylogenomic-dating.pdf`: Book chapter with tutorial. 17 | 18 | `gH/mcmctree-outBV.ctl`: Control file for MCMCTree necessary to set up calculation fo the gradient and Hessian for approximate likelihood calculation. 19 | 20 | `mcmc/mcmctree.ctl`: Control file for MCMCTree to perform MCMC sampling from the posterior distribution of divergence times and rates using the approximate likelihood method. 21 | 22 | `prior/mcmctree-pr.ctl`: Control file for MCMCTree to perform MCMC sampling from the prior distribuiton. 23 | 24 | `R/analysis.R`: Examples of how to summarise the MCMC output and MCMC diagnostics. 25 | 26 | `src/`: Source code of MCMCTree v4.9e. 27 | 28 | A pretty densitree of the 10 species analysed in this tutorial: 29 | 30 | ![](/R/pri10s.jpg) 31 | -------------------------------------------------------------------------------- /prior/mcmctree-pr.ctl: -------------------------------------------------------------------------------- 1 | seed = -1 2 | seqfile = ../data/10s.phys 3 | treefile = ../data/10s.tree 4 | mcmcfile = mcmc.txt 5 | outfile = out.txt 6 | 7 | ndata = 2 8 | seqtype = 0 * 0: nucleotides; 1:codons; 2:AAs 9 | usedata = 0 * 0: no data (prior); 1:exact likelihood; 2:approximate likelihood; 3:out.BV (in.BV) 10 | clock = 2 * 1: global clock; 2: independent rates; 3: correlated rates 11 | RootAge = '<1.0' * safe constraint on root age, used if no fossil for root. 12 | 13 | model = 4 * 0:JC69, 1:K80, 2:F81, 3:F84, 4:HKY85 14 | alpha = 0.5 * alpha for gamma rates at sites 15 | ncatG = 5 * No. categories in discrete gamma 16 | 17 | cleandata = 0 * remove sites with ambiguity data (1:yes, 0:no)? 18 | 19 | BDparas = 1 1 0 * birth, death, sampling 20 | kappa_gamma = 6 2 * gamma prior for kappa 21 | alpha_gamma = 1 1 * gamma prior for alpha 22 | 23 | rgene_gamma = 2 40 1 * gammaDir prior for rate for genes 24 | sigma2_gamma = 1 10 1 * gammaDir prior for sigma^2 (for clock=2 or 3) 25 | 26 | print = 1 * 0: no mcmc sample; 1: everything except branch rates 2: everything 27 | burnin = 20000 28 | sampfreq = 100 29 | nsample = 20000 30 | 31 | *** Note: Make your window wider (100 columns) before running the program. 32 | -------------------------------------------------------------------------------- /mcmc/mcmctree.ctl: -------------------------------------------------------------------------------- 1 | seed = -1 2 | seqfile = ../data/10s.phys 3 | treefile = ../data/10s.tree 4 | mcmcfile = mcmc.txt 5 | outfile = out.txt 6 | 7 | ndata = 2 8 | seqtype = 0 * 0: nucleotides; 1:codons; 2:AAs 9 | usedata = 2 * 0: no data (prior); 1:exact likelihood; 10 | * 2:approximate likelihood; 3:out.BV (in.BV) 11 | clock = 2 * 1: global clock; 2: independent rates; 3: correlated rates 12 | RootAge = '<1.0' * safe constraint on root age, used if no fossil for root. 13 | 14 | model = 4 * 0:JC69, 1:K80, 2:F81, 3:F84, 4:HKY85 15 | alpha = 0.5 * alpha for gamma rates at sites 16 | ncatG = 5 * No. categories in discrete gamma 17 | 18 | cleandata = 0 * remove sites with ambiguity data (1:yes, 0:no)? 19 | 20 | BDparas = 1 1 0 * birth, death, sampling 21 | kappa_gamma = 6 2 * gamma prior for kappa 22 | alpha_gamma = 1 1 * gamma prior for alpha 23 | 24 | rgene_gamma = 2 40 1 * gammaDir prior for rate for genes 25 | sigma2_gamma = 1 10 1 * gammaDir prior for sigma^2 (for clock=2 or 3) 26 | 27 | print = 1 * 0: no mcmc sample; 1: everything except branch rates 2: everything 28 | burnin = 20000 29 | sampfreq = 100 30 | nsample = 20000 31 | 32 | *** Note: Make your window wider (100 columns) before running the program. 33 | -------------------------------------------------------------------------------- /R/analysis.R: -------------------------------------------------------------------------------- 1 | # Make sure R's working directory is divtime/R 2 | 3 | rm(list=ls()) # clean up the workspace 4 | 5 | # ############################################### 6 | # POSTERIOR: 7 | # ############################################### 8 | # read in MCMC trace files 9 | mcmc1 <- read.table("../mcmc/mcmc1.txt", head=TRUE) 10 | mcmc2 <- read.table("../mcmc/mcmc2.txt", head=TRUE) 11 | 12 | # each data frame contains 15 columns: 13 | # MCMC generation number, 9 node ages (divergence times), 2 mean mutation rates, 14 | # 2 rate drift coefficients, and sample log-likelihood values 15 | names(mcmc1) 16 | # [1] "Gen" "t_n11" "t_n12" "t_n13" "t_n14" "t_n15" "t_n16" "t_n17" 17 | # [9] "t_n18" "t_n19" "mu1" "mu2" "sigma2_1" "sigma2_2" "lnL" 18 | 19 | # to check for convergence of the MCMC runs, we calculate the posterior 20 | # means of times for each run, and plot them against each other 21 | t.mean1 <- apply(mcmc1[,2:10], 2, mean) * 100 22 | t.mean2 <- apply(mcmc2[,2:10], 2, mean) * 100 23 | # good convergence is indicated when the points fall on the y = x line. 24 | 25 | par(mfrow=c(2,2)) 26 | # posterior times for run 1 vs run 2: 27 | plot(t.mean1, t.mean2, main="a) Posterior times, r 1 vs. r 2"); abline(0, 1) 28 | # notice that ancient times (t_n11 and t_n12) have small ESS 29 | # trace plots are useful to visualise the MCMC and split problems 30 | plot(mcmc1$t_n19, ty='l', main="b) trace of t_n19") 31 | plot(mcmc1$t_n11, ty='l', main="c) trace of t_n11") 32 | plot(density(mcmc1$t_n11), main="d) histogram of t_n11, r 1 vs. r2") 33 | lines(density(mcmc2$t_n11), lty=2) 34 | 35 | # we can calculate the effective sample sizes (ESS) of the parameters 36 | # (you need to have the coda package installed for this to work) 37 | mean.mcmc <- apply(mcmc1[,-1], 2, mean) 38 | ess.mcmc <- apply(mcmc1[,-1], 2, coda::effectiveSize) 39 | var.mcmc <- apply(mcmc1[,-1], 2, var) 40 | se.mcmc <- sqrt(var.mcmc / ess.mcmc) 41 | cbind(mean.mcmc, ess.mcmc, var.mcmc, se.mcmc) 42 | 43 | # oldpar <- par() 44 | # mai <- oldpar$mai; mai[3] <- .35; par(mai = mai) 45 | # mai <- oldpar$mai; mai[1] <- .35; mai[2] <- .35; mai[3] <- .45; par(mai = mai) 46 | 47 | # If you have the ape and bppr packages installed, you can make a densitree plot 48 | # To get bppr, try: 49 | # devtools::install_github("dosreislab/bppr") 50 | par(mfrow=c(1,1)) 51 | #pri10s.tree <- ape::read.tree("../data/10s.tree") # does not work with some ape versions 52 | pri10s.tree <- ape::read.tree("../data/10s-nocal.tree") 53 | bppr::mcmc2densitree(pri10s.tree, mcmc1 * 100, "t_", thin=0.05, col="blue", alpha=0.01, pfrac=0.2) 54 | title(xlab="Divergence time (Ma)") 55 | 56 | # Some phylogentic programs such as BEAST or MrBayes output a list of trees in 57 | # Newick format, instead of a table. If you want, you can convert MCMCtree's 58 | # MCMC output into a list of Newick trees as with BEAST or MrBayes 59 | pri10s.tree$node.label <- NULL # remove node labels (optional) 60 | mcmc.trees <- bppr::mcmc2multiphylo(pri10s.tree, mcmc1, "t_", thin=0.05) 61 | # You can write the trees to a file (note this file can be potentially very big) 62 | ape::write.tree(mcmc.trees, file="many.tree") 63 | 64 | # ############################################### 65 | # PRIOR: 66 | # ############################################### 67 | mcmc1.p <- read.table("../prior/mcmc1.txt", head=TRUE) 68 | mcmc2.p <- read.table("../prior/mcmc2.txt", head=TRUE) 69 | 70 | p.mean1 <- apply(mcmc1.p[,2:10], 2, mean) * 100 71 | p.mean2 <- apply(mcmc2.p[,2:10], 2, mean) * 100 72 | plot(p.mean1, p.mean2); abline(0, 1) 73 | 74 | # par(mai=c(.4,.5,.4,.2)) # this changes the margin sizes 75 | par(mfcol=c(3,3)) 76 | for(i in 9:1) { 77 | dpr <- density(mcmc1.p[,i+1], adj=.1) # prior 78 | dPr <- density(mcmc1[,i+1], adj=.1) # Posterior 79 | xl <- range(c(dpr$x, dPr$x)) 80 | yl <- range(c(dpr$y, dPr$y)) 81 | plot(dpr, main=paste("t_n",i+10,sep=""), xlab="", ylab="", las=1, xlim=xl, ylim=yl, col="darkgrey") 82 | lines(dPr, col="black") 83 | } 84 | 85 | -------------------------------------------------------------------------------- /data/330s.tree: -------------------------------------------------------------------------------- 1 | 330 1 2 | 3 | ((((((((((((((((Macaca_ochreata, Macaca_brunnescens), Macaca_maura), Macaca_hecki), ((Macaca_tonkeana, Macaca_nigra), Macaca_nigrescens)), (((Macaca_siberu, Macaca_leonina), Macaca_pagensis), (Macaca_silenus, Macaca_nemestrina))), (((((Macaca_thibetana, Macaca_assamensis), Macaca_sinica), (Macaca_munzala, Macaca_radiata)), Macaca_arctoides), (((Macaca_fuscata, Macaca_mulatta), Macaca_cyclopis), Macaca_fascicularis))), Macaca_sylvanus), (((((((Rungwecebus_kipunji, Papio_ursinus), Papio_cynocephalus), Papio_anubis), Papio_hamadryas), Papio_papio), ((Lophocebus_albigena, Lophocebus_aterrimus), Theropithecus_gelada)), (((Cercocebus_galeritus, Cercocebus_agilis), Cercocebus_torquatus), (Mandrillus_leucophaeus, Mandrillus_sphinx)))), ((((((Cercopithecus_cephus, Cercopithecus_ascanius), (Cercopithecus_mitis, Cercopithecus_nictitans)), (Cercopithecus_mona, Cercopithecus_neglectus)), Cercopithecus_hamlyni), ((((Chlorocebus_aethiops, Chlorocebus_tantalus), Chlorocebus_pygerythrus), Chlorocebus_sabaeus), (Cercopithecus_lhoesti, Erythrocebus_patas))), Allenopithecus_nigroviridis)), ((((((((Trachypithecus_auratus, Trachypithecus_germaini), Trachypithecus_cristatus), ((Trachypithecus_barbei, Trachypithecus_obscurus), Trachypithecus_phayrei)), ((Trachypithecus_francoisi, Trachypithecus_poliocephalus), Trachypithecus_laotum)), ((((Semnopithecus_hector, Semnopithecus_entellus), Trachypithecus_johnii), (Trachypithecus_pileatus, Trachypithecus_geei)), (Semnopithecus_priam, Trachypithecus_vetulus))), (((Pygathrix_nemaeus, Pygathrix_cinerea), (Simias_concolor, Nasalis_larvatus)), (((Rhinopithecus_avunculus, Rhinopithecus_roxellana), Rhinopithecus_bieti), Rhinopithecus_brelichi))), ((((((((Presbytis_bicolor, Presbytis_rubicunda), Presbytis_melalophos), Presbytis_sumatranu), ((Presbytis_fredericae, Presbytis_comata), Presbytis_mitrata)), (Presbytis_potenziani, Presbytis_siberu)), ((Presbytis_frontata, Presbytis_chrysomelas), Presbytis_hosei)), Presbytis_femoralis), Presbytis_thomasi)), (((((((Piliocolobus_tephrosceles, Piliocolobus_rufomitratus), Piliocolobus_foai), (Piliocolobus_kirkii, Piliocolobus_gordonorum)), ((Piliocolobus_preussi, Piliocolobus_pennantii), Piliocolobus_tholloni)), Piliocolobus_badius), Procolobus_verus), ((((Colobus_vellerosus, Colobus_polykomos), Colobus_guereza), Colobus_angolensis), Colobus_satanas)))), ((((((((((Hylobates_funereus, Hylobates_muelleri), Hylobates_abbotti), (Hylobates_agilis, Hylobates_albibarbis)), (Hylobates_klossii, Hylobates_moloch)), Hylobates_lar), Hylobates_pileatus), Symphalangus_syndactylus), ((((Nomascus_leucogenys, Nomascus_siki), Nomascus_gabriellae), Nomascus_concolor), (Nomascus_hainanus, Nomascus_nasutus))), (Hoolock_hoolock, Hoolock_leuconedys)), ((((Pan_troglodytes, Pan_paniscus), Homo_sapiens), Gorilla_gorilla), (Pongo_abelii, Pongo_pygmaeus)))), (((((((((((((Callithrix_mauesi, Callithrix_humeralifera), Callithrix_emiliae), Callithrix_argentata), Callithrix_pygmaea), ((((Callithrix_penicillata, Callithrix_jacchus), Callithrix_kuhlii), Callithrix_geoffroyi), Callithrix_aurita)), Callithrix_saterei), Callimico_goeldii), ((Leontopithecus_chrysopygus, Leontopithecus_rosalia), Leontopithecus_chrysomelas)), ((((Saguinus_martinsi, Saguinus_midas), (Saguinus_oedipus, Saguinus_geoffroyi)), (Saguinus_labiatus, Saguinus_mystax)), (((Saguinus_tripartitus, Saguinus_fuscicollis), (Saguinus_nigricollis, Saguinus_graellsi)), Saguinus_melanoleucus))), ((((Aotus_azarae, Aotus_infulatus), Aotus_nigriceps), Aotus_nancymaae), (((Aotus_lemurinus, Aotus_griseimembra), Aotus_trivirgatus), Aotus_vociferans))), ((((Cebus_olivaceus, Cebus_albifrons), Cebus_capucinus), ((Cebus_apella, Cebus_libidinosus), Cebus_xanthosternos)), (((Saimiri_sciureus, Saimiri_oerstedii), Saimiri_ustus), Saimiri_boliviensis))), (((((Ateles_belzebuth, Ateles_geoffroyi), Ateles_fusciceps), ((Ateles_marginatus, Ateles_chamek), Ateles_paniscus)), ((((Lagothrix_lagotricha, Lagothrix_poeppigii), Lagothrix_cana), Lagothrix_lugens), (Brachyteles_arachnoides, Brachyteles_hypoxanthus))), (((((((Alouatta_macconnelli, Alouatta_seniculus), Alouatta_nigerrima), Alouatta_sara), Alouatta_caraya), Alouatta_belzebul), ((Alouatta_coibensis, Alouatta_palliata), Alouatta_pigra)), Alouatta_guariba))), (((((Callicebus_moloch, Callicebus_hoffmannsi), Callicebus_donacophilus), Callicebus_personatus), (Callicebus_lugens, Callicebus_torquatus)), (((((Cacajao_ayresi, Cacajao_hosomi), Cacajao_melanocephalus), Cacajao_calvus), ((Chiropotes_chiropotes, Chiropotes_israelita), Chiropotes_utahicki)), (Pithecia_monachus, Pithecia_irrorata))))), (((Tarsius_wallacei, Tarsius_lariang), Tarsius_dentatus), (Tarsius_bancanus, Tarsius_syrichta))), (((((((((((((((((Microcebus_mamiratra, Microcebus_margotmarshae), (Microcebus_arnholdi, Microcebus_sambiranensis)), Microcebus_rufus), (((Microcebus_myoxinus, Microcebus_berthae), Microcebus_lehilahytsara), Microcebus_mittermeieri)), ((((Microcebus_jollyae, Microcebus_sp_nova3Louis06), Microcebus_sp_nova1Louis06), ((Microcebus_spDWW2010aMarolambo, Microcebus_gerpi), Microcebus_spDWW2010aManantant)), Microcebus_simmonsi)), Microcebus_tavaratra), (((Microcebus_ravelobensis, Microcebus_bongolavensis), Microcebus_danfossi), (Microcebus_macarthurii, Microcebus_sp_nova3Radespiel08))), (Microcebus_murinus, Microcebus_griseorufus)), (Mirza_zaza, Mirza_coquereli)), Allocebus_trichotis), (((Cheirogaleus_medius, Cheirogaleus_major), Cheirogaleus_crossleyi), Cheirogaleus_sibreei)), Phaner_pallescens), ((((((((Lepilemur_sahamalazensis, Lepilemur_ahmansoni), Lepilemur_mittermeieri), Lepilemur_dorsalis), ((Lepilemur_milanoii, Lepilemur_tymerlachsoni), Lepilemur_ankaranensis)), Lepilemur_septentrionalis), (((Lepilemur_grewcockorum, Lepilemur_otto), Lepilemur_edwardsi), Lepilemur_microdon)), ((((Lepilemur_hubbardorum, Lepilemur_ruficaudatus), Lepilemur_randrianasoloi), Lepilemur_aeeclis), (Lepilemur_petteri, Lepilemur_leucopus))), (((((Lepilemur_betsileo, Lepilemur_jamesorum), Lepilemur_mustelinus), Lepilemur_fleuretae), Lepilemur_wrighti), Lepilemur_seali))), ((((((Avahi_ramanantsoavani, Avahi_meridionalis), (Avahi_betsilio, Avahi_peyrierasi)), Avahi_laniger), ((Avahi_unicolor, Avahi_cleesei), Avahi_occidentalis)), (((Propithecus_tattersalli, Propithecus_coquereli), Propithecus_verreauxi), ((Propithecus_edwardsi, Propithecus_diadema), Propithecus_perrieri))), Indri_indri)), (((((((((Eulemur_sanfordi, Eulemur_albifrons), Eulemur_fulvus), Eulemur_rufifrons), ((Eulemur_collaris, Eulemur_cinereiceps), Eulemur_rufus)), Eulemur_mongoz), ((Eulemur_flavifrons, Eulemur_macaco), Eulemur_coronatus)), Eulemur_rubriventer), (((((Hapalemur_griseus, Hapalemur_alaotrensis), Hapalemur_occidentalis), Hapalemur_aureus), Prolemur_simus), Lemur_catta)), (Varecia_rubra, Varecia_variegata))), Daubentonia_madagascariensis), ((((((((Galago_senegalensis, Galago_gallarum), Galago_moholi), Galago_matschiei), (Galago_zanzibaricus, Galago_granti)), ((Otolemur_garnettii, Otolemur_crassicaudatus), (Galago_alleni, Galago_gabonensis))), Galago_demidoff), Euoticus_elegantulus), ((((((Nycticebus_bengalensis, Nycticebus_coucang), Nycticebus_javanicus), Nycticebus_menagensis), Nycticebus_pygmaeus), Loris_tardigradus), (Arctocebus_calabarensis, Perodicticus_potto))))), (Galeopterus_variegatus, Cynocephalus_volans)), (Tupaia_glis, Tupaia_belangeri)); 4 | -------------------------------------------------------------------------------- /src/treespace.c: -------------------------------------------------------------------------------- 1 | /* treespace.c 2 | collection of tree perturbation routines 3 | */ 4 | 5 | #include "paml.h" 6 | 7 | int MakeTreeIb (int ns, int Ib[], int rooted) 8 | { 9 | /* construct tree from Ib[] using the algorithm of adding species 10 | Ib[k] marks the branch to which the (k+3)_th species (or the root) 11 | is added. Ib[k] should be in the range [0,k+3] 12 | */ 13 | int i,j,k, is,it; 14 | 15 | tree.nbranch=3; 16 | for (i=0; i<3; i++) { tree.branches[i][0]=3; tree.branches[i][1]=i; } 17 | for (k=0; k=is) tree.branches[i][j]+=2; 22 | it=tree.branches[Ib[k]][1]; 23 | tree.branches[Ib[k]][1]=is+1; 24 | tree.branches[tree.nbranch][0]=is+1; 25 | tree.branches[tree.nbranch++][1]=it; 26 | tree.branches[tree.nbranch][0]=is+1; 27 | tree.branches[tree.nbranch++][1]=is; 28 | } 29 | tree.root=tree.branches[0][0]; 30 | BranchToNode (); 31 | 32 | if (rooted) { 33 | it=tree.branches[Ib[k]][0]; 34 | tree.branches[Ib[k]][0]=tree.branches[tree.nbranch][0]=2*com.ns-2; 35 | tree.branches[tree.nbranch][1]=it; 36 | for (; it!=tree.root; it=nodes[it].father) { 37 | tree.branches[nodes[it].ibranch][0]=it; 38 | tree.branches[nodes[it].ibranch][1]=nodes[it].father; 39 | } 40 | tree.root=2*com.ns-2; tree.nbranch++; 41 | BranchToNode (); 42 | } 43 | return (0); 44 | } 45 | 46 | int GetTreeI (int itree, int ns, int rooted) 47 | { 48 | /* get the i_th tree. Trees are ordered according to the algorithm of 49 | adding species. 50 | returns a random tree if itree==-1, in which case ns can be large 51 | */ 52 | int i, M[NS-2], nM=ns-3+rooted, Ib[NS-2]; 53 | 54 | for (i=0; i3) { 61 | FOR (i, nM) printf ("%5d ", M[i]); FPN (F0); 62 | FOR (i, nM) printf ("%5d ", Ib[i]); FPN (F0); 63 | } 64 | */ 65 | MakeTreeIb (ns, Ib, rooted); 66 | return (0); 67 | } 68 | 69 | 70 | 71 | int CountLHsTree (void) 72 | { 73 | /* This counts the number of labeled histories for a given rooted tree. 74 | */ 75 | int i,k, nLH, nLR[NS-1][2], change, *sons, j; 76 | double y=0; 77 | 78 | for(i=com.ns; i=com.ns; i--) { 84 | sons = nodes[i].sons; 85 | for(j=0; j<2; j++) { 86 | if(nLR[i-com.ns][j] != -1) continue; 87 | if(sons[j] < com.ns) { 88 | nLR[i-com.ns][j] = 0; 89 | change = 1; 90 | } 91 | else if(nLR[sons[j]-com.ns][0] != -1 && nLR[sons[j]-com.ns][1] != -1) { 92 | nLR[i-com.ns][j] = nLR[sons[j]-com.ns][0] + nLR[sons[j]-com.ns][1] + 1; 93 | change = 1; 94 | } 95 | } 96 | } 97 | if(!change) break; 98 | } 99 | for(i=0,nLH=1; i=0; i--) { 143 | if (Ib[i]<2*i+3) break; 144 | if (i==0) { 145 | finish=1; 146 | break; 147 | } 148 | Ib[i]=0; Ib[i-1]++; 149 | } 150 | if (finish) break; 151 | } 152 | FPN(fout); 153 | 154 | return (0); 155 | } 156 | 157 | int GetIofTree (int rooted, int keeptree, double space[]) 158 | { 159 | /* Get the index of tree. 160 | tree.branches[] are destroyed for reconstruction, 161 | and some branches are reversed which may affect nodes[] also. 162 | Both are restored before return if keeptree=1. 163 | Works with binary trees only. 164 | bA[nbranch*(ns-2)] 165 | */ 166 | int M[NS-2], nM=com.ns-3+rooted; 167 | int i,j,k,is,it, b=0,a1,a2,Ib[NS-1], nid=tree.nnode-com.ns; 168 | char ns2=(char)(com.ns-2), *bA=(char*)space; /* bA[b*ns2+j]: ancestors on branch b */ 169 | struct TREEB tree0=tree; 170 | 171 | if (tree.nnode-com.ns!=com.ns-1-!rooted) error2 ("GetIofTree"); 172 | if (com.ns>15) error2("ns too large in GetIofTree"); 173 | 174 | /* find new root. 175 | Ib[]: No. of times inner nodes are visited on paths 1-2, 1-3, 2-3 */ 176 | for (i=0; ib becomes b->a */ 243 | 244 | if (newroot==oldroot) return; 245 | for (b=newroot,a=nodes[b].father; b!=oldroot; b=a,a=nodes[b].father) { 246 | tree.branches[nodes[b].ibranch][0]=b; 247 | tree.branches[nodes[b].ibranch][1]=a; 248 | #if (BASEML || CODEML) 249 | if(a>=com.ns /* && com.method==1 */) com.oldconP[a]=0; /* update the node */ 250 | #endif 251 | } 252 | 253 | tree.root=newroot; 254 | BranchToNode (); 255 | for (b=oldroot,a=nodes[b].father; b!=newroot; b=a,a=nodes[b].father) { 256 | nodes[b].branch = nodes[a].branch; 257 | nodes[b].label = nodes[a].label; 258 | } 259 | nodes[newroot].branch = -1; 260 | nodes[newroot].label = -1; 261 | 262 | #if (CODEML) 263 | /* omega's are moved in updateconP for NSbranchsites models */ 264 | if(com.model && com.NSsites==0) { 265 | for (b=oldroot,a=nodes[b].father; b!=newroot; b=a,a=nodes[b].father) 266 | nodes[b].omega = nodes[a].omega; 267 | nodes[newroot].omega = -1; 268 | } 269 | #endif 270 | #if (BASEML) 271 | if(com.nhomo==2) { 272 | for (b=oldroot,a=nodes[b].father; b!=newroot; b=a,a=nodes[b].father) 273 | nodes[b].pkappa = nodes[a].pkappa; 274 | nodes[newroot].pkappa = NULL; 275 | } 276 | #endif 277 | 278 | } 279 | 280 | 281 | 282 | int NeighborNNI (int i_tree) 283 | { 284 | /* get the i_tree'th neighboring tree of tree by the nearest neighbor 285 | interchange (NNI), each tree has 2*(# internal branches) neighbors. 286 | works with rooted and unrooted binary trees. 287 | 288 | Gives the ip_th neighbor for interior branch ib. 289 | Involved branches are a..b, a..c, b..d, 290 | with a..b to be the internal branch. 291 | swap c with d, with d to be the ip_th son of b 292 | */ 293 | int i, a,b,c,d, ib=i_tree/2, ip=i_tree%2; 294 | 295 | if (tree.nbranch!=com.ns*2-2-(nodes[tree.root].nson==3)) 296 | error2 ("err NeighborNNI: multificating tree."); 297 | 298 | /* locate a,b,c,d */ 299 | for (i=0,a=0; i=com.ns && a++==ib) break; 301 | ib=i; 302 | a=tree.branches[ib][0]; b=tree.branches[ib][1]; 303 | c=nodes[a].sons[0]; if(c==b) c=nodes[a].sons[1]; 304 | d=nodes[b].sons[ip]; 305 | 306 | /* swap nodes c and d */ 307 | tree.branches[nodes[c].ibranch][1]=d; 308 | tree.branches[nodes[d].ibranch][1]=c; 309 | BranchToNode (); 310 | return (0); 311 | } 312 | 313 | int GetLHistoryI (int iLH) 314 | { 315 | /* Get the ILH_th labelled history. This function is rather similar to 316 | GetTreeI which returns the I_th rooted or unrooted tree topology. 317 | The labeled history is recorded in the numbering of nodes: 318 | node # increases as the node gets older: 319 | node d corresponds to time 2*ns-2-d; tree.root=ns*2-2; 320 | t0=1 > t1 > t2 > ... > t[ns-2] 321 | k ranges from 0 to i(i-1)/2 and indexes the pair (s1 & s2, with s1=2; i--) { 332 | k=it%(i*(i-1)/2); it/=(i*(i-1)/2); 333 | s2=(int)(sqrt(1.+8*k)-1)/2+1; s1=k-s2*(s2-1)/2; /* s1=s2 || s1<0) printf("\nijk%6d%6d%6d", s1, s2, k); 336 | 337 | nodes[nodea[s1]].father=nodes[nodea[s2]].father=inode; 338 | nodes[inode].nson=2; 339 | nodes[inode].sons[0]=nodea[s1]; 340 | nodes[inode].sons[1]=nodea[s2]; 341 | nodea[s1]=inode; nodea[s2]=nodea[i-1]; 342 | inode++; 343 | } 344 | tree.root=inode-1; 345 | NodeToBranch(); 346 | return (0); 347 | } 348 | 349 | int GetIofLHistory (void) 350 | { 351 | /* Get the index of the labelled history (rooted tree with nodes ordered 352 | according to time). 353 | Numbering of nodes: node # increases as the node gets older: 354 | node d corresponds to time 2*ns-2-d; tree.root=ns*2-2; 355 | t0=1 > t1 > t2 > ... > t[ns-2] 356 | */ 357 | int index, i,j,k[NS+1], inode,nnode, nodea[NS], s[2]; 358 | 359 | if (nodes[tree.root].nson!=2 || tree.nnode!=com.ns*2-1 360 | || tree.root!=com.ns*2-2) error2("IofLH"); 361 | for (i=0; i=com.ns) 392 | FOR (k, com.ns-1) 393 | if (tree.branches[i][j]==LHistory[k]) 394 | { tree.branches[i][j]=com.ns*2-2-k; break; } 395 | BranchToNode(); 396 | 397 | return (0); 398 | } 399 | -------------------------------------------------------------------------------- /src/paml.h: -------------------------------------------------------------------------------- 1 | /* paml.h 2 | */ 3 | 4 | #if (!defined PAML_H) 5 | #define PAML_H 6 | 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #define square(a) ((a)*(a)) 19 | #define FOR(i,n) for(i=0; i(b)?(a):(b)) 24 | #define swap2(a,b,y) { y=a; a=b; b=y; } 25 | #define Pi 3.1415926535897932384626433832795 26 | 27 | #define beep putchar('\a') 28 | #define spaceming2(n) ((n)*((n)*2+9+2)*sizeof(double)) 29 | 30 | int ReadSeq (FILE *fout, FILE *fseq, int cleandata, int locus); 31 | int ScanFastaFile(FILE *f, int *ns, int *ls, int *aligned); 32 | void EncodeSeqs (void); 33 | void SetMapAmbiguity(void); 34 | void ReadPatternFreq (FILE* fout, char* fpattf); 35 | int Initialize (FILE *fout); 36 | int MoveCodonSeq (int ns, int ls, char *z[]); 37 | int PatternWeight (void); 38 | int PatternWeightJC69like (void); 39 | int PatternWeightSimple (void); 40 | int Site2Pattern (FILE *fout); 41 | 42 | double getRoot(double (*f)(double), double (*df)(double), double initVal); 43 | int f_and_x(double x[], double f[], int n, int fromx, int LastItem); 44 | void bigexp(double lnx, double *a, double *b); 45 | void SetSeed (int seed, int PrintSeed); 46 | double rndu (void); 47 | void rndu_vector (double r[], int n); 48 | void randorder(int order[], int n, int space[]); 49 | #define rnduab(a,b) ((a)+((b)-(a))*rndu()) 50 | double reflect(double x, double a, double b); 51 | #define rndexp(mean) (-(mean)*log(rndu())) 52 | double rnduM0V1 (void); 53 | double rndNormal(void); 54 | int rndBinomial(int n, double p); 55 | double rndBox(void); 56 | double rndAirplane(void); 57 | double rndStrawhat(void); 58 | double rndBactrian(void); 59 | double rndBactrianTriangle(void); 60 | double rndBactrianLaplace(void); 61 | double rndTriangle(void); 62 | double rndLaplace (void); 63 | double rndCauchy (void); 64 | double rndt2 (void); 65 | double rndt4 (void); 66 | double rndlogt2 (double loc, double s); 67 | double rndlogistic (void); 68 | double rndloglogistic (double loc, double s); 69 | double rndgamma(double alpha); 70 | double rndbeta(double p, double q); 71 | int rndpoisson(double m); 72 | int rndNegBinomial(double shape, double mean); 73 | int SampleCat (double P[], int n, double space[]); 74 | int MultiNomialAliasSetTable (int ncat, double prob[], double F[], int L[], double space[]); 75 | int MultiNomialAlias (int n, int ncat, double F[], int L[], int nobs[]); 76 | int MultiNomial2 (int n, int ncat, double prob[], int nobs[], double space[]); 77 | int DiscreteBeta (double freq[], double x[], double p, double q, int K, int UseMedian); 78 | int DiscreteGamma (double freqK[], double rK[], double alpha, double beta, int K, int UseMedian); 79 | int AutodGamma (double Mmat[], double freqK[], double rK[], double *rho1, double alfa, double rho, int K); 80 | 81 | double QuantileChi2 (double prob, double v); 82 | #define QuantileGamma(prob,alpha,beta) QuantileChi2(prob,2.0*(alpha))/(2.0*(beta)) 83 | double PDFGamma(double x, double alpha, double beta); 84 | #define CDFGamma(x,alpha,beta) IncompleteGamma((beta)*(x),alpha,LnGamma(alpha)) 85 | double logPriorRatioGamma(double xnew, double xold, double a, double b); 86 | double PDFinvGamma(double x, double alpha, double beta); 87 | #define CDFinvGamma(x,alpha,beta) (1-CDFGamma(1/(x),alpha,beta)) 88 | #define CDFChi2(x,v) CDFGamma(x,(v)/2.0,0.5) 89 | double PDFBeta(double x, double p, double q); 90 | double CDFBeta(double x, double p, double q, double lnbeta); 91 | double QuantileBeta(double prob, double p, double q, double lnbeta); 92 | double Quantile(double(*cdf)(double x,double par[]), double p, double x, double par[], double xb[2]); 93 | double QuantileNormal (double prob); 94 | double PDFNormal (double x, double mu, double sigma2); 95 | double logPDFNormal(double x, double mu, double sigma2); 96 | double CDFNormal (double x); 97 | double logCDFNormal (double x); 98 | double PDFCauchy (double x, double m, double sigma); 99 | double PDFloglogistic (double x, double loc, double s); 100 | double PDFlogt2 (double x, double loc, double s); 101 | double PDFt2 (double x, double m, double s); 102 | double PDFt4 (double x, double m, double s); 103 | double PDFt (double x, double loc, double scale, double df, double lnConst); 104 | double CDFt (double x, double loc, double scale, double df, double lnbeta); 105 | double PDFSkewT (double x, double loc, double scale, double shape, double df); 106 | double PDFSkewN (double x, double loc, double scale, double shape); 107 | double logPDFSkewN(double x, double loc, double scale, double shape); 108 | 109 | int StirlingS2(int n, int k); 110 | double lnStirlingS2(int n, int k); 111 | double LnGamma(double alpha); 112 | #define LnBeta(p,q) (LnGamma(p) + LnGamma(q) - LnGamma(p+q)) 113 | double DFGamma(double x, double alpha, double beta); 114 | double IncompleteGamma (double x, double alpha, double ln_gamma_alpha); 115 | #define CDFBinormal(h,k,r) LBinormal(-(h),-(k),r) /* CDF for bivariate normal */ 116 | double LBinormal (double h, double k, double r); 117 | double logLBinormal (double h, double k, double r); 118 | double probBinomial (int n, int k, double p); 119 | double probBetaBinomial (int n, int k, double p, double q); 120 | double factorial (int n); 121 | double Binomial(double n, int k, double *scale); 122 | int BinomialK(double alpha, int n, double C[], double S[]); 123 | int GaussLegendreRule(const double **x, const double **w, int order); 124 | int GaussLaguerreRule(const double **x, const double **w, int order); 125 | double NIntegrateGaussLegendre(double(*fun)(double x), double a, double b, int order); 126 | 127 | int ScatterPlot (int n, int nseries, int yLorR[], double x[], double y[], 128 | int nrow, int ncol, int ForE); 129 | void rainbowRGB (double temperature, int *R, int *G, int *B); 130 | void GetIndexTernary(int *ix, int *iy, double *x, double *y, int itriangle, int K); 131 | 132 | int CodeChara (char b, int seqtype); 133 | int dnamaker (char z[], int ls, double pi[]); 134 | int picksite (char z[], int ls, int begin, int gap, char buffer[]); 135 | int transform (char z[], int ls, int direction, int seqtype); 136 | int RemoveIndel(void); 137 | int f_mono_di (FILE *fout, char z[], int ls, int iring, double fb1[], double fb2[], double CondP[]); 138 | int PickExtreme (FILE *fout, char z[], int ls, int iring, int lfrag, int ffrag[]); 139 | 140 | int print1seq (FILE*fout, unsigned char *z, int ls, int pose[]); 141 | void printSeqs(FILE *fout, int *pose, char keep[], int format); 142 | int printPatterns(FILE *fout); 143 | void printSeqsMgenes (void); 144 | int printsma (FILE*fout, char*spname[], unsigned char*z[], int ns, int l, int lline, int gap, int seqtype, 145 | int transformed, int simple, int pose[]); 146 | int printsmaCodon (FILE *fout, unsigned char * z[], int ns, int ls, int lline, int simple); 147 | int zztox ( int n31, int l, char z1[], char z2[], double *x ); 148 | int testXMat (double x[]); 149 | double SeqDivergence (double x[], int model, double alpha, double *kapa); 150 | int symtest (double freq[], int n, int nobs, double space[], double *chisym, 151 | double* chihom); 152 | int dSdNNG1986(char *z1, char *z2, int lc, int icode, int transfed, double *dS, double *dN, double *Ssites, double *Nsites); 153 | int difcodonNG(char codon1[], char codon2[], double *SynSite,double *AsynSite, 154 | double *SynDif, double *AsynDif, int transfed, int icode); 155 | int difcodonLWL85 (char codon1[], char codon2[], double sites[3], double sdiff[3], 156 | double vdiff[3], int transfed, int icode); 157 | int testTransP (double P[], int n); 158 | double testDetailedBalance (double P[], double pi[], int n); 159 | void pijJC69 (double pij[2], double t); 160 | int PMatK80 (double P[], double t, double kapa); 161 | int PMatT92 (double P[], double t, double kappa, double pGC); 162 | int PMatTN93 (double P[], double a1t, double a2t, double bt, double pi[]); 163 | int PMatUVRoot (double P[],double t,int n,double U[],double V[],double Root[]); 164 | int PMatCijk (double PMat[], double t); 165 | int PMatQRev(double P[], double pi[], double t, int n, double space[]); 166 | int EvolveHKY85 (char source[], char target[], int ls, double t, 167 | double rates[], double pi[], double kapa, int isHKY85); 168 | double DistanceIJ (int is, int js, int model, double alpha, double *kappa); 169 | int DistanceMatNuc (FILE *fout, FILE*f2base, int model, double alpha); 170 | int EigenQREVbase (FILE* fout, double kappa[], double pi[], 171 | int *nR, double Root[], double Cijk[]); 172 | int DistanceMatNG86 (FILE *fout, FILE*fds, FILE*fdn, FILE*dt, double alpha); 173 | int setmark_61_64 (void); 174 | 175 | int BootstrapSeq (char* seqfilename); 176 | int rell(FILE*flnf, FILE*fout, int ntree); 177 | int print1site (FILE*fout, int h); 178 | int MultipleGenes (FILE* fout, FILE*fpair[], double space[]); 179 | int lfunRates (FILE* fout, double x[], int np); 180 | int AncestralSeqs (FILE *fout, double x[]); 181 | void ListAncestSeq(FILE *fout, char *zanc); 182 | int ChangesSites(FILE*fout, int coding, char *zanc); 183 | 184 | int NucListall(char b, int *nb, int ib[4]); 185 | char *getcodon(char codon[], int icodon); 186 | char *getAAstr(char *AAstr, int iaa); 187 | int Codon2AA(char codon[3], char aa[3], int icode, int *iaa); 188 | int DNA2protein(char dna[], char protein[], int lc, int icode); 189 | int printcu (FILE *f1, double fcodon[], int icode); 190 | int printcums (FILE *fout, int ns, double fcodons[], int code); 191 | int QtoPi (double Q[], double pi[], int n, double *space); 192 | int PtoPi (double P[], double pi[], int n, double *space); 193 | int PtoX (double P1[], double P2[], double pi[], double X[]); 194 | 195 | void starttimer(void); 196 | char* printtime(char timestr[]); 197 | void sleep2(int wait); 198 | char *strc (int n, int c); 199 | int printdouble(FILE*fout, double a); 200 | void strcase (char *str, int direction); 201 | void error2(char * message); 202 | int indexing(double x[], int n, int index[], int descending, int space[]); 203 | int binarysearch (const void *key, const void *base, size_t n, size_t size, int(*compare)(const void *, const void *), int *found); 204 | FILE *gfopen(char *filename, char *mode); 205 | int appendfile(FILE*fout, char*filename); 206 | 207 | int zero (double x[], int n); 208 | double sum (double x[], int n); 209 | int fillxc (double x[], double c, int n); 210 | int xtoy (double x[], double y[], int n); 211 | int abyx (double a, double x[], int n); 212 | int axtoy(double a, double x[], double y[], int n); 213 | int axbytoz(double a, double x[], double b, double y[], double z[], int n); 214 | int identity (double x[], int n); 215 | double distance (double x[], double y[], int n); 216 | double innerp(double x[], double y[], int n); 217 | double norm(double x[], int n); 218 | 219 | double Det3x3 (double x[3*3]); 220 | int matby (double a[], double b[], double c[], int n,int m,int k); 221 | int matbytransposed (double a[], double b_transposed[], double c[], int n, int m, int k); 222 | int matIout (FILE *fout, int x[], int n, int m); 223 | int matout (FILE *file, double x[], int n, int m); 224 | int matout2 (FILE *fout, double x[], int n, int m, int wid, int deci); 225 | int mattransp1 (double x[], int n); 226 | int mattransp2 (double x[], double y[], int n, int m); 227 | int matinv (double x[], int n, int m, double space[]); 228 | int matexp (double A[], int n, int nTaylorTerms, int nSquares, double space[]); 229 | #ifdef USE_GSL 230 | int matexpGSL (double A[], int n, double space[]); 231 | #endif 232 | int matsqrt (double A[], int n, double work[]); 233 | int CholeskyDecomp (double A[], int n, double L[]); 234 | int eigenQREV (double Q[], double pi[], int n, 235 | double Root[], double U[], double V[], double spacesqrtpi[]); 236 | int eigenRealSym(double A[], int n, double Root[], double work[]); 237 | 238 | int MeanVar (double x[], int n, double *mean, double *var); 239 | int variance (double x[], int n, int nx, double mx[], double vx[]); 240 | int correl (double x[], double y[], int n, double *mx, double *my, 241 | double *vxx, double *vxy, double *vyy, double *r); 242 | int comparefloat (const void *a, const void *b); 243 | int comparedouble (const void *a, const void *b); 244 | double Eff_IntegratedCorrelationTime (double x[], int n, double *mx, double *varx, double *rho1); 245 | int HPDinterval(double x[], int n, double HPD[2], double alpha); 246 | int DescriptiveStatistics(FILE *fout, char infile[], int nbin, int propternary, int SkipColumns); 247 | int DescriptiveStatisticsSimple (FILE *fout, char infile[], int SkipColumns); 248 | int splitline (char line[], int fields[]); 249 | int scanfile (FILE*fin, int *nrecords, int *nx, int *HasHeader, char line[], int ifields[]); 250 | 251 | double bound (int nx, double x0[], double p[], double x[], 252 | int (*testx) (double x[], int nx)); 253 | int gradient (int n, double x[], double f0, double g[], 254 | double (* fun)(double x[],int n), double space[], int Central); 255 | int Hessian (int nx, double x[], double f, double g[], double H[], 256 | double (*fun)(double x[], int n), double space[]); 257 | int HessianSKT2004 (double xmle[], double lnLm, double g[], double H[]); 258 | 259 | int H_end (double x0[], double x1[], double f0, double f1, double e1, double e2, int n); 260 | double LineSearch(double(*fun)(double x),double *f,double *x0,double xb[2],double step,double e); 261 | double LineSearch2 (double(*fun)(double x[],int n), double *f, double x0[], 262 | double p[], double h, double limit, double e, double space[], int n); 263 | 264 | void xtoFreq(double x[], double freq[], int n); 265 | 266 | 267 | int SetxBound (int np, double xb[][2]); 268 | int ming1 (FILE *fout, double *f, double (* fun)(double x[], int n), 269 | int (*dfun) (double x[], double *f, double dx[], int n), 270 | int (*testx) (double x[], int n), 271 | double x0[], double space[], double e, int n); 272 | int ming2 (FILE *fout, double *f, double (*fun)(double x[], int n), 273 | int (*dfun)(double x[], double *f, double dx[], int n), 274 | double x[], double xb[][2], double space[], double e, int n); 275 | int minB (FILE*fout, double *lnL,double x[],double xb[][2],double e, double space[]); 276 | int minB2 (FILE*fout, double *lnL,double x[],double xb[][2],double e, double space[]); 277 | 278 | 279 | int Newton (FILE *fout, double *f, double (* fun)(double x[], int n), 280 | int (* ddfun) (double x[], double *fx, double dx[], double ddx[], int n), 281 | int (*testx) (double x[], int n), 282 | double x0[], double space[], double e, int n); 283 | 284 | int nls2 (FILE *fout, double *sx, double * x0, int nx, 285 | int (* fun)(double x[], double y[], int nx, int ny), 286 | int (* jacobi)(double x[], double J[], int nx, int ny), 287 | int (*testx) (double x[], int nx), 288 | int ny, double e); 289 | 290 | int ResetFinetuneSteps(FILE *fout, double Pjump[], double finetune[], int nsteps); 291 | 292 | /* tree structure functions in treesub.c */ 293 | void NodeToBranch (void); 294 | void BranchToNode (void); 295 | void ClearNode (int inode); 296 | int ReadTreeN (FILE *ftree, int *haslength, int *haslabel, int copyname, int popline); 297 | int ReadTreeB (FILE *ftree, int popline); 298 | int OutTreeN (FILE *fout, int spnames, int printopt); 299 | int OutTreeB (FILE *fout); 300 | int DeRoot (void); 301 | int GetSubTreeN (int keep[], int space[]); 302 | void printtree (int timebranches); 303 | void PointconPnodes (void); 304 | int SetBranch (double x[]); 305 | int DistanceMat (FILE *fout, int ischeme, double alfa, double *kapa); 306 | int LSDistance (double * ss, double x[], int (*testx)(double x[],int np)); 307 | 308 | int StepwiseAdditionMP (double space[]); 309 | double MPScoreStepwiseAddition (int is, double space[], int save); 310 | int AddSpecies (int species, int ib); 311 | int StepwiseAddition (FILE* fout, double space[]); 312 | int readx(double x[], int *fromfile); 313 | double TreeScore(double x[], double space[]); 314 | 315 | int PopEmptyLines (FILE* fseq, int lline, char line[]); 316 | int hasbase (char *str); 317 | int blankline (char *str); 318 | 319 | void BranchLengthBD(int rooted, double birth, double death, double sample, 320 | double mut); 321 | int RandomLHistory (int rooted, double space[]); 322 | 323 | void Tree2Partition (char partition[]); 324 | int Partition2Tree (char splits[], int lsplit, int ns, int nsplit, double label[]); 325 | void CladeSupport (FILE *fout, char treef[], int getSnames, char mastertreef[], int pick1tree); 326 | int GetNSfromTreeFile(FILE *ftree, int *ns, int *ntree); 327 | int NSameBranch (char partition1[],char partition2[], int nib1,int nib2, int IBsame[]); 328 | 329 | int QTN93 (int model, double Q[], double kappa1, double kappa2, double pi[]); 330 | int RootTN93 (int ischeme, double kapa1, double kapa2, double pi[], double *scalefactor, double Root[]); 331 | int EigenTN93 (int ischeme, double kapa1, double kapa2, double pi[], int *nR, double Root[], double Cijk[]); 332 | 333 | int DownStatesOneNode (int ison, int father); 334 | int DownStates (int inode); 335 | int PathwayMP (FILE *fout, double space[]); 336 | double MPScore (double space[]); 337 | double RemoveMPNinfSites (double *nsiteNinf); 338 | int MarkStopCodons(void); 339 | 340 | int MPInformSites (void); 341 | int CollapsNode (int inode, double x[]); 342 | 343 | int MakeTreeIb (int ns, int Ib[], int rooted); 344 | int GetTreeI (int itree, int ns, int rooted); 345 | double CountTrees (int ns, int rooted); 346 | double CountLHs (int ns); 347 | int CountLHsTree (void); 348 | int ListTrees (FILE* fout, int ns, int rooted); 349 | int GetIofTree (int rooted, int keeptree, double space[]); 350 | void ReRootTree (int newroot); 351 | int NeighborNNI (int i_tree); 352 | int GetLHistoryI (int iLH); 353 | int GetIofLHistory (void); 354 | int CountLHistory(char LHistories[], double space[]); 355 | int ReorderNodes (char LHistory[]); 356 | 357 | int GetSubSeqs(int nsnew); 358 | 359 | /* functions for evolving sequences */ 360 | int GenerateSeq (void); 361 | int Rates4Sites (double rates[],double alfa,int ncatG,int ls, int cdf, 362 | double space[]); 363 | void Evolve (int inode); 364 | void EvolveJC (int inode); 365 | 366 | 367 | int ReadTreeSeqs(FILE*fout); 368 | int UseLocus (int locus, int copyconP, int setmodel, int setSeqName); 369 | int GetGtree(int locus); 370 | int printGtree(int printBlength); 371 | 372 | void copySptree(void); 373 | void printSptree(void); 374 | 375 | 376 | enum {BASEseq=0, CODONseq, AAseq, CODON2AAseq, BINARYseq, BASE5seq} SeqTypes; 377 | 378 | enum {PrBranch=1, PrNodeNum=2, PrLabel=4, PrNodeStr=8, PrAge=16, PrOmega=32} OutTreeOptions; 379 | 380 | 381 | /* use mean (0; default) for discrete gamma instead of median (1) */ 382 | #define DGammaUseMedian 0 383 | 384 | 385 | #define FAST_RANDOM_NUMBER 386 | 387 | #define mBactrian 0.95 388 | #define sBactrian sqrt(1-mBactrian*mBactrian) 389 | #define aBox 0.5 390 | #define bBox (sqrt(12 - 3*aBox*aBox) - aBox) / 2 391 | #define aAirplane 1.0 392 | #define aStrawhat 1.0 393 | 394 | #define MAXNFIELDS 320000 395 | 396 | #define PAML_RELEASE 0 397 | 398 | #define FullSeqNames 0 /* 1: numbers at the beginning of sequence name are part of name */ 399 | 400 | #define pamlVerStr "paml version 4.9d, February 2017" 401 | 402 | #endif 403 | -------------------------------------------------------------------------------- /src/baseml.c: -------------------------------------------------------------------------------- 1 | /* baseml.c 2 | Maximum likelihood parameter estimation e aligned DNA (RNA) sequences, 3 | combined with phylogenetic tree estimation. 4 | Copyright, Ziheng YANG, July 1992 onwards 5 | 6 | cc -o baseml -fast baseml.c tools.c -lm 7 | cl -O2 baseml.c tools.c 8 | baseml 9 | */ 10 | 11 | #include "paml.h" 12 | 13 | #define NS 7000 14 | #define NBRANCH (NS*2-2) 15 | #define NNODE (NS*2-1) 16 | #define MAXNSONS 200 17 | #define NGENE 500 18 | #define LSPNAME 50 19 | #define NCODE 5 20 | #define NCATG 100 21 | 22 | #define NP (NBRANCH+NGENE+11) 23 | /* 24 | #define NP (NBRANCH+3*NNODE+NGENE+11+NCATG*NCATG-1) 25 | */ 26 | extern int noisy, NFunCall, NEigenQ, NPMatUVRoot, *ancestor, GeneticCode[][64]; 27 | extern double Small_Diff, *SeqDistance; 28 | 29 | int Forestry (FILE *fout); 30 | void DetailOutput(FILE *fout, double x[], double var[]); 31 | int GetOptions(char *ctlf); 32 | int GetInitials(double x[], int *fromfile); 33 | int GetInitialsTime(double x[]); 34 | int SetxInitials(int np, double x[], double xb[][2]); 35 | int SetParameters(double x[]); 36 | int SetPGene(int igene, int _pi, int _UVRoot, int _alpha, double x[]); 37 | int SetPSiteClass(int iclass, double x[]); 38 | int testx(double x[], int np); 39 | double GetBranchRate(int igene, int ibrate, double x[], int *ix); 40 | int GetPMatBranch (double Pt[], double x[], double t, int inode); 41 | int ConditionalPNode (int inode, int igene, double x[]); 42 | 43 | int TransformxBack(double x[]); 44 | int AdHocRateSmoothing (FILE*fout, double x[NS*3], double xb[NS*3][2], double space[]); 45 | void DatingHeteroData(FILE* fout); 46 | 47 | int TestModel (FILE *fout, double x[], int nsep, double space[]); 48 | int OldDistributions (int inode, double AncientFreqs[]); 49 | int SubData(int argc, char *argv[]); 50 | int GroupDistances(); 51 | 52 | 53 | struct CommonInfo { 54 | unsigned char *z[NS]; 55 | char *spname[NS], seqf[512],outf[512],treef[512], cleandata; 56 | char oldconP[NNODE]; /* update conP for nodes to save computation (0 yes; 1 no) */ 57 | int seqtype, ns, ls, ngene, posG[NGENE+1], lgene[NGENE], *pose, npatt, readpattern; 58 | int np, ntime, nrgene, nrate, nalpha, npi, nhomo, ncatG, ncode, Mgene; 59 | size_t sspace, sconP; 60 | int fix_kappa, fix_rgene, fix_alpha, fix_rho, nparK, fix_blength; 61 | int clock, model, getSE, runmode, print, verbose, ndata, bootstrap; 62 | int icode, coding, method; 63 | int nbtype; 64 | double *fpatt, kappa, alpha, rho, rgene[NGENE], pi[4],piG[NGENE][4], TipDate, TipDate_TimeUnit; 65 | double freqK[NCATG], rK[NCATG], MK[NCATG*NCATG]; 66 | double (*plfun)(double x[],int np), *conP, *fhK, *space; 67 | int conPSiteClass; /* is conP memory allocated for each class? */ 68 | int NnodeScale; 69 | char *nodeScale; /* nScale[ns-1] for interior nodes */ 70 | double *nodeScaleF; /* nScaleF[npatt] for scale factors */ 71 | int fix_omega; 72 | double omega; 73 | } com; 74 | struct TREEB { 75 | int nbranch, nnode, root, branches[NBRANCH][2]; 76 | double lnL; 77 | } tree; 78 | struct TREEN { 79 | int father, nson, sons[MAXNSONS], ibranch, ipop; 80 | double branch, age, *pkappa, pi[4], *conP, label; 81 | char *nodeStr, fossil; 82 | } *nodes, **gnodes, nodes_t[2*NS-1]; 83 | 84 | 85 | /* for sptree.nodes[].fossil: lower, upper, bounds, gamma, inverse-gamma */ 86 | enum {LOWER_F=1, UPPER_F, BOUND_F} FOSSIL_FLAGS; 87 | char *fossils[]={" ", "L", "U", "B"}; 88 | 89 | struct SPECIESTREE { 90 | int nbranch, nnode, root, nspecies, nfossil; 91 | struct TREESPN { 92 | char name[LSPNAME+1], fossil, usefossil; /* fossil: 0, 1, 2, 3 */ 93 | int father, nson, sons[2]; 94 | double age, pfossil[7]; /* lower and upper bounds or alpha & beta */ 95 | double *lnrates; /* log rates for loci */ 96 | } nodes[2*NS-1]; 97 | } sptree; 98 | /* all trees are binary & rooted, with ancestors unknown. */ 99 | 100 | struct DATA { /* locus-specific data and tree information */ 101 | int ns[NGENE], ls[NGENE], npatt[NGENE], ngene, lgene[NGENE]; 102 | int root[NGENE+1], BlengthMethod, fix_nu, nbrate[NGENE]; 103 | char *z[NGENE][NS], cleandata[NGENE]; 104 | double *fpatt[NGENE], lnpT, lnpR, lnpDi[NGENE]; 105 | double Qfactor[NGENE], pi[NGENE][NCODE], nu[NGENE]; 106 | double rgene[NGENE], kappa[NGENE], alpha[NGENE], omega[1]; 107 | int NnodeScale[NGENE]; 108 | char *nodeScale[NGENE]; /* nScale[data.ns[locus]-1] for interior nodes */ 109 | } data; 110 | 111 | int nR=NCODE, LASTROUND, BayesEB; 112 | double PMat[NCODE*NCODE], Cijk[NCODE*NCODE*NCODE], Root[NCODE]; 113 | int StepMatrix[NCODE*NCODE]; 114 | 115 | 116 | FILE *frub, *flnf, *frst, *frst1, *frst2=NULL, *finitials=NULL; 117 | char *ratef="rates"; 118 | char *models[]={"JC69","K80","F81","F84","HKY85","T92","TN93","REV","UNREST", "REVu","UNRESTu"}; 119 | enum {JC69, K80, F81, F84, HKY85, T92, TN93, REV, UNREST, REVu, UNRESTu} MODELS; 120 | char *clockstr[]={"", "Global clock", "Local clock", "ClockCombined"}; 121 | enum {GlobalClock=1, LocalClock, ClockCombined} ClockModels; 122 | 123 | double _rateSite=1; /* rate for a site */ 124 | int N_PMatUVRoot=0; 125 | 126 | 127 | #define BASEML 1 128 | #include "treesub.c" 129 | #include "treespace.c" 130 | 131 | int main (int argc, char *argv[]) 132 | { 133 | FILE *fout, *fseq=NULL, *fpair[6]; 134 | char pairfs[1][32]={"2base.t"}; 135 | char rstf[512]="rst", ctlf[512]="baseml.ctl", timestr[64]; 136 | char *Mgenestr[]={"diff. rate", "separate data", "diff. rate & pi", 137 | "diff. rate & kappa", "diff. rate & pi & kappa"}; 138 | int getdistance=1, i, idata; 139 | size_t s2=0; 140 | 141 | if(argc>2 && !strcmp(argv[argc-1], "--stdout-no-buf")) 142 | setvbuf(stdout, NULL, _IONBF, 0); 143 | 144 | starttimer(); 145 | SetSeed(-1, 0); 146 | 147 | com.ndata = 1; 148 | com.cleandata = 0; noisy = 0; com.runmode = 0; 149 | 150 | com.clock = 0; 151 | com.fix_rgene = 0; /* 0: estimate rate factors for genes */ 152 | com.nhomo = 0; 153 | com.getSE = 0; /* 1: want S.E.s of estimates; 0: don't want them */ 154 | 155 | com.seqtype = 0; com.model = 0; 156 | com.fix_kappa = 0; com.kappa = 5; 157 | com.fix_alpha = 1; com.alpha = 0.; com.ncatG = 4; /* alpha=0 := inf */ 158 | com.fix_rho = 1; com.rho = 0; com.nparK = 0; 159 | com.ncode = 4; com.icode = 0; 160 | com.print = 0; com.verbose = 1; com.fix_blength = 0; 161 | com.method = 0; com.space = NULL; 162 | 163 | printf("BASEML in %s\n", pamlVerStr); 164 | frub=gfopen("rub","w"); frst=gfopen(rstf,"w"); frst1=gfopen("rst1","w"); 165 | 166 | if (argc>1) strncpy(ctlf, argv[1], 95); 167 | GetOptions(ctlf); 168 | if(com.runmode != -2) finitials=fopen("in.baseml","r"); 169 | if(com.getSE == 2) frst2 = fopen("rst2","w"); 170 | 171 | fprintf(frst, "Supplemental results for BASEML\n\nseqf: %s\ntreef: %s\n", 172 | com.seqf, com.treef); 173 | fout = gfopen(com.outf, "w"); 174 | fpair[0]=(FILE*)gfopen(pairfs[0],"w"); 175 | 176 | /* for stepwise addition, com.sspace should be calculated using com.ns. */ 177 | com.sspace = 1000000*sizeof(double); 178 | if((com.space = (double*)malloc(com.sspace)) == NULL) 179 | error2("oom space"); 180 | 181 | if(com.clock==5 || com.clock==6) 182 | DatingHeteroData(fout); 183 | 184 | if((fseq=fopen (com.seqf,"r"))==NULL) { 185 | printf ("\n\nSequence file %s not found!\n", com.seqf); 186 | exit (-1); 187 | } 188 | 189 | for (idata=0; idata1) { 191 | printf("\nData set %4d\n", idata+1); 192 | fprintf(fout, "\n\nData set %4d\n", idata+1); 193 | 194 | fprintf(frst1, "%4d", idata+1); 195 | } 196 | if(idata) GetOptions(ctlf); /* com.cleandata is read from here again. */ 197 | ReadSeq ((com.verbose?fout:NULL), fseq, com.cleandata, 0); 198 | SetMapAmbiguity(); 199 | 200 | /* AllPatterns(fout); */ 201 | 202 | if (com.rho && com.readpattern) error2("rho doesn't work with readpattern."); 203 | if(com.ndata==1) fclose(fseq); 204 | i = (com.ns*2-1)*sizeof(struct TREEN); 205 | if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom"); 206 | 207 | if(com.coding) { 208 | if(com.ls%3!=0 || (com.ngene!=1 && com.ngene!=3)) 209 | error2("this is not a coding sequence. Remove icode?"); 210 | } 211 | 212 | if(com.Mgene && com.ngene==1) error2 ("option Mgene for 1 gene?"); 213 | if(com.ngene>1 && com.nhomo) error2("nhomo for mutliple genes?"); 214 | if(com.nalpha && (!com.alpha || com.ngene==1 || com.fix_alpha)) 215 | error2("Malpha"); 216 | if(com.nalpha>1 && com.rho) error2("Malpha or rho"); 217 | if(com.alpha==0) com.nalpha = 0; 218 | else com.nalpha = (com.nalpha?com.ngene:!com.fix_alpha); 219 | if(com.Mgene==1) com.nalpha = !com.fix_alpha; 220 | 221 | if(com.ngene==1) com.Mgene = 0; 222 | if((com.nhomo==1 && com.ngene>1) || (com.Mgene>1 && com.nhomo>=1)) 223 | error2("nhomo does not work with Mgene options"); 224 | 225 | if((com.Mgene>=2 && com.model==JC69) || (com.Mgene>=3 && com.model==F81) 226 | || ((com.Mgene==2 || com.Mgene==4) && com.model==K80) 227 | || (com.Mgene>1 && com.nhomo>1) 228 | || (com.Mgene>=2 && (com.model==UNREST||com.model==UNRESTu))) 229 | error2 ("model || Mgene"); 230 | fprintf(fout,"\nBASEML (in %s) %s %s ", pamlVerStr, com.seqf, models[com.model]); 231 | if(com.clock) fprintf(fout," %s ",clockstr[com.clock]); 232 | if (!com.nparK && com.alpha && com.rho) fprintf (fout, " Auto-"); 233 | if (com.alpha) fprintf (fout,"dGamma (ncatG=%d)", com.ncatG); 234 | if (com.nalpha>1) fprintf (fout,"(%d gamma)", com.nalpha); 235 | if (com.ngene>1) 236 | fprintf (fout, " (%d genes: %s) ", com.ngene, Mgenestr[com.Mgene]); 237 | if (com.nhomo>1) 238 | fprintf(fout,"\nNonhomo:%2d fix_kappa%2d\n",com.nhomo, com.fix_kappa); 239 | if (com.nparK && com.ncatG>6) 240 | printf("\a\n%d rate categories for nonparametric model?\n", com.ncatG); 241 | if (com.nparK) fprintf(fout,"\nnparK:%4d K:%4d\n",com.nparK, com.ncatG); 242 | 243 | if(getdistance) { 244 | i = com.ns*(com.ns-1)/2; 245 | SeqDistance = (double*)realloc(SeqDistance,i*sizeof(double)); 246 | ancestor = (int*)realloc(ancestor, i*sizeof(int)); 247 | if(SeqDistance==NULL||ancestor==NULL) error2("oom distance&ancestor"); 248 | } 249 | InitializeBaseAA(fout); 250 | 251 | if(com.Mgene==3) 252 | for(i=0; i=com.ns) error2("nothing to do"); 304 | 305 | for(is=0; isSeqDistance[i*(i-1)/2+j]) d = SeqDistance[i*(i-1)/2+j]; 327 | } 328 | if(dminmax=4) Perturbation(fout, (com.runmode==4), com.space); 356 | 357 | FPN(frst); if((idata+1)%10==0) fflush(frst); 358 | if(com.ndata>1 && com.runmode) { 359 | fprintf(frst1, "\t"); 360 | OutTreeN(frst1, 1, 0); 361 | } 362 | FPN(frst1); fflush(frst1); 363 | free(nodes); 364 | printf("\nTime used: %s\n", printtime(timestr)); 365 | 366 | } /* for(idata) */ 367 | if(com.ndata>1 && fseq) fclose(fseq); 368 | free(com.space); 369 | fclose(fout); fclose(frst); fclose(fpair[0]); 370 | if(finitials) { fclose(finitials); finitials=NULL; } 371 | 372 | return (0); 373 | } 374 | 375 | 376 | /* x[]: t[ntime], rgene[ngene-1], kappa[nbranch], pi[nnode*3], 377 | { alpha, rho || rK[], fK[] || rK[], MK[] } 378 | */ 379 | 380 | FILE *ftree; 381 | void PrintBestTree(FILE *fout, FILE *ftree, int btree); 382 | 383 | int Forestry (FILE *fout) 384 | { 385 | static int times=0; 386 | int status=0, inbasemlg=0, i,j=0, itree=0,ntree, np,np0,iteration=1; 387 | int pauptree=0, btree=0, haslength; 388 | double x[NP], xcom[NP-NBRANCH], lnL,lnL0=0,lnLbest=0, e=1e-7, nchange=-1; 389 | double xb[NP][2], tl=0, *g=NULL, *H=NULL; 390 | FILE *finbasemlg=NULL, *frate=NULL; 391 | 392 | ftree = gfopen(com.treef,"r"); 393 | GetTreeFileType(ftree, &ntree, &pauptree, 0); 394 | if(com.alpha) 395 | frate = gfopen(ratef,"w"); 396 | if (com.alpha && com.rho==0 && com.nhomo==0 && com.nparK==0 && com.ns<15) { 397 | inbasemlg=1; finbasemlg=gfopen("in.basemlg","w"); 398 | } 399 | flnf = gfopen("lnf","w+"); 400 | fprintf(flnf,"%6d %6d %6d\n", ntree, com.ls, com.npatt); 401 | 402 | for(itree=0; ntree==-1||itree0 && !haslength) com.fix_blength=0; 414 | if (times++==0 && com.fix_blength>0 && haslength) { 415 | if(com.clock) puts("\nBranch lengths in tree are ignored"); 416 | else { 417 | if(com.fix_blength==2) puts("\nBranch lengths in tree are fixed."); 418 | else if(com.fix_blength==1) 419 | puts("\nBranch lengths in tree used as initials."); 420 | if(com.fix_blength==1) { 421 | FOR(i,tree.nnode) 422 | if((x[nodes[i].ibranch]=nodes[i].branch)<0) 423 | x[nodes[i].ibranch]=1e-5; 424 | } 425 | } 426 | } 427 | 428 | if(com.cleandata) 429 | nchange=MPScore (com.space); 430 | if(noisy&&com.ns<99) 431 | { OutTreeN(F0,0,0); printf(" MP score: %.2f\n",nchange);} 432 | OutTreeN(fout,0,0); fprintf(fout," MP score: %.2f",nchange); 433 | if(!com.clock && com.model<=REV && com.nhomo<=2 434 | && nodes[tree.root].nson<=2 && com.ns>2){ 435 | puts("\nThis is a rooted tree, without clock. Check."); 436 | if(com.verbose) fputs("\nThis is a rooted tree. Please check!",fout); 437 | } 438 | 439 | fflush(fout); fflush(flnf); 440 | 441 | GetInitials(x, &i); 442 | 443 | if(i==-1) iteration=0; 444 | 445 | if((np=com.np)>NP) error2("raise NP and recompile"); 446 | 447 | if(com.sspace < spaceming2(np)) { 448 | com.sspace = spaceming2(np); 449 | if((com.space=(double*)realloc(com.space,com.sspace))==NULL) 450 | error2("oom space"); 451 | } 452 | 453 | if(itree) { np0 = np; } 454 | if(itree && !finitials && (com.nhomo==0 || com.nhomo==2)) 455 | for (i=0; i2 && com.ns<50) { OutTreeB(F0); FPN(F0); matout(F0,x,1,np); } 463 | printf("\nlnL0 = %12.6f\n",-lnL); 464 | } 465 | 466 | if (iteration && np) { 467 | SetxBound (np, xb); 468 | SetxInitials (np, x, xb); /* start within the feasible region */ 469 | if(com.method==1) 470 | j = minB(noisy>2?frub:NULL, &lnL,x,xb, e, com.space); 471 | else if(com.method==3) 472 | j = minB2(noisy>2?frub:NULL, &lnL,x,xb, e, com.space); 473 | else 474 | j = ming2(noisy>2?frub:NULL,&lnL,com.plfun,NULL,x,xb, com.space,e,np); 475 | 476 | if (j==-1 || lnL<=0 || lnL>1e7) status = -1; 477 | else status = 0; 478 | } 479 | 480 | if (itree==0) { lnL0=lnLbest=lnL; btree=0; } 481 | else if (lnL=2) TransformxBack(x); 491 | } 492 | if(com.clock) { /* move times into x[] */ 493 | for(i=0,j=!nodes[tree.root].fossil; i=2 && com.clock==0 && nodes[tree.root].nson==3) { /* g & H */ 518 | fprintf(frst2,"\n %d\n\n", com.ns); 519 | OutTreeN(frst2, 1, 1); fprintf(frst2,"\n\n"); 520 | for(i=0; i0.0004 && fabs(g[i])<0.005) g[i] = 0; 522 | for(i=0; i0. ? sqrt(H[i*np+i]) : -1)); 535 | FPN(fout); 536 | } 537 | 538 | /* if(com.clock) SetBranch(x); */ 539 | /* GroupDistances(); */ 540 | 541 | if(com.nbtype>1) 542 | fprintf(fout,"\nWarning: branch rates are not yet applied in tree length and branch lengths"); 543 | if(AbsoluteRate) { 544 | fprintf(fout,"\nNote: mutation rate is not applied to tree length. Tree has times, for TreeView & FigTree\n"); 545 | for(i=0; i1?" (1st gene)":"")); 553 | } 554 | FPN(fout); OutTreeN(fout,1,0); FPN(fout); 555 | FPN(fout); OutTreeN(fout,1,1); FPN(fout); 556 | if(com.clock) { 557 | FPN(fout); OutTreeN(fout,1,PrNodeNum); FPN(fout); 558 | } 559 | 560 | if(com.TipDate) { /* scale back the times in nodes[].branch */ 561 | for(i=0; i=2 **/ 591 | if((com.nhomo<=2 && com.nparK==0 && com.model<=REV && com.rho==0) 592 | || (com.nhomo>2 && com.alpha==0 && com.Mgene==0)) 593 | AncestralSeqs(frst, x); 594 | } 595 | } /* for (itree) */ 596 | 597 | if(ntree>1) { 598 | fprintf(frst1, "\t%d\t", btree+1); 599 | PrintBestTree(frst1, ftree, btree); 600 | } 601 | if(finbasemlg) fclose(finbasemlg); if (frate) fclose(frate); 602 | fclose(ftree); 603 | 604 | if(ntree==-1) ntree=itree; 605 | if(ntree>1) { rewind(flnf); rell(flnf,fout,ntree); } 606 | fclose(flnf); 607 | 608 | return(0); 609 | } 610 | 611 | 612 | void PrintBestTree(FILE *fout, FILE *ftree, int btree) 613 | { 614 | int itree, ntree, i,j; 615 | 616 | rewind(ftree); 617 | GetTreeFileType(ftree,&ntree,&i,0); 618 | for(itree=0; ntree==-1||itree=3) { 677 | if(!com.fix_kappa && (com.model==F84 || com.model==HKY85)) 678 | fprintf (fout, "kappa or abcde under %s (in order of branches):", models[com.model]); 679 | else 680 | fprintf (fout, "kappa or abcde under %s:", models[com.model]); 681 | for(i=0; i2 && com.model<=TN93) { 711 | eigenTN93(com.model, *nodes[inode].pkappa, *(nodes[inode].pkappa+1), nodes[inode].pi, &nR, Root, Cijk); 712 | QTN93(com.model, Q, *nodes[inode].pkappa, *(nodes[inode].pkappa+1), nodes[inode].pi); 713 | } 714 | else if (com.nhomo>2 && com.model==REV) 715 | eigenQREVbase(NULL, Q, nodes[inode].pkappa, nodes[inode].pi, &nR, Root, Cijk); 716 | 717 | for(i=0; i=2) { 769 | for(i=0; i=3) k+=com.ngene*nkappa[com.model]; 780 | else k+=nkappa[com.model]; 781 | } 782 | else { 783 | if (com.model1) 800 | DiscreteGamma(com.freqK,com.rK,com.alpha,com.alpha,com.ncatG,DGammaUseMedian); 801 | fprintf(fout, "\nrate: "); FOR(i,com.ncatG) fprintf(fout," %8.5f",com.rK[i]); 802 | fprintf(fout, "\nfreq: "); FOR(i,com.ncatG) fprintf(fout," %8.5f",com.freqK[i]); 803 | FPN(fout); 804 | } 805 | if (!com.fix_rho) { 806 | fprintf (fout, "rho for the auto-discrete-gamma model: %9.5f",x[k]); 807 | FPN(fout); 808 | } 809 | if (com.nparK>=1 && com.nalpha<=1) { 810 | fprintf(fout, "\nrate:"); FOR(i,com.ncatG) fprintf(fout," %8.5f",com.rK[i]); 811 | fprintf(fout, "\nfreq:"); FOR(i,com.ncatG) fprintf(fout," %8.5f",com.freqK[i]); 812 | FPN(fout); 813 | } 814 | if (com.rho || (com.nparK>=3 && com.nalpha<=1)) { 815 | fprintf (fout, "transition probabilities between rate categories:\n"); 816 | for(i=0;i5) || (com.model==UNRESTu && com.nrate>11)) 835 | error2(errstr); 836 | for(i=0; i3 || b2<0 || b2>3) error2("bases out of range."); 847 | if(b1==b2 || StepMatrix[b1*n+b2]>0) { 848 | printf("pair %c%c already specified.\n", BASEs[b1], BASEs[b2]); 849 | } 850 | if(com.model==REVu) StepMatrix[b1*n+b2] = StepMatrix[b2*n+b1] = i+1; 851 | else StepMatrix[b1*n+b2] = i+1; 852 | } 853 | printf("rate %d: %d pairs\n", i+1,k); 854 | } 855 | for(i=0; i=9) 890 | printf ("\n%3d %15s | %-20s %6.2f", iopt+1,optstr[iopt],opt,t); 891 | switch (iopt) { 892 | case ( 0): sscanf(pline+1, "%s", com.seqf); break; 893 | case ( 1): sscanf(pline+1, "%s", com.outf); break; 894 | case ( 2): sscanf(pline+1, "%s", com.treef); break; 895 | case ( 3): noisy=(int)t; break; 896 | case ( 4): com.cleandata=(char)t; break; 897 | case ( 5): com.verbose=(int)t; break; 898 | case ( 6): com.runmode=(int)t; break; 899 | case ( 7): com.method=(int)t; break; 900 | case ( 8): com.clock=(int)t; break; 901 | case ( 9): 902 | sscanf(pline+1, "%lf%lf", &com.TipDate, &com.TipDate_TimeUnit); 903 | break; 904 | case (10): com.fix_rgene=(int)t; break; 905 | case (11): com.Mgene=(int)t; break; 906 | case (12): com.nhomo=(int)t; break; 907 | case (13): com.getSE=(int)t; break; 908 | case (14): com.print=(int)t; break; 909 | case (15): com.model=(int)t; 910 | if(com.model>UNREST) GetStepMatrix(line); break; 911 | case (16): com.fix_kappa=(int)t; break; 912 | case (17): 913 | com.kappa=t; 914 | if(com.fix_kappa && (com.clock==5 || com.clock==6) 915 | && com.model!=0 && com.model!=2) { 916 | ng = splitline (++pline, markgenes); 917 | for(j=0; j=1) { 969 | com.fix_alpha = com.fix_rho=1; 970 | if(com.alpha==0) com.alpha = 0.5; /* used to generate rK as initial values */ 971 | if(com.nparK<=2) com.rho = 0; 972 | else com.rho = 0.4; 973 | if(com.nhomo>=1) 974 | error2("nhomo & nparK"); 975 | } 976 | if(com.model!=F84 && com.kappa<=0) error2("init kappa.."); 977 | if(!com.fix_alpha && com.alpha<=0) error2("init alpha.."); 978 | if(!com.fix_rho && com.rho==0) { com.rho=0.001; puts("init rho reset"); } 979 | 980 | if (com.alpha) 981 | { if (com.ncatG<2 || com.ncatG>NCATG) error2 ("ncatG"); } 982 | else if (com.ncatG>1) com.ncatG=1; 983 | 984 | if(com.method &&(com.clock||com.rho)) 985 | { com.method=0; puts("Iteration method reset: method = 0"); } 986 | if(com.method && (com.model==UNREST||com.model==UNRESTu)) 987 | error2("I did not implemented method = 1 for UNREST. Use method = 0"); 988 | 989 | if(com.model>UNREST && com.Mgene>2) 990 | error2("u models don't work with Mgene"); 991 | 992 | if (com.nhomo>5) 993 | error2("nhomo"); 994 | else if (com.nhomo==2) { 995 | if (com.model!=K80 && com.model!=F84 && com.model!=HKY85) error2("nhomo"); 996 | } 997 | else if (com.nhomo>2 && !(com.model>=F84 && com.model<=REV)) error2("nhomo & model"); 998 | else if (com.nhomo>2 && com.method) error2("nhomo & method."); 999 | else { 1000 | if (com.nhomo==1 && !(com.model>=F81 && com.model<=REV) && com.model!=REVu) 1001 | error2("nhomo=1 and model"); 1002 | } 1003 | 1004 | if(com.nhomo>=2 && com.clock==2) error2("clock=2 & nhomo imcompatible"); 1005 | 1006 | if(com.clock>=5 && com.model>=TN93) error2("model & clock imcompatible"); 1007 | 1008 | if (com.nhomo>1 && com.runmode>0) error2("nhomo incompatible with runmode"); 1009 | if (com.runmode==-2) error2("runmode = -2 not implemented in baseml."); 1010 | if (com.clock && com.runmode>2) error2("runmode & clock?"); 1011 | if (com.runmode) com.fix_blength=0; 1012 | if (com.runmode==3 && (com.npi || com.nparK)) 1013 | error2("runmode incompatible with nparK or nhomo."); 1014 | 1015 | if (com.model==JC69 || com.model==K80 || com.model==UNREST) 1016 | if (com.nhomo!=2) com.nhomo=0; 1017 | if (com.model==JC69 || com.model==F81) { com.fix_kappa=1; com.kappa=1; } 1018 | if ((com.model==TN93 || com.model==REV || com.model==REVu) && com.nhomo<=2) 1019 | com.fix_kappa=0; 1020 | if (com.seqtype==5 && com.model!=REVu) 1021 | error2("RNA-editing model requires REVu"); 1022 | 1023 | if (com.nparK==3) { 1024 | puts("\n\nnparK==3, double stochastic, may not work. Use nparK=4?\n"); 1025 | getchar(); 1026 | } 1027 | com.fix_omega=1; com.omega=0; 1028 | if(com.ndata<=0) com.ndata=1; 1029 | if(com.bootstrap && com.ndata!=1) error2("ndata=1 for bootstrap."); 1030 | 1031 | return (0); 1032 | } 1033 | 1034 | 1035 | int GetInitials (double x[], int *fromfile) 1036 | { 1037 | /* This calculates com.np, com.ntime, com.npi, com.nrate etc., and gets 1038 | initial values for ML iteration. It also calculates some of the 1039 | statistics (such as eigen values and vectors) that won't change during 1040 | the likelihood iteration. However, think about whether this is worthwhile. 1041 | The readfile option defeats this effort right now as the x[] is read in 1042 | after such calculations. Some of the effort is duplicated in 1043 | SetParameters(). Needs more careful thinking. 1044 | */ 1045 | int i,j,k, K=com.ncatG, n31pi=(com.model==T92?1:3); 1046 | int nkappa[] = {0, 1, 0, 1, 1, 1, 2, 5, 11, -1, -1}, nkappasets; 1047 | size_t sconP_new=(size_t)(tree.nnode-com.ns)*com.ncode*com.npatt*com.ncatG*sizeof(double); 1048 | double t=-1; 1049 | 1050 | NFunCall = NPMatUVRoot = NEigenQ = 0; 1051 | if(com.clock==ClockCombined && com.ngene<=1) 1052 | error2("Combined clock model requires mutliple genes."); 1053 | GetInitialsTimes (x); 1054 | 1055 | com.plfun = lfunAdG; 1056 | if (com.alpha==0 && com.nparK==0) 1057 | com.plfun = lfun; 1058 | else if ((com.alpha && com.rho==0) || com.nparK==1 || com.nparK==2) 1059 | com.plfun = lfundG; 1060 | 1061 | if(com.clock && com.fix_blength==-1) 1062 | com.fix_blength = 0; 1063 | 1064 | if(com.method && com.fix_blength!=2 && com.plfun==lfundG) { 1065 | com.conPSiteClass = 1; 1066 | if(com.sconP < sconP_new) { 1067 | com.sconP = sconP_new; 1068 | printf("\n%9lu bytes for conP, adjusted\n", com.sconP); 1069 | if((com.conP = (double*)realloc(com.conP, com.sconP))==NULL) 1070 | error2("oom conP"); 1071 | } 1072 | } 1073 | InitializeNodeScale(); 1074 | 1075 | com.nrgene = (!com.fix_rgene)*(com.ngene-1); 1076 | for(j=0; j=3) 1084 | com.nrate *= com.ngene; 1085 | } 1086 | } 1087 | switch (com.nhomo) { 1088 | case (0): com.npi = 0; break; /* given 1 pi */ 1089 | case (1): com.npi = 1; break; /* solve 1 pi */ 1090 | case (2): com.npi = 0; com.nrate=tree.nbranch; break; /* b kappa's */ 1091 | case (3): 1092 | com.npi = com.ns + (tree.root>=com.ns) + (tree.nnode>com.ns+1); 1093 | com.nrate = (!com.fix_kappa && (com.model==F84 || com.model==HKY85) ? tree.nbranch : nkappa[com.model]); 1094 | for(i=0; i=com.ns) 1097 | nodes[tree.root].label = com.ns+1; 1098 | break; /* ns+2 pi */ 1099 | case (4): 1100 | com.npi = tree.nnode; /* nnode pi */ 1101 | com.nrate = nkappa[com.model]*(com.fix_kappa ? 1 : tree.nbranch); 1102 | break; 1103 | case (5): 1104 | /* com.nrate = (!com.fix_kappa && (com.model==F84 || com.model==HKY85) ? tree.nbranch : nkappa[com.model]); */ 1105 | com.nrate = nkappa[com.model]*(com.fix_kappa ? 1 : tree.nbranch); 1106 | for(i=0,com.npi=0; i com.npi) 1109 | com.npi = j+1; 1110 | if(i!=tree.root && (j<0 || j>tree.nnode-1)) 1111 | puts("node label in tree."); 1112 | } 1113 | if(tree.root>=com.ns) 1114 | nodes[tree.root].label = com.npi++; 1115 | 1116 | printf("%d sets of frequency parameters\n",com.npi); 1117 | break; /* user-specified pi */ 1118 | } 1119 | 1120 | if (com.model<=TN93 && com.Mgene<=1 && com.nhomo==0) 1121 | eigenTN93 (com.model,com.kappa,com.kappa,com.pi,&nR,Root,Cijk); 1122 | if (com.model==REV || com.model==UNREST) 1123 | for(j=0; j<(com.Mgene>=3?com.ngene:1); j++) { 1124 | k = com.ntime + com.nrgene + j*(com.model==REV?5:11); 1125 | for(i=0; i=5) return(0); 1202 | if(com.fix_blength<2) SetBranch(x); 1203 | 1204 | for(i=0; i=3) { 1231 | for (i=0,k=com.ntime+com.nrgene; i1) return (status); 1263 | k = com.ntime+com.nrate+com.nrgene+com.npi*n31pi; 1264 | if (!com.fix_alpha) { 1265 | com.alpha=x[k++]; 1266 | if (com.fix_rho) 1267 | DiscreteGamma (com.freqK,com.rK,com.alpha,com.alpha,K,DGammaUseMedian); 1268 | } 1269 | if (!com.fix_rho) { 1270 | com.rho=x[k++]; 1271 | AutodGamma (com.MK, com.freqK, com.rK, &t,com.alpha,com.rho,K); 1272 | } 1273 | if (com.nparK==0) return(status); 1274 | 1275 | /* nparK models */ 1276 | xtoy (x+k, com.rK, K-1); 1277 | 1278 | if (com.nparK==2) { 1279 | if (!LASTROUND) f_and_x(x+k+K-1, com.freqK, K,0,0); 1280 | else xtoy (x+k+K-1, com.freqK, K-1); 1281 | com.freqK[K-1]=1-sum(com.freqK, K-1); 1282 | } 1283 | else if (com.nparK==3) { /* rK & MK (double stochastic matrix) */ 1284 | for (i=0,k+=K-1; i=3)*igene*nr[com.model]; 1316 | double *xcom=x+com.ntime; 1317 | double ka1=xcom[k], ka2=(com.model==TN93?xcom[k+1]:-1); 1318 | 1319 | if(com.Mgene==2 && com.fix_kappa) ka1 = ka2 = com.kappa; 1320 | 1321 | if (_pi) { 1322 | xtoy(com.piG[igene], com.pi, 4); 1323 | } 1324 | if (_UVRoot) { 1325 | if (com.model==K80) com.kappa = ka1; 1326 | else if (com.model<=TN93) 1327 | eigenTN93(com.model, ka1, ka2, com.pi, &nR, Root, Cijk); 1328 | else if (com.model==REV || com.model==REVu) 1329 | eigenQREVbase(NULL, PMat, xcom+k, com.pi, &nR, Root, Cijk); 1330 | } 1331 | if (_alpha) { 1332 | com.alpha = xcom[com.nrgene+com.nrate+com.npi+igene]; /* check?? */ 1333 | DiscreteGamma(com.freqK, com.rK, com.alpha, com.alpha, com.ncatG, DGammaUseMedian); 1334 | } 1335 | return(0); 1336 | } 1337 | 1338 | 1339 | int SetxBound (int np, double xb[][2]) 1340 | { 1341 | /* sets lower and upper bounds for variables during iteration 1342 | */ 1343 | int i, j, k=0, nf=0, n31pi=(com.model==T92?1:3); 1344 | double tb[]={.4e-6, 9999}, tb0=.4e-6, rgeneb[]={1e-4,999}, rateb[]={1e-5,999}; 1345 | double alphab[]={.005, 999}, rhob[]={-0.2, 0.99}, pb[]={.00001,.99999}; 1346 | double fb[]={-19,9}; /* transformed freqs.*/ 1347 | 1348 | SetxBoundTimes (xb); 1349 | for(i=com.ntime;i2 && np<50) { 1368 | printf("\nBounds (np=%d):\n",np); 1369 | FOR(i,np) printf(" %10.6f", xb[i][0]); FPN(F0); 1370 | FOR(i,np) printf(" %10.6f", xb[i][1]); FPN(F0); 1371 | } 1372 | return(0); 1373 | } 1374 | 1375 | int testx (double x[], int np) 1376 | { 1377 | /* This is used for LS branch length estimation by nls2, called only if(clock==0) 1378 | */ 1379 | int i; 1380 | double tb[]={.4e-6, 99}; 1381 | 1382 | FOR (i,com.ntime) 1383 | if (x[i]tb[1]) 1384 | return (-1); 1385 | return (0); 1386 | } 1387 | 1388 | 1389 | 1390 | int ConditionalPNode (int inode, int igene, double x[]) 1391 | { 1392 | int n=com.ncode, i,j,k,h, ison, pos0=com.posG[igene],pos1=com.posG[igene+1]; 1393 | double t; 1394 | 1395 | for (i=0; i0 && !com.oldconP[nodes[inode].sons[i]]) 1397 | ConditionalPNode (nodes[inode].sons[i], igene, x); 1398 | if(inode3) 1454 | printf ("\nt = %.5f in PMatCijk", t); 1455 | if (t<1e-200) { identity (P, n); return(0); } 1456 | 1457 | for (k=1; k0?pij:0); 1462 | } 1463 | return (0); 1464 | } 1465 | 1466 | 1467 | #ifdef UNDEFINED 1468 | 1469 | int CollapsSite (FILE *fout, int nsep, int ns, int *ncat, int SiteCat[]) 1470 | { 1471 | int j,k, it, h, b[NS], ndiff, n1, bit1; 1472 | /* n1: # of 1's ... bit1: the 1st nonzero bit */ 1473 | 1474 | *ncat = 5 + (1<<(ns-1))-1 + nsep*11; 1475 | if (fout) fprintf (fout, "\n# cat:%5d # sep:%5d\n\n", *ncat, nsep); 1476 | 1477 | FOR (h, 1<<2*ns) { 1478 | for (j=0,it=h; j1 || com.nhomo || com.alpha || com.nparK || com.model>REV) 1519 | error2 ("Pexp()"); 1520 | SetParameters (x); 1521 | FOR (j, tree.nbranch) 1522 | PMatCijk (Pt[j], nodes[tree.branches[j][1]].branch); 1523 | 1524 | for (h=0; h<(1<<2*com.ns); h++) { 1525 | if (SiteCat[h] == 0) continue; 1526 | for (j=0,it=h; j>=2,j++) ; 1527 | for (isum=0,fh=0; isum1) { 1564 | lmax0 += nobs[h]*log(nobs[h]); 1565 | lnL0 += nobs[h]*log(pexp[h]); 1566 | } 1567 | ef = com.ls*pexp[h]; 1568 | de = square(nobs[h]-ef)/ef; 1569 | X2 += de; 1570 | fprintf (fout, "\nCat #%3d%9.0f%9.2f%9.2f", h, nobs[h], ef,de); 1571 | } 1572 | fprintf (fout, "\n\nlmax0:%12.4f D2:%12.4f X2:%12.4f\n", 1573 | lmax0, 2*(lmax0-lnL0), X2); 1574 | free (SiteCat); 1575 | 1576 | return (0); 1577 | } 1578 | 1579 | 1580 | #endif 1581 | 1582 | 1583 | int OldDistributions (int inode, double AncientFreqs[]) 1584 | { 1585 | /* reconstruct nucleotide frequencies at and down inode for nonhomogeneous models com.nhomo==3 or 4. 1586 | AncientFreqs[tree.nnode*4] 1587 | */ 1588 | int i, n=4; 1589 | double kappa=com.kappa; 1590 | 1591 | if (com.alpha || com.model>REV) { 1592 | puts("OldDistributions() does not run when alpha > 0 or model >= TN93"); 1593 | return(-1); 1594 | } 1595 | if(inode==tree.root) 1596 | xtoy (nodes[inode].pi, AncientFreqs+inode*n, n); 1597 | else { 1598 | if (com.nhomo>2 && com.model<=TN93) 1599 | eigenTN93(com.model, *nodes[inode].pkappa, *(nodes[inode].pkappa+1), nodes[inode].pi, &nR, Root, Cijk); 1600 | else if (com.nhomo>2 && com.model==REV) 1601 | eigenQREVbase(NULL, PMat, nodes[inode].pkappa, nodes[inode].pi, &nR, Root, Cijk); 1602 | 1603 | PMatCijk (PMat, nodes[inode].branch); 1604 | matby (AncientFreqs+nodes[inode].father*n, PMat, AncientFreqs+inode*n, 1, n, n); 1605 | } 1606 | for(i=0; i