├── ACKNOWLEDGEMENTS ├── AUTHORS ├── COPYING ├── ChangeLog ├── FUTURES ├── INSTALL ├── Makefile ├── Makefile.mac ├── NEWS ├── PROBLEMS ├── README.md ├── algo.h ├── align.c ├── align.h ├── align_lib ├── Makefile ├── Makefile.mac ├── aa2nt.c ├── aa2nt_local.c ├── align.c ├── align.h ├── common_align.c ├── gen_nt_sm.c ├── pwalign.c ├── pwalign_main.c ├── seq.c ├── seq.h ├── splice.c ├── splice_bound.c ├── splice_local.c ├── table.c ├── table.h ├── transalign.pl ├── transaln.c └── version.h ├── aln_mask.c ├── backtrans.cc ├── best.c ├── best.h ├── brent.c ├── bs_nj.c ├── common ├── common.h ├── hash_char.h ├── hash_com.h ├── hash_misc.h └── mem.c ├── compare.cc ├── cpp_utils.cc ├── cpp_utils.h ├── cut_tree.c ├── doxygen.conf ├── eps.h ├── est_len.c ├── examples ├── ex1.nucl.mfa ├── ex1.nucl.nhx ├── ex2.nucl.mfa └── ex2.nucl.nhx ├── filter.c ├── flcallback.cc ├── flglobal.cc ├── flglobal.h ├── flnjtree.cc ├── flnjtree_ui.fl ├── flworkspace.cc ├── lost.c ├── main.c ├── make_ng86.c ├── make_ng86.h ├── mmerge.cc ├── ng86_ds.c ├── nhx_output.cc ├── nj.c ├── nj2.c ├── order.c ├── ortho.c ├── output.c ├── parser.l ├── parser.y ├── phyml.c ├── phyml.h ├── phyml ├── Makefile ├── Makefile.mac ├── bionj.c ├── bionj.h ├── eigen.c ├── eigen.h ├── free.c ├── free.h ├── lh3_addon.cc ├── lh3_addon.h ├── lh3_spec.cc ├── main.c ├── ml.c ├── ml.h ├── models.c ├── models.h ├── optimiz.c ├── optimiz.h ├── options.c ├── options.h ├── simu.c ├── simu.h ├── utilities.c └── utilities.h ├── pre_cons.c ├── prob_dist.c ├── read.c ├── read_aln.cc ├── reroot.c ├── scripts ├── benchmark.pl ├── get_part.pl ├── goTree.pl └── tree_wrapper.pl ├── sdi.c ├── simulate.c ├── spec-ens.nh ├── spec-enstax.nh ├── spec-taxon_id.nh ├── spec.c ├── spec.nh ├── subtree.c ├── task.c ├── timeout.c ├── tree.h ├── tree_plot.c ├── tree_plot.h ├── treebest.texi ├── trimpoor.cc ├── utils.h ├── utils1.c └── utils2.c /ACKNOWLEDGEMENTS: -------------------------------------------------------------------------------- 1 | 2 | The following people contribute in some way to phylotree project. As we have 3 | not asked for their permission when making the list, we did not write their 4 | e-mail address here. Nonetheless, we really appreciate their generous 5 | contributions. 6 | 7 | Lachlan James Coin 8 | For his beneficial discussion with constrained Neighbour-joining and various 9 | tree-building algorithms. 10 | 11 | Richard Durbin 12 | For his sparkling idea about TreeFam project and good instruction on tree- 13 | cutting and constrained method. Without him, there will never be TreeBeST. 14 | 15 | Li Ruiqiang, Jean-Karim Heriche and Avril Coghlan 16 | For their beneficial discussion and encouragement all the time. 17 | 18 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | 2 | Li Heng, lh3@sanger.ac.uk, was the original author and chief programmer of 3 | treebest. 4 | 5 | Du Wenfeng, duwf@genomics.org.cn, was the author of dn/ds calculator. His 6 | creative idea accelerated the calculation speed by far. 7 | 8 | Guan Liang, guanl@genomics.org.cn, was the author of nh2pic, a wonderful 9 | NHX->FIGURE convertor. Although I do not use his codes any longer, I still 10 | appreciate his contribution to TreeBeST. 11 | 12 | Liu Tao, liutao@genomics.org.cn, was the author of goTree.pl, a wapper for 13 | the famous `puzzle' and `phyml' tree-builder. 14 | 15 | -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------ 2 | r274 | lh3lh3 | 2007-10-19 09:44:45 +0100 (Fri, 19 Oct 2007) | 3 lines 3 | 4 | * 1.9.1-4 5 | * rename project name as treebest 6 | 7 | ------------------------------------------------------------------------ 8 | r273 | lh3lh3 | 2007-10-19 09:17:05 +0100 (Fri, 19 Oct 2007) | 2 lines 9 | 10 | rename njtree to treebest 11 | 12 | ------------------------------------------------------------------------ 13 | r204 | lh3lh3 | 2007-01-29 10:04:07 +0000 (Mon, 29 Jan 2007) | 2 lines 14 | 15 | there is a bug in this tree. 16 | 17 | ------------------------------------------------------------------------ 18 | r197 | lh3lh3 | 2007-01-23 13:04:47 +0000 (Tue, 23 Jan 2007) | 2 lines 19 | 20 | add species trees for Ensembl 21 | 22 | ------------------------------------------------------------------------ 23 | r188 | lh3lh3 | 2007-01-18 10:53:36 +0000 (Thu, 18 Jan 2007) | 2 lines 24 | 25 | Makefiles for building universal binaries on Intel-Mac 26 | 27 | ------------------------------------------------------------------------ 28 | r186 | lh3lh3 | 2007-01-17 12:53:37 +0000 (Wed, 17 Jan 2007) | 8 lines 29 | 30 | 31 | - add quiet mode to "njtree best" 32 | - fix a bug in phyml when protein alignemnt is bootstrapped. 33 | - add "fltk-config --post" for flnjtree under Mac OS X 34 | - fix a typo in spec.* 35 | - add taxon_id species tree 36 | 37 | 38 | ------------------------------------------------------------------------ 39 | r163 | lh3lh3 | 2006-12-04 08:53:49 +0000 (Mon, 04 Dec 2006) | 2 lines 40 | 41 | fix a bug when "njtree nj -s spec.nh" is in use. 42 | 43 | ------------------------------------------------------------------------ 44 | r153 | lh3lh3 | 2006-11-30 13:51:05 +0000 (Thu, 30 Nov 2006) | 2 lines 45 | 46 | my own branch 47 | 48 | ------------------------------------------------------------------------ 49 | r58 | lh3lh3 | 2006-10-26 08:48:56 +0100 (Thu, 26 Oct 2006) | 3 lines 50 | 51 | Add "trimpoor" which is expected to trim sequences that affect the quality of a 52 | gene trees. However, this has not been tested thoroughly. 53 | 54 | ------------------------------------------------------------------------ 55 | r53 | lh3lh3 | 2006-10-25 11:34:40 +0100 (Wed, 25 Oct 2006) | 4 lines 56 | 57 | 58 | - fix a bug when packaging 59 | - release 1.9.1 60 | 61 | ------------------------------------------------------------------------ 62 | r51 | lh3lh3 | 2006-10-24 15:23:38 +0100 (Tue, 24 Oct 2006) | 7 lines 63 | 64 | 65 | - add SIS (species intersection score) for duplication nodes 66 | - :DD=Y now means: Dubious Duplication. Previously it means 67 | "confirmed duplication". 68 | - fix a potential bug, although seemingly it has never happened. 69 | 70 | 71 | ------------------------------------------------------------------------ 72 | r20 | lh3lh3 | 2006-10-11 09:44:08 +0100 (Wed, 11 Oct 2006) | 2 lines 73 | 74 | Remove some obsolete rules 75 | 76 | ------------------------------------------------------------------------ 77 | r16 | lh3lh3 | 2006-10-10 13:58:05 +0100 (Tue, 10 Oct 2006) | 2 lines 78 | 79 | improve packaging method 80 | 81 | ------------------------------------------------------------------------ 82 | r15 | lh3lh3 | 2006-10-10 13:06:55 +0100 (Tue, 10 Oct 2006) | 2 lines 83 | 84 | change NJTREE version number 85 | 86 | ------------------------------------------------------------------------ 87 | r13 | lh3lh3 | 2006-10-10 11:57:54 +0100 (Tue, 10 Oct 2006) | 3 lines 88 | 89 | create dev branch 90 | 91 | 92 | ------------------------------------------------------------------------ 93 | r12 | lh3lh3 | 2006-10-10 10:48:55 +0100 (Tue, 10 Oct 2006) | 2 lines 94 | 95 | add njtree source codes. 96 | 97 | ------------------------------------------------------------------------ 98 | -------------------------------------------------------------------------------- /FUTURES: -------------------------------------------------------------------------------- 1 | 2 | HIGH Piority: 3 | 4 | 5 | MODERATE Piority: 6 | 7 | * constrained NJ for multifurcated unrooted trees 8 | 9 | 10 | LOW Piority: 11 | 12 | * Fetch TreeFam data via http connection. I am conceiving of some kind of 13 | http library. 14 | * Add more evolutionary model. 15 | 16 | 17 | Useful: 18 | 19 | * Add a graphical multialignment viewer? 20 | 21 | -------------------------------------------------------------------------------- /INSTALL: -------------------------------------------------------------------------------- 1 | 2 | Just type `make', `treebest' will be compiled. If `FLTK' package has 3 | been installed, `make fltreebest' will generate the GUI version 4 | `fltreebest'. Please contact Heng Li if you meet any 5 | problems. 6 | 7 | Good luck! 8 | 9 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC= gcc 2 | CXX= g++ 3 | CFLAGS= -W -Wall -O2 -fomit-frame-pointer #-pg 4 | CXXFLAGS= $(CFLAGS) 5 | YFLAGS= 6 | DFLAGS= -DUSE_GCC -DHAVE_PHYML -DLH3_ADDON -DYYMAXDEPTH=90000 -DYYINITDEPTH=60000 # for phyml and the newick parser 7 | VERSION= 1.9.2 8 | NJVERSION= -DTR_VERSION="\"$(VERSION)\"" -DTR_BUILD=\"`date +%d%b%Y`\" 9 | LEX= flex 10 | YACC= bison --yacc 11 | FLUID= fluid 12 | FLTKCFG= fltk-config 13 | PROG= treebest 14 | LIBOBJS= lex.yy.o y.tab.o read.o nj.o utils1.o subtree.o sdi.o bs_nj.o \ 15 | reroot.o task.o output.o ortho.o cut_tree.o spec.o utils2.o \ 16 | align.o filter.o aln_mask.o prob_dist.o brent.o ng86_ds.o lost.o \ 17 | tree_plot.o cpp_utils.o read_aln.o pre_cons.o simulate.o compare.o \ 18 | order.o nhx_output.o nj2.o backtrans.o phyml.o mmerge.o est_len.o \ 19 | best.o trimpoor.o 20 | FLOBJS= flnjtree.o flnjtree_ui.o flglobal.o flworkspace.o flcallback.o 21 | INCLUDES= -I. 22 | LIBS= -L. -lphylotree -Lalign_lib -lalign -lm -Lphyml -lphyml 23 | SUBDIRS= . align_lib phyml 24 | 25 | .SUFFIXES:.c .o .cc 26 | 27 | .c.o: 28 | $(CC) -c $(DFLAGS) $(CFLAGS) $(INCLUDES) $< -o $@ 29 | 30 | .cc.o: 31 | $(CXX) -c $(DFLAGS) $(CXXFLAGS) $(INCLUDES) $< -o $@ 32 | 33 | all:$(PROG) 34 | 35 | lib-recur all-recur clean-recur install-recur: 36 | @target=`echo $@ | sed s/-recur//`; \ 37 | wdir=`pwd`; \ 38 | list='$(SUBDIRS)'; for subdir in $$list; do \ 39 | cd $$subdir; \ 40 | $(MAKE) CC="$(CC)" CXX="$(CXX)" DFLAGS="$(DFLAGS)" CFLAGS="$(CFLAGS)" \ 41 | INCLUDES="$(INCLUDES)" $$target || exit 1; \ 42 | cd $$wdir; \ 43 | done; 44 | 45 | lib:libphylotree.a 46 | 47 | libphylotree.a:$(LIBOBJS) 48 | $(AR) -cru $@ $(LIBOBJS) 49 | 50 | treebest:lib-recur main.o 51 | $(CXX) $(CFLAGS) $(DFLAGS) main.o -o $@ $(LIBS) 52 | 53 | fltreebest:lib-recur $(FLOBJS) 54 | $(CXX) $(CXXFLAGS) $(DFLAGS) -o $@ $(FLOBJS) `$(FLTKCFG) --cxxflags` `$(FLTKCFG) --ldstaticflags` $(LIBS); \ 55 | $(FLTKCFG) --post $@ 56 | 57 | timeout:timeout.o 58 | $(CC) $(CFLAGS) $(DFLAGS) timeout.o -o $@ 59 | 60 | make_ng86:make_ng86.o 61 | $(CC) $(CFLAGS) $(DFLAGS) make_ng86.o -o $@ 62 | 63 | ng86_ds.h:make_ng86 64 | ./make_ng86 $@ 65 | 66 | ng86_ds.o:ng86_ds.h ng86_ds.c 67 | 68 | main.o:main.c 69 | $(CC) -c $(CFLAGS) $(DFLAGS) $(NJVERSION) main.c -o $@ 70 | 71 | tags:*.c *.cc phyml/*.c align_lib/*.c phyml/*.cc 72 | ctags *.c *.cc *.l *.y phyml/*.c phyml/*.cc align_lib/*.c common/*.h common/*.c 73 | 74 | treebest.pdf:treebest.texi 75 | texi2pdf treebest.texi 76 | 77 | y.tab.c y.tab.h:parser.y 78 | $(YACC) -d $(YFLAGS) parser.y 79 | 80 | lex.yy.c:parser.l 81 | $(LEX) parser.l 82 | 83 | order.o:algo.h 84 | 85 | lex.yy.o:lex.yy.c y.tab.h 86 | $(CC) -c $(DFLAGS) $(CFLAGS) lex.yy.c -o $@ 87 | 88 | y.tab.cc:y.tab.c 89 | ln -sf y.tab.c y.tab.cc 90 | 91 | y.tab.o:y.tab.cc 92 | $(CXX) -c $(DFLAGS) $(CFLAGS) y.tab.cc -o $@ 93 | 94 | set: 95 | @if [ `expr match "$(DFLAGS_SPEC)" ".*HAVE_PHYML.*"` -ne 0 ]; then echo 'yes'; fi 96 | 97 | $(FLOBJS):flnjtree_ui.h flglobal.h 98 | flnjtree_ui.cc flnjtree_ui.h:flnjtree_ui.fl 99 | $(FLUID) -c flnjtree_ui.fl 100 | 101 | package:lex.yy.c y.tab.c 102 | @(cd ..; mv treebest treebest-$(VERSION); \ 103 | (find treebest-$(VERSION) -type f | grep -v "\.svn" | xargs tar cf -) | gzip > treebest-$(VERSION).tar.gz; \ 104 | mv treebest-$(VERSION) treebest) 105 | 106 | clean: 107 | rm -f gmon.out *.o a.out y.output libphylotree.a *.cp *.fn *.ky *.pg *.tp *.vr *.toc *.aux *.pdf *.log \ 108 | treebest fltreebest timeout nh2pic flnjtree_ui.cc flnjtree_ui.h make_ng86 \ 109 | treebest-*.tar.bz2 ChangeLog.bak _phyml_boot*.txt \ 110 | y.tab.cc tags common/*.o treebest-*.tar.gz *~ 111 | 112 | cleanmore:clean 113 | rm -f ng86_ds.h lex.yy.c y.tab.* 114 | 115 | distclean:clean-recur 116 | rm -f ng86_ds.h lex.yy.c y.tab.* 117 | -------------------------------------------------------------------------------- /Makefile.mac: -------------------------------------------------------------------------------- 1 | CC= gcc 2 | CXX= g++ 3 | CFLAGS= -Wall -O2 -fomit-frame-pointer -arch i386 -arch ppc #-pg 4 | CXXFLAGS= $(CFLAGS) 5 | YFLAGS= 6 | DFLAGS= -DUSE_GCC -DHAVE_PHYML -DLH3_ADDON # for phyml 7 | VERSION= 1.9.2 8 | NJVERSION= -DTR_VERSION="\"$(VERSION)\"" -DTR_BUILD=\"`date +%d%b%Y`\" 9 | LEX= flex 10 | YACC= bison --yacc 11 | FLUID= fluid 12 | FLTKCFG= fltk-config 13 | PROG= treebest 14 | LIBOBJS= lex.yy.o y.tab.o read.o nj.o utils1.o subtree.o sdi.o bs_nj.o \ 15 | reroot.o task.o output.o ortho.o cut_tree.o spec.o utils2.o \ 16 | align.o filter.o aln_mask.o prob_dist.o brent.o ng86_ds.o lost.o \ 17 | tree_plot.o cpp_utils.o read_aln.o pre_cons.o simulate.o compare.o \ 18 | order.o nhx_output.o nj2.o backtrans.o phyml.o mmerge.o est_len.o \ 19 | best.o trimpoor.o 20 | FLOBJS= flnjtree.o flnjtree_ui.o flglobal.o flworkspace.o flcallback.o 21 | INCLUDES= -I. 22 | LIBS= -L. -lphylotree -Lalign_lib -lalign -lm -Lphyml -lphyml 23 | SUBDIRS= . align_lib phyml 24 | 25 | .SUFFIXES:.c .o .cc 26 | 27 | .c.o: 28 | $(CC) -c $(DFLAGS) $(CFLAGS) $(INCLUDES) $< -o $@ 29 | 30 | .cc.o: 31 | $(CXX) -c $(DFLAGS) $(CXXFLAGS) $(INCLUDES) $< -o $@ 32 | 33 | all:$(PROG) 34 | 35 | lib-recur all-recur clean-recur install-recur: 36 | @target=`echo $@ | sed s/-recur//`; \ 37 | wdir=`pwd`; \ 38 | list='$(SUBDIRS)'; for subdir in $$list; do \ 39 | cd $$subdir; \ 40 | $(MAKE) CC="$(CC)" CXX="$(CXX)" DFLAGS="$(DFLAGS)" CFLAGS="$(CFLAGS)" \ 41 | INCLUDES="$(INCLUDES)" $$target -f Makefile.mac || exit 1; \ 42 | cd $$wdir; \ 43 | done; 44 | 45 | lib:libphylotree.a 46 | 47 | libphylotree.a:$(LIBOBJS) 48 | libtool -static -o $@ $(LIBOBJS) 49 | 50 | treebest:lib-recur main.o 51 | $(CXX) $(CFLAGS) $(DFLAGS) main.o -o $@ $(LIBS) 52 | 53 | fltreebest:lib-recur $(FLOBJS) 54 | $(CXX) $(CXXFLAGS) $(DFLAGS) -o $@ $(FLOBJS) `$(FLTKCFG) --cxxflags` `$(FLTKCFG) --ldstaticflags` $(LIBS); \ 55 | $(FLTKCFG) --post $@ 56 | 57 | timeout:timeout.o 58 | $(CC) $(CFLAGS) $(DFLAGS) timeout.o -o $@ 59 | 60 | make_ng86:make_ng86.o 61 | $(CC) $(CFLAGS) $(DFLAGS) make_ng86.o -o $@ 62 | 63 | ng86_ds.h:make_ng86 64 | ./make_ng86 $@ 65 | 66 | ng86_ds.o:ng86_ds.h ng86_ds.c 67 | 68 | main.o:main.c 69 | $(CC) -c $(CFLAGS) $(DFLAGS) $(NJVERSION) main.c -o $@ 70 | 71 | tags:*.c *.cc phyml/*.c align_lib/*.c phyml/*.cc 72 | ctags *.c *.cc *.l *.y phyml/*.c phyml/*.cc align_lib/*.c common/*.h common/*.c 73 | 74 | treebest.pdf:treebest.texi 75 | texi2pdf treebest.texi 76 | 77 | y.tab.c y.tab.h:parser.y 78 | $(YACC) -d $(YFLAGS) parser.y 79 | 80 | lex.yy.c:parser.l 81 | $(LEX) parser.l 82 | 83 | order.o:algo.h 84 | 85 | lex.yy.o:lex.yy.c y.tab.h 86 | $(CC) -c $(DFLAGS) $(CFLAGS) lex.yy.c -o $@ 87 | 88 | y.tab.cc:y.tab.c 89 | ln -sf y.tab.c y.tab.cc 90 | 91 | y.tab.o:y.tab.cc 92 | $(CXX) -c $(DFLAGS) $(CFLAGS) y.tab.cc -o $@ 93 | 94 | set: 95 | @if [ `expr match "$(DFLAGS_SPEC)" ".*HAVE_PHYML.*"` -ne 0 ]; then echo 'yes'; fi 96 | 97 | $(FLOBJS):flnjtree_ui.h flglobal.h 98 | flnjtree_ui.cc flnjtree_ui.h:flnjtree_ui.fl 99 | $(FLUID) -c flnjtree_ui.fl 100 | 101 | package:lex.yy.c y.tab.c 102 | @(cd ..; mv treebest treebest-$(VERSION); \ 103 | (find treebest-$(VERSION) -type f | grep -v "\.svn" | xargs tar cf -) | gzip > treebest-$(VERSION).tar.gz; \ 104 | mv treebest-$(VERSION) treebest) 105 | 106 | clean: 107 | rm -f gmon.out *.o a.out y.output libphylotree.a *.cp *.fn *.ky *.pg *.tp *.vr *.toc *.aux *.pdf *.log \ 108 | treebest fltreebest timeout nh2pic flnjtree_ui.cc flnjtree_ui.h make_ng86 fltreebest-*.tar.bz2 \ 109 | ChangeLog.bak _phyml_boot*.txt \ 110 | y.tab.cc tags common/*.o treebest-*.tar.gz *~ 111 | 112 | cleanmore:clean 113 | rm -f ng86_ds.h lex.yy.c y.tab.* 114 | 115 | distclean:clean-recur 116 | rm -f ng86_ds.h lex.yy.c y.tab.* 117 | -------------------------------------------------------------------------------- /PROBLEMS: -------------------------------------------------------------------------------- 1 | 2 | * Component 'backtrans' has not been thoroghly tested. 3 | 4 | * For branch-based bootstrapping, bootstrap values will still stick to 5 | the nodes instead of branches. When the tree is re-rooted, this will 6 | cause trouble. So I strongly recommend to use node-based 7 | bootstrapping, which is the default behaviour. 8 | 9 | * Memory violation still occurs in rare cases, especially in 10 | `fltreebest'. I will fix them one by one. 11 | 12 | * Unrooted constrained NJ now only works with binary constrained trees. 13 | 14 | * Some new functionalities should be added to FLtreeBeST. 15 | 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | treebest 2 | ======== 3 | 4 | TreeBeST: Tree Building guided by Species Tree (Ensembl Compara modifications) 5 | 6 | This repository holds the necessary changes of [Heng Li's version](https://github.com/lh3/treebest) to run the latest Ensembl Compara pipeline. 7 | Ensembl is **not** the official maintainer of this software. 8 | Pull-requests can still be submitted, but we will only accept them if they 9 | can provide a benefit to Ensembl. 10 | 11 | You can find more documentation on SourceForge: (http://treesoft.sourceforge.net/treebest.shtml) 12 | 13 | The main new features are: 14 | * new `-s` option in `treebest sdi`, to allow a user-defined species tree. This change is from [Albert Vilella](https://sites.google.com/site/avilella/) 15 | * new `T` node-tag in the NHX output: a bit-field listing the input trees that support the node. This is populated by the _mmerge_ algorithm 16 | * new `-I` option in `treebest nj`, to carry on the `T` tags from the input tree 17 | * new `-Z` option in `treebest best`, to redefine the PhyML variable `MIN_DIFF_LK`. It prevents PhyML from crashing during its computation 18 | * new `-X` option in `treebest best`, to give a higher weight to the likelihood that comes from the reconciliation with the species tree (default 1) 19 | * Species-intersection scores are now also reported as floating-point values under the `DCS` node-tag. The value is between 0 and 1, and displayed with 4 decimals. 20 | * new `-I` option in `treebest best`, to start from the input tree instead of building one 21 | 22 | Other changes include: 23 | * bugfixes / tweaks when processing the filtered alignments (TreeBeST includes a Clustal-score-based MSA-filtering step) 24 | * bugfixes / tweaks when merging the trees 25 | * using `double` instead of `float` for floating-point values 26 | 27 | ## Branches and tags 28 | 29 | There is a single branch (master) where all the development goes. The version number stated in the source code (1.9.2) is not maintained. 30 | 31 | `ensembl_production_XX` tags are used to refer to the version used for the 32 | production of Ensembl version XX. Due to deployment constraints, these tags 33 | may not include the latest changes of the master branch. 34 | Instead, we provide `ensembl_release_candidate_Y` tags, Y starting from 1, for 35 | the "next" version we will deploy in production. 36 | 37 | ## Pre-git history 38 | 39 | Treebest used to be kept in a Subversion repository at 40 | http://sourceforge.net/p/treesoft/code/HEAD/tree/branches/lh3/ but Heng 41 | Li's GitHub repository (the parent of this repository) has collapsed the 42 | whole history in just 1 commit. 43 | 44 | There is a complete import of the Subversion repository at 45 | https://github.com/muffato/treebest . You can attach it to your checkout to 46 | see 47 | 48 | ``` 49 | git remote add history https://github.com/muffato/treebest 50 | git fetch history 51 | git replace --graft 2a4fe3563e09ff069d319c9987ad4354b984b70f f5bbfb5c2e591ae8a176da960fb0d9edc01f1a96 52 | ``` 53 | This will pretend that the parent of the commit 54 | 2a4fe3563e09ff069d319c9987ad4354b984b70f is 55 | f5bbfb5c2e591ae8a176da960fb0d9edc01f1a96. _replace_ references are 56 | understood by all git tools, incl. `git log`. 57 | 58 | -------------------------------------------------------------------------------- /algo.h: -------------------------------------------------------------------------------- 1 | #ifndef ALGORITHM_H_ 2 | #define ALGORITHM_H_ 3 | 4 | #ifndef ALGO_EQUAL 5 | #define ALGO_EQUAL(a,b) ((a)=(b)) 6 | #endif /* ALGO_EQUAL */ 7 | #ifndef ALGO_CMP 8 | #define ALGO_CMP(a,b) ((a)<(b)) 9 | #endif /* ALGO_CMP */ 10 | #define ALGO_SWAP(a,b) { ALGO_EQUAL(swap_tmp,a);ALGO_EQUAL(a,b);ALGO_EQUAL(b,swap_tmp); } 11 | 12 | #ifdef ALGO_TYPE 13 | #ifdef ALGO_QSORT 14 | 15 | #include 16 | #include 17 | 18 | typedef struct 19 | { 20 | size_t left,right; 21 | } ALGO_QSortStack; 22 | 23 | static void algo_qsort(ALGO_TYPE a[], size_t n) 24 | { 25 | size_t s, t, i, j, k; 26 | ALGO_QSortStack *top, *stack; 27 | ALGO_TYPE rp, swap_tmp; 28 | 29 | if (n == 0) return; 30 | stack = (ALGO_QSortStack*)malloc(sizeof(ALGO_QSortStack) * (size_t)((sizeof(size_t)*log(n)/M_LN2)+2)); 31 | 32 | top = stack; s = 0; t = n-1; 33 | while (1) { 34 | if (s < t) { 35 | i = s; j = t; k = (i+j)>>1; rp = a[k]; 36 | ALGO_SWAP(a[k], a[t]); 37 | do { 38 | do { ++i; } while (ALGO_CMP(a[i], rp)); 39 | do { --j; } while (j && ALGO_CMP(rp, a[j])); 40 | ALGO_SWAP(a[i], a[j]); 41 | } while (i < j); 42 | ALGO_SWAP(a[i], a[j]); 43 | ALGO_SWAP(a[i], a[t]); 44 | if (i-s > t-i) { 45 | if (i-s > 9) { top->left = s; top->right = i-1; ++top; } 46 | if (t-i > 9) s = i+1; 47 | else s = t; 48 | } else { 49 | if (t-i > 9) { top->left = i+1; top->right = t; ++top; } 50 | if (i-s > 9) t = i-1; 51 | else t = s; 52 | } 53 | } else { 54 | if (top == stack) { 55 | free(stack); 56 | for (i = 1; i < n; ++i) 57 | for (j = i; j > 0 && ALGO_CMP(a[j], a[j-1]); --j) 58 | ALGO_SWAP(a[j], a[j-1]); 59 | return; 60 | } else { --top; s = top->left; t = top->right; } 61 | } 62 | } 63 | } 64 | #endif /* ALGO_QSORT */ 65 | #ifdef ALGO_KSMALL 66 | static ALGO_TYPE algo_ksmall(ALGO_TYPE array[], size_t n, size_t k) 67 | /* Return the kth smallest value in array array[0..n-1], The input array will be rearranged 68 | * to have this value in array[k-1], with all smaller elements moved to arr[0..k-2] (in 69 | * arbitrary order) and all larger elements in arr[k..n] (also in arbitrary order) */ 70 | { 71 | ALGO_TYPE *arr, a, swap_tmp; 72 | size_t i, ir, j, l, mid; 73 | 74 | arr = array - 1; 75 | l = 1; 76 | ir = n; 77 | for (;;) { 78 | if (ir <= l + 1) { /* Active partition contains 1 or 2 elements */ 79 | if (ir == l + 1 && ALGO_CMP(arr[ir], arr[l])) /* Case of 2 elements */ 80 | ALGO_SWAP(arr[l], arr[ir]); 81 | return arr[k]; 82 | } else { 83 | mid = (l + ir) >> 1; 84 | ALGO_SWAP(arr[mid], arr[l+1]); 85 | if (ALGO_CMP(arr[ir], arr[l])) ALGO_SWAP(arr[l], arr[ir]); 86 | if (ALGO_CMP(arr[ir], arr[l+1])) ALGO_SWAP(arr[l+1], arr[ir]); 87 | if (ALGO_CMP(arr[l+1], arr[l])) ALGO_SWAP(arr[l], arr[l+1]); 88 | i = l + 1; /* initialize pointers for partitioning */ 89 | j = ir; 90 | a = arr[l+1]; /* partition element */ 91 | for (;;) { /* beginning of innermost loop */ 92 | do ++i; while (ALGO_CMP(arr[i], a)); /* scan up to find element > a */ 93 | do --j; while (ALGO_CMP(a, arr[j])); /* scan down to find element < a */ 94 | if (j < i) break; /* Pointers crossed. Partitioning complete. */ 95 | ALGO_SWAP(arr[i], arr[j]); 96 | } 97 | arr[l+1] = arr[j]; /* insert partitioning element */ 98 | arr[j] = a; 99 | if (j >= k) ir = j - 1; /* Keep active the partition that contains the kth element */ 100 | if (j <= k) l = i; 101 | } 102 | } 103 | } 104 | #endif /* ALGO_KSMALL */ 105 | #ifdef ALGO_HEAP 106 | void algo_heap_adjust(ALGO_TYPE l[], int i, int n) 107 | { 108 | ALGO_TYPE tmp; 109 | int k; 110 | 111 | ALGO_EQUAL(tmp, l[i]); 112 | for (;;) { 113 | k = (i << 1) + 1; 114 | if (k >= n) { 115 | ALGO_EQUAL(l[i], tmp); 116 | return; 117 | } 118 | if (k < n - 1 && ALGO_CMP(l[k+1], l[k])) ++k; 119 | if (ALGO_CMP(l[k], tmp)) { 120 | ALGO_EQUAL(l[i], l[k]); 121 | i = k; 122 | } else { 123 | ALGO_EQUAL(l[i], tmp); 124 | return; 125 | } 126 | } 127 | } 128 | void algo_heap_make(ALGO_TYPE l[], int lsize) 129 | { 130 | int i; 131 | for (i = (lsize >> 1) - 1; i >= 0; --i) 132 | algo_heap_adjust(l, i, lsize); 133 | } 134 | void algo_heap_sort(ALGO_TYPE l[], int lsize) 135 | { 136 | ALGO_TYPE swap_tmp; 137 | int i; 138 | 139 | for (i = lsize - 1; i > 0; --i) { 140 | ALGO_SWAP(l[0], l[i]); 141 | algo_heap_adjust(l, 0, i); 142 | } 143 | } 144 | #endif /* ALGO_HEAP */ 145 | #endif /* ALGO_TYPE */ 146 | 147 | #endif /* ALGORITHM_H_ */ 148 | -------------------------------------------------------------------------------- /align.h: -------------------------------------------------------------------------------- 1 | #ifndef PHYLOTREE_ALIGN_H_ 2 | #define PHYLOTREE_ALIGN_H_ 3 | 4 | #include 5 | #include 6 | #include "tree.h" 7 | 8 | #define QUAL_SCALE 4.0 9 | #define QUAL_MIN_RES 0.3333333 10 | 11 | #define DIST_MM 1 12 | #define DIST_KIMURA 2 13 | #define DIST_JTT 3 14 | #define DIST_DN 4 15 | #define DIST_DS 5 16 | #define DIST_DM 6 17 | #define DIST_NT_MM 7 18 | 19 | #define MA_MIN_DIST 0.00001 20 | #define MA_MAX_DIST 9.0 21 | 22 | typedef struct __lih_MultiAlign 23 | { 24 | int max, n; /* maximum number/number of sequences */ 25 | int len; /* length of the alignment */ 26 | int is_nucl; /* if true, treat as nucleotide alignment */ 27 | char **name; /* names of the sequences */ 28 | char **seq; /* character-converted sequences */ 29 | char **gene; /* gene names, if presented in MFA file */ 30 | char **comment; /* comment for the gene */ 31 | struct __lih_MultiAlign *ori_nucl; 32 | } MultiAlign; 33 | 34 | typedef struct 35 | { 36 | int n; /* number of sequences */ 37 | int len; /* length of the alignment */ 38 | int *pos_qual; /* quality values on each position */ 39 | double *seq_qual; /* quality values for each sequences */ 40 | } AlignQual; 41 | 42 | typedef struct 43 | { 44 | int type; /* type of distance */ 45 | int is_kimura; /* whether use kimura correction */ 46 | double *mat; /* now only JTT model */ 47 | double *eigen; /* eigen values, only used for JTT */ 48 | double scale; /* for JTT */ 49 | } DistParam; 50 | 51 | #ifdef __cplusplus 52 | extern "C" { 53 | #endif 54 | 55 | /* alloc and free */ 56 | DistParam *ma_alloc_DistParam(int type); 57 | void ma_free_DistParam(DistParam *dp); 58 | 59 | /* fill `mat' from alignment `ma' with `dp' parameters. When `is_rand' is true, the 60 | * alignment will be resampled. It is just a wrapper. Other functions will be called. */ 61 | void ma_cal_dist(Matrix *mat, const MultiAlign *ma, int is_rand, const DistParam *dp); 62 | /* probability distance */ 63 | void ma_cal_prob_dist(Matrix *mat, const MultiAlign *ma, int is_rand, const DistParam *dp); 64 | /* mismatch distance */ 65 | void ma_cal_mm_dist(Matrix *mat, const MultiAlign *ma, int is_rand, int is_kimura, int *); 66 | 67 | /* alloc and free */ 68 | MultiAlign *ma_alloc(); 69 | void ma_free(MultiAlign *ma); 70 | /* read alignment */ 71 | MultiAlign *ma_read(FILE *fp, int is_nucl, int is_aln); 72 | MultiAlign *ma_read_alignment(FILE *fp, int is_nucl); 73 | MultiAlign *ma_read_aln_alignment(FILE *fp, int is_nucl); 74 | 75 | /* alloc and free */ 76 | AlignQual *ma_alloc_AlignQual(const MultiAlign *ma); 77 | void ma_free_AlignQual(AlignQual *aq); 78 | /* calculate alignment quality */ 79 | AlignQual *ma_cal_qual(const MultiAlign *ma); 80 | /* filter a multialignment */ 81 | void ma_filter(MultiAlign *ma, int is_collpase, int cut_off, int is_mask_segment); 82 | 83 | void ma_mask_poor_segment(MultiAlign *ma, MultiAlign *ma_nt); 84 | 85 | void ma_apply_mask(MultiAlign *ma); 86 | 87 | void tr_align_output(FILE *fp, const MultiAlign *ma); 88 | /* output in ALN format */ 89 | void tr_align_aln_output(FILE *fp, const MultiAlign *ma); 90 | /* translate nucleotide alignment to protein alignment */ 91 | MultiAlign *ma_trans_align(const MultiAlign *ma, int is_trans); 92 | 93 | MultiAlign *ma_back_trans(MultiAlign *aa, FILE *fp, double threshold); 94 | 95 | void ma_remove_gap(MultiAlign *ma); 96 | 97 | void tr_attach_geneid(Tree *tree, MultiAlign *ma); 98 | 99 | Matrix *ma_init_small_matrix(MultiAlign *ma); 100 | 101 | #ifdef __cplusplus 102 | } 103 | #endif 104 | #endif 105 | -------------------------------------------------------------------------------- /align_lib/Makefile: -------------------------------------------------------------------------------- 1 | CC= gcc 2 | CFLAGS= -W -O2 -Wall -fomit-frame-pointer 3 | DFLAGS= #-DUSE_KR_ALLOC 4 | LIBOBJS= aa2nt.o aa2nt_local.o transaln.o \ 5 | splice.o splice_local.o splice_bound.o \ 6 | common_align.o \ 7 | seq.o table.o align.o \ 8 | pwalign.o 9 | EXEOBJS= pwalign_main.o ../common/mem.o 10 | SMEXEOBJS= make_matrix.o 11 | INCLUDES= -I.. 12 | 13 | .SUFFIXES:.c .o 14 | 15 | .c.o: 16 | $(CC) -c $(CFLAGS) $(DFLAGS) $< -o $@ $(INCLUDES) -I.. 17 | 18 | all:pwalign 19 | 20 | lib:libalign.a 21 | 22 | install: 23 | 24 | libalign.a:$(LIBOBJS) 25 | $(AR) -cru $@ $(LIBOBJS) 26 | 27 | pwalign:libalign.a $(EXEOBJS) 28 | $(CC) $(CFLAGS) $(DFLAGS) $(EXEOBJS) -L. -lalign -o $@ 29 | 30 | make_matrix.o:gen_nt_sm.c 31 | $(CC) -c $(CFLAGS) $(DFLAGS) -DALN_MATRIX_DEBUG gen_nt_sm.c -o $@ $(INCLUDES) 32 | 33 | make_matrix: $(SMEXEOBJS) 34 | $(CC) $(CFLAGS) $(DFLAGS) $(SMEXEOBJS) -o $@ -lm $(INCLUDES) 35 | 36 | clean: 37 | rm -f *.o common/*.o libalign.a pwalign gmon.out a.out make_matrix 38 | -------------------------------------------------------------------------------- /align_lib/Makefile.mac: -------------------------------------------------------------------------------- 1 | CC= gcc 2 | CFLAGS= -g -O2 -Wall 3 | DFLAGS= #-DUSE_KR_ALLOC 4 | LIBOBJS= aa2nt.o aa2nt_local.o transaln.o \ 5 | splice.o splice_local.o splice_bound.o \ 6 | common_align.o \ 7 | seq.o table.o align.o \ 8 | pwalign.o 9 | EXEOBJS= pwalign_main.o ../common/mem.o 10 | SMEXEOBJS= make_matrix.o 11 | INCLUDES= -I.. 12 | 13 | .SUFFIXES:.c .o 14 | 15 | .c.o: 16 | $(CC) -c $(CFLAGS) $(DFLAGS) $< -o $@ $(INCLUDES) -I.. 17 | 18 | all:pwalign 19 | 20 | lib:libalign.a 21 | 22 | install: 23 | 24 | libalign.a:$(LIBOBJS) 25 | libtool -static -o $@ $(LIBOBJS) 26 | 27 | pwalign:libalign.a $(EXEOBJS) 28 | $(CC) $(CFLAGS) $(DFLAGS) $(EXEOBJS) -L. -lalign -o $@ 29 | 30 | make_matrix.o:gen_nt_sm.c 31 | $(CC) -c $(CFLAGS) $(DFLAGS) -DALN_MATRIX_DEBUG gen_nt_sm.c -o $@ $(INCLUDES) 32 | 33 | make_matrix: $(SMEXEOBJS) 34 | $(CC) $(CFLAGS) $(DFLAGS) $(SMEXEOBJS) -o $@ -lm $(INCLUDES) 35 | 36 | clean: 37 | rm -f *.o common/*.o libalign.a pwalign gmon.out a.out make_matrix 38 | -------------------------------------------------------------------------------- /align_lib/align.c: -------------------------------------------------------------------------------- 1 | /* 2 | * libalign -- alignment utilities 3 | * 4 | * Copyright (c) 2003-2004, Li Heng 5 | * 6 | * 7 | * This library is free software; you can redistribute it and/or 8 | * modify it under the terms of the GNU Lesser General Public 9 | * License as published by the Free Software Foundation; either 10 | * version 2.1 of the License, or (at your option) any later version. 11 | * 12 | * This library is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 | * Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public 18 | * License along with this library; if not, write to the Free Software 19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 | * 21 | */ 22 | 23 | #include 24 | #include "align.h" 25 | #include "table.h" 26 | #include "seq.h" 27 | 28 | #define ALN_REVERSE4(c) (((c) > 3)? c : 4 - c) 29 | AlnParam aln_param_nt2nt = { 10, 2, 2, -1, -1, -1, -1, aln_sm_nt, 16, 75 }; 30 | AlnParam aln_param_rd2rd = { 20, 19, 19, -1, -1, -1, -1, aln_sm_read, 16, 75 }; 31 | AlnParam aln_param_aa2aa = { 12, 2, 2, -1, -1, -1, -1, aln_sm_blosum62, 22, 50 }; 32 | AlnParam aln_param_aa2nt = { 12, 2, 2, 20, -1, -1, -1, aln_sm_blosum62, 22, 50 }; 33 | AlnParam aln_param_splice = { 500, 50, 50, -1, 1500, 2200, -43, aln_sm_hs, 5, -1 }; 34 | AlnParam aln_param_splice2 = { 900, 50, 50, -1, 1900, 2900, -43, aln_sm_hs, 5, -1 }; 35 | /* this parameter is optimized for HLA typing */ 36 | AlnParam aln_param_rd_splice = { 20, 19, 19, -1, 50, 70, -10, aln_sm_read, 16, -1 }; 37 | 38 | AlnAln *aln_init_AlnAln() 39 | { 40 | AlnAln *aa; 41 | aa = (AlnAln*)MYALLOC(sizeof(AlnAln)); 42 | aa->path = 0; 43 | aa->out1 = aa->out2 = aa->outm = 0; 44 | aa->asp = 0; 45 | aa->path_len = 0; 46 | aa->n_sub_path = 0; 47 | return aa; 48 | } 49 | void aln_free_AlnAln(AlnAln *aa) 50 | { 51 | MYFREE(aa->path); 52 | MYFREE(aa->out1); 53 | MYFREE(aa->out2); 54 | MYFREE(aa->outm); 55 | MYFREE(aa->asp); 56 | aa->path = 0; aa->out1 = aa->out2 = aa->outm = 0; 57 | aa->asp = 0; 58 | aa->path_len = 0; 59 | aa->n_sub_path = 0; 60 | MYFREE(aa); 61 | } 62 | void aln_apply_matrix_mean(AlnParam *ap) 63 | { 64 | int i; 65 | 66 | for (i = 0; i != ap->row * ap->row; ++i) 67 | ap->matrix[i] -= ap->matrix_mean; 68 | } 69 | AlnAln *aln_align(char *seq1, char *seq2, AlnParam *ap, int type) 70 | { 71 | AlnAln *aa; 72 | 73 | if (ap->good_splice > 0) 74 | aa = aln_splice(seq1, seq2, ap, type); 75 | else if (ap->row < 20) /* nucleotide */ 76 | aa = aln_common_align(seq1, seq2, ap, type); 77 | else if (ap->frame_shift > 0) 78 | aa = aln_nt2aa(seq1, seq2, ap, type); 79 | else aa = aln_common_align(seq1, seq2, ap, type); 80 | 81 | return aa; 82 | } 83 | uchar *aln_reverse4(uchar *seq, int len) 84 | { 85 | int i; 86 | uchar tmp; 87 | for (i = 0; i != len >> 1; ++i) { 88 | tmp = ALN_REVERSE4(seq[i]); 89 | seq[i] = ALN_REVERSE4(seq[len - i - 1]); 90 | seq[len - i - 1] = tmp; 91 | } 92 | if (len & 0x1) seq[len>>1] = ALN_REVERSE4(seq[len>>1]); 93 | return seq; 94 | } 95 | -------------------------------------------------------------------------------- /align_lib/align.h: -------------------------------------------------------------------------------- 1 | /* 2 | * libalign -- alignment utilities 3 | * 4 | * Copyright (c) 2003-2004, Li Heng 5 | * 6 | * 7 | * This library is free software; you can redistribute it and/or 8 | * modify it under the terms of the GNU Lesser General Public 9 | * License as published by the Free Software Foundation; either 10 | * version 2.1 of the License, or (at your option) any later version. 11 | * 12 | * This library is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 | * Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public 18 | * License along with this library; if not, write to the Free Software 19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 | * 21 | */ 22 | 23 | #ifndef ALN_LIB_ALIGN_H_ 24 | #define ALN_LIB_ALIGN_H_ 25 | 26 | #include "common/common.h" 27 | #include "table.h" 28 | 29 | #define FROM_M 0 30 | #define FROM_I 1 31 | #define FROM_D 2 32 | #define FROM_N 3 33 | #define FROM_O 3 34 | #define FROM_T 4 35 | #define FROM_G 5 36 | #define FROM_0 7 37 | 38 | #define ALN_LOCAL_ALIGN 0 39 | #define ALN_GLOBAL_ALIGN 1 40 | #define ALN_BOUND_ALIGN 2 41 | 42 | #define FLAW_INS 0x0100 43 | #define FLAW_DEL 0x0200 44 | #define FLAW_FRAME_SHIFT 0x0400 45 | #define FLAW_NO_HEAD 0x0800 46 | #define FLAW_NO_TAIL 0x1000 47 | 48 | #define MINOR_INF -1073741823 49 | 50 | typedef struct 51 | { 52 | int gap_open; 53 | int gap_ext; 54 | int gap_end; 55 | 56 | int frame_shift; 57 | 58 | int good_splice; 59 | int bad_splice; 60 | 61 | int matrix_mean; 62 | int *matrix; 63 | int row; 64 | int band_width; 65 | } AlnParam; 66 | 67 | typedef struct 68 | { 69 | int i, j; 70 | unsigned char ctype; 71 | } path_t; 72 | 73 | typedef struct 74 | { 75 | path_t *path; 76 | int path_len; 77 | } AlnSubPath; 78 | 79 | typedef struct 80 | { 81 | path_t *path; 82 | int path_len; 83 | int score; 84 | 85 | char *out1, *out2; 86 | char *outm; 87 | 88 | int n_sub_path; 89 | AlnSubPath *asp; 90 | } AlnAln; 91 | 92 | typedef int (*AlnCoreFunc)(unsigned char *, int, unsigned char *, int, AlnParam *, path_t *, int *); 93 | 94 | #ifdef __cplusplus 95 | extern "C" { 96 | #endif 97 | 98 | AlnAln *aln_align(char *seq1, char *seq2, AlnParam *ap, int type); 99 | void aln_init_score_array(uchar *seq, int len, int row, int *score_matrix, int **s_array); 100 | void aln_apply_matrix_mean(AlnParam *ap); 101 | 102 | AlnAln *aln_init_AlnAln(); 103 | void aln_free_AlnAln(AlnAln *aa); 104 | 105 | /* canonical local and global alignment with speed enhancement */ 106 | AlnAln *aln_common_align(char *seq1, char *seq2, AlnParam *ap, int type); 107 | /* banded global alignment */ 108 | int aln_global_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, AlnParam *ap, 109 | path_t *path, int *path_len); 110 | /* smith-waterman alignment */ 111 | int aln_local_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, AlnParam *ap, 112 | path_t *path, int *path_len); 113 | 114 | /* align amino acid sequence against nucleotide sequence with frame-shift */ 115 | AlnAln *aln_nt2aa(char *seq_nt, char *seq_aa, AlnParam *ap, int type); 116 | void aln_output_segment(char *ont, char *oprot, path_t *path, int path_len, char *locus1, char *locus2); 117 | char *aln_back_trans(char *aln_aa, char *seq_nt_pre, int *flaw_code); 118 | int aln_aa2nt_global_core(unsigned char *prot, int prot_len, unsigned char *nt_trans, int nt_len, 119 | AlnParam *ap, path_t *path, int *path_len); 120 | int aln_aa2nt_local_core(unsigned char *prot, int prot_len, unsigned char *nt_trans, int nt_len, 121 | AlnParam *ap, path_t *path, int *path_len); 122 | 123 | /* align cDNA to genomic sequences */ 124 | AlnAln *aln_splice(char *gen, char *est, AlnParam *ap, int type); 125 | int aln_splice_global_core(unsigned char *est, int est_len, unsigned char *gen, int gen_len, AlnParam *ap, 126 | path_t *path, int *path_len); 127 | int aln_splice_local_core(unsigned char *est, int est_len, unsigned char *gen, int gen_len, AlnParam *ap, 128 | path_t *path, int *path_len); 129 | int aln_splice_bound_core(unsigned char *est, int est_len, unsigned char *gen, int gen_len, AlnParam *ap, 130 | int *gen_start, int *gen_stop, int *est_start, int *est_stop); 131 | AlnSubPath *aln_splice_sub_path(path_t *path, int path_len, int *count); 132 | 133 | uchar *aln_reverse4(uchar *seq, int len); 134 | 135 | int pwalign_task(int argc, char *argv[]); 136 | 137 | extern AlnParam aln_param_nt2nt, aln_param_aa2aa, aln_param_aa2nt, aln_param_splice, aln_param_splice2; 138 | extern AlnParam aln_param_rd2rd, aln_param_rd_splice; 139 | 140 | #ifdef __cplusplus 141 | } 142 | #endif 143 | 144 | #endif /* ALIGN_H_ */ 145 | -------------------------------------------------------------------------------- /align_lib/gen_nt_sm.c: -------------------------------------------------------------------------------- 1 | #include 2 | #ifdef ALN_MATRIX_DEBUG 3 | #include 4 | #include 5 | #include 6 | 7 | #endif 8 | 9 | double aln_gen_nt_score_matrix(int *matrix, double t, double gc, double R, double beta) 10 | { 11 | double theta1, theta2; 12 | double u, v, x, y; 13 | double alpha; 14 | double mat[25]; 15 | double theta_tmp[4]; 16 | double tmp, ident; 17 | int i, j; 18 | 19 | theta1 = gc; 20 | theta2 = 1.0 - gc; 21 | alpha = 2.0 * beta * R; 22 | theta_tmp[0] = theta_tmp[3] = theta2 / 2.0; 23 | theta_tmp[1] = theta_tmp[2] = theta1 / 2.0; 24 | u = 0.5 * (1.0 + exp(-2.0 * beta * t) - 2.0 * exp(-(alpha + beta) * t)); 25 | v = 0.5 * (1.0 - exp(-2.0 * beta * t)); 26 | x = (1.0 - u * theta1 - v) / theta2; 27 | y = (1.0 - u * theta2 - v) / theta1; 28 | #ifdef ALN_MATRIX_DEBUG 29 | printf("(x,y,u,v) (%f,%f,%f,%f)\n", x, y, u, v); 30 | #endif 31 | ident = x * theta2 * theta2 + y * theta1 * theta1; 32 | /* calculate "AGCT" score */ 33 | for (i = 0; i != 25; ++i) 34 | mat[i] = log(2.0 * v); 35 | mat[0 * 5 + 0] = mat[3 * 5 + 3] = log(2.0 * x); 36 | mat[1 * 5 + 1] = mat[2 * 5 + 2] = log(2.0 * y); 37 | mat[0*5+1] = mat[1*5+0] = mat[2*5+3] = mat[3*5+2] = log(2.0 * u); 38 | /* calculate 'N' score */ 39 | for (i = 0; i != 4; ++i) { 40 | for (j = 0, tmp = 0.0; j != 4; ++j) 41 | tmp += mat[i * 5 + j] * theta_tmp[j]; 42 | mat[i * 5 + 4] = mat[4 * 5 + i] = tmp; 43 | } 44 | for (j = 0, tmp = 0.0; j != 4; ++j) 45 | tmp += mat[4 * 5 + j] * theta_tmp[j]; 46 | mat[4 * 5 + 4] = tmp; 47 | /* fill matrix */ 48 | for (i = 0, tmp = 0.0; i != 25; ++i) 49 | if (tmp < mat[i]) tmp = mat[i]; 50 | for (i = 0; i != 25; ++i) 51 | matrix[i] = (int)(mat[i] / tmp * 100.0 + 0.5); 52 | return ident; 53 | } 54 | #ifdef ALN_MATRIX_DEBUG 55 | void usage(const char *prog) 56 | { 57 | fprintf(stderr, "Usage: %s [-t time] [-b beta] [-r R] [-c GC] [-h]\n", prog); 58 | exit(1); 59 | } 60 | int main(int argc, char *argv[]) 61 | { 62 | int matrix[25]; 63 | int i, j, c; 64 | double id; 65 | double R = 1.5; 66 | double gc = 0.52; 67 | double beta = 0.18; 68 | double t = 1.0; 69 | 70 | while ((c = getopt(argc, argv, "r:b:t:c:h")) >= 0) { 71 | switch (c) { 72 | case 'r': R = atof(optarg); break; 73 | case 'b': beta = atof(optarg); break; 74 | case 't': t = atof(optarg); break; 75 | case 'c': gc = atof(optarg); break; 76 | case 'h': usage(argv[0]); break; 77 | } 78 | } 79 | id = aln_gen_nt_score_matrix(matrix, t, gc, R, beta); 80 | printf("\n"); 81 | printf(" A G C T N\n"); 82 | for (i = 0; i != 5; ++i) { 83 | printf("%c", "AGCTN"[i]); 84 | for (j = 0; j != 5; ++j) 85 | printf("%6d", matrix[i * 5 + j]); 86 | printf("\n"); 87 | } 88 | printf("\npercent identities = %.1f%%\n\n", id*100.0); 89 | 90 | return 0; 91 | } 92 | #endif 93 | -------------------------------------------------------------------------------- /align_lib/pwalign.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "align.h" 6 | #include "seq.h" 7 | #include "version.h" 8 | 9 | #ifndef MAX_NAME_LEN 10 | #define MAX_NAME_LEN 255 11 | #endif 12 | 13 | 14 | int aln_pair_align(FILE *fp1, FILE *fp2, AlnParam *ap, int type, int misc_flag) 15 | { 16 | seq_t seq1, seq2; 17 | int len1, len2, n; 18 | char name1[MAX_NAME_LEN], name2[MAX_NAME_LEN]; 19 | path_t *pt, *pp; 20 | AlnAln *aa; 21 | 22 | INIT_SEQ(seq1); INIT_SEQ(seq2); 23 | 24 | for (n = 0; ; ++n) { 25 | len1 = read_fasta(fp1, &seq1, name1, 0); 26 | len2 = read_fasta(fp2, &seq2, name2, 0); 27 | if (len1 < 0 || len2 < 0) break; 28 | aa = aln_align((char*)seq1.s, (char*)seq2.s, ap, type); 29 | pp = aa->path; pt = aa->path + aa->path_len - 1; 30 | printf(">%s\t%d\t%d\t%d\t%s\t%d\t%d\t%d\t%d\n", name1, len1, pt->i, pp->i, 31 | name2, len2, pt->j, pp->j, aa->score); 32 | if (aa->out1) printf("%s\n", aa->out1); 33 | if (aa->outm) printf("%s\n", aa->outm); 34 | if (aa->out2) printf("%s\n", aa->out2); 35 | if (type != ALN_BOUND_ALIGN) printf("//\n"); 36 | fflush(stdout); 37 | if (misc_flag) 38 | aln_output_segment((char*)seq1.s, (char*)seq2.s, aa->path, aa->path_len, name1, name2); 39 | aln_free_AlnAln(aa); 40 | } 41 | MYFREE(seq1.s); MYFREE(seq2.s); 42 | return n; 43 | } 44 | static void pwalign_usage(char *prog) 45 | { 46 | fprintf(stderr, "\n"); 47 | fprintf(stderr, "Program : pwalign (PairWise ALIGNment tool)\n"); 48 | fprintf(stderr, "Version : %s, on %s\n", aln_version, aln_date); 49 | fprintf(stderr, "Contact : liheng@genomics.org.cn\n\n"); 50 | fprintf(stderr, "Usage : %s [options] \n\n", prog); 51 | fprintf(stderr, "Options : -f generate full alignment\n"); 52 | fprintf(stderr, " -a do not apply matrix mean in local alignment\n"); 53 | fprintf(stderr, " -d just calculate alignment boundaries\n"); 54 | fprintf(stderr, " -o NUM gap open penalty\n"); 55 | fprintf(stderr, " -e NUM gap extension penalty\n"); 56 | fprintf(stderr, " -n NUM gap end penalty for nt2nt or aa2aa\n"); 57 | fprintf(stderr, " -s NUM frame-shift penalty for aa2nt\n"); 58 | fprintf(stderr, " -g NUM good splicing penalty\n"); 59 | fprintf(stderr, " -w NUM band-width\n"); 60 | fprintf(stderr, " -b NUM bad splicing penalty\n"); 61 | fprintf(stderr, " -m output miscellaneous information\n"); 62 | fprintf(stderr, " -h help\n\n"); 63 | exit(1); 64 | } 65 | 66 | int pwalign_task(int argc, char *argv[]) 67 | { 68 | AlnParam ap; 69 | int gap_open, gap_ext, gap_end, fs; 70 | int good_splice, bad_splice, band_width; 71 | int c, type = ALN_LOCAL_ALIGN; 72 | int misc_flag = 0, matrix_mean = 1; 73 | FILE *fp1, *fp2; 74 | 75 | band_width = gap_open = gap_ext = gap_end = fs = good_splice = bad_splice = -1; 76 | while ((c = getopt(argc, argv, "fadhmo:e:n:s:b:g:w:")) >= 0) { 77 | switch (c) { 78 | case 'a': matrix_mean = 0; break; 79 | case 'd': type = ALN_BOUND_ALIGN; break; 80 | case 'f': type = ALN_GLOBAL_ALIGN; break; 81 | case 'o': gap_open = atoi(optarg); break; 82 | case 'e': gap_ext = atoi(optarg); break; 83 | case 'n': gap_end = atoi(optarg); break; 84 | case 'g': good_splice = atoi(optarg); break; 85 | case 'b': bad_splice = atoi(optarg); break; 86 | case 's': fs = atoi(optarg); break; 87 | case 'w': band_width = atoi(optarg); break; 88 | case 'm': misc_flag = 1; break; 89 | case 'h': pwalign_usage(argv[0]); break; 90 | } 91 | } 92 | if (optind + 3 != argc) pwalign_usage(argv[0]); 93 | if (!strcmp(argv[optind], "nt2nt")) { 94 | ap = aln_param_nt2nt; 95 | } else if (!strcmp(argv[optind], "aa2aa")) { 96 | ap = aln_param_aa2aa; 97 | } else if (!strcmp(argv[optind], "nt2aa")) { 98 | ap = aln_param_aa2nt; 99 | } else if (!strcmp(argv[optind], "splice")) { 100 | if (matrix_mean) ap = aln_param_splice2; 101 | else ap = aln_param_splice; 102 | } else { 103 | fprintf(stderr, "ERROR: valid types are nt2nt, aa2aa, nt2aa or splice\n"); 104 | exit(2); 105 | } 106 | if (gap_open >= 0) ap.gap_open = gap_open; 107 | if (gap_ext >= 0) ap.gap_ext = gap_ext; 108 | if (gap_end >= 0) ap.gap_end = gap_end; 109 | if (fs >= 0) ap.frame_shift = fs; 110 | if (good_splice >= 0) ap.good_splice = good_splice; 111 | if (bad_splice >= 0) ap.bad_splice = bad_splice; 112 | if (band_width >= 0) ap.band_width = band_width; 113 | /* I forget what this line is for. Now I comment it. 2006-01-16 */ 114 | /* if (matrix_mean != 0 && type != ALN_GLOBAL_ALIGN) aln_apply_matrix_mean(&ap); */ 115 | 116 | fp1 = fopen(argv[optind+1], "r"); 117 | fp2 = fopen(argv[optind+2], "r"); 118 | if (!fp1 || !fp2) { 119 | fprintf(stderr, "ERROR: file open error\n"); 120 | exit(3); 121 | } 122 | aln_pair_align(fp1, fp2, &ap, type, misc_flag); 123 | fclose(fp1); fclose(fp2); 124 | return 0; 125 | } 126 | -------------------------------------------------------------------------------- /align_lib/pwalign_main.c: -------------------------------------------------------------------------------- 1 | #include "align.h" 2 | 3 | int main(int argc, char *argv[]) 4 | { 5 | return pwalign_task(argc, argv); 6 | } 7 | -------------------------------------------------------------------------------- /align_lib/seq.c: -------------------------------------------------------------------------------- 1 | /* 2 | * libalign -- alignment utilities 3 | * 4 | * Copyright (c) 2003-2004, Li Heng 5 | * 6 | * 7 | * This library is free software; you can redistribute it and/or 8 | * modify it under the terms of the GNU Lesser General Public 9 | * License as published by the Free Software Foundation; either 10 | * version 2.1 of the License, or (at your option) any later version. 11 | * 12 | * This library is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 | * Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public 18 | * License along with this library; if not, write to the Free Software 19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 | * 21 | */ 22 | 23 | #include 24 | #include 25 | #include 26 | #include "seq.h" 27 | 28 | /* Read sequences from file "fp" in FASTA format. Sequence will be saved 29 | * in "seq", sequence ID in "locus", and comment saved in "comment", 30 | * provided "comment != 0". Sequence length will be returned. If -1 is 31 | * returned, no sequence is left in the file. */ 32 | int read_fasta(FILE *fp, seq_t *seq, char *locus, char *comment) 33 | { 34 | int c, l, max; 35 | char *p; 36 | 37 | c = 0; 38 | while (!feof(fp) && fgetc(fp) != '>'); 39 | if (feof(fp)) return -1; 40 | p = locus; 41 | while (!feof(fp) && (c = fgetc(fp)) != ' ' && c != '\t' && c != '\n') 42 | if (c != '\r') *p++ = c; 43 | *p = '\0'; 44 | if (comment) { 45 | p = comment; 46 | if (c != '\n') { 47 | while (!feof(fp) && ((c = fgetc(fp)) == ' ' || c == '\t')); 48 | if (c != '\n') { 49 | *p++ = c; 50 | while (!feof(fp) && (c = fgetc(fp)) != '\n') 51 | if (c != '\r') *p++ = c; 52 | } 53 | } 54 | *p = '\0'; 55 | } else if (c != '\n') while (!feof(fp) && fgetc(fp) != '\n'); 56 | l = 0; max = seq->m; 57 | while (!feof(fp) && (c = fgetc(fp)) != '>') { 58 | if (isalpha(c) || c == '-' || c == '.') { 59 | if (l + 1 >= max) { 60 | max += SEQ_BLOCK_SIZE; 61 | seq->s = (char*)MYREALLOC(seq->s, sizeof(char) * max); 62 | } 63 | seq->s[l++] = (char)c; 64 | } 65 | } 66 | if (c == '>') ungetc(c,fp); 67 | if (l) seq->s[l] = 0; 68 | seq->m = max; seq->l = l; 69 | return l; 70 | } 71 | int read_fasta_str(char *buffer, seq_t *seq, char *locus, char *comment, char **ptr) 72 | { 73 | int c, l, max; 74 | char *p; 75 | char *q; 76 | 77 | c = 0; q = buffer; 78 | while (*q && *q++ != '>'); 79 | if (*q == 0) return -1; 80 | p = locus; 81 | while (*q && (c = *q++) != ' ' && c != '\t' && c != '\n') 82 | if (c != '\r') *p++ = c; 83 | *p = '\0'; 84 | if (comment) { 85 | p = comment; 86 | if (c != '\n') { 87 | while (*q && ((c = *q++) == ' ' || c == '\t')); 88 | if (c != '\n') { 89 | *p++ = c; 90 | while (*q && (c = *q++) != '\n') 91 | if (c != '\r') *p++ = c; 92 | } 93 | } 94 | *p = '\0'; 95 | } else if (c != '\n') while (*q && *q++ != '\n'); 96 | l = 0; max = seq->m; 97 | while (*q && (c = *q++) != '>') { 98 | if (isalpha(c) || c == '-' || c == '.') { 99 | if (l + 1 >= max) { 100 | max += SEQ_BLOCK_SIZE; 101 | seq->s = (char*)MYREALLOC(seq->s, sizeof(char) * max); 102 | } 103 | seq->s[l++] = (char)c; 104 | } 105 | } 106 | if (c == '>') --q; 107 | seq->s[l] = 0; 108 | seq->m = max; seq->l = l; 109 | *ptr = q; 110 | return l; 111 | } 112 | /* Read quality from file "fp" in FASTA format. Quality will be saved 113 | * in "seq", sequence ID in "locus", and comment saved in "comment", 114 | * provided "comment != 0". Sequence length will be returned. If -1 is 115 | * returned, no sequence is left in the file. */ 116 | int read_qual(FILE *fp, seq_t *seq, char *locus, char *comment) 117 | { 118 | int c, l, max; 119 | char *p, *q, tmp[8]; 120 | 121 | c = 0; 122 | while (!feof(fp) && fgetc(fp) != '>'); 123 | if (feof(fp)) return -1; 124 | p = locus; 125 | while (!feof(fp) && (c = fgetc(fp)) != ' ' && c != '\t' && c != '\n') 126 | if (c != '\r') *p++ = c; 127 | *p = '\0'; 128 | if (comment) { 129 | p = comment; 130 | if (c != '\n') { 131 | while (!feof(fp) && ((c = fgetc(fp)) == ' ' || c == '\t')); 132 | if (c != '\n') { 133 | *p++ = c; 134 | while (!feof(fp) && (c = fgetc(fp)) != '\n') 135 | if (c != '\r') *p++ = c; 136 | } 137 | } 138 | *p = '\0'; 139 | } 140 | if (c != '\n') while (!feof(fp) && fgetc(fp) != '\n'); 141 | l = 0; max = seq->m; 142 | q = tmp; 143 | while (!feof(fp) && (c = fgetc(fp)) != '>') { 144 | if (isdigit(c)) *q++ = c; 145 | else if (q != tmp && (c == '\t' || c == ' ' || c == '\n')) { 146 | *q = '\0'; 147 | if (l + 1 >= max) { 148 | max += SEQ_BLOCK_SIZE; 149 | seq->s = (char*)MYREALLOC(seq->s, sizeof(char) * max); 150 | } 151 | seq->s[l++] = (char)atoi(tmp); 152 | q = tmp; 153 | } 154 | } 155 | if (c == '>') ungetc(c, fp); 156 | seq->s[l] = 0; 157 | seq->m = max; seq->l = l; 158 | return l; 159 | } 160 | -------------------------------------------------------------------------------- /align_lib/seq.h: -------------------------------------------------------------------------------- 1 | /* 2 | * libalign -- alignment utilities 3 | * 4 | * Copyright (c) 2003-2004, Li Heng 5 | * 6 | * 7 | * This library is free software; you can redistribute it and/or 8 | * modify it under the terms of the GNU Lesser General Public 9 | * License as published by the Free Software Foundation; either 10 | * version 2.1 of the License, or (at your option) any later version. 11 | * 12 | * This library is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 | * Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public 18 | * License along with this library; if not, write to the Free Software 19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 | * 21 | */ 22 | 23 | #ifndef SEQ_H_ 24 | #define SEQ_H_ 25 | 26 | #include 27 | #include "common/common.h" 28 | 29 | #define SEQ_BLOCK_SIZE 512 30 | #define SEQ_MAX_NAME_LEN 255 31 | 32 | #define INIT_SEQ(seq) (seq).s = 0; (seq).l = (seq).m = 0 33 | 34 | #define CHAR2QUAL(c) \ 35 | ((isdigit(c))? ((c)-'0') : ((islower(c))? ((c)-'a'+10) : ((isupper(c))? ((c)-'A'+36) : 0))) 36 | #define QUAL2CHAR(q) \ 37 | (((q)<10)? ((q)+'0') : (((q)<36)? ((q)-10+'a') : (((q)<62)? ((q)-36+'A') : 'Z'))) 38 | 39 | typedef struct 40 | { 41 | int l, m; /* length and maximum buffer size */ 42 | char *s; /* sequence */ 43 | } seq_t; 44 | 45 | #ifdef __cplusplus 46 | extern "C" { 47 | #endif 48 | 49 | int read_fasta(FILE*, seq_t*, char*, char*); 50 | int read_fasta_str(char *buffer, seq_t*, char*, char*, char **ptr); 51 | int read_qual(FILE*, seq_t*, char*, char*); 52 | 53 | #ifdef __cplusplus 54 | } 55 | #endif 56 | 57 | #endif /* SEQ_H_ */ 58 | -------------------------------------------------------------------------------- /align_lib/table.h: -------------------------------------------------------------------------------- 1 | /* 2 | * libalign -- alignment utilities 3 | * 4 | * Copyright (c) 2003-2004, Li Heng 5 | * 6 | * 7 | * This library is free software; you can redistribute it and/or 8 | * modify it under the terms of the GNU Lesser General Public 9 | * License as published by the Free Software Foundation; either 10 | * version 2.1 of the License, or (at your option) any later version. 11 | * 12 | * This library is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 | * Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public 18 | * License along with this library; if not, write to the Free Software 19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 | * 21 | */ 22 | 23 | #ifndef TABLE_H_ 24 | #define TABLE_H_ 25 | 26 | #define ALN_NT4_A 0 27 | #define ALN_NT4_G 1 28 | #define ALN_NT4_C 2 29 | #define ALN_NT4_T 3 30 | #define ALN_NT4_N 4 31 | #define ALN_NT4_GAP 5 32 | 33 | /* char -> 5 (=4+1) nucleotides */ 34 | extern unsigned char aln_nt4_table[256]; 35 | extern char *aln_nt4_rev_table; 36 | 37 | /* char -> 17 (=16+1) nucleotides */ 38 | extern unsigned char aln_nt16_table[256]; 39 | extern char *aln_nt16_rev_table; 40 | extern int aln_nt16_table_aux[16]; 41 | extern unsigned char aln_nt16_table_comp[17]; 42 | 43 | /* char -> 22 (=20+1+1) amino acids */ 44 | extern unsigned char aln_aa_table[256]; 45 | extern char *aln_aa_rev_table; 46 | 47 | /* translation table */ 48 | extern unsigned char aln_trans_table_eu[66]; 49 | extern char *aln_trans_table_eu_char; 50 | 51 | /* BLOSUM62 and BLOSUM45 */ 52 | #define ALN_AA_GAP 22 53 | extern int aln_sm_blosum62[], aln_sm_blosum45[]; 54 | 55 | /* human-mouse score matrix for 4 bases */ 56 | extern int aln_sm_hs[]; 57 | 58 | /* common nucleotide score matrix for 16 bases */ 59 | extern int aln_sm_nt[]; 60 | 61 | /* common read for 16 bases. note that read alignment is quite different from common nucleotide alignment */ 62 | extern int aln_sm_read[]; 63 | 64 | /* fill aln_sm_nt[]. just for development. do not use it, unless you know what you are doing. */ 65 | #ifdef __cplusplus 66 | extern "C" { 67 | #endif 68 | void aln_make_sm_nt(int row, int *score_matrix); 69 | int aln_trans_seq(const char *nt, int len, char *aa, int is_trans); 70 | #ifdef __cplusplus 71 | } 72 | #endif 73 | 74 | #endif /* TABLE_H_ */ 75 | -------------------------------------------------------------------------------- /align_lib/transaln.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "align.h" 4 | 5 | char *aln_back_trans(char *aln_aa, char *seq_nt_pre, int *flaw_code) 6 | { 7 | AlnAln *aA; 8 | char *t, *s, *seq_aa, *aln_nt_pre, *aln_nt, *seq_nt; 9 | path_t *p; 10 | int l, count, flaw, base, tmp; 11 | 12 | seq_aa = (char*)MYALLOC(sizeof(char) * (strlen(aln_aa) + 1)); 13 | seq_nt = (char*)MYALLOC(sizeof(char) * (strlen(seq_nt_pre) + 1)); 14 | 15 | for (s = aln_aa, t = seq_aa; *s; ++s) 16 | if (isalpha(*s)) *t++ = *s; 17 | *t = '\0'; 18 | for (s = seq_nt_pre, t = seq_nt; *s; ++s) 19 | if (isalpha(*s)) *t++ = *s; 20 | *t = '\0'; 21 | aA = aln_nt2aa(seq_nt, seq_aa, &aln_param_aa2nt, 0); 22 | 23 | count = base = 0; flaw = 0; 24 | aln_nt = (char*)MYALLOC(sizeof(char) * (strlen(aln_aa)*3 + 1)); 25 | aln_nt_pre = (char*)MYALLOC(sizeof(char) * (strlen(aln_aa)*3 + 1)); 26 | /* check head */ 27 | p = aA->path + aA->path_len - 1; 28 | s = aln_nt_pre; 29 | if (p->j-1 != 0) { 30 | flaw |= FLAW_NO_HEAD; 31 | for (l = 0; l < p->j-1; ++l) { 32 | *s++ = 'X'; *s++ = 'X'; *s++ = 'X'; 33 | } 34 | base += p->j-1; 35 | } 36 | /* check alignment */ 37 | p = aA->path + aA->path_len - 1; 38 | for (l = 0; p >= aA->path; --p) { 39 | switch (p->ctype) { 40 | case FROM_M: 41 | *s++ = seq_nt[p->i-3]; *s++ = seq_nt[p->i-2]; *s++ = seq_nt[p->i-1]; 42 | if (isalpha(aA->outm[l+2])) ++count; 43 | l += 3; 44 | break; 45 | case FROM_I: *s++ = 'X'; *s++ = 'X'; *s++ = 'X'; flaw |= FLAW_INS; l += 3; break; 46 | case FROM_D: flaw |= FLAW_DEL; l += 3; break; 47 | case FROM_O: flaw |= FLAW_FRAME_SHIFT; l += 1; break; 48 | case FROM_T: flaw |= FLAW_FRAME_SHIFT; l += 2; break; 49 | } 50 | } 51 | base += aA->path_len; 52 | /* check tail */ 53 | p = aA->path; 54 | tmp = strlen(seq_aa); 55 | if (p->j != tmp) { 56 | flaw |= FLAW_NO_TAIL; 57 | for (l = p->j; l < tmp; ++l) { 58 | *s++ = 'X'; *s++ = 'X'; *s++ = 'X'; 59 | } 60 | base += tmp - p->j; 61 | } 62 | *s = '\0'; 63 | /* make alignment */ 64 | for (l = 0, t = aln_nt, s = aln_aa; *s; ++s) { 65 | if (isalpha(*s)) { 66 | *t++ = aln_nt_pre[l++]; 67 | *t++ = aln_nt_pre[l++]; 68 | *t++ = aln_nt_pre[l++]; 69 | } else { 70 | *t++ = *s; *t++ = *s; *t++ = *s; 71 | } 72 | } 73 | *t = '\0'; 74 | flaw |= (int)(100.0*count/base+0.5); 75 | 76 | aln_free_AlnAln(aA); 77 | MYFREE(seq_nt); MYFREE(seq_aa); MYFREE(aln_nt_pre); 78 | 79 | if (flaw_code) *flaw_code = flaw; 80 | return aln_nt; 81 | } 82 | -------------------------------------------------------------------------------- /align_lib/version.h: -------------------------------------------------------------------------------- 1 | #ifndef VERSION_H_ 2 | #define VERSION_H_ 3 | 4 | static char *aln_version = "0.1.5"; 5 | static char *aln_date = "May 3, 2006"; 6 | 7 | #endif 8 | -------------------------------------------------------------------------------- /aln_mask.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "align_lib/table.h" 3 | #include "align.h" 4 | #include "tree.h" 5 | #include "utils.h" 6 | 7 | #define MIN_POOR_LENGTH 10 8 | 9 | Tree *tr_build_tree_from_align(MultiAlign *ma, int n_cons, Tree **cons, int init_cons, Tree *spec, 10 | int dist_type, int is_sdi_root, int is_rand); 11 | 12 | static double *cal_weight(MultiAlign *ma) 13 | { 14 | int i, n; 15 | double *fweight, f, sum; 16 | Tree *p, *root, **node; 17 | 18 | n = ma->n; 19 | fweight = (double*)malloc(sizeof(double) * n); 20 | root = tr_build_tree_from_align(ma, 0, 0, 0, 0, DIST_MM, 0, 0); /* build NJ tree */ 21 | node = tr_stack(root, Tree*); 22 | tr_expand_leaf_by_id(root, node); 23 | /* calculate 'order'. Tree::flag should be '0' */ 24 | for (i = 0; i < n; ++i) { 25 | p = node[i]; 26 | while (p) { 27 | ++(p->flag); 28 | p = p->pre; 29 | } 30 | } 31 | /* calculate weight */ 32 | for (i = 0, sum = 0.0; i < n; ++i) { 33 | p = node[i]; 34 | f = 0.0; 35 | while (p->pre) { 36 | f += p->d / p->flag; 37 | p = p->pre; 38 | } 39 | fweight[i] = f; 40 | sum += f; 41 | } 42 | /* re-scale */ 43 | for (i = 0; i < n; ++i) 44 | fweight[i] /= sum; 45 | free(node); 46 | tr_delete_tree(root); 47 | return fweight; 48 | } 49 | static int *cal_profile_score_array(MultiAlign *ma, int k, double *weight) 50 | { 51 | int i, j, n, sum; 52 | int *S, *new_w, *bl_array; 53 | 54 | n = ma->n; 55 | new_w = (int*)malloc(sizeof(int) * n); 56 | S = (int*)malloc(sizeof(int) * (ma->len + 1)); 57 | /* calculate weight excluding sequence k */ 58 | for (i = 0; i < n; ++i) 59 | new_w[i] = (int)(weight[i] / (1.0 - weight[k]) * 1000.0 + 0.5); 60 | for (j = 0; j < ma->len; ++j) { 61 | if (ma->seq[k][j] == ALN_AA_GAP) { 62 | S[j] = 0; 63 | continue; 64 | } 65 | bl_array = aln_sm_blosum62 + (int)ma->seq[k][j] * ALN_AA_GAP; 66 | sum = 0; 67 | for (i = 0; i < n; ++i) { 68 | if (i == k) continue; 69 | if (ma->seq[i][j] != ALN_AA_GAP) 70 | sum += bl_array[(int)ma->seq[i][j]] * new_w[i] + new_w[i]; 71 | } 72 | S[j] = sum; 73 | } 74 | free(new_w); 75 | return S; 76 | } 77 | static void mask_one_seq(MultiAlign *ma, MultiAlign *ma_nt, int k, double *weight) 78 | { 79 | int *S, *F, *B, *start; 80 | int n, i, j, len; 81 | char *seq, *seq_nt; 82 | 83 | n = ma->n; 84 | len = ma->len; 85 | if (ma_nt && (ma_nt->len != len * 3 || ma_nt->n != n)) { 86 | fprintf(stderr, "[mask_one_seq] 'ma_nt' is inconsistent with 'ma'. 'ma_nt' is skipped.\n"); 87 | ma_nt = 0; 88 | } 89 | F = (int*)malloc(sizeof(int) * (len + 2)); 90 | B = (int*)malloc(sizeof(int) * (len + 2)); 91 | /* calculate F and B */ 92 | S = cal_profile_score_array(ma, k, weight); 93 | --S; 94 | F[0] = 0; 95 | for (j = 1; j <= len; ++j) 96 | F[j] = (F[j-1] + S[j] < 0)? F[j-1] + S[j] : 0; 97 | B[len+1] = 0; 98 | for (j = len; j >= 1; --j) 99 | B[j] = (B[j+1] + S[j] < 0)? B[j+1] + S[j] : 0; 100 | ++S; 101 | free(S); 102 | /* do mask */ 103 | seq = ma->seq[k]; 104 | seq_nt = ma_nt ? ma_nt->seq[k] : 0; 105 | start = (int*)malloc(sizeof(int) * (len + 2)); 106 | start[0] = 0; 107 | for (j = 1; j <= len; ++j) { 108 | if (F[j] < 0 && B[j] < 0) { 109 | start[j] = (start[j-1] == 0)? j : start[j - 1]; 110 | } else { 111 | start[j] = j; 112 | if (start[j-1] > 0 && j - start[j - 1] >= MIN_POOR_LENGTH) { /* do mask in this region */ 113 | int n_poor = 0; 114 | for (i = start[j-1]; i < j; ++i) 115 | if (seq[i-1] != ALN_AA_GAP) ++n_poor; 116 | if (n_poor < MIN_POOR_LENGTH) continue; 117 | for (i = start[j-1]; i < j; ++i) { 118 | if (seq[i-1] != ALN_AA_GAP) { 119 | seq[i-1] = 21; /* X */ 120 | if (seq_nt) seq_nt[i*3-3] = seq_nt[i*3-2] = seq_nt[i*3-1] = 4; 121 | } 122 | } 123 | } 124 | } 125 | } 126 | free(start); free(F); free(B); 127 | } 128 | void ma_mask_poor_segment(MultiAlign *ma, MultiAlign *ma_nt) 129 | { 130 | int k; 131 | double *weight; 132 | assert(ma); 133 | if (ma->n < 3) return; /* do nothing */ 134 | weight = cal_weight(ma); 135 | for (k = 0; k < ma->n; ++k) 136 | mask_one_seq(ma, ma_nt, k, weight); 137 | free(weight); 138 | } 139 | -------------------------------------------------------------------------------- /backtrans.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "align.h" 6 | #include "utils.h" 7 | #include "align_lib/align.h" 8 | #include "align_lib/table.h" 9 | #include "align_lib/seq.h" 10 | #include "common/hash_char.h" 11 | 12 | extern "C" { 13 | //char *aln_back_trans(char *aln_aa, char *seq_nt_pre, int *flaw_code); 14 | int ma_backtrans_task(int argc, char *argv[]); 15 | FILE *tr_get_fp(const char *fn); 16 | } 17 | 18 | hash_map_char *ta_read_fa(FILE *fp) 19 | { 20 | hash_map_char *hash; 21 | char name[256]; 22 | seq_t seq; 23 | 24 | assert(fp); 25 | INIT_SEQ(seq); 26 | hash = new hash_map_char; 27 | while ((read_fasta(fp, &seq, name, 0)) >= 0) { 28 | char *s = (char*)malloc(sizeof(char) * (seq.l+1)); 29 | strcpy(s, seq.s); 30 | hash->insert(name, s); 31 | } 32 | MYFREE(seq.s); 33 | return hash; 34 | } 35 | MultiAlign *ma_back_trans_core(MultiAlign *aa, hash_map_char *hash, double threshold) 36 | { 37 | char *aln_aa, *aln_nt_pre, *aln_nt, *seq_nt; 38 | int flaw; 39 | int i, j, k, l; 40 | MultiAlign *nt; 41 | 42 | nt = ma_alloc(); 43 | nt->len = aa->len * 3; 44 | nt->max = aa->n; 45 | nt->is_nucl = 1; 46 | nt->name = (char**)malloc(sizeof(char*) * aa->n); 47 | nt->seq = (char**)malloc(sizeof(char*) * aa->n); 48 | nt->gene = (char**)malloc(sizeof(char*) * aa->n); 49 | nt->comment = (char**)malloc(sizeof(char*) * aa->n); 50 | for (i = 0; i < aa->n; ++i) 51 | nt->name[i] = nt->seq[i] = nt->gene[i] = nt->comment[i] = 0; 52 | aln_aa = (char*)malloc(sizeof(char) * (aa->len + 1)); 53 | for (i = k = 0; i < aa->n; ++i) { 54 | if (hash->find(aa->name[i], &seq_nt)) { 55 | for (j = 0; j < aa->len; ++j) 56 | aln_aa[j] = aln_aa_rev_table[aa->seq[i][j]]; 57 | aln_aa[j] = '\0'; 58 | aln_nt_pre = aln_back_trans(aln_aa, seq_nt, &flaw); 59 | /* fprintf(stderr, "%d\t%-30s\t%d\t%d\t%s\n", i, aa->name[i], nt->len, strlen(aln_nt_pre), aln_nt_pre); */ 60 | if ((flaw & 0xff)/100.0 > threshold) { 61 | if (flaw & 0xff00) 62 | fprintf(stderr, " flaw code %x for sequence %s\n", flaw&0xff00, aa->name[i]); 63 | nt->name[k] = cpystr(aa->name[i]); 64 | if (aa->gene && aa->gene[i]) nt->gene[k] = cpystr(aa->gene[i]); 65 | if (aa->comment && aa->comment[i]) nt->comment[k] = cpystr(aa->comment[i]); 66 | aln_nt = nt->seq[k] = (char*)malloc(sizeof(char) * nt->len); 67 | for (l = 0; l < nt->len; ++l) 68 | aln_nt[l] = aln_nt4_table[(int)aln_nt_pre[l]]; 69 | ++k; 70 | } else fprintf(stderr, " poor nt2aa alignment for %s (%d%% < %d%%)\n", 71 | aa->name[i], flaw&0xff, (int)(threshold*100.0+0.5)); 72 | free(aln_nt_pre); 73 | } else fprintf(stderr, " fail to find sequence %s\n", aa->name[i]); 74 | } 75 | nt->n = k; 76 | free(aln_aa); 77 | return nt; 78 | } 79 | MultiAlign *ma_back_trans(MultiAlign *aa, FILE *fp, double threshold) 80 | { 81 | hash_map_char *hash; 82 | hash_map_char::iterator iter; 83 | MultiAlign *ma; 84 | hash = ta_read_fa(fp); 85 | ma = ma_back_trans_core(aa, hash, threshold); 86 | for (iter = hash->begin(); iter < hash->end(); ++iter) 87 | if (isfilled(iter)) MYFREE(iter->val); 88 | delete hash; 89 | return ma; 90 | } 91 | static int ma_backtrans_usage() 92 | { 93 | fprintf(stderr, "Usage: treebest backtrans [-t ] \n"); 94 | return 1; 95 | } 96 | int ma_backtrans_task(int argc, char *argv[]) 97 | { 98 | int c; 99 | FILE *fp_aa, *fp_nt; 100 | double thres = 0.90; 101 | MultiAlign *aa, *nt; 102 | while ((c = getopt(argc, argv, "t:")) >= 0) { 103 | switch (c) { 104 | case 't': thres = atof(optarg); break; 105 | } 106 | } 107 | if (argc-1 <= optind) return ma_backtrans_usage(); 108 | fp_aa = tr_get_fp(argv[optind]); 109 | fp_nt = tr_get_fp(argv[optind+1]); 110 | aa = ma_read_alignment(fp_aa, 0); 111 | nt = ma_back_trans(aa, fp_nt, thres); 112 | tr_align_output(stdout, nt); 113 | fclose(fp_aa); fclose(fp_nt); 114 | ma_free(aa); ma_free(nt); 115 | return 0; 116 | } 117 | -------------------------------------------------------------------------------- /best.h: -------------------------------------------------------------------------------- 1 | #ifndef LH3_BEST_H_ 2 | #define LH3_BEST_H_ 3 | 4 | #include "tree.h" 5 | #include "align.h" 6 | 7 | typedef struct 8 | { 9 | MultiAlign *ma; 10 | Tree *ctree; 11 | Tree *stree; 12 | int is_contract_stree; 13 | int is_phyml; 14 | int is_phyml_spec; 15 | int is_phyml_cons; 16 | 17 | /* output related */ 18 | int is_debug; 19 | char *prefix; 20 | char *output_fn; 21 | 22 | /* alignment preprocessing */ 23 | int is_sequenced_only; 24 | int is_collapse_splice; 25 | int is_mask_lss; 26 | int is_quiet; 27 | int qual_thres; 28 | int only_filter; 29 | 30 | /* PHYML related, passed to PhymlConfig */ 31 | int n_cat; 32 | double kappa; 33 | double alpha; 34 | double prob_dup; 35 | double prob_loss_dup; 36 | double prob_loss_spec; 37 | double prob_not_exist; 38 | double lk_scale; 39 | 40 | Tree* is_skip_mmerge; 41 | } BestConfig; 42 | 43 | #ifdef __cplusplus 44 | extern "C" { 45 | #endif 46 | 47 | BestConfig *best_init_option(); 48 | void best_free_option(BestConfig *bo); 49 | int best_task(int argc, char *argv[]); 50 | BestConfig *best_command_line_options(int argc, char *argv[]); 51 | Tree *best_core(BestConfig *bo); 52 | 53 | #ifdef __cplusplus 54 | } 55 | #endif 56 | 57 | #endif 58 | -------------------------------------------------------------------------------- /brent.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define ITMAX 100 4 | #define CGOLD 0.3819660 5 | #define ZEPS 1.0e-10 6 | #define SHFT(a,b,c,d) (a)=(b);(b)=(c);(c)=(d); 7 | #define SIGN(a,b) (((b) >= 0.0)? fabs(a) : -fabs(a)) 8 | 9 | /* 10 | * Brent algorithm 11 | */ 12 | double brent(double ax, double bx, double cx, double (*f)(double), double tol, double *xmin) 13 | { 14 | int iter; 15 | double a, b, d, etemp, fu, fv, fw, fx, p, q, r, tol1, tol2, u, v, w, x, xm; 16 | double e; /* step length */ 17 | 18 | d = e = 0.0; 19 | a = (ax < cx)? ax : cx; 20 | b = (ax > cx)? ax : cx; 21 | x = w = v = bx; 22 | fw = fv = fx = (*f)(x); 23 | for (iter = 1; iter < ITMAX; ++iter) { 24 | xm = 0.5 * (a + b); 25 | tol2 = 2.0 * (tol1 = tol * fabs(x) + ZEPS); 26 | if (fabs(x - xm) <= tol2 - 0.5 * (b-a)) { 27 | *xmin = x; 28 | return fx; /* done */ 29 | } 30 | if (fabs(e) > tol1) { /* construct a trial parabolic fit */ 31 | r = (x - w) * (fx - fv); 32 | q = (x - v) * (fx - fw); 33 | p = (x - v) * q - (x - w) * r; 34 | q = 2.0 * (q - r); 35 | if (q > 0.0) p = -p; 36 | q = fabs(q); 37 | etemp = e; 38 | e = d; 39 | if (fabs(p) >= fabs(0.5 * q * etemp) || p <= q * (a - x) || p >= q * (b - x)) 40 | /* This condition determine the acceptability of parabolic fit. Here we 41 | * take golden section step into the larger of the two segments. */ 42 | d = CGOLD * (e = (x >= xm)? a - x : b - x); 43 | else { 44 | d = p / q; 45 | u = x + d; 46 | if (u - a < tol2 || b - u < tol2) 47 | d = SIGN(tol1, xm - x); 48 | } 49 | } else d = CGOLD * (e = (x >= xm)? a - x : b - x); 50 | u = (fabs(d) >= tol1)? x + d : x + SIGN(tol1, d); 51 | fu = (*f)(u); 52 | /* This is the one function evaluation per iteration. */ 53 | if (fu <= fx) { 54 | if (u >= x) a = x; 55 | else b = x; 56 | SHFT(v, w, x, u); 57 | SHFT(fv, fw, fx, fu); 58 | } else { 59 | if (u < x) a = u; 60 | else b = u; 61 | if (fu <= fw || w == x) { 62 | v = w; w = u; 63 | fv = fw; fw = fu; 64 | } else if (fu <= fv || v == x || v == w) { 65 | v = u; 66 | fv = fu; 67 | } 68 | } 69 | } 70 | *xmin = x; 71 | return fx; 72 | } 73 | 74 | #ifdef DEBUG 75 | double f(double x) 76 | { 77 | return (x-1) * (x-1); 78 | } 79 | int main() 80 | { 81 | double x, y; 82 | y = brent(-10, -5, 0, f, 1e-5, &x); 83 | printf("x=%f, y=%f\n", x, y); 84 | return 0; 85 | } 86 | #endif 87 | -------------------------------------------------------------------------------- /common/common.h: -------------------------------------------------------------------------------- 1 | #ifndef COMMON_H_ 2 | #define COMMON_H_ 3 | 4 | #include 5 | 6 | typedef unsigned char uchar; 7 | 8 | #ifdef USE_KR_ALLOC 9 | #define MYALLOC kr_alloc 10 | #define MYFREE kr_free 11 | #define MYREALLOC kr_realloc 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | void *kr_alloc(size_t); 17 | void *kr_realloc(void*, size_t); 18 | void kr_free(void*); 19 | void kr_check_list(); 20 | size_t kr_size(void*); 21 | #ifdef __cplusplus 22 | } 23 | #endif 24 | 25 | #else /* USE_KR_ALLOC */ 26 | #define MYALLOC malloc 27 | #define MYFREE free 28 | #define MYREALLOC realloc 29 | #endif /* USE_KR_ALLOC */ 30 | 31 | #endif /* COMMON_H_ */ 32 | -------------------------------------------------------------------------------- /common/hash_char.h: -------------------------------------------------------------------------------- 1 | #ifndef HASH_CHAR_H_ 2 | #define HASH_CHAR_H_ 3 | 4 | #include 5 | #include 6 | #include "hash_com.h" 7 | 8 | template 9 | struct __lih_ValKeyPairChar 10 | { 11 | ValueType val; 12 | char *key; 13 | bool isempty, isdel; 14 | }; 15 | 16 | template 17 | class hash_map_char:public __lih_hash_base_class 18 | <__lih_ValKeyPairChar > 19 | { 20 | typedef __lih_ValKeyPairChar ValKeyStruct; 21 | 22 | inline bool insert_aux(ValKeyStruct *vkp, size_t m, const char *key, ValKeyStruct **p) { 23 | *p = __lih_hash_insert_aux(vkp, m, key); 24 | if ((*p)->isempty) { 25 | char *r = (char*)::malloc(sizeof(char) * (strlen(key) + 1)); 26 | (*p)->key = r; 27 | strcpy((*p)->key, key); 28 | (*p)->isempty = false; 29 | } else if ((*p)->isdel) { 30 | ::free((*p)->key); 31 | (*p)->key = (char*)::malloc(sizeof(char) * (strlen(key) + 1)); 32 | strcpy((*p)->key, key); 33 | (*p)->isdel = false; 34 | } else return true; 35 | return false; 36 | } 37 | public: 38 | hash_map_char(void) {}; 39 | ~hash_map_char(void) { clear(); }; 40 | inline void clear(void) 41 | { 42 | ValKeyStruct *p; 43 | for (p = this->val_key_pair; p < this->val_key_pair + this->curr_m; p++) { 44 | if (!p->isempty) ::free(p->key); 45 | } 46 | __lih_hash_base_class::clear(); 47 | } 48 | inline void resize(size_t m) 49 | { 50 | size_t new_m, new_upper; 51 | new_m = __lih_hash_cal_size(m); 52 | new_upper = int(new_m * __lih_HASH_UPPER + 0.5); 53 | // if count_n is beyond the new upper boundary, return 54 | if (this->count_n >= new_upper) return; 55 | 56 | ValKeyStruct *new_vkp, *p, *q; 57 | new_vkp = (ValKeyStruct*)malloc(new_m * sizeof(ValKeyStruct)); 58 | __lih_hash_clear_aux(new_vkp, new_m); 59 | 60 | for (p = this->val_key_pair; p < this->val_key_pair + this->curr_m; p++) { 61 | if (!p->isempty && !p->isdel) { 62 | insert_aux(new_vkp, new_m, p->key, &q); 63 | q->val = p->val; 64 | } 65 | } 66 | for (p = this->val_key_pair; p < this->val_key_pair + this->curr_m; p++) 67 | if (!p->isempty) ::free(p->key); 68 | ::free(this->val_key_pair); 69 | this->val_key_pair = new_vkp; 70 | this->curr_m = new_m; 71 | this->upper_bound = new_upper; 72 | } 73 | inline bool insert(const char *key, const ValueType &val) 74 | { 75 | ValKeyStruct *p; 76 | if (this->count_n >= this->upper_bound) 77 | resize(this->curr_m + 1); 78 | if (insert_aux(this->val_key_pair, this->curr_m, key, &p)) { 79 | p->val = val; 80 | return true; 81 | } else { 82 | ++(this->count_n); 83 | p->val = val; 84 | return false; 85 | } 86 | } 87 | inline bool insert(const char *key, ValueType **q) 88 | { 89 | ValKeyStruct *p; 90 | if (this->count_n >= this->upper_bound) 91 | resize(this->curr_m + 1); 92 | if (insert_aux(this->val_key_pair, this->curr_m, key, &p)) { 93 | *q = &(p->val); 94 | return true; 95 | } else { 96 | ++(this->count_n); 97 | *q = &(p->val); 98 | return false; 99 | } 100 | } 101 | inline bool find(const char *key, ValueType *value) const 102 | { 103 | ValKeyStruct *p; 104 | p = __lih_hash_search_aux(this->val_key_pair, this->curr_m, key); 105 | if (p && !p->isempty && !p->isdel) { 106 | *value = p->val; 107 | return true; 108 | } 109 | return false; 110 | } 111 | inline void free() 112 | { 113 | clear(); 114 | __lih_hash_base_class::free(); 115 | } 116 | }; 117 | 118 | struct __lih_KeyStructChar 119 | { 120 | char *key; 121 | bool isempty, isdel; 122 | }; 123 | 124 | class hash_set_char:public __lih_hash_base_class<__lih_KeyStructChar> 125 | { 126 | typedef __lih_KeyStructChar KeyStruct; 127 | 128 | inline bool insert_aux(KeyStruct *vkp, size_t m, const char *key) { 129 | KeyStruct *p; 130 | p = __lih_hash_insert_aux(vkp, m, key); 131 | if (p->isempty) { 132 | p->key = (char*)malloc(sizeof(char) * (strlen(key) + 1)); 133 | strcpy(p->key, key); 134 | p->isempty = false; 135 | } else if (p->isdel) { 136 | ::free(p->key); 137 | p->key = (char*)malloc(sizeof(char) * (strlen(key) + 1)); 138 | strcpy(p->key, key); 139 | p->isdel = false; 140 | } else return true; 141 | return false; 142 | } 143 | public: 144 | hash_set_char(void) {}; 145 | ~hash_set_char(void) { clear(); }; 146 | inline void clear(void) 147 | { 148 | KeyStruct *p; 149 | for (p = val_key_pair; p < val_key_pair + curr_m; p++) { 150 | if (!p->isempty) ::free(p->key); 151 | } 152 | __lih_hash_base_class::clear(); 153 | } 154 | inline void resize(size_t m) 155 | { 156 | size_t new_m, new_upper; 157 | new_m = __lih_hash_cal_size(m); 158 | new_upper = int(new_m * __lih_HASH_UPPER + 0.5); 159 | // if count_n is beyond the new upper boundary, return 160 | if (count_n >= new_upper) return; 161 | 162 | KeyStruct *new_vkp, *p; 163 | new_vkp = (KeyStruct*)malloc(new_m * sizeof(KeyStruct)); 164 | __lih_hash_clear_aux(new_vkp, new_m); 165 | 166 | for (p = val_key_pair; p < val_key_pair + curr_m; p++) { 167 | if (!p->isempty) { 168 | if (!p->isdel) { 169 | insert_aux(new_vkp, new_m, p->key); 170 | } 171 | ::free(p->key); 172 | } 173 | } 174 | ::free(val_key_pair); 175 | val_key_pair = new_vkp; 176 | curr_m = new_m; 177 | upper_bound = new_upper; 178 | } 179 | inline bool insert(const char *key) 180 | { 181 | if (count_n >= upper_bound) 182 | resize(curr_m + 1); 183 | if (insert_aux(val_key_pair, curr_m, key)) return true; 184 | count_n++; 185 | return false; 186 | } 187 | inline bool find(const char *key) 188 | { 189 | KeyStruct *p; 190 | p = __lih_hash_search_aux(val_key_pair, curr_m, key); 191 | if (p && !p->isempty && !p->isdel) { 192 | return true; 193 | } 194 | return false; 195 | } 196 | inline bool erase(const char *key) 197 | { 198 | KeyStruct *p; 199 | if ((p = __lih_hash_erase_aux(val_key_pair, curr_m, key))) { 200 | ::free(p->key); 201 | p->key = 0; 202 | --count_n; 203 | return true; 204 | } 205 | return false; 206 | } 207 | }; 208 | 209 | #endif // HASH_CHAR_H_ 210 | -------------------------------------------------------------------------------- /common/hash_com.h: -------------------------------------------------------------------------------- 1 | /* This hash table is a closed hash using double hashing method. */ 2 | #ifndef HASH_COM_H_ 3 | #define HASH_COM_H_ 4 | 5 | #include 6 | #include 7 | 8 | typedef unsigned int bit32_t; 9 | typedef unsigned long long bit64_t; 10 | typedef unsigned short bit16_t; 11 | 12 | const double __lih_HASH_UPPER = 0.70; 13 | const int __lih_HASH_PRIME_SIZE = 30; 14 | 15 | /* kinds of hash functions for string */ 16 | 17 | inline bit32_t __lih_sgi_hash_string(const char* s) 18 | { 19 | bit32_t h = 0; 20 | for ( ; *s; s++) 21 | h = 5 * h + *s; 22 | return h; 23 | } 24 | inline bit32_t __lih_ELF_hash_string(const char *key) 25 | { 26 | bit32_t g, h = 0; 27 | while (*key) { 28 | h = (h << 4) + *key++; 29 | g = h & 0xf0000000ul; 30 | if (g) h ^= g >> 24; 31 | h &= ~g; 32 | } 33 | return h; 34 | } 35 | 36 | // This will be the default hashing function for string. 37 | // Do a web search "g_str_hash X31_HASH" for more information. 38 | 39 | inline bit32_t __lih_X31_hash_string(const char *s) 40 | { 41 | bit32_t h = 0; 42 | for ( ; *s; s++) 43 | h = (h << 5) - h + *s; 44 | return h; 45 | } 46 | 47 | /* kinds of hash functions for bit32_t */ 48 | 49 | inline bit32_t __lih_Knuth_hash_int(bit32_t key) 50 | { 51 | return key * 2654435761ul; 52 | } 53 | // Note that key = 0 will cause a key zero 54 | inline bit32_t __lih_Jenkins_hash_int(bit32_t key) 55 | { 56 | key += (key << 12); 57 | key ^= (key >> 22); 58 | key += (key << 4); 59 | key ^= (key >> 9); 60 | key += (key << 10); 61 | key ^= (key >> 2); 62 | key += (key << 7); 63 | key ^= (key >> 12); 64 | return key; 65 | } 66 | inline bit64_t __lih_Jenkins_hash_64(bit64_t key) 67 | { 68 | key += ~(key << 32); 69 | key ^= (key >> 22); 70 | key += ~(key << 13); 71 | key ^= (key >> 8); 72 | key += (key << 3); 73 | key ^= (key >> 15); 74 | key += ~(key << 27); 75 | key ^= (key >> 31); 76 | return key; 77 | } 78 | 79 | // This will be the default function for bit32_t 80 | 81 | inline bit32_t __lih_Wang_hash_int(bit32_t key) 82 | { 83 | key += ~(key << 15); 84 | key ^= (key >> 10); 85 | key += (key << 3); 86 | key ^= (key >> 6); 87 | key += ~(key << 11); 88 | key ^= (key >> 16); 89 | return key; 90 | } 91 | 92 | /* default hash functions for "bit32_t" and "const char*" */ 93 | 94 | inline bit32_t __lih_hash_fun(bit32_t key) 95 | { 96 | #ifndef LIH_HASH_INT 97 | return __lih_Wang_hash_int(key); 98 | #else 99 | return key; 100 | #endif 101 | } 102 | inline bit32_t __lih_hash_fun(const char *key) 103 | { 104 | return __lih_X31_hash_string(key); 105 | } 106 | inline bit32_t __lih_hash_fun(bit64_t key) 107 | { 108 | #ifdef LIH_HASH_INT 109 | return bit32_t(__lih_Jenkins_hash_64(key)); 110 | #else 111 | return bit32_t(key>>16) ^ bit32_t(key); 112 | #endif 113 | } 114 | inline bit32_t __lih_hash_fun(bit16_t key) 115 | { 116 | #ifndef LIH_HASH_INT 117 | return __lih_Wang_hash_int(bit32_t(key)); 118 | #else 119 | return bit32_t(key); 120 | #endif 121 | } 122 | 123 | /* judge equal for "bit32_t" and "const char*" */ 124 | 125 | inline bool __lih_key_equal(bit32_t a, bit32_t b) 126 | { 127 | return a == b; 128 | } 129 | inline bool __lih_key_equal(const char *a, const char *b) 130 | { 131 | return strcmp(a, b) == 0; 132 | } 133 | inline bool __lih_key_equal(bit64_t a, bit64_t b) 134 | { 135 | return a == b; 136 | } 137 | inline bool __lih_key_equal(bit16_t a, bit16_t b) 138 | { 139 | return a == b; 140 | } 141 | 142 | /* prime table */ 143 | 144 | static const bit32_t __lih_prime_list[__lih_HASH_PRIME_SIZE] = 145 | { 146 | 0ul, 3ul, 53ul, 97ul, 193ul, 147 | 389ul, 769ul, 1543ul, 3079ul, 6151ul, 148 | 12289ul, 24593ul, 49157ul, 98317ul, 196613ul, 149 | 393241ul, 786433ul, 1572869ul, 3145739ul, 6291469ul, 150 | 12582917ul, 25165843ul, 50331653ul, 100663319ul, 201326611ul, 151 | 402653189ul, 805306457ul, 1610612741ul, 3221225473ul, 4294967291ul 152 | }; 153 | 154 | template 155 | inline TYPE *__lih_hash_insert_aux(TYPE *vkp, size_t m, KeyType key) 156 | { 157 | bit32_t inc, k, i, site; 158 | site = m; 159 | k = __lih_hash_fun(key); 160 | i = k % m; 161 | inc = 1 + k % (m - 1); 162 | 163 | bit32_t last = i; 164 | while (!vkp[i].isempty && !__lih_key_equal(vkp[i].key, key)) { 165 | if (vkp[i].isdel) site = i; 166 | if (i + inc >= m) { 167 | i = i + inc - m; 168 | } else i += inc; 169 | if (i == last) return vkp + site; 170 | } 171 | if (vkp[i].isempty && site != m) return vkp + site; 172 | else return vkp + i; 173 | } 174 | template 175 | inline TYPE *__lih_hash_search_aux(TYPE *vkp, size_t m, KeyType key) 176 | { 177 | if (!m) return 0; 178 | bit32_t inc, k, i; 179 | k = __lih_hash_fun(key); 180 | i = k % m; 181 | inc = 1 + k % (m - 1); 182 | bit32_t last = i; 183 | while (!vkp[i].isempty && !__lih_key_equal(vkp[i].key, key)) { 184 | if (i + inc >= m) { 185 | i = i + inc - m; 186 | } else i += inc; 187 | if (i == last) return 0; 188 | } 189 | return vkp + i; 190 | } 191 | template 192 | inline TYPE *__lih_hash_erase_aux(TYPE *vkp, size_t m, KeyType key) 193 | { 194 | TYPE *p; 195 | p = __lih_hash_search_aux(vkp, m, key); 196 | if (p && !p->isempty) { 197 | if (p->isdel) return NULL; 198 | p->isdel = true; 199 | return p; 200 | } else return 0; 201 | } 202 | template 203 | inline void __lih_hash_clear_aux(TYPE *vkp, size_t m) 204 | { 205 | for (size_t i = 0; i < m; ++i) { 206 | vkp[i].isempty = true; 207 | vkp[i].isdel = false; 208 | } 209 | } 210 | inline size_t __lih_hash_cal_size(size_t m) 211 | { 212 | bit32_t t; 213 | t = __lih_HASH_PRIME_SIZE - 1; 214 | while (__lih_prime_list[t] > m) --t; 215 | return __lih_prime_list[t+1]; 216 | } 217 | 218 | #define isfilled(p) (!(p)->isempty && !(p)->isdel) 219 | #define isempty(p) ((p)->isempty) 220 | 221 | template 222 | class __lih_hash_base_class 223 | { 224 | protected: 225 | size_t curr_m, count_n, upper_bound; 226 | TYPE *val_key_pair; 227 | public: 228 | typedef TYPE* iterator; 229 | __lih_hash_base_class(void) 230 | { 231 | val_key_pair = 0; 232 | curr_m = 0; 233 | count_n = 0; 234 | upper_bound = 0; 235 | } 236 | ~__lih_hash_base_class(void) { ::free(val_key_pair); } 237 | inline void clear(void) 238 | { 239 | __lih_hash_clear_aux(val_key_pair, curr_m); 240 | count_n = 0; 241 | } 242 | inline size_t size(void) const { return count_n; }; 243 | inline size_t capacity(void) const { return curr_m; }; 244 | inline void free() 245 | { 246 | ::free(val_key_pair); 247 | val_key_pair = 0; 248 | curr_m = 0; 249 | count_n = 0; 250 | upper_bound = 0; 251 | } 252 | inline iterator begin() { return val_key_pair; } 253 | inline iterator end() { return val_key_pair + curr_m; } 254 | }; 255 | 256 | #endif // HASH_COM_H_ 257 | -------------------------------------------------------------------------------- /common/hash_misc.h: -------------------------------------------------------------------------------- 1 | #ifndef HASH_MISC_H_ 2 | #define HASH_MISC_H_ 3 | 4 | #include "hash_com.h" 5 | 6 | template 7 | struct __lih_ValKeyPairInt 8 | { 9 | ValueType val; 10 | KeyType key; 11 | bool isempty, isdel; 12 | }; 13 | 14 | template 15 | class hash_map_misc:public __lih_hash_base_class<__lih_ValKeyPairInt > 16 | { 17 | typedef __lih_ValKeyPairInt ValKeyStruct; 18 | 19 | inline bool insert_aux(ValKeyStruct *vkp, size_t m, KeyType key, ValKeyStruct *&p) 20 | { 21 | p = __lih_hash_insert_aux(vkp, m, key); 22 | if (p->isempty) { 23 | p->key = key; 24 | p->isempty = false; 25 | } else if (p->isdel) { 26 | p->key = key; 27 | p->isdel = false; 28 | } else return true; 29 | return false; 30 | } 31 | public: 32 | typedef ValueType value_type; 33 | typedef KeyType key_type; 34 | 35 | hash_map_misc(void) {}; 36 | ~hash_map_misc(void) {}; 37 | inline void resize(size_t m) 38 | { 39 | size_t new_m, new_upper; 40 | new_m = __lih_hash_cal_size(m); 41 | new_upper = int(new_m * __lih_HASH_UPPER + 0.5); 42 | // if this->count_n is beyond the new upper boundary, return 43 | if (this->count_n >= new_upper) return; 44 | 45 | ValKeyStruct *new_vkp, *p, *q; 46 | new_vkp = (ValKeyStruct*)malloc(new_m * sizeof(ValKeyStruct)); 47 | __lih_hash_clear_aux(new_vkp, new_m); 48 | 49 | for (p = this->val_key_pair; p < this->val_key_pair + this->curr_m; p++) { 50 | if (!p->isempty && !p->isdel) { 51 | insert_aux(new_vkp, new_m, p->key, q); 52 | q->val = p->val; 53 | } 54 | } 55 | ::free(this->val_key_pair); 56 | this->val_key_pair = new_vkp; 57 | this->curr_m = new_m; 58 | this->upper_bound = new_upper; 59 | } 60 | inline bool insert(KeyType key, const ValueType &val) 61 | { 62 | ValKeyStruct *p; 63 | if (this->count_n >= this->upper_bound) 64 | resize(this->curr_m + 1); 65 | if (insert_aux(this->val_key_pair, this->curr_m, key, p)) { 66 | p->val = val; 67 | return true; 68 | } else { 69 | ++(this->count_n); 70 | p->val = val; 71 | return false; 72 | } 73 | } 74 | inline bool fetch_insert(KeyType key, ValueType **r) 75 | { 76 | ValKeyStruct *p; 77 | if (this->count_n >= this->upper_bound) 78 | resize(this->curr_m + 1); 79 | if (insert_aux(this->val_key_pair, this->curr_m, key, p)) { 80 | *r = &(p->val); 81 | return true; 82 | } else { 83 | ++(this->count_n); 84 | *r = &(p->val); 85 | return false; 86 | } 87 | } 88 | inline bool find(KeyType key, ValueType *value) 89 | { 90 | ValKeyStruct *p; 91 | p = __lih_hash_search_aux(this->val_key_pair, this->curr_m, key); 92 | if (p && !p->isempty && !p->isdel) { 93 | *value = p->val; 94 | return true; 95 | } 96 | return false; 97 | } 98 | inline bool erase(KeyType key) 99 | { 100 | if (__lih_hash_erase_aux(this->val_key_pair, this->curr_m, key)) { 101 | --(this->count_n); 102 | return true; 103 | } 104 | return false; 105 | } 106 | inline ValueType &locate(KeyType key) 107 | { 108 | ValKeyStruct *p; 109 | if (this->count_n >= this->upper_bound) resize(this->curr_m + 1); 110 | p = __lih_hash_insert_aux(this->val_key_pair, this->curr_m, key); 111 | if (p->isempty) { 112 | p->key = key; 113 | p->isempty = false; 114 | } else if (p->isdel) { 115 | p->key = key; 116 | p->isdel = false; 117 | } 118 | return p->val; 119 | } 120 | }; 121 | 122 | template 123 | struct __lih_KeyStructInt 124 | { 125 | KeyType key; 126 | bool isempty, isdel; 127 | }; 128 | 129 | template 130 | class hash_set_misc : public __lih_hash_base_class<__lih_KeyStructInt > 131 | { 132 | typedef __lih_KeyStructInt KeyStruct; 133 | 134 | inline bool insert_aux(KeyStruct *vkp, size_t m, KeyType key) 135 | { 136 | KeyStruct *p; 137 | p = __lih_hash_insert_aux(vkp, m, key); 138 | if (p->isempty) { 139 | p->key = key; 140 | p->isempty = false; 141 | } else if (p->isdel) { 142 | p->key = key; 143 | p->isdel = false; 144 | } else return true; 145 | return false; 146 | } 147 | public: 148 | typedef KeyType key_type; 149 | 150 | hash_set_misc(void) {}; 151 | ~hash_set_misc(void) {}; 152 | inline void resize(size_t m) 153 | { 154 | size_t new_m, new_upper; 155 | new_m = __lih_hash_cal_size(m); 156 | new_upper = int(new_m * __lih_HASH_UPPER + 0.5); 157 | // if this->count_n is beyond the new upper boundary, return 158 | if (this->count_n >= new_upper) return; 159 | 160 | KeyStruct *new_vkp, *p; 161 | new_vkp = (KeyStruct*)malloc(new_m * sizeof(KeyStruct)); 162 | __lih_hash_clear_aux(new_vkp, new_m); 163 | 164 | for (p = this->val_key_pair; p < this->val_key_pair + this->curr_m; p++) { 165 | if (!p->isempty && !p->isdel) 166 | insert_aux(new_vkp, new_m, p->key); 167 | } 168 | ::free(this->val_key_pair); 169 | this->val_key_pair = new_vkp; 170 | this->curr_m = new_m; 171 | this->upper_bound = new_upper; 172 | } 173 | inline bool insert(KeyType key) 174 | { 175 | if (this->count_n >= this->upper_bound) 176 | resize(this->curr_m + 1); 177 | if (insert_aux(this->val_key_pair, this->curr_m, key)) return true; 178 | ++(this->count_n); 179 | return false; 180 | } 181 | inline bool find(KeyType key) 182 | { 183 | KeyStruct *p; 184 | p = __lih_hash_search_aux(this->val_key_pair, this->curr_m, key); 185 | if (p && !p->isempty && !p->isdel) return true; 186 | return false; 187 | } 188 | inline bool erase(KeyType key) 189 | { 190 | if (__lih_hash_erase_aux(this->val_key_pair, this->curr_m, key)) { 191 | --(this->count_n); 192 | return true; 193 | } 194 | return false; 195 | } 196 | }; 197 | #endif // HASH_INT_H_ 198 | -------------------------------------------------------------------------------- /common/mem.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "common.h" 5 | 6 | #ifdef USE_KR_ALLOC 7 | const double KR_1MB = 1048576.0; 8 | 9 | typedef struct _header_ 10 | { 11 | struct _header_ *ptr; /* next free block */ 12 | size_t size; /* size of current free block */ 13 | } HEADER; 14 | 15 | static HEADER base; 16 | static HEADER *allocp = 0; /* the last and also the first free block */ 17 | static size_t kr_total_allocated = 0; 18 | 19 | void fatal_error(const char *s) 20 | { 21 | fprintf(stderr, "MEMORY LEAK: %s\n", s); 22 | exit(1); 23 | } 24 | static HEADER *morecore(size_t nu) 25 | { 26 | extern void kr_check_list(); 27 | extern void kr_free(void*); 28 | size_t rnu; 29 | HEADER *up; 30 | 31 | rnu = (nu + 0xffff) & (~(size_t)(0xffff)); 32 | up = (HEADER*)malloc(rnu * sizeof(HEADER)); 33 | if (!up) { 34 | kr_check_list(); 35 | fprintf(stderr, "%.3f Mbytes requested but not available.\n", rnu * sizeof(HEADER) / KR_1MB); 36 | exit(1); 37 | } 38 | kr_total_allocated += rnu * sizeof(HEADER); 39 | up->size = rnu; 40 | kr_free(up + 1); 41 | return allocp; 42 | } 43 | void kr_free(void *ap) 44 | { 45 | HEADER *p, *q; 46 | 47 | if (!ap) return; 48 | p = (HEADER*)ap - 1; 49 | for (q = allocp; !(p > q && p < q->ptr); q = q->ptr) 50 | if (q >= q->ptr && (p > q || p < q->ptr)) break; 51 | 52 | if (p + p->size == q->ptr) { /* in one continuous huge block, merge */ 53 | p->size += q->ptr->size; 54 | p->ptr = q->ptr->ptr; 55 | } else if (p + p->size > q->ptr && q->ptr >= p) { 56 | fatal_error("cross-linked list (kr_free[1])"); 57 | } else p->ptr = q->ptr; /* separated blocks, do not merge */ 58 | 59 | if (q + q->size == p) { /* in one continuous huge block */ 60 | q->size += p->size; 61 | q->ptr = p->ptr; 62 | } else if (q + q->size > p && p >= q) { 63 | fatal_error("cross-linked list (kr_free[2])"); 64 | } else q->ptr = p; /* separated block, do not merge */ 65 | 66 | allocp = q; 67 | } 68 | void *kr_realloc(void *ap, size_t nbytes) 69 | { 70 | extern void *kr_alloc(size_t); 71 | HEADER *p, *q; 72 | size_t nunits; 73 | 74 | if (!ap) return kr_alloc(nbytes); 75 | nunits = 1 + (nbytes + sizeof(HEADER) - 1) / sizeof(HEADER); 76 | p = (HEADER*)ap - 1; 77 | if (p->size >= nunits) return ap; 78 | q = (HEADER*)kr_alloc(nbytes); 79 | memcpy(q, ap, (p->size - 1) * sizeof(HEADER)); 80 | kr_free(ap); 81 | return q; 82 | } 83 | void *kr_alloc(size_t nbytes) 84 | { 85 | HEADER *p, *q; 86 | size_t nunits; 87 | 88 | nunits = 1 + (nbytes + sizeof(HEADER) - 1) / sizeof(HEADER); 89 | if (!(q = allocp)) { 90 | base.ptr = allocp = q = &base; 91 | base.size = 0; 92 | } 93 | for (p = q->ptr;; q = p, p = p->ptr) { 94 | if (p->size >= nunits) { 95 | if (p->size == nunits) q->ptr = p->ptr; 96 | else { 97 | p->size -= nunits; 98 | p += p->size; 99 | p->size = nunits; 100 | } 101 | allocp = q; 102 | return p + 1; 103 | } 104 | if (p == allocp) 105 | if (!(p = morecore(nunits))) return 0; 106 | } 107 | } 108 | void kr_check_list() 109 | { 110 | unsigned n_blocks, n_ends, n_units; 111 | HEADER *p, *q; 112 | 113 | p = allocp; 114 | n_blocks = n_ends = n_units = 0; 115 | do { 116 | q = p->ptr; 117 | n_units += p->size; 118 | if (p + p->size > q && q > p) 119 | fatal_error("cross-linked list (kr_check_list)"); 120 | if (q < p) ++n_ends; 121 | p = q; 122 | ++n_blocks; 123 | } while (p != allocp); 124 | 125 | if (n_ends != 1) 126 | fatal_error("Not single end in the linked list (kr_check_list)"); 127 | fprintf(stderr, "total space: %.3f Mbytes; free space: %.3f Mbytes; fragments: %u\n", 128 | kr_total_allocated / KR_1MB, n_units * sizeof(HEADER) / KR_1MB, n_blocks); 129 | } 130 | size_t kr_size(void *ap) 131 | { 132 | HEADER *p; 133 | p = (HEADER*)ap - 1; 134 | return (p->size - 1) * sizeof(HEADER); 135 | } 136 | #endif /* USR_KR_ALLOC */ 137 | -------------------------------------------------------------------------------- /cpp_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef CPP_UTILS_H_ 2 | #define CPP_UTILS_H_ 3 | 4 | #include "tree.h" 5 | #include "utils.h" 6 | #include "align.h" 7 | 8 | #ifdef __cplusplus 9 | extern "C" { 10 | #endif 11 | 12 | int cpp_check_align_name(MultiAlign *ma); 13 | int cpp_check_list(int m, char **name); 14 | int cpp_attach_id_by_name(Tree *root, int m, char **name); 15 | int cpp_attach_flag_by_name(Tree *root, int m, char **name); 16 | Tree *cpp_subtree(Tree *root, int m, char **name); 17 | Tree *cpp_subtree_aux(const Tree *root); 18 | Tree **cpp_get_spec_links(const Tree *tree, const Tree *spec); 19 | Tree *cpp_shrink_spec(Tree *spec_tree, int m, char **name); 20 | Tree *cpp_shrink_spec_by_tree(Tree *spec_tree, Tree *gene_tree); 21 | Tree *cpp_check_tree(Tree *tree); 22 | void cpp_set_leaf_order(Tree *src, Tree *dst, int def_val); 23 | Tree *cpp_subtree_spec(Tree *root, Tree *spec, int n, char **spec_name); 24 | void cpp_insert_keyval(Tree *t, const char *key, const char *val); 25 | void cpp_get_keyval(Tree *t, const char *key, char **val); 26 | void cpp_copy_nhx_hash(Tree *dst, Tree *src); 27 | void cpp_shrink_ma_by_spec(MultiAlign *ma, Tree *spec); 28 | void tr_dN_dS(Tree *tree, MultiAlign *ma); 29 | Tree *cpp_post_spec_tree(Tree *tree, int is_core); 30 | 31 | #ifdef __cplusplus 32 | } 33 | #endif 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /cut_tree.c: -------------------------------------------------------------------------------- 1 | /*** 2 | * Author: liheng 3 | * Created: unknown 4 | * Last MDF: 2005-01-22 5 | * 6 | * 2005-01-22 liheng 7 | * 8 | * * remove useless parameter bs_thres 9 | */ 10 | #include 11 | #include 12 | #include "tree.h" 13 | #include "utils.h" 14 | 15 | /* bs_thres is useless at present. */ 16 | Tree *tr_cut_tree(Tree *tree, const Tree *spec, int m, char **ingroup, const char *cut) 17 | { 18 | Tree *lca, *cut_ptr, *p, *q, *r; 19 | 20 | assert(cut); 21 | if (m > 0) assert(ingroup); 22 | if (!tree || !spec || !ingroup || m == 0) return tree; 23 | lca = tr_comm_ancester(tree, m, ingroup); 24 | if (lca == 0 || spec(lca) == 0) return tree; 25 | cut_ptr = tr_search_by_name(spec, cut); 26 | if (cut_ptr == 0) return tree; 27 | /* find CUT node */ 28 | for (p = lca; p->pre; p = p->pre) { /* this loop can be improved. */ 29 | r = spec(p->pre); q = cut_ptr; 30 | while (r->ftime != q->ftime) { 31 | if (r->ftime < q->ftime) r = r->pre; 32 | else q = q->pre; 33 | } 34 | if (q != cut_ptr) break; /* cut here */ 35 | } 36 | if (p->pre == 0) p = tree; /* choose the root node as CUT */ 37 | else if (SDIptr(p->pre)->type != 'D') p = p->pre; /* if not duplication, reserve outgroup */ 38 | return p; 39 | } 40 | -------------------------------------------------------------------------------- /eps.h: -------------------------------------------------------------------------------- 1 | #ifndef EPS_H_ 2 | #define EPS_H_ 3 | 4 | #include 5 | 6 | #define EPS FILE 7 | #define EPSPTR FILE * 8 | #define eps_open(s) fopen((s),"w+") 9 | #define eps_close(fp) fclose(fp) 10 | 11 | #define eps_header(fp,x,y,linewidth) { \ 12 | fprintf(fp,"%%!PS-Adobe-3.0 EPSF-3.0\n"); \ 13 | fprintf(fp,"%%%%BoundingBox:"); \ 14 | fprintf(fp," 1 1 %g %g\n\n",(double)(x),(double)(y)); \ 15 | fprintf(fp,"/C { dup 255 and 255 div exch dup -8 bitshift 255 and 255 div 3 1 roll -16 bitshift 255 and 255 div 3 1 roll setrgbcolor } bind def\n"); \ 16 | fprintf(fp,"/L { 4 2 roll moveto lineto } bind def\n"); \ 17 | fprintf(fp,"/LX { dup 4 -1 roll exch moveto lineto } bind def\n"); \ 18 | fprintf(fp,"/LY { dup 4 -1 roll moveto exch lineto } bind def\n"); \ 19 | fprintf(fp,"/LS { 3 1 roll moveto show } bind def\n"); \ 20 | fprintf(fp,"/RS { dup stringwidth pop 4 -1 roll exch sub 3 -1 roll moveto show } bind def\n"); \ 21 | fprintf(fp,"/B { 4 copy 3 1 roll exch 6 2 roll 8 -2 roll moveto lineto lineto lineto closepath } bind def\n");\ 22 | fprintf(fp,"%g setlinewidth\n\n",linewidth);\ 23 | } 24 | #define eps_font(fp,f,s) { \ 25 | fprintf(fp,"/FS %d def\n",s); \ 26 | fprintf(fp,"/FS4 FS 4 div def\n"); \ 27 | fprintf(fp,"/%s findfont FS scalefont setfont\n\n",f); \ 28 | } 29 | 30 | #define eps_bottom(fp) fprintf(fp,"stroke showpage\n") 31 | #define eps_color(fp,col) fprintf(fp,"stroke %d C\n",col) 32 | #define eps_gray(fp,gray) fprintf(fp, "%g setgray\n",(double)gray) 33 | #define eps_line(fp,x1,y1,x2,y2) fprintf(fp,"%g %g %g %g L\n",(double)(x1),(double)(y1),(double)(x2),(double)(y2)) 34 | #define eps_linex(fp,x1,x2,y) fprintf(fp,"%g %g %g LX\n",(double)(x1),(double)(x2),(double)(y)) 35 | #define eps_liney(fp,y1,y2,x) fprintf(fp,"%g %g %g LY\n",(double)(y1),(double)(y2),(double)(x)) 36 | #define eps_Lstr(fp,x,y,s) fprintf(fp,"%g %g (%s) LS\n",(double)(x),(double)(y),s) 37 | #define eps_Rstr(fp,x,y,s) fprintf(fp,"%g %g (%s) RS\n",(double)(x),(double)(y),s) 38 | #define eps_Lstr4(fp,x,y,s) fprintf(fp,"%g %g FS4 add (%s) LS\n",(double)(x),(double)(y),s) 39 | #define eps_Rstr4(fp,x,y,s) fprintf(fp,"%g %g FS4 add (%s) RS\n",(double)(x),(double)(y),s) 40 | #define eps_Lstr4s(fp,x,y,s) fprintf(fp,"%g %g FS4 sub (%s) LS\n",(double)(x),(double)(y),s) 41 | #define eps_Rstr4s(fp,x,y,s) fprintf(fp,"%g %g FS4 sub (%s) RS\n",(double)(x),(double)(y),s) 42 | #define eps_box(fp,x1,y1,x2,y2) fprintf(fp,"%g %g %g %g B\n",(double)(x1),(double)(y1),(double)(x2),(double)(y2)) 43 | #define eps_fill(fp) fprintf(fp,"fill\n") 44 | #define eps_stroke(fp) fprintf(fp,"stroke\n") 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /est_len.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "align.h" 3 | #include "tree.h" 4 | #include "utils.h" 5 | #include "cpp_utils.h" 6 | 7 | Tree *tr_est_len_nj_core(Tree *tree, Matrix *mat) 8 | { 9 | Tree *forest[1], *rst; 10 | forest[0] = tree; 11 | rst = tr_nj_rooted(mat, 1, forest, 1); 12 | return rst; 13 | } 14 | void tr_est_len(Tree *tree, Matrix *mat, const char *tag) 15 | { 16 | Tree **node, **node2, *tree2; 17 | char str[127]; 18 | int i, n; 19 | 20 | n = mat->count; 21 | if (tree->n_leaf != n) { 22 | fprintf(stderr, "[tr_est_w_by_nj] inconsistent between the tree and the matrix!\n"); 23 | return; 24 | } 25 | if (tree->n != 2 || tree->ftime != 2 * n - 2) { 26 | fprintf(stderr, "[tr_est_w_by_nj] the input tree is not a binary rooted tree!\n"); 27 | return; 28 | } 29 | node = tr_stack(tree, Tree*); 30 | node2 = tr_stack(tree, Tree*); 31 | tr_expand_node(tree, node); 32 | tree2 =tr_est_len_nj_core(tree, mat); 33 | cpp_set_leaf_order(tree, tree2, 0); 34 | tr_order_core(tree2); 35 | n = tr_expand_node(tree2, node2); 36 | for (i = 0; i < n - 1; ++i) { 37 | if (tag) { 38 | sprintf(str, "%f", node2[i]->d); 39 | cpp_insert_keyval(node[i], tag, str); 40 | } else node[i]->d = node2[i]->d; 41 | } 42 | tr_delete_tree(tree2); 43 | free(node); free(node2); 44 | } 45 | static int tr_estlen_usage() 46 | { 47 | fprintf(stderr, "Usage: treebest estlen \n"); 48 | return 1; 49 | } 50 | int tr_estlen_task(int argc, char *argv[]) 51 | { 52 | extern FILE *tr_get_fp(const char *fn); 53 | FILE *fp, *fpmat; 54 | Matrix *mat; 55 | Tree *tree; 56 | 57 | if (argc < 4) return tr_estlen_usage(); 58 | fp = tr_get_fp(argv[1]); 59 | fpmat = tr_get_fp(argv[2]); 60 | tree = tr_parse_first(fp); 61 | mat = tr_read_matrix(fpmat); 62 | tr_est_len(tree, mat, argv[3]); 63 | tr_tree_output(stdout, tree, OUTPUT_SDI | OUTPUT_ORI_NHX); 64 | tr_delete_matrix(mat); 65 | tr_delete_tree(tree); 66 | return 0; 67 | } 68 | -------------------------------------------------------------------------------- /examples/ex1.nucl.nhx: -------------------------------------------------------------------------------- 1 | (((05_YEAST:0.493471[&&NHX:S=YEAST], 2 | 04_SCHPO:0.318204[&&NHX:S=SCHPO] 3 | ):0.157022[&&NHX:D=N:S=Ascomycota], 4 | 03_ARATH:0.308649[&&NHX:S=ARATH] 5 | ):0.04141[&&NHX:D=N:S=Eukaryota], 6 | ((((((((08_RAT:0.102249[&&NHX:S=RAT], 7 | (13_RAT:0.095242[&&NHX:S=RAT], 8 | 10_RAT:0.031982[&&NHX:S=RAT] 9 | ):0.018615[&&NHX:D=Y:S=RAT] 10 | ):0.014489[&&NHX:D=Y:S=RAT], 11 | 11_MOUSE:0.042825[&&NHX:S=MOUSE] 12 | ):0.037198[&&NHX:D=N:S=Murinae], 13 | 12_HUMAN:0.060992[&&NHX:E=$-PANTR:S=HUMAN] 14 | ):0.109605[&&NHX:D=N:E=$-CANFA:S=Eutheria], 15 | 09_CHICK:0.105274[&&NHX:S=CHICK] 16 | ):0.086729[&&NHX:D=N:E=$-XENTR:S=Amniota], 17 | 06_BRARE:0.356832[&&NHX:E=$-Tetraodontidae:S=BRARE] 18 | ):0.073642[&&NHX:D=N:S=Euteleostomi], 19 | (((18_HUMAN:0.077476[&&NHX:E=$-PANTR:S=HUMAN], 20 | 17_MOUSE:0.066898[&&NHX:E=$-RAT:S=MOUSE] 21 | ):0.142228[&&NHX:D=N:E=$-CANFA:S=Eutheria], 22 | 16_CHICK:0.196657[&&NHX:S=CHICK] 23 | ):0.083315[&&NHX:D=N:E=$-XENTR:S=Amniota], 24 | (15_FUGRU:0.125305[&&NHX:E=$-TETNG:S=FUGRU], 25 | 14_BRARE:0.192069[&&NHX:S=BRARE] 26 | ):0.133036[&&NHX:D=N:S=Clupeocephala] 27 | ):0.096936[&&NHX:D=N:S=Euteleostomi] 28 | ):0.06101[&&NHX:D=Y:E=$-CIOIN:S=Euteleostomi], 29 | 07_DROME:0.262868[&&NHX:E=$-ANOGA-APIME:S=DROME] 30 | ):0.1118[&&NHX:D=N:S=Bilateria], 31 | (02_CAEBR:0.168649[&&NHX:S=CAEBR], 32 | 01_CAEEL:0.166054[&&NHX:S=CAEEL] 33 | ):0.371596[&&NHX:D=N:S=Caenorhabditis:Loglk=-18289.490411] 34 | ):0.083573[&&NHX:D=N:S=Bilateria] 35 | )[&&NHX:D=N:S=Eukaryota]; 36 | -------------------------------------------------------------------------------- /examples/ex2.nucl.nhx: -------------------------------------------------------------------------------- 1 | (((((((((((CCND3_HUMAN:0.002674[&&NHX:S=HUMAN], 2 | CCND3_PANTR:0.001163[&&NHX:S=PANTR] 3 | ):0.004961[&&NHX:D=N:S=Homo/Pan/Gorilla:B=97], 4 | ENSMMUT00000006270_MACMU:0.006613[&&NHX:S=MACMU] 5 | ):0.025333[&&NHX:D=N:S=Catarrhini:B=97], 6 | (Q3MHH5_BOVIN:0.024478[&&NHX:S=BOVIN], 7 | XP_538918_CANFA:0.019324[&&NHX:S=CANFA] 8 | ):0.012604[&&NHX:D=N:S=Eutheria:B=88] 9 | ):0.012144[&&NHX:D=N:S=Eutheria:B=87], 10 | Ccnd3_MOUSE:0.078153[&&NHX:E=$-RAT:S=MOUSE] 11 | ):0.052509[&&NHX:D=N:S=Eutheria:B=99], 12 | ENSMODT00000016305_MONDO:0.102238[&&NHX:S=MONDO] 13 | ):0.099837[&&NHX:D=N:S=Theria:B=100], 14 | NP_001008453_CHICK:0.124722[&&NHX:S=CHICK] 15 | ):0.141613[&&NHX:D=N:E=$-XENTR-Clupeocephala:S=Amniota:B=100], 16 | ((((((((CCND2_HUMAN:0.001604[&&NHX:S=HUMAN], 17 | CCND2_PANTR:0[&&NHX:S=PANTR] 18 | ):0.010091[&&NHX:D=N:S=Homo/Pan/Gorilla:B=100], 19 | ENSMMUT00000011951_MACMU:0.010451[&&NHX:S=MACMU] 20 | ):0.028986[&&NHX:D=N:S=Catarrhini:B=100], 21 | (Ccnd2_MOUSE:0.014226[&&NHX:S=MOUSE], 22 | CCND2_RAT:0.020782[&&NHX:S=RAT] 23 | ):0.066893[&&NHX:D=N:S=Murinae:B=100] 24 | ):0.013591[&&NHX:D=N:S=Eutheria:B=24], 25 | (ENSBTAT00000022145_BOVIN:0.063398[&&NHX:S=BOVIN], 26 | ENSCAFT00000024405_CANFA:0.037808[&&NHX:S=CANFA] 27 | ):0.017904[&&NHX:D=N:S=Eutheria:B=46] 28 | ):0.070069[&&NHX:D=N:S=Eutheria:B=21], 29 | ENSMODT00000023250_MONDO:0.102155[&&NHX:S=MONDO] 30 | ):0.044438[&&NHX:D=N:S=Theria:B=56], 31 | CCND2_CHICK:0.08376[&&NHX:S=CHICK] 32 | ):0.010229[&&NHX:D=N:S=Amniota:B=16], 33 | ENSXETT00000013131_XENTR:0.229614[&&NHX:S=XENTR] 34 | ):0.031997[&&NHX:D=N:S=Tetrapoda:B=48], 35 | (((ENSDART00000057490_BRARE:0[&&NHX:S=BRARE], 36 | ENSDART00000067396_BRARE:0[&&NHX:S=BRARE] 37 | ):0.194362[&&NHX:D=Y:S=BRARE:B=100], 38 | (GSTENT00021540001_TETNG:0.028056[&&NHX:S=TETNG], 39 | NEWSINFRUT00000130791_FUGRU:0.018001[&&NHX:S=FUGRU] 40 | ):0.162428[&&NHX:D=N:S=Tetraodontidae:B=100] 41 | ):0.069991[&&NHX:D=N:S=Clupeocephala:B=71], 42 | (GSTENT00016773001_TETNG:0.027037[&&NHX:S=TETNG], 43 | NEWSINFRUT00000164290_FUGRU:0.028575[&&NHX:S=FUGRU] 44 | ):0.111143[&&NHX:D=N:E=$-BRARE:S=Tetraodontidae:B=100] 45 | ):0.082239[&&NHX:D=Y:S=Clupeocephala:B=47] 46 | ):0.097944[&&NHX:D=N:S=Euteleostomi:B=75] 47 | ):0.031891[&&NHX:D=Y:S=Euteleostomi:B=89], 48 | ((((((((CCND1_HUMAN:0.008437[&&NHX:E=$-PANTR:S=HUMAN], 49 | Q58HF8_MACMU:0.00586[&&NHX:S=MACMU] 50 | ):0.016661[&&NHX:D=N:S=Catarrhini:B=100], 51 | (ENSBTAT00000023277_BOVIN:0.032907[&&NHX:S=BOVIN], 52 | Q64HP0_CANFA:0.042464[&&NHX:S=CANFA] 53 | ):0.018136[&&NHX:D=N:S=Eutheria:B=8] 54 | ):0.026836[&&NHX:D=N:S=Eutheria:B=4], 55 | ((Ccnd1_MOUSE:0[&&NHX:S=MOUSE], 56 | F2-Ccnd1_MOUSE:0[&&NHX:S=MOUSE] 57 | ):0.010821[&&NHX:D=Y:S=MOUSE:B=100], 58 | CCND1_RAT:0.021804[&&NHX:S=RAT] 59 | ):0.045942[&&NHX:D=N:S=Murinae:B=100] 60 | ):0.082331[&&NHX:D=N:E=$-MONDO:S=Eutheria:B=94], 61 | CCND1_CHICK:0.115605[&&NHX:S=CHICK] 62 | ):0.029463[&&NHX:D=N:S=Amniota:B=97], 63 | ENSXETT00000008274_XENTR:0.261833[&&NHX:S=XENTR] 64 | ):0.050035[&&NHX:D=N:S=Tetrapoda:B=60], 65 | (ccnd1_BRARE:0.155429[&&NHX:S=BRARE], 66 | (GSTENT00026524001_TETNG:0.040039[&&NHX:S=TETNG], 67 | NEWSINFRUT00000152106_FUGRU:0.195772[&&NHX:S=FUGRU] 68 | ):0.110333[&&NHX:D=N:S=Tetraodontidae:B=100] 69 | ):0.068045[&&NHX:D=N:S=Clupeocephala:B=99] 70 | ):0.057882[&&NHX:D=N:S=Euteleostomi:B=58], 71 | (GSTENT00005089001_TETNG:0.075138[&&NHX:S=TETNG], 72 | NEWSINFRUT00000150560_FUGRU:0.048107[&&NHX:S=FUGRU] 73 | ):0.136489[&&NHX:D=N:E=$-BRARE-Tetrapoda:S=Tetraodontidae:B=100] 74 | ):0.066105[&&NHX:D=Y:S=Euteleostomi:B=90], 75 | (ENSXETT00000000537_XENTR:0.278384[&&NHX:E=$-Amniota:S=XENTR], 76 | (ENSDART00000023368_BRARE:0.203942[&&NHX:S=BRARE], 77 | NEWSINFRUT00000162311_FUGRU:0.198271[&&NHX:E=$-TETNG:S=FUGRU] 78 | ):0.473741[&&NHX:D=N:S=Clupeocephala:B=100] 79 | ):0.063958[&&NHX:D=N:S=Euteleostomi:B=20] 80 | ):0.051347[&&NHX:D=Y:S=Euteleostomi:B=3] 81 | ):0.0718[&&NHX:D=Y:S=Euteleostomi:B=5], 82 | (zgc-103502_BRARE:0.307385[&&NHX:S=BRARE], 83 | (GSTENT00027337001_TETNG:0.105924[&&NHX:S=TETNG], 84 | NEWSINFRUT00000136857_FUGRU:0.103917[&&NHX:S=FUGRU] 85 | ):0.206047[&&NHX:D=N:S=Tetraodontidae:B=100] 86 | ):0.171392[&&NHX:D=N:E=$-Tetrapoda:S=Clupeocephala:B=100] 87 | ):0.07197[&&NHX:D=Y:S=Euteleostomi:B=2], 88 | ENSCINT00000027176_CIOIN:0.429677[&&NHX:S=CIOIN] 89 | ):0.024715[&&NHX:D=N:S=Chordata:B=0], 90 | (((CycD-RA_DROME:0.454498[&&NHX:E=$-DROPS:S=DROME], 91 | Q5TN60_ANOGA:0.450188[&&NHX:S=ANOGA] 92 | ):0.039326[&&NHX:D=N:S=Diptera:B=12], 93 | ENSAPMT00000025755_APIME:0.280665[&&NHX:S=APIME] 94 | ):0.038738[&&NHX:D=N:S=Endopterygota:B=3], 95 | (CBG02768_CAEBR:0.211843[&&NHX:S=CAEBR], 96 | (cyd-1_CAEEL:0.185763[&&NHX:S=CAEEL], 97 | cr01.sctg2.wum.450.1_CAERE:0.129235[&&NHX:S=CAERE] 98 | ):0.072966[&&NHX:D=N:S=Caenorhabditis:B=98] 99 | ):0.682157[&&NHX:D=N:S=Caenorhabditis:B=98] 100 | ):0.020587[&&NHX:D=N:S=Bilateria:B=0] 101 | )[&&NHX:D=N:S=Bilateria:B=0]; 102 | -------------------------------------------------------------------------------- /flglobal.h: -------------------------------------------------------------------------------- 1 | #ifndef FLGLOBAL_H_ 2 | #define FLGLOBAL_H_ 3 | 4 | #define FN_N_SPACE 3 5 | #define FN_ARTEMIS 0 6 | #define FN_DIONYSIS 1 7 | #define FN_SELENA 2 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include "tree.h" 18 | #include "phyml.h" 19 | #include "tree_plot.h" 20 | 21 | class FNglobal; 22 | 23 | class Workspace : public Fl_Widget 24 | { 25 | FNglobal *global; 26 | Tree *tree; 27 | PlotParam *pp; 28 | int is_sdi; 29 | void draw(); 30 | void draw_tree(); 31 | void init(); 32 | void destroy_tree(); 33 | void destroy(); 34 | public: 35 | Workspace(int x, int y, int width, int height); 36 | ~Workspace() { destroy(); } 37 | void set_global(FNglobal *g) { global = g; } 38 | void size(int ww, int hh); 39 | void toggle_spec() { pp->show_spec = !pp->show_spec; } 40 | void toggle_real() { pp->is_real = !pp->is_real; } 41 | void change_font_size(int s) { if ((pp->font_size += s) < 5) pp->font_size = 5; } 42 | void set_tree(FILE *fp); 43 | void set_tree(Tree *); 44 | void root_tree(); 45 | void order_tree(); 46 | int export_eps(const char *fn); 47 | int save_nhx(const char *fn); 48 | PlotParam *get_pp() { return pp; } 49 | Tree *get_tree() { return tree; } 50 | void redraw(); 51 | }; 52 | 53 | class FNbuild 54 | { 55 | Task *task; 56 | FNglobal *global; 57 | public: 58 | Fl_Input *i_alignment; 59 | Fl_Int_Input *i_bs, *i_cutoff; 60 | Fl_Choice *i_type, *i_dist_type, *i_format, *i_cons_tree; 61 | Fl_Progress *prog_bar; 62 | Fl_Check_Button *is_cons; 63 | 64 | FNbuild(FNglobal *g); 65 | ~FNbuild(); 66 | Tree *build(); 67 | }; 68 | 69 | class FNphyml 70 | { 71 | FNglobal *global; 72 | PhymlConfig *pc; 73 | public: 74 | Fl_Input *i_alignment; 75 | Fl_Int_Input *i_bs, *i_cutoff, *i_num_cat; 76 | Fl_Float_Input *i_tstv, *i_inv, *i_alpha; 77 | Fl_Choice *i_tree, *i_type, *i_model, *i_format; 78 | Fl_Progress *prog_bar; 79 | Fl_Check_Button *has_tree, *is_tstv, *is_alpha, *is_inv, *is_bs, *is_mask_lss; 80 | 81 | FNphyml(FNglobal *g); 82 | ~FNphyml(); 83 | Tree *do_phyml(); 84 | }; 85 | 86 | class FNmerge 87 | { 88 | FNglobal *global; 89 | public: 90 | Fl_Choice *i_ws1, *i_ws2; 91 | FNmerge(FNglobal *g) { i_ws1 = i_ws2 = 0; global = g; } 92 | ~FNmerge() { delete i_ws1; delete i_ws2; } 93 | Tree *merge(); 94 | }; 95 | 96 | class FNcompare 97 | { 98 | FNglobal *global; 99 | public: 100 | Fl_Choice *i_ws; 101 | FNcompare(FNglobal *g) { i_ws = 0; global = g; } 102 | ~FNcompare() { delete i_ws; } 103 | void compare(); 104 | }; 105 | 106 | class FNbacktrans 107 | { 108 | FNglobal *global; 109 | public: 110 | Fl_Input *i_aa_aln, *i_nt_seq, *i_nt_aln; 111 | FNbacktrans(FNglobal *g) { i_aa_aln = i_nt_seq = i_nt_aln = 0; global = g; } 112 | ~FNbacktrans() { delete i_aa_aln; delete i_nt_aln; delete i_nt_seq; } 113 | void backtrans(); 114 | }; 115 | 116 | class FNglobal 117 | { 118 | public: 119 | Fl_Double_Window *build_w, *main_w, *merge_w, *compare_w, *backtrans_w, *phyml_w; 120 | Fl_File_Chooser *fc_nhx, *fc_eps, *fc_save, *fc_aln, *fc_phyml_aln; 121 | Fl_File_Chooser *fc_aa_aln, *fc_nt_seq, *fc_nt_aln; 122 | Fl_Light_Button *b_spec, *b_real; 123 | Fl_Tabs *tabs; 124 | Fl_Widget *prev_tab; 125 | Workspace *workspace[FN_N_SPACE]; 126 | 127 | FNbuild *build; 128 | FNmerge *merge; 129 | FNcompare *compare; 130 | FNbacktrans *backtrans; 131 | FNphyml *phyml; 132 | Tree *spec_tree; 133 | 134 | Fl_Button *b_save, *b_comp, *b_expt, *b_order, *b_merge, *b_root; 135 | 136 | FNglobal(); 137 | ~FNglobal(); 138 | Workspace *active_ws(); 139 | int change_ws(int); 140 | }; 141 | 142 | extern Fl_Menu_Item menu_type[], menu_nt_type[], menu_aa_type[], menu_format[], menu_workspace[]; 143 | extern Fl_Menu_Item menu_phyml_type[], menu_phyml_nt_model[], menu_phyml_aa_model[]; 144 | 145 | void cb_open(Fl_Widget *w, void *global); 146 | void cb_save(Fl_Widget *w, void *global); 147 | void cb_expt(Fl_Widget *w, void *global); 148 | void cb_build(Fl_Widget *w, void *global); 149 | void cb_phyml(Fl_Widget *w, void *global); 150 | void cb_root(Fl_Widget *w, void *global); 151 | void cb_order(Fl_Widget *w, void *global); 152 | void cb_merge(Fl_Widget *w, void *global); 153 | void cb_compare(Fl_Widget *w, void *global); 154 | void cb_backtrans(Fl_Widget *w, void *global); 155 | 156 | void cb_compare_tree(Fl_Widget *w, void *global); 157 | void cb_close_compare(Fl_Widget *w, void *global); 158 | 159 | void cb_merge_tree(Fl_Widget *w, void *global); 160 | void cb_close_merge(Fl_Widget *w, void *global); 161 | 162 | void cb_close_backtrans(Fl_Widget *w, void *global); 163 | void cb_do_backtrans(Fl_Widget *w, void *global); 164 | void cb_browse_aa_aln(Fl_Widget *w, void *global); 165 | void cb_browse_nt_seq(Fl_Widget *w, void *global); 166 | void cb_browse_nt_aln(Fl_Widget *w, void *global); 167 | 168 | void cb_browse(Fl_Widget *w, void *global); 169 | void cb_type(Fl_Widget *w, void *global); 170 | void cb_build_tree(Fl_Widget *w, void *global); 171 | void cb_toggle_cons(Fl_Widget *w, void *global); 172 | void cb_close_build(Fl_Widget *w, void *global); 173 | 174 | void cb_phyml_browse(Fl_Widget *w, void *global); 175 | void cb_phyml_type(Fl_Widget *w, void *global); 176 | void cb_phyml_inv(Fl_Widget *w, void *global); 177 | void cb_phyml_alpha(Fl_Widget *w, void *global); 178 | void cb_phyml_tstv(Fl_Widget *w, void *global); 179 | void cb_phyml_has_tree(Fl_Widget *w, void *global); 180 | void cb_phyml_tree(Fl_Widget *w, void *global); 181 | void cb_phyml_n_cat(Fl_Widget *w, void *global); 182 | void cb_close_phyml(Fl_Widget *w, void *global); 183 | 184 | void cb_zoom_in_x(Fl_Widget *w, void *global); 185 | void cb_zoom_out_x(Fl_Widget *w, void *global); 186 | void cb_zoom_in_y(Fl_Widget *w, void *global); 187 | void cb_zoom_out_y(Fl_Widget *w, void *global); 188 | void cb_small_font(Fl_Widget *w, void *global); 189 | void cb_large_font(Fl_Widget *w, void *global); 190 | void cb_prev(Fl_Widget *w, void *global); 191 | void cb_next(Fl_Widget *w, void *global); 192 | 193 | void cb_real(Fl_Widget *w, void *global); 194 | void cb_spec(Fl_Widget *w, void *global); 195 | 196 | void cb_update_buttons(Fl_Widget *w, void *global); 197 | 198 | #endif 199 | -------------------------------------------------------------------------------- /flnjtree.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include "flglobal.h" 3 | #include "flnjtree_ui.h" 4 | 5 | extern "C" { 6 | FILE *tr_get_fp(const char *fn); 7 | }; 8 | int main(int argc, char *argv[]) 9 | { 10 | extern int FL_NORMAL_SIZE; 11 | int i; 12 | 13 | FL_NORMAL_SIZE = 12; 14 | FNglobal *g = new FNglobal; 15 | g->main_w = make_main_window(g); 16 | g->build_w = make_build_window(g); 17 | g->phyml_w = make_phyml_window(g); 18 | g->merge_w = make_merge_window(g); 19 | g->compare_w = make_comp_window(g); 20 | g->backtrans_w = make_trans_window(g); 21 | for (i = 0; i < 3 && i+1 < argc; ++i) { 22 | FILE *fp = tr_get_fp(argv[i+1]); 23 | if (fp) { 24 | g->workspace[i]->set_tree(fp); 25 | if (fp != stdin) fclose(fp); 26 | } 27 | } 28 | g->main_w->show(); 29 | return Fl::run(); 30 | } 31 | -------------------------------------------------------------------------------- /lost.c: -------------------------------------------------------------------------------- 1 | /*** 2 | * Author: liheng 3 | * Created: 2005-01-22 4 | * Last MDF: 2005-01-27 5 | * 6 | * 2005-01-27 liheng 7 | * 8 | * * fix memory violation 9 | * 10 | * 2005-01-28 liheng 11 | * 12 | * * fix another memory violation 13 | */ 14 | #include 15 | #include "utils.h" 16 | #include "tree.h" 17 | 18 | #ifdef USE_GCC 19 | inline int lh3_tmp_add_lost(Tree *p, Tree *s) 20 | #else 21 | int lh3_tmp_add_lost(Tree *p, Tree *s) 22 | #endif 23 | { 24 | int tmp; 25 | while (s->n > 0 && s->bs < 2) { /* find the lowest lost genus */ 26 | int j; 27 | for (j = 0; j < s->n; ++j) 28 | if (s->node[j]->bs > 0) break; 29 | s = s->node[j]; 30 | } 31 | tmp = ++(SDIptr(p)->n_lost); 32 | SDIptr(p)->lost = (Tree**)realloc(SDIptr(p)->lost, sizeof(Tree*) * tmp); 33 | SDIptr(p)->lost[tmp-1] = s; 34 | return 1; 35 | } 36 | int tr_lost_infer(Tree *tree, const Tree *spec) 37 | { 38 | Tree **node, *t, *s, *p; 39 | int count, i, j, m; 40 | 41 | if (tree == 0 || spec == 0) return 0; 42 | count = 0; 43 | if (!tree->node_array) { 44 | node = tr_stack(tree, Tree*); 45 | m = tr_expand_node(tree, node); 46 | } else { 47 | node = tree->node_array; 48 | m = tree->n_node; 49 | } 50 | for (i = 0; i < m - 1; ++i) { 51 | p = node[i]; 52 | t = 0; /* just for a warning. */ 53 | s = spec(p); 54 | if (s == 0) continue; 55 | if (s == spec(p->pre) && SDIptr(p->pre)->type == 'D') { 56 | for (j = 0; j < s->n; ++j) 57 | if (SDIptr(p->pre)->spec_node[j] != 0 && SDIptr(p)->spec_node[j] == 0 && s->node[j]->bs > 0) 58 | count += lh3_tmp_add_lost(p, s->node[j]); 59 | continue; 60 | } 61 | if (s != spec(p->pre)) { /* spec(p) is a descendant of spec(p->pre) */ 62 | for (j = 0; j < s->n; ++j) 63 | if (SDIptr(p)->spec_node[j] == 0 && s->node[j]->bs > 0) 64 | count += lh3_tmp_add_lost(p, s->node[j]); 65 | t = s; s = s->pre; 66 | while (s != spec(p->pre)) { 67 | for (j = 0; j < s->n; ++j) 68 | if (s->node[j] != t && s->node[j]->bs > 0) 69 | count += lh3_tmp_add_lost(p, s->node[j]); 70 | t = s; s = s->pre; 71 | } 72 | } 73 | if (SDIptr(p->pre)->type != 'D') continue; 74 | else { /* now, SDIptr(p->pre)->type == 'D' and s == spec(p->pre) */ 75 | for (j = 0; j < s->n; ++j) 76 | if (SDIptr(p->pre)->spec_node[j] == 1 && s->node[j] != t && s->node[j]->bs > 0) 77 | count += lh3_tmp_add_lost(p, s->node[j]); 78 | } 79 | } 80 | if (!tree->node_array) free(node); 81 | return count; 82 | } 83 | -------------------------------------------------------------------------------- /main.c: -------------------------------------------------------------------------------- 1 | /*** 2 | * Created: unknown 3 | * Author: liheng 4 | * Last MDF: 2005-01-13 5 | * 6 | * 7 | * 2005-01-13 liheng: 8 | * 9 | * * add merge component 10 | */ 11 | #include 12 | #include 13 | #ifndef _WIN32 14 | #include 15 | #endif 16 | #include 17 | #include "tree.h" 18 | #include "phyml.h" 19 | 20 | int tr_sdi_task(int argc, char *argv[]); 21 | int tr_build(int argc, char *argv[]); 22 | int tr_root_task(int argc, char *argv[]); 23 | int tr_reformat_task(int argc, char *argv[]); 24 | int tr_filter_task(int argc, char *argv[]); 25 | int tr_trans_task(int argc, char *argv[]); 26 | int tr_treedist_task(int argc, char *argv[]); 27 | int tr_leaf_task(int argc, char *argv[]); 28 | int tr_mfa2aln_task(int argc, char *argv[]); 29 | int tr_ortho_task(int argc, char *argv[]); 30 | int tr_distmat_task(int argc, char *argv[]); 31 | int tr_mmerge_task(int argc, char *argv[]); 32 | int pwalign_task(int argc, char *argv[]); 33 | int tr_subtree_task(int argc, char *argv[]); 34 | int tr_simulate_task(int argc, char *argv[]); 35 | int tr_sortleaf_task(int argc, char *argv[]); 36 | int tr_estlen_task(int argc, char *argv[]); 37 | int best_task(int argc, char *argv[]); 38 | int plot_eps_task(int argc, char *argv[]); 39 | int ma_backtrans_task(int argc, char *argv[]); 40 | int tr_trimpoor_task(int argc, char *argv[]); 41 | 42 | void usage() 43 | { 44 | fprintf(stderr, "\n"); 45 | fprintf(stderr, "Program: TreeBeST (gene Tree Building guided by Species Tree)\n"); 46 | fprintf(stderr, "Version: %s build %s\n", TR_VERSION, TR_BUILD); 47 | fprintf(stderr, "Contact: Heng Li \n\n"); 48 | fprintf(stderr, "Usage: treebest [options]\n\n"); 49 | fprintf(stderr, "Command: nj build neighbour-joining tree, SDI, rooting\n"); 50 | fprintf(stderr, " best build tree with the help of a species tree\n"); 51 | fprintf(stderr, " phyml build phyml tree\n"); 52 | fprintf(stderr, " sdi speciation vs. duplication inference\n"); 53 | fprintf(stderr, " spec print species tree\n"); 54 | fprintf(stderr, " format reformat a tree\n"); 55 | fprintf(stderr, " filter filter a multi-alignment\n"); 56 | fprintf(stderr, " trans translate coding nucleotide alignment\n"); 57 | fprintf(stderr, " backtrans translate aa alignment back to nt\n"); 58 | fprintf(stderr, " leaf get external nodes\n"); 59 | fprintf(stderr, " mfa2aln convert MFA to ALN format\n"); 60 | fprintf(stderr, " ortho ortholog/paralog inference\n"); 61 | fprintf(stderr, " distmat distance matrix\n"); 62 | fprintf(stderr, " treedist topological distance between two trees\n"); 63 | fprintf(stderr, " pwalign pairwise alignment\n"); 64 | fprintf(stderr, " mmerge merge a forest\n"); 65 | fprintf(stderr, " export export a tree to EPS format\n"); 66 | fprintf(stderr, " subtree extract the subtree\n"); 67 | fprintf(stderr, " simulate simulate a gene tree\n"); 68 | fprintf(stderr, " sortleaf sort leaf order\n"); 69 | fprintf(stderr, " estlen estimate branch length\n"); 70 | fprintf(stderr, " trimpoor trim out leaves that affect the quality of a tree\n"); 71 | fprintf(stderr, " root root a tree by minimizing height\n\n"); 72 | } 73 | int main(int argc, char *argv[]) 74 | { 75 | #ifdef _WIN32 76 | srand(time(0)); 77 | #else 78 | srand48(time(0)^((int)getpid())); 79 | #endif 80 | if (argc == 1) { 81 | usage(); 82 | return 1; 83 | } 84 | if (strcmp(argv[1], "nj") == 0) 85 | return tr_build(argc-1, argv+1); 86 | else if (strcmp(argv[1], "best") == 0) 87 | return best_task(argc-1, argv+1); 88 | else if (strcmp(argv[1], "phyml") == 0) 89 | return phyml_task(argc-1, argv+1); 90 | else if (strcmp(argv[1], "sdi") == 0) 91 | return tr_sdi_task(argc-1, argv+1); 92 | else if (strcmp(argv[1], "root") == 0) 93 | return tr_root_task(argc-1, argv+1); 94 | else if (strcmp(argv[1], "format") == 0) 95 | return tr_reformat_task(argc-1, argv+1); 96 | else if (strcmp(argv[1], "filter") == 0) 97 | return tr_filter_task(argc-1, argv+1); 98 | else if (strcmp(argv[1], "trans") == 0) 99 | return tr_trans_task(argc-1, argv+1); 100 | else if (strcmp(argv[1], "backtrans") == 0) 101 | return ma_backtrans_task(argc-1, argv+1); 102 | else if (strcmp(argv[1], "leaf") == 0) 103 | return tr_leaf_task(argc-1, argv+1); 104 | else if (strcmp(argv[1], "treedist") == 0) 105 | return tr_treedist_task(argc-1, argv+1); 106 | else if (strcmp(argv[1], "mfa2aln") == 0) 107 | return tr_mfa2aln_task(argc-1, argv+1); 108 | else if (strcmp(argv[1], "ortho") == 0) 109 | return tr_ortho_task(argc-1, argv+1); 110 | else if (strcmp(argv[1], "distmat") == 0) 111 | return tr_distmat_task(argc-1, argv+1); 112 | else if (strcmp(argv[1], "pwalign") == 0) 113 | return pwalign_task(argc-1, argv+1); 114 | else if (strcmp(argv[1], "mmerge") == 0) 115 | return tr_mmerge_task(argc-1, argv+1); 116 | else if (strcmp(argv[1], "export") == 0) 117 | return plot_eps_task(argc-1, argv+1); 118 | else if (strcmp(argv[1], "subtree") == 0) 119 | return tr_subtree_task(argc-1, argv+1); 120 | else if (strcmp(argv[1], "simulate") == 0) 121 | return tr_simulate_task(argc-1, argv+1); 122 | else if (strcmp(argv[1], "sortleaf") == 0) 123 | return tr_sortleaf_task(argc-1, argv+1); 124 | else if (strcmp(argv[1], "estlen") == 0) 125 | return tr_estlen_task(argc-1, argv+1); 126 | else if (strcmp(argv[1], "trimpoor") == 0) 127 | return tr_trimpoor_task(argc-1, argv+1); 128 | else if (strcmp(argv[1], "spec") == 0) { 129 | extern char *tr_species_tree_string; 130 | printf("%s\n", tr_species_tree_string); 131 | return 0; 132 | } else { 133 | fprintf(stderr, "[main] unrecognized command %s\n", argv[1]); 134 | return 1; 135 | } 136 | return 0; 137 | } 138 | -------------------------------------------------------------------------------- /make_ng86.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "make_ng86.h" 6 | 7 | #define NG86_TYPE "char" 8 | 9 | int main(int argc, char *argv[]) 10 | { 11 | int i, j, k, l; 12 | int codon_index; 13 | struct dsresult value, result[64][64]; 14 | FILE *fh; 15 | 16 | if (argc == 1) { 17 | fprintf(stderr, "Usage: %s \n", argv[0]); 18 | return 1; 19 | } 20 | for(j = 0; j < 4; j++) { 21 | for(k = 0; k < 4; k++) { 22 | for(l = 0; l < 4; l++) { 23 | codon_index = (j * 4 + k) * 4 + l; 24 | seq[codon_index][0] = j; 25 | seq[codon_index][1] = k; 26 | seq[codon_index][2] = l; 27 | } 28 | } 29 | } 30 | 31 | for(i = 0; i < 64; i++) { 32 | for(j = i; j < 64; j++) { 33 | value = calc_ds(3, i, j); 34 | result[i][j] = value; 35 | result[j][i] = value; 36 | } 37 | } 38 | 39 | fh = fopen(argv[1], "w"); 40 | 41 | fprintf(fh, "#ifndef NG86_DS_H_\n"); 42 | fprintf(fh, "#define NG86_DS_H_\n\n"); 43 | fprintf(fh, "/* This file is generated by make_ng86\n"); 44 | fprintf(fh, " *\n"); 45 | fprintf(fh, " * Author: Du Wenfeng \n"); 46 | fprintf(fh, " * modified by Li Heng \n"); 47 | fprintf(fh, " */\n\n"); 48 | 49 | fprintf(fh, "const double pot_smut[64] = { /* potential same mut */\n"); 50 | for(i = 0; i < 4; i++) { 51 | for(j = 0; j < 4; j++) { 52 | for(k = 0; k < 4; k++) { 53 | fprintf(fh, "%1.5f", result[0][(i * 4 + j) * 4 + k].s2); 54 | if (i != 3 || j != 3 || k != 3) fputc(',', fh); 55 | } 56 | } 57 | fprintf(fh, "\n"); 58 | } 59 | fprintf(fh,"};\n"); 60 | 61 | fprintf(fh, "const %s diff_smut[64][64] = { /* same diff */\n", NG86_TYPE); 62 | for(i = 0; i < 64; i++) { 63 | fprintf(fh, "{"); 64 | for(j = 0; j < 64; j++) { 65 | if(result[i][j].ns < 0) result[i][j].ns = -1; 66 | fprintf(fh, "%d", result[i][j].ns); 67 | if (j < 63) fputc(',', fh); 68 | } 69 | if (i < 63) fprintf(fh,"},\n"); 70 | else fprintf(fh,"}\n"); 71 | } 72 | fprintf(fh,"};\n"); 73 | 74 | fprintf(fh, "const %s diff_nmut[64][64] = { /* not same diff */\n", NG86_TYPE); 75 | for(i = 0; i < 64; i++) { 76 | fprintf(fh, "{"); 77 | for(j = 0; j < 64; j++) { 78 | if(result[i][j].nn < 0) result[i][j].nn = -1; 79 | fprintf(fh, "%d", result[i][j].nn); 80 | if (j < 63) fputc(',', fh); 81 | } 82 | if (i < 63) fprintf(fh,"},\n"); 83 | else fprintf(fh,"}\n"); 84 | } 85 | fprintf(fh,"};\n"); 86 | 87 | fprintf(fh, "const %s diff_path[64][64] = { /* path */\n", NG86_TYPE); 88 | for(i = 0; i < 64; i++) { 89 | fprintf(fh, "{"); 90 | for(j = 0; j < 64; j++) { 91 | if(result[i][j].np < 0) result[i][j].np = -1; 92 | fprintf(fh, "%d", result[i][j].np); 93 | if (j < 63) fputc(',', fh); 94 | } 95 | if (i < 63) fprintf(fh,"},\n"); 96 | else fprintf(fh,"}\n"); 97 | } 98 | fprintf(fh,"};\n"); 99 | 100 | fprintf(fh, "#endif\n"); 101 | 102 | fclose(fh); 103 | return 0; 104 | } 105 | 106 | int substitution(int codon[], int result[][3][3]) 107 | { 108 | int j, sub_pos, sub_base, na; 109 | for(sub_pos = 0; sub_pos < 3; sub_pos++) { 110 | j = 0; 111 | for(sub_base = 0; sub_base < 4; sub_base++) { 112 | if(sub_base != codon[sub_pos]) { 113 | for(na = 0; na < 3; na++) { 114 | result[sub_pos][j][na] = codon[na]; 115 | } 116 | result[sub_pos][j][sub_pos] = sub_base; 117 | j++; 118 | } 119 | } 120 | } 121 | return 0; 122 | } 123 | 124 | struct dsresult calc_ds(int length, int seq_index_0, int seq_index_1) 125 | { 126 | int s, n, p, i; 127 | int codon[2][3]; 128 | 129 | double s_total_0 = 0, s_total_1 = 0; 130 | double sd_total, nd_total; 131 | 132 | struct dsresult result; 133 | 134 | sd_total = 0.0; 135 | nd_total = 0.0; 136 | 137 | i = 0; 138 | 139 | readcodon(codon[0], seq_index_0, i); 140 | readcodon(codon[1], seq_index_1, i); 141 | 142 | potential_mut(codon[0], &s_total_0); 143 | potential_mut(codon[1], &s_total_1); 144 | 145 | s = 0; n = 0; p = 0; 146 | mut(codon[0], codon[1], &s, 0, &n, 0, &p); 147 | 148 | result.nn = n; 149 | result.np = p; 150 | result.ns = s; 151 | result.s1 = s_total_0; 152 | result.s2 = s_total_1; 153 | return result; 154 | } 155 | 156 | int readcodon(int codon[], int seqindex, int pos) 157 | { 158 | int k, cvalue; 159 | for(k = 0; k < 3; k++) { 160 | codon[k] = seq[seqindex][pos + k]; 161 | if(codon[k] == 4) return 1; 162 | } 163 | cvalue = calc_codon(codon); 164 | return 0; 165 | } 166 | 167 | void potential_mut(int codon[], double *s_total) 168 | { 169 | int k, l, same, nonsense, substcodon[3][3][3]; 170 | 171 | substitution(codon, substcodon); 172 | for(k = 0; k < 3; k++) { 173 | same = 0; 174 | nonsense = 0; 175 | for(l = 0; l < 3; l++) { 176 | if(aa[calc_codon(substcodon[k][l])] == stopcodon) { 177 | nonsense++; 178 | } else { 179 | if(aa[calc_codon(substcodon[k][l])] == aa[calc_codon(codon)]) same++; 180 | } 181 | } 182 | *s_total += (double)same / (double)(3 - nonsense); 183 | } 184 | } 185 | 186 | int mut(int codon_1[], int codon_2[], int *s_all, int s_path, int *n_all, int n_path, int *p) 187 | { 188 | int temp[3], codon; 189 | int i, j; 190 | 191 | if(calc_codon(codon_1) == calc_codon(codon_2)) { 192 | *s_all += s_path; 193 | *n_all += n_path; 194 | (*p)++; 195 | return 0; 196 | } 197 | for(i = 0; i < 3;i++) { 198 | if(codon_1[i] != codon_2[i]) { 199 | for(j = 0; j < 3; j++) { 200 | temp[j] = codon_1[j]; 201 | } 202 | temp[i] = codon_2[i]; 203 | codon = calc_codon(temp); 204 | if(aa[codon] != stopcodon) { 205 | if(aa[calc_codon(codon_1)] == aa[codon]) { 206 | mut(temp, codon_2, s_all, s_path + 1, n_all, n_path, p); 207 | } else { 208 | mut(temp, codon_2, s_all, s_path, n_all, n_path + 1, p); 209 | } 210 | } 211 | } 212 | } 213 | return 0; 214 | } 215 | 216 | int calc_codon(int codon[]) 217 | { 218 | return ((codon[0] * 4 + codon[1]) * 4 + codon[2]); 219 | } 220 | -------------------------------------------------------------------------------- /make_ng86.h: -------------------------------------------------------------------------------- 1 | #ifndef MAKE_DS_H_ 2 | #define MAKE_DS_H_ 3 | 4 | int seq[64][3]; 5 | 6 | const int aa[65] = { 7 | 11,11, 2, 2, 1, 1,15,15, 16,16,16,16, 9,12, 9, 9, 8 | 6, 6, 3, 3, 7, 7, 7, 7, 0, 0, 0, 0, 19,19,19,19, 9 | 5, 5, 8, 8, 1, 1, 1, 1, 14,14,14,14, 10,10,10,10, 10 | 20,20,18,18, 20,17, 4, 4, 15,15,15,15, 10,10,13,13, 21, 11 | }; 12 | /* A, T, G, C */ 13 | /*const int aa[64] = { 14 | 8, 11, 8, 11, 7, 7, 10, 7, 14, 15, 14, 15, 16, 16, 16, 16, 15 | 20, 19, 20, 19, 9, 4, 9, 4, 20, 1, 18, 1, 15, 15, 15, 15, 16 | 3, 2, 3, 2, 17, 17, 17, 17, 5, 5, 5, 5, 0, 0, 0, 0, 17 | 13, 6, 13, 6, 9, 9, 9, 9, 14, 14, 14, 14, 12, 12, 12, 12 18 | };*/ 19 | const int stopcodon = 20; 20 | const int other[4][3] = {{1,2,3},{0,2,3},{0,1,3},{0,1,2}}; 21 | 22 | 23 | struct dsresult { 24 | int ns, nn, np; 25 | double s1, s2; 26 | }; 27 | 28 | int substitution(int *, int [][3][3]); 29 | struct dsresult calc_ds(int, int, int); 30 | int readcodon(int *, int, int); 31 | void potential_mut(int *, double *); 32 | int mut(int *, int *, int *, int, int *, int, int *); 33 | int calc_codon(int *); 34 | 35 | #endif /* #ifndef DS_H */ 36 | -------------------------------------------------------------------------------- /ng86_ds.c: -------------------------------------------------------------------------------- 1 | /*** 2 | * Author: Du Wenfeng and liheng 3 | * Created: unknown 4 | * Last MDF: 2005-01-22 5 | * 6 | * 2005-01-22 liheng 7 | * 8 | * * remove dm distance 9 | */ 10 | /* 11 | * written by Du Wenfeng , and modified by 12 | */ 13 | #include 14 | #include 15 | #include "align.h" 16 | #include "ng86_ds.h" 17 | #include "align_lib/table.h" 18 | 19 | static float *ds_smut = 0; 20 | static float *ds_nmut = 0; 21 | 22 | void ma_init_nucl_data() 23 | { 24 | int i, j; 25 | if (ds_smut && ds_nmut) return; /* have already been initialized */ 26 | ds_smut = (float*)malloc(sizeof(float) * 64 * 64); 27 | ds_nmut = (float*)malloc(sizeof(float) * 64 * 64); 28 | for (i = 0; i < 64; ++i) { 29 | for (j = 0; j < 64; ++j) { 30 | if (aln_trans_table_eu[i] < 20 && aln_trans_table_eu[j] < 20) { 31 | if (diff_path[i][j] == 0) 32 | fprintf(stderr, "WARNING: Unknown error occurs to d_n/d_s initialization!\n"); 33 | ds_smut[(i<<6)|j] = (float)diff_smut[i][j] / diff_path[i][j]; 34 | ds_nmut[(i<<6)|j] = (float)diff_nmut[i][j] / diff_path[i][j]; 35 | } else ds_smut[(i<<6)|j] = ds_nmut[(i<<6)|j] = 0; 36 | } 37 | } 38 | } 39 | void ma_free_nucl_data() 40 | { 41 | free(ds_smut); 42 | free(ds_nmut); 43 | ds_smut = ds_nmut = 0; 44 | } 45 | /* 46 | * calculate nucleotide distance 47 | * 48 | * When dist_type == DIST_DM and mat_dn != 0, both mat and mat_dn will be filled with 49 | * dS and dN, respectively. 50 | */ 51 | void ma_cal_nucl_aux(Matrix *mat, const MultiAlign *ma, int is_rand, int dist_type, Matrix *mat_dn) 52 | { 53 | int l, i, j, k; 54 | int *array; 55 | char *seqi, *seqj; 56 | float ds, max_ds; 57 | float dn, max_dn; 58 | int count; 59 | 60 | assert(mat); assert(ma); 61 | l = ma->len; 62 | max_ds = max_dn = -1.0; 63 | array = (int*)malloc(sizeof(int) * l); 64 | /* initialize mat and mat_dn */ 65 | for (i = 0; i < ma->n * ma->n; ++i) 66 | mat->dist[i] = -1.0; 67 | if (dist_type == DIST_DM && mat_dn) { 68 | for (i = 0; i < ma->n * ma->n; ++i) 69 | mat_dn->dist[i] = -1.0; 70 | } 71 | for (i = 0; i < l; ++i) 72 | #ifdef _WIN32 73 | if (is_rand) array[i] = (int)((double)rand() / RAND_MAX * l); 74 | #else 75 | if (is_rand) array[i] = (int)(drand48() * l); 76 | #endif 77 | else array[i] = i; 78 | 79 | for (i = 0; i < ma->n; ++i) { 80 | seqi = ma->seq[i]; 81 | for (j = 0; j < i; ++j) { 82 | int ci, cj; 83 | float poti, potj, s_diff, n_diff; 84 | seqj = ma->seq[j]; 85 | count = 0; 86 | poti = potj = s_diff = n_diff = 0.0; 87 | for (k = 0, count = 0; k < l; ++k) { /* core loop */ 88 | ci = seqi[array[k]]; cj = seqj[array[k]]; 89 | if (aln_trans_table_eu[ci] >= 20 || aln_trans_table_eu[cj] >= 20) continue; 90 | ++count; 91 | poti += pot_smut[ci]; 92 | potj += pot_smut[cj]; 93 | if (ci != cj) { 94 | s_diff += ds_smut[(ci<<6)|cj]; 95 | n_diff += ds_nmut[(ci<<6)|cj]; 96 | } 97 | } 98 | if (count > 0 && poti + potj > 0 && count*6 - poti - potj > 0) { 99 | float s_avg, n_avg; 100 | s_avg = (poti + potj) / 2.0; 101 | n_avg = count * 3.0 - s_avg; 102 | ds = s_diff / s_avg; 103 | dn = n_diff / n_avg; 104 | if (dist_type == DIST_DN) /* calculate dn */ 105 | mat->dist[i * ma->n + j] = mat->dist[j * ma->n + i] = dn; 106 | else if (dist_type == DIST_DS) 107 | mat->dist[i * ma->n + j] = mat->dist[j * ma->n + i] = ds; 108 | else if (dist_type == DIST_DM && mat_dn != 0) { 109 | mat->dist[i * ma->n + j] = mat->dist[j * ma->n + i] = ds; 110 | mat_dn->dist[i * ma->n + j] = mat_dn->dist[j * ma->n + i] = dn; 111 | } else mat->dist[i * ma->n + j] = mat->dist[j * ma->n + i] = 0.0; 112 | if (ds > max_ds) max_ds = ds; 113 | if (dn > max_dn) max_dn = dn; 114 | } /* otherwise, do nothing */ 115 | } 116 | } 117 | max_ds *= 2.0; max_dn *= 2.0; 118 | if (dist_type == DIST_DN) { /* calculate dn */ 119 | for (i = 0; i < ma->n * ma->n; ++i) /* set undefined distance as 2 * max_dn */ 120 | if (mat->dist[i] < 0.0) mat->dist[i] = max_dn; 121 | } else if (dist_type == DIST_DS) { 122 | for (i = 0; i < ma->n * ma->n; ++i) /* set undefined distance as 2 * max_ds */ 123 | if (mat->dist[i] < 0.0) mat->dist[i] = max_ds; 124 | } else if (dist_type == DIST_DM && mat_dn != 0) { 125 | for (i = 0; i < ma->n * ma->n; ++i) { /* set undefined distance */ 126 | if (mat->dist[i] < 0.0) mat->dist[i] = max_ds; 127 | if (mat_dn->dist[i] < 0.0) mat_dn->dist[i] = max_dn; 128 | } 129 | } 130 | for (i = 0; i < ma->n; ++i) /* set diag as 0 */ 131 | mat->dist[i * ma->n + i] = 0.0; 132 | if (dist_type == DIST_DM && mat_dn) { 133 | for (i = 0; i < ma->n; ++i) /* set diag as 0 */ 134 | mat_dn->dist[i * ma->n + i] = 0.0; 135 | } 136 | free(array); 137 | } 138 | void ma_cal_nucl(Matrix *mat, const MultiAlign *ma, int is_rand, int dist_type) 139 | { 140 | assert(mat); assert(ma); 141 | ma_cal_nucl_aux(mat, ma, is_rand, dist_type, 0); 142 | } 143 | -------------------------------------------------------------------------------- /nhx_output.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "align.h" 4 | #include "common/hash_char.h" 5 | #include "utils.h" 6 | #include "tree.h" 7 | 8 | static unsigned out_flag = 0; 9 | static hash_set_char *set; 10 | 11 | void tr_tree_output(FILE *fp, const Tree *root, unsigned flag) 12 | { 13 | assert(fp); 14 | char *str = tr_tree_string(root, flag); 15 | fprintf(fp, "%s", str); 16 | free(str); 17 | } 18 | static int string_estimate_string_len(const Tree *tree, int out_flag) 19 | { 20 | Tree **node, *p; 21 | int i, j, n, len = 0; 22 | SDIinfo *s; 23 | hash_map_char *hash; 24 | hash_map_char::iterator iter; 25 | 26 | node = tr_stack(tree, Tree*); 27 | n = tr_expand_node(tree, node); 28 | for (i = 0; i < n; ++i) { 29 | p = node[i]; 30 | if (p->name) len += strlen(p->name); // name 31 | if (p->n) len += 6 + 2 + p->n + (p->n-1); // "[&&NHX]" + "()" + "\n" + "," 32 | if (p->d > 1e-30) len += 1 + 17; // ":"dist 33 | if ((out_flag & OUTPUT_SDI) && SDIptr(p)) { 34 | s = SDIptr(p); 35 | len += 28; // ":D=Y:SIS=100:DCS=0.1234:DD=Y" 36 | if (spec(p) && spec(p)->name) { 37 | len += 3 + strlen(spec(p)->name); // ":S="spec 38 | if (s->n_lost) { 39 | len += 4; // ":E=$" 40 | for (j = 0; j < s->n_lost; ++j) 41 | if (s->lost[j]->name) 42 | len += 1 + strlen(s->lost[j]->name); // "-"spec 43 | } 44 | } 45 | if (p->tree_index) { 46 | len += 6; // ":T=100" 47 | } 48 | 49 | } 50 | len += 6 + 7; // ":Com=N" + ":B=100" 51 | hash = (hash_map_char *)p->nhx; 52 | if (hash) { 53 | for (iter = hash->begin(); iter < hash->end(); ++iter) 54 | if (isfilled(iter) && !set->find(iter->key)) 55 | len += 2 + strlen(iter->key) + strlen(iter->val); // ":"key"="val 56 | } 57 | } 58 | len += 2; // ";\n" 59 | free(node); 60 | return len; 61 | } 62 | inline int string_nhx_node(char *str, const Tree *t) 63 | { 64 | int i; 65 | char *p; 66 | SDIinfo *s; 67 | hash_map_char *hash; 68 | hash_map_char::iterator iter; 69 | 70 | if (!((out_flag & OUTPUT_SDI) && SDIptr(t)) && t->nhx == 0 && t->bs < 0) 71 | return 0; 72 | p = str; 73 | p += sprintf(p, "[&&NHX"); 74 | if ((out_flag & OUTPUT_SDI) && SDIptr(t)) { 75 | int c; 76 | s = SDIptr(t); 77 | c = s->type; 78 | if (c != 'N') { /* duplication flag is defined */ 79 | p += sprintf(p, ":D=%c", (c == 'D')? 'Y' : 'N'); /* whether duplication? */ 80 | if (c == 'D') { 81 | p += sprintf(p, ":SIS=%d:DCS=%.4f", int(100.0*s->sis+0.5), s->sis); 82 | if (!s->is_confirmed_dup) p += sprintf(p, ":DD=Y"); /* Dubious Duplication */ 83 | } 84 | } 85 | if (s->n_lost) { 86 | p += sprintf(p, ":E=$"); 87 | for (i = 0; i < s->n_lost; ++i) 88 | if (s->lost[i]->name) 89 | p += sprintf(p, "-%s", s->lost[i]->name); 90 | } 91 | if (spec(t) && spec(t)->name && (s->type != 'N' || t->n == 0)) /* species name */ 92 | p += sprintf(p, ":S=%s", spec(t)->name); 93 | } 94 | if ((out_flag & OUTPUT_TREE_INDEX) && (t->tree_index > 0)) 95 | p += sprintf(p, ":T=%d", t->tree_index); 96 | if (t->bs >= 0) p += sprintf(p, ":B=%d", t->bs); /* bootstrap value */ 97 | if ((out_flag & OUTPUT_COMPARE) && t->is_cons == 0) /* identical clade */ 98 | p += sprintf(p, ":Com=N"); 99 | /* print miscellaneous information */ 100 | hash = (hash_map_char *)t->nhx; 101 | if (hash) { 102 | for (iter = hash->begin(); iter < hash->end(); ++iter) 103 | if (isfilled(iter) && !set->find(iter->key)) 104 | p += sprintf(p, ":%s=%s", iter->key, iter->val); 105 | } 106 | p += sprintf(p, "]"); 107 | if (p - 7 == str) { /* strip [&&NHX] */ 108 | p = str; *str = '\0'; 109 | } 110 | return p - str; 111 | } 112 | static int tr_tree_string_aux(char *str, const Tree *t) 113 | { 114 | int i; 115 | char *p = str; 116 | 117 | /* leaf node */ 118 | if (t->n == 0) { 119 | p += sprintf(p, "%s", t->name); 120 | if (t->d > -1e30) 121 | p += sprintf(p, ":%g", t->d); 122 | if (out_flag & OUTPUT_SDI) 123 | p += string_nhx_node(p, t); 124 | return p - str; 125 | } 126 | p += sprintf(p, "("); 127 | p += tr_tree_string_aux(p, t->node[0]); 128 | for (i = 1; i < t->n; ++i) { /* output children recursively */ 129 | p += sprintf(p, ",\n"); 130 | p += tr_tree_string_aux(p, t->node[i]); 131 | } 132 | p += sprintf(p, "\n)"); 133 | if (t->name) p += sprintf(p, "%s", t->name); 134 | if (t->d > -1e30) /* if reasonable length, output branch length */ 135 | p += sprintf(p, ":%g", t->d); 136 | if (out_flag & OUTPUT_SDI) 137 | p += string_nhx_node(p, t); 138 | return p - str; 139 | } 140 | char *tr_tree_string(const Tree *root, unsigned flag) 141 | { 142 | char *str, *p; 143 | int len; 144 | if (root == 0) return cpystr("_null_;\n"); // output a null tree 145 | out_flag = flag; 146 | set = new hash_set_char; 147 | set->insert("B"); 148 | if (!(flag & OUTPUT_ORI_NHX)) { 149 | if (flag & OUTPUT_SDI) { 150 | set->insert("E"); set->insert("S"); set->insert("D"); 151 | } 152 | if (flag & OUTPUT_COMPARE) set->insert("Com"); 153 | if (flag & OUTPUT_TREE_INDEX) set->insert("T"); 154 | } 155 | if (flag & OUTPUT_TREE_INDEX) set->insert("T"); 156 | len = string_estimate_string_len(root, flag); 157 | str = (char*)malloc(sizeof(char) * len); 158 | p = str; 159 | p += tr_tree_string_aux(p, root); 160 | p += sprintf(p, ";\n"); 161 | delete set; 162 | return str; 163 | } 164 | void tr_attach_geneid(Tree *tree, MultiAlign *ma) 165 | { 166 | hash_map_char *hash, *nhx; 167 | char *s, *t; 168 | Tree **node, *p; 169 | int i, n; 170 | 171 | hash = new hash_map_char; 172 | for (i = 0; i < ma->n; ++i) { 173 | if (ma->name[i] && ma->gene && ma->gene[i]) { 174 | s = (strstr(ma->gene[i], "GENEID="))? ma->gene[i] + 7 : ma->gene[i]; 175 | hash->insert(ma->name[i], s); // no memory allocation here 176 | } 177 | } 178 | node = tr_stack(tree, Tree*); 179 | n = tr_expand_leaf(tree, node); 180 | for (i = 0; i < n; ++i) { 181 | p = node[i]; 182 | if (hash->find(p->name, &s)) { 183 | if (!p->nhx) { 184 | nhx = new hash_map_char; 185 | p->nhx = nhx; 186 | } else nhx = (hash_map_char*)p->nhx; 187 | if (nhx->find("G", &t)) free(t); 188 | nhx->insert("G", cpystr(s)); // memory allocation 189 | } 190 | } 191 | free(node); 192 | delete hash; 193 | } 194 | void tr_attach_seed_tag(Tree *tree, int n, char **list) 195 | { 196 | hash_map_char *nhx; 197 | hash_set_char *hash; 198 | Tree **node, *p; 199 | int m, i; 200 | char *t; 201 | 202 | if (n) assert(list); 203 | hash = new hash_set_char; 204 | for (i = 0; i < n; ++i) if (list[i]) hash->insert(list[i]); 205 | node = tr_stack(tree, Tree*); 206 | m = tr_expand_leaf(tree, node); 207 | for (i = 0; i < m; ++i) { 208 | p = node[i]; 209 | if (hash->find(p->name)) { 210 | if (!p->nhx) { 211 | nhx = new hash_map_char; 212 | p->nhx = nhx; 213 | } else nhx = (hash_map_char*)p->nhx; 214 | if (nhx->find("Sd", &t)) free(t); 215 | nhx->insert("Sd", cpystr("Y")); // memory allocation 216 | } 217 | } 218 | free(node); 219 | delete hash; 220 | } 221 | -------------------------------------------------------------------------------- /nj.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "tree.h" 5 | #include "cpp_utils.h" 6 | #include "utils.h" 7 | 8 | /** \file nj.c 9 | * \brief Utilities for the calculation of bootstrap values. 10 | * 11 | * This file also contains utilities for calculating bootstrap values. The use of 12 | * hash idea accelerate the calculation by far. Constrained NJ has been moved to 13 | * file nj2.c. 14 | */ 15 | 16 | /** \fn void tr_pre_bootstrap(Tree *tree, int is_node) 17 | * \brief Fill Tree::ptr array and prepare for bootstrapping comparison. 18 | * \param tree Binary tree to be processed. 19 | * \param is_node Mode. 0 for the conventional branch mode and 1 for node mode. 20 | * 21 | * This function will fill tree::ptr as a integer array, storing the leaves 22 | * a node covers. It will also calculate a magic number for each array to 23 | * accelerate the array comparison in tr_update_bootstrap(). The time 24 | * complexity is O(N^2). 25 | * 26 | * Note that this function, not like tr_compare_core() or tr_mmerge(), will 27 | * not backup Tree::ptr. Any information ptr points to will be lost. 28 | */ 29 | void tr_pre_bootstrap(Tree *tree, int is_node) 30 | { 31 | Tree **node; 32 | int m, i, j, l, n; 33 | int magic, c; 34 | int magica[3], ca[3], key0; 35 | int *r, *q; 36 | Tree *p; 37 | 38 | if (tree == 0) return; 39 | n = tree->n_leaf; 40 | node = (Tree**)malloc(sizeof(Tree*) * n); 41 | for (i = 0, key0 = 0; i < n; ++i) key0 ^= i; 42 | m = tr_expand_internal_node(tree, node); 43 | 44 | /* O(N^2) */ 45 | for (i = 0; i < m; ++i) { 46 | p = node[i]; 47 | p->bs = 0; /* counter */ 48 | q = (int*)malloc(sizeof(int) * (n + 1)); 49 | p->ptr = q; 50 | for (j = 0; j < n; ++j) q[j] = 0; 51 | for (l = 0; l < 2; ++l) { /* only the first two nodes are handled */ 52 | magic = c = 0; 53 | if (p->node[l]->n == 0) { 54 | magic ^= p->node[l]->id; 55 | ++c; 56 | q[p->node[l]->id] = l + 1; /* i.e. 1 for l=0 and 2 for l=1 */ 57 | } else { 58 | r = (int*)p->node[l]->ptr; 59 | for (j = 0; j < n; ++j) { 60 | if (r[j]) { 61 | magic ^= j; ++c; 62 | q[j] = l + 1; 63 | } 64 | } 65 | } 66 | magica[l] = magic; 67 | ca[l] = c; 68 | } 69 | magica[2] = key0 ^ magica[0] ^ magica[1]; 70 | ca[2] = n - ca[0] - ca[1]; 71 | if (is_node) { /* node mode. each branch connected with this node is correct */ 72 | for (l = 0, magic = 0; l < 3; ++l) 73 | if (magica[l]*n+ca[l] > magic*n+c) { magic = magica[l]; c = ca[l]; } 74 | } else { /* branch mode. the edge (n, n->parent) is correct */ 75 | magic = magica[0] ^ magica[1]; c = ca[0] + ca[1]; 76 | if (magica[2]*n+ca[2] > magic*n+c) { magic = magica[2]; c = ca[2]; }; 77 | } 78 | q[n] = magic * n + c; 79 | } 80 | free(node); 81 | } 82 | /** \fn void tr_update_bootstrap(Tree *tree, const Tree *samp, int type) 83 | * \brief Compare two trees and update Tree::bs. 84 | * \param tree Tree to be updated. 85 | * \param samp Resampled tree. 86 | * \param is_node Mode. 0 for the conventional branch mode and 1 for node mode. 87 | * 88 | * This function compares a tree to a bootstrapped tree samp. If a node 89 | * is supported by bootstraped tree, Tree::bs will be increased by one. 90 | * The worst-case time complexity is O(N^3), but with the help of hash 91 | * technology, the real complexity approaches O(N^2). If we use more 92 | * hash, the time might be reduced to O(N). Nonetheless, I think the 93 | * speed here is enough. 94 | * 95 | * Branch mode realizes the conventional method to calculate bootstrap 96 | * values. Such values are actually calculated for a branch, instead of 97 | * a node. Node mode is initiated by TreeBeST. It evaluates the 98 | * accuracy of the tri-furcation at a node. So node mode is only applied 99 | * for a binary tree, but it is more rigorous. 100 | */ 101 | void tr_update_bootstrap(Tree *tree, const Tree *samp, int is_node) 102 | { 103 | Tree **tnode, **snode, *p; 104 | int i, j, k, tm, ts, n; 105 | int key, t1, array[3]; 106 | int *q, *r; 107 | 108 | if (tree == 0 || samp == 0) return; 109 | if (tree->n_leaf != samp->n_leaf) { 110 | fprintf(stderr, "[tr_update_bootstrap] not seem to be a resampled tree\n"); 111 | return; 112 | } 113 | n = tree->n_leaf; 114 | tnode = (Tree**)malloc(sizeof(Tree*) * n); 115 | snode = (Tree**)malloc(sizeof(Tree*) * n); 116 | 117 | tm = tr_expand_internal_node(tree, tnode); 118 | ts = tr_expand_internal_node(samp, snode); 119 | /* is_bin = (tree->n > 2 || samp->n > 2)? 0 : 1; */ 120 | for (i = 0; i < tm; ++i) { 121 | p = tnode[i]; 122 | q = (int*)tnode[i]->ptr; 123 | key = q[n]; 124 | for (j = 0; j < ts; ++j) { 125 | r = (int*)snode[j]->ptr; 126 | if (!is_node) { /* branch mode */ 127 | if (key != r[n]) continue; 128 | for (k = 0, t1 = 0; k < n; ++k) 129 | if ((q[k] && r[k]) || (!q[k] && !r[k])) ++t1; 130 | if (t1 == 0 || t1 == n) { 131 | ++(p->bs); r[n] = 0; 132 | break; 133 | } 134 | } else { /* node mode */ 135 | if (key != r[n]) continue; 136 | array[0] = array[1] = array[2] = -1; 137 | for (k = 0; k < n; ++k) { 138 | if (array[q[k]] < 0) array[q[k]] = r[k]; 139 | else if (array[q[k]] != r[k]) break; 140 | } 141 | if (k == n && array[0] + array[1] + array[2] == 0 + 1 + 2) { 142 | ++(p->bs); r[n] = 0; 143 | break; 144 | } 145 | } 146 | } 147 | } 148 | if (!is_node) tree->bs = -1; /* no bootstrap for root node in branch mode */ 149 | free(tnode); free(snode); 150 | } 151 | -------------------------------------------------------------------------------- /order.c: -------------------------------------------------------------------------------- 1 | /*! \file order.c 2 | * \brief Order the external nodes of a tree. 3 | */ 4 | #include 5 | #include 6 | #include "tree.h" 7 | #include "utils.h" 8 | 9 | #define ORDER_MASK 0x03ff 10 | #define ORDER_SHIFT 10 11 | #define ORDER_TIMES 256.0 12 | 13 | typedef struct 14 | { 15 | void *ptr; 16 | unsigned key; 17 | int cnt; 18 | Tree *left; /** point to the left-most child */ 19 | } OrderInfo; 20 | 21 | #define Oinfo(p) ((OrderInfo*)(p)->ptr) 22 | 23 | #define ALGO_QSORT 24 | #define ALGO_TYPE TreePtr 25 | #define ALGO_CMP(a,b) ((Oinfo(a)->keykey) || (Oinfo(a)->key==Oinfo(b)->key && strcmp(Oinfo(a)->left->name,Oinfo(b)->left->name)<0)) 26 | #include "algo.h" 27 | 28 | static void dump_ptr(Tree *tree) 29 | { 30 | int n, i; 31 | Tree **node, *p; 32 | OrderInfo *q; 33 | 34 | if (tree == 0) return; 35 | node = tr_stack(tree, Tree*); 36 | n = tr_expand_node(tree, node); 37 | for (i = 0; i < n; ++i) { 38 | q = (OrderInfo*)malloc(sizeof(OrderInfo)); 39 | p = node[i]; 40 | q->left = (p->n == 0)? p : 0; 41 | q->key = (p->n == 0)? ((p->flag << ORDER_SHIFT) | p->flag) : 0; 42 | q->cnt = (p->n == 0)? (p->flag < tree->n_leaf? 1 : 0) : 0; 43 | q->ptr = p->ptr; 44 | p->ptr = q; 45 | } 46 | free(node); 47 | } 48 | static void restore_ptr(Tree *tree) 49 | { 50 | int n, i; 51 | Tree **node, *p; 52 | OrderInfo *q; 53 | 54 | if (tree == 0) return; 55 | node = tr_stack(tree, Tree*); 56 | n = tr_expand_node(tree, node); 57 | for (i = 0; i < n; ++i) { 58 | p = node[i]; 59 | q = Oinfo(p); 60 | p->ptr = q->ptr; 61 | free(q); 62 | } 63 | free(node); 64 | } 65 | void tr_set_spec_leaf_order(Tree *tree) 66 | { 67 | int i, n; 68 | Tree **node, *p; 69 | 70 | if (tree == 0) return; 71 | node = tr_stack(tree, Tree*); 72 | n = tr_expand_leaf(tree, node); 73 | for (i = 0; i < n; ++i) { 74 | p = node[i]; 75 | if (p->ptr && spec(p)) p->flag = spec(p)->id; 76 | else p->flag = n; 77 | } 78 | free(node); 79 | } 80 | /** \fn void tr_order_core(Tree *tree) 81 | * \brief Re-order the leaves of a tree. 82 | * \param tree A tree with leaves numbered. 83 | * 84 | * This function will try to rearrange the leaves in an order that 85 | * approaches increasing order. The algorithm here is very simple and intuitive. 86 | * I do not know what on earth is optimized here, but it works intuitively 87 | * well. Usually, you need to call cpp_set_leaf_order() first to initialize 88 | * Tree::flag. 89 | */ 90 | void tr_order_core(Tree *tree) 91 | { 92 | Tree **node, *p, *q; 93 | int i, j, n, y; 94 | double x; 95 | 96 | if (tree == 0) return; 97 | node = tr_stack(tree, Tree*); 98 | dump_ptr(tree); 99 | n = tr_expand_internal_node(tree, node); 100 | for (i = 0; i < n; ++i) { 101 | p = node[i]; 102 | if (p->n == 2) { 103 | if (!ALGO_CMP(p->node[0], p->node[1])) { /** swap! */ 104 | q = p->node[0]; p->node[0] = p->node[1]; p->node[1] = q; 105 | } 106 | } else algo_qsort(p->node, p->n); 107 | for (x = 0.0, j = 0, y = 0; j < p->n; ++j) { 108 | y += Oinfo(p->node[j])->cnt; 109 | x += (Oinfo(p->node[j])->key >> ORDER_SHIFT) / ORDER_TIMES * Oinfo(p->node[j])->cnt; 110 | } 111 | Oinfo(p)->cnt = y; 112 | if (y) Oinfo(p)->key = ((int)(x * ORDER_TIMES / y + 0.5) << ORDER_SHIFT) | (Oinfo(p->node[0])->key & ORDER_MASK); 113 | else Oinfo(p)->key = 0xfffffffful << ORDER_SHIFT | (Oinfo(p->node[0])->key & ORDER_MASK); 114 | Oinfo(p)->left = Oinfo(p->node[0])->left; 115 | } 116 | restore_ptr(tree); 117 | tr_tree_init(tree); 118 | free(node); 119 | } 120 | -------------------------------------------------------------------------------- /ortho.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "tree.h" 4 | #include "utils.h" 5 | 6 | Ortholog *tr_alloc_ortho(int n) 7 | { 8 | int i; 9 | Ortholog *ortho; 10 | ortho = (Ortholog*)malloc(sizeof(Ortholog)); 11 | ortho->n = n; 12 | ortho->rel = (short**)malloc(sizeof(short*) * n); 13 | ortho->taxon = (TreePtr**)malloc(sizeof(TreePtr*) * n); 14 | ortho->is_pseudo = (char**)malloc(sizeof(char*) * n); 15 | for (i = 0; i < ortho->n; ++i) { 16 | ortho->rel[i] = (short*)calloc(i, sizeof(short)); 17 | ortho->taxon[i] = (TreePtr*)calloc(i, sizeof(TreePtr)); 18 | ortho->is_pseudo[i] = (char*)calloc(i, sizeof(TreePtr)); 19 | } 20 | return ortho; 21 | } 22 | void tr_delete_ortho(Ortholog *ortho) 23 | { 24 | int i; 25 | for (i = 0; i < ortho->n; ++i) { 26 | free(ortho->rel[i]); 27 | free(ortho->taxon[i]); 28 | free(ortho->is_pseudo[i]); 29 | } 30 | free(ortho->rel); 31 | free(ortho->taxon); 32 | free(ortho->is_pseudo); 33 | free(ortho); 34 | } 35 | void tr_flag_confirmed(Tree *tree, const Tree *spec) 36 | { 37 | int i, j, k, m, n_leaf; 38 | Tree **node; 39 | int **tmp_array; 40 | 41 | if (tree == 0 || spec == 0) return; 42 | m = (tree->ftime > spec->ftime)? tree->ftime : spec->ftime; 43 | node = (Tree**)malloc(sizeof(Tree*) * (m + 1)); 44 | /* for species tree */ 45 | m = tr_expand_leaf(spec, node); 46 | for (i = 0; i < m; ++i) node[i]->flag = i; 47 | /* for gene tree */ 48 | n_leaf = spec->n_leaf; 49 | tmp_array = tr_stack(tree, int*); 50 | m = tr_expand_node(tree, node); 51 | for (i = 0; i < m; ++i) 52 | tmp_array[i] = (int*)calloc(n_leaf + 1, sizeof(int)); 53 | for (i = 0; i < m; ++i) { 54 | Tree *p = node[i]; 55 | int *q = tmp_array[i]; 56 | assert(i == p->ftime); 57 | if (!SDIptr(p) || !spec(p)) continue; 58 | if (p->n == 0) { /* external node */ 59 | q[spec(p)->flag] = 1; 60 | } else { 61 | int c1, c2, *r; 62 | SDIptr(p)->is_confirmed_dup = 0; 63 | for (j = 0; j < p->n; ++j) { 64 | r = tmp_array[p->node[j]->ftime]; 65 | for (k = 0; k < n_leaf; ++k) { 66 | if (r[k] && q[k]) SDIptr(p)->is_confirmed_dup = 1; 67 | else if (r[k]) q[k] = 1; 68 | } 69 | } 70 | /* only consider binary trees */ 71 | q = tmp_array[p->node[0]->ftime]; 72 | r = tmp_array[p->node[1]->ftime]; 73 | c1 = c2 = 0; 74 | for (k = 0; k < n_leaf; ++k) { 75 | if (q[k] && r[k]) ++c1; 76 | if (q[k] || r[k]) ++c2; 77 | } 78 | SDIptr(p)->sis = ((float) c1) / c2; 79 | } 80 | } 81 | for (i = 0; i < m; ++i) 82 | free(tmp_array[i]); 83 | free(tmp_array); 84 | free(node); 85 | } 86 | void tr_ortho_aux(Tree *tree, int is_pseudo) 87 | { 88 | int i, m; 89 | Tree **node; 90 | SDIinfo *s; 91 | 92 | if (tree == 0) return; 93 | node = tr_stack(tree, Tree*); 94 | m = tr_expand_internal_node(tree, node); 95 | for (i = 0; i < m; ++i) { 96 | s = SDIptr(node[i]); 97 | node[i]->flag = (s->type == 'S')? 1 : 0; 98 | if (is_pseudo && s->type == 'D' && !s->is_confirmed_dup) 99 | node[i]->flag = 2; 100 | } 101 | free(node); 102 | } 103 | Ortholog *tr_ortho(Tree *tree, const Tree *spec, int is_pseudo) 104 | { 105 | int i, j; 106 | int n_tree_leaf; 107 | Tree **tree_leaf; 108 | Tree *p, *q; 109 | Ortholog *ortho; 110 | 111 | if (tree == 0 || spec == 0) return 0; 112 | 113 | n_tree_leaf = tree->n_leaf; 114 | ortho = tr_alloc_ortho(n_tree_leaf); 115 | tree_leaf = (Tree**)calloc(n_tree_leaf, sizeof(Tree*)); 116 | tr_expand_leaf_by_id(tree, tree_leaf); 117 | tr_ortho_aux(tree, is_pseudo); /* set Tree::flag */ 118 | 119 | /* foreach leaf */ 120 | for (i = 0; i < n_tree_leaf; ++i) { 121 | /* leaf taxon needs to be in the species tree */ 122 | if (spec(tree_leaf[i]) == 0) continue; 123 | /* foreach other leaf */ 124 | for (j = 0; j < i; ++j) { /* here is O(N^2) */ 125 | if (spec(tree_leaf[j]) == 0) continue; 126 | p = tree_leaf[i]; 127 | q = tree_leaf[j]; 128 | while (p->ftime != q->ftime) { /* find last common ancestor */ 129 | if (p->ftime < q->ftime) p = p->pre; 130 | else if (p->ftime > q->ftime) q = q->pre; 131 | } 132 | /* now, p == q is the LCA */ 133 | if (p->flag || spec(tree_leaf[i]) == spec(tree_leaf[j])) { /* ortholog OR within-species paralog */ 134 | ortho->rel[i][j] = 1; 135 | ortho->taxon[i][j] = spec(p); 136 | ortho->is_pseudo[i][j] = (char)p->flag; 137 | } 138 | } 139 | } 140 | free(tree_leaf); 141 | return ortho; 142 | } 143 | void tr_comp_ortho(Ortholog *ori, const Ortholog *bs) 144 | { 145 | int i, j; 146 | short *p, *q; 147 | for (i = 0; i < ori->n; ++i) { 148 | p = ori->rel[i]; 149 | q = bs->rel[i]; 150 | for (j = 0; j < i; ++j) 151 | if (p[j] && q[j]) ++p[j]; 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /output.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "tree.h" 5 | #include "utils.h" 6 | #include "align.h" 7 | #include "align_lib/table.h" 8 | 9 | void tr_stat_output(FILE *fp, const Tree *root) 10 | { 11 | Tree **node, *p; 12 | SDIinfo *q; 13 | int m, i, j; 14 | 15 | if (root == 0) return; 16 | node = tr_stack(root, Tree*); 17 | m = tr_expand_internal_node(root, node); 18 | for (i = 0; i < m; ++i) { 19 | p = node[i]; 20 | q = SDIptr(p); 21 | if (q->type != 'N') { 22 | fprintf(fp, "%c\t%d\t%s", q->type, p->bs, spec(p)->name); 23 | if (q->type == 'D') { 24 | j = (spec(p) == spec(p->node[0]))? 1 : 0; 25 | if (spec(p->node[j]) && spec(p->node[j])->name) 26 | fprintf(fp, "\t%s", spec(p->node[j])->name); 27 | } 28 | fprintf(fp, "\t$"); 29 | for (j = 0; j < q->n_lost; ++j) 30 | fprintf(fp, "-%s", q->lost[j]->name); 31 | fputc('\n', fp); 32 | fflush(fp); 33 | } 34 | } 35 | free(node); 36 | } 37 | /* 38 | * Output ortholog information 39 | */ 40 | void tr_ortho_output(FILE *fp, const Ortholog *ortho, const Tree *tree) 41 | { 42 | int i, j; 43 | short *p; 44 | Tree **leaf; 45 | TreePtr *q; 46 | char *r; 47 | 48 | if (ortho == 0 || tree == 0) return; 49 | leaf = (Tree**)malloc(sizeof(Tree*) * ortho->n); 50 | for (i = 0; i < ortho->n; ++i) 51 | leaf[i] = 0; 52 | tr_expand_leaf_by_id(tree, leaf); 53 | for (i = 0; i < ortho->n; ++i) { 54 | p = ortho->rel[i]; 55 | q = ortho->taxon[i]; 56 | r = ortho->is_pseudo[i]; 57 | if (leaf[i] == 0) continue; 58 | for (j = 0; j < i; ++j) 59 | if (p[j] > 0 && leaf[j] != 0) { 60 | fprintf(fp, "%s\t%s\t%s\t%d\t%d\n", leaf[i]->name, leaf[j]->name, q[j]->name, (int)(p[j]-1), (int)(r[j])); 61 | /* fprintf(fp, "%s\t%s\t%s\t%d\n", leaf[j]->name, leaf[i]->name, q[j]->name, (int)(p[j]-1)); */ 62 | fflush(fp); 63 | } 64 | } 65 | free(leaf); 66 | } 67 | void tr_align_output(FILE *fp, const MultiAlign *ma) 68 | { 69 | int i, j, k; 70 | char *seq; 71 | 72 | assert(ma); 73 | for (i = 0; i < ma->n; ++i) { 74 | fprintf(fp, ">%s", ma->name[i]); 75 | if (ma->comment && ma->comment[i]) fprintf(fp, " %s", ma->comment[i]); 76 | fputc('\n', fp); 77 | fflush(fp); 78 | seq = ma->seq[i]; 79 | if (!ma->is_nucl) { 80 | for (j = 0; j < ma->len; j += 60) { 81 | for (k = j; k < ma->len && k < j + 60; ++k) 82 | fputc(aln_aa_rev_table[(int)seq[k]], fp); 83 | fputc('\n', fp); 84 | fflush(fp); 85 | } 86 | } else { 87 | for (j = 0; j < ma->len; j += 60) { 88 | for (k = j; k < ma->len && k < j + 60; ++k) 89 | fputc(aln_nt4_rev_table[(int)seq[k]], fp); 90 | fputc('\n', fp); 91 | fflush(fp); 92 | } 93 | } 94 | } 95 | } 96 | void tr_align_aln_output(FILE *fp, const MultiAlign *ma) 97 | { 98 | unsigned int max_name_len = 0; 99 | char format[20]; 100 | int i, j, k, len; 101 | char *seq; 102 | AlignQual *aq; 103 | 104 | assert(ma); 105 | fprintf(fp, "CLUSTAL W (1.83) multiple sequence alignment\n\n"); 106 | aq = ma_cal_qual(ma); 107 | for (i = 0; i < ma->n; ++i) 108 | if (max_name_len < strlen(ma->name[i])) max_name_len = strlen(ma->name[i]); 109 | len = 80 - 1 - max_name_len; 110 | sprintf(format, "%%-%ds", max_name_len + 1); 111 | for (j = 0; j < ma->len; j += len) { 112 | fputc('\n', fp); 113 | fflush(fp); 114 | for (i = 0; i < ma->n; ++i) { 115 | seq = ma->seq[i]; 116 | fflush(fp); 117 | fprintf(fp, format, ma->name[i]); 118 | if (!ma->is_nucl) { 119 | for (k = j; k < ma->len && k < j + len; ++k) 120 | fputc(aln_aa_rev_table[(int)seq[k]], fp); 121 | } else { 122 | for (k = j; k < ma->len && k < j + len; ++k) 123 | fputc(aln_nt4_rev_table[(int)seq[k]], fp); 124 | } 125 | fputc('\n', fp); 126 | fflush(fp); 127 | } 128 | if (!ma->is_nucl) { 129 | fprintf(fp, format, " "); 130 | for (k = j; k < ma->len && k < j + len; ++k) 131 | if (aq->pos_qual[k] == 100) fputc('*', fp); 132 | else fprintf(fp, "%1d", aq->pos_qual[k]/10); 133 | } 134 | fputc('\n', fp); 135 | fflush(fp); 136 | } 137 | ma_free_AlignQual(aq); 138 | } 139 | void tr_matrix_output(FILE *fp, const Matrix *mat) 140 | { 141 | int i, j, n; 142 | double *p; 143 | 144 | assert(fp); assert(mat); 145 | n = mat->count; 146 | fprintf(fp, " %d\n", n); 147 | for (i = 0; i < n; ++i) { 148 | p = mat->dist + i * n; 149 | fprintf(fp, "%-30s ", mat->name[i]); 150 | for (j = 0; j < n; ++j) 151 | fprintf(fp, " %9.5f", p[j]); 152 | fputc('\n', fp); 153 | } 154 | } 155 | void tr_task_output(FILE *fp, const Task *task) 156 | { 157 | assert(fp); assert(task); 158 | if (!task->is_verbose) { 159 | if (task->cut && task->subtree) 160 | tr_tree_output(fp, task->subtree, task->out_flag); 161 | else if (task->tree) 162 | tr_tree_output(fp, task->tree, task->out_flag); 163 | return; 164 | } 165 | if (task->tree) { 166 | fprintf(fp, "@begin full_tree\n"); 167 | tr_tree_output(fp, task->tree, task->out_flag); 168 | fprintf(fp, "@end full_tree\n\n"); 169 | if (task->aln) { 170 | fprintf(fp, "@begin alignment\n"); 171 | tr_align_output(fp, task->aln); 172 | fprintf(fp, "@end alignment\n\n"); 173 | } 174 | if (task->spec_tree) { 175 | fprintf(fp, "@begin full_ortholog\n"); 176 | tr_ortho_output(fp, task->ortho, task->tree); 177 | fprintf(fp, "@end full_ortholog\n\n"); 178 | fprintf(fp, "@begin full_stat\n"); 179 | tr_stat_output(fp, task->tree); 180 | fprintf(fp, "@end full_stat\n\n"); 181 | } 182 | } 183 | if (task->cut && task->subtree) { 184 | Tree *p; 185 | p = task->subtree; 186 | if (p->pre && SDIptr(p->pre)->type == 'S') p = p->pre; 187 | fprintf(fp, "@begin cut_tree\n"); 188 | tr_tree_output(fp, p, task->out_flag); 189 | fprintf(fp, "@end cut_tree\n\n"); 190 | if (task->spec_tree) { 191 | fprintf(fp, "@begin cut_ortholog\n"); 192 | tr_ortho_output(fp, task->ortho, p); 193 | fprintf(fp, "@end cut_ortholog\n\n"); 194 | fprintf(fp, "@begin cut_stat\n"); 195 | tr_stat_output(fp, p); 196 | fprintf(fp, "@end cut_stat\n\n"); 197 | } 198 | } 199 | } 200 | -------------------------------------------------------------------------------- /parser.l: -------------------------------------------------------------------------------- 1 | %{ 2 | #include 3 | #include 4 | #include "y.tab.h" 5 | 6 | #define MAX_TOKEN 4096 7 | 8 | double lastfnum; 9 | char lastid[MAX_TOKEN]; 10 | int lineno = 0; 11 | int read_bootstrap = 0; 12 | 13 | static YY_BUFFER_STATE buffer = 0; 14 | 15 | int yylex(); 16 | void alloc_str_buffer(const char *str); 17 | void alloc_file_buffer(FILE *fp); 18 | void free_buffer(); 19 | 20 | int yywrap(void) { return 1; } 21 | %} 22 | 23 | letter [a-zA-Z] 24 | num [0-9] 25 | sign ("+"|"-") 26 | iden [^ \t\r\n:,;&()\[\]=]+ 27 | dist ":"{blank}*{sign}?(([0-9]+?"."[0-9]+([Ee]{sign}?[0-9]+)?)|([0-9]+[Ee]{sign}?[0-9]+)|[0-9]+) 28 | blank " "|"\t"|"\n"|"\r" 29 | nhx &&NHX 30 | comment \[(({blank}*)|({blank}*[^&\[\]][^\[\]]*))\] 31 | 32 | %% 33 | 34 | {dist} { lastfnum = atof(yytext + 1); return TR_DIST; } 35 | {iden} { 36 | if (strlen(yytext) >= MAX_TOKEN) yytext[MAX_TOKEN-1] = '\0'; 37 | strcpy(lastid, yytext); return TR_ID; 38 | } 39 | {nhx} { return TR_NHX; } 40 | {comment} {} 41 | {blank} { if (*yytext == '\n') ++lineno; } 42 | . { return *yytext; } 43 | 44 | %% 45 | 46 | void alloc_str_buffer(const char *str) 47 | { 48 | buffer = yy_scan_string(str); 49 | } 50 | void alloc_file_buffer(FILE *fp) 51 | { 52 | buffer = yy_create_buffer(fp, YY_BUF_SIZE); 53 | yy_switch_to_buffer(buffer); 54 | } 55 | void free_buffer() 56 | { 57 | yy_delete_buffer(buffer); 58 | buffer = 0; 59 | } 60 | -------------------------------------------------------------------------------- /parser.y: -------------------------------------------------------------------------------- 1 | %{ 2 | #define YYPARSER 3 | #define YYSTYPE Tree* 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "common/hash_char.h" 10 | #include "tree.h" 11 | #include "utils.h" 12 | 13 | extern char lastid[]; 14 | extern double lastfnum; 15 | extern int lineno; 16 | extern int read_bootstrap; 17 | 18 | #ifdef __cplusplus 19 | extern "C" { 20 | #endif 21 | int yylex(); 22 | void yyerror(const char *); 23 | char *str_cpy(); 24 | int yyparse(); 25 | void alloc_str_buffer(const char *str); 26 | void alloc_file_buffer(FILE *fp); 27 | void free_buffer(); 28 | #ifdef __cplusplus 29 | } 30 | #endif 31 | 32 | static Tree *root; 33 | static hash_map_char *hash; 34 | static char *curr_key; 35 | 36 | Tree *tr_new_node() 37 | { 38 | Tree *t; 39 | t = (Tree*)malloc(sizeof(Tree)); 40 | t->ptr = 0; 41 | t->nhx = 0; 42 | t->n = 0; 43 | t->node = 0; 44 | t->bs = -1; 45 | t->d = -2e30; 46 | t->name = 0; 47 | t->id = -1; 48 | t->pre = 0; 49 | t->ftime = 0; 50 | t->n_leaf = 0; 51 | t->is_cons = 0; 52 | t->flag = 0; 53 | t->tree_index = 0; 54 | t->n_node = 0; 55 | t->node_array = 0; 56 | return t; 57 | } 58 | %} 59 | 60 | %token TR_ID TR_DIST TR_NHX 61 | 62 | %% 63 | 64 | tree : nhx_cell ';' 65 | { root = $1; return 0; } 66 | | 67 | { return 1; } 68 | ; 69 | 70 | nh_cell : node 71 | { $$ = $1; } 72 | | node TR_DIST 73 | { $$ = $1; $$->d = (double)lastfnum; } 74 | ; 75 | 76 | nhx_cell : nh_cell nhx 77 | { 78 | $$ = $1; 79 | if (hash && hash->size() > 0) { 80 | char *s; 81 | $$->nhx = (void*)hash; 82 | if (hash->find("B", &s)) 83 | $$->bs = atoi(s); 84 | if (hash->find("T", &s)) 85 | $$->tree_index = atoi(s); 86 | } 87 | hash = 0; 88 | } 89 | | nh_cell 90 | { $$ = $1; } 91 | ; 92 | 93 | nhx : '[' nhx_start key_val_list ']' 94 | | '[' nhx_start ']' 95 | { delete hash; hash = 0; } 96 | ; 97 | 98 | nhx_start : TR_NHX 99 | { hash = new hash_map_char; } 100 | ; 101 | 102 | key_val_list: key_val_list key_val_pair 103 | | key_val_pair 104 | ; 105 | 106 | key_val_pair: ':' key '=' TR_ID 107 | { if (hash) hash->insert(curr_key, str_cpy()); free(curr_key); } 108 | ; 109 | 110 | key : TR_ID 111 | { curr_key = str_cpy(); } 112 | ; 113 | 114 | node : TR_ID 115 | { $$ = tr_new_node(); $$->name = str_cpy(); } 116 | | inner 117 | { $$ = $1; } 118 | ; 119 | 120 | list : list ',' nhx_cell 121 | { 122 | $1->node = (Tree**)realloc($1->node, sizeof(Tree*) * ($1->n + 1)); 123 | $1->node[$1->n] = $3; 124 | ++($1->n); 125 | $$ = $1; 126 | $3->pre = $$; 127 | } 128 | | nhx_cell 129 | { 130 | $$ = tr_new_node(); 131 | $$->n = 1; 132 | $$->node = (Tree**)malloc(sizeof(Tree*)); 133 | $$->node[0] = $1; 134 | $1->pre = $$; 135 | } 136 | ; 137 | 138 | inner : '(' list ')' 139 | { $$ = $2; } 140 | | '(' list ')' TR_ID 141 | { $$ = $2; $$->name = str_cpy(); } 142 | ; 143 | 144 | %% 145 | 146 | char *str_cpy() 147 | { 148 | char *s = (char*)malloc(sizeof(char) * (strlen(lastid) + 1)); 149 | return strcpy(s, lastid); 150 | } 151 | void yyerror(const char *err) 152 | { 153 | fprintf(stderr, "ERROR: %s in line %d, near (%s,%g)\n", err, lineno, lastid, (double)lastfnum); 154 | } 155 | void tr_delete_tree(Tree *root) 156 | { 157 | int i, m, is_free; 158 | Tree **node; 159 | if (root == 0) return; 160 | if (!root->node_array) { 161 | node = tr_stack(root, Tree*); 162 | m = tr_expand_node(root, node); 163 | is_free = 1; 164 | } else { 165 | node = root->node_array; 166 | m = root->n_node; 167 | is_free = 0; 168 | } 169 | for (i = 0; i < m; ++i) { 170 | Tree *t = node[i]; // the following is exactly the same as tr_delete_node() 171 | if (t->n > 0) free(t->node); 172 | if (t->name) free(t->name); 173 | if (t->ptr) free(t->ptr); 174 | if (t->nhx) { 175 | hash_map_char *p = (hash_map_char *)t->nhx; 176 | hash_map_char::iterator iter; 177 | for (iter = p->begin(); iter < p->end(); ++iter) 178 | if (isfilled(iter)) free(iter->val); 179 | delete p; 180 | } 181 | if (t->node_array) free(t->node_array); 182 | free(t); 183 | } 184 | if (is_free) free(node); 185 | } 186 | Tree **tr_parse(FILE *fp, int *n) 187 | { 188 | Tree **forest; 189 | alloc_file_buffer(fp); 190 | lineno = 0; 191 | forest = 0; 192 | *n = 0; 193 | while (yyparse() == 0) { 194 | tr_tree_init(root); /* calculate finish time */ 195 | tr_attach_id(root); /* add default ID */ 196 | forest = (Tree**)realloc(forest, sizeof(Tree*) * (*n + 1)); 197 | forest[(*n)++] = root; 198 | } 199 | free_buffer(); 200 | return forest; 201 | } 202 | Tree *tr_parse_first(FILE *fp) 203 | { 204 | Tree *tree; 205 | alloc_file_buffer(fp); 206 | lineno = 0; 207 | tree = 0; 208 | if (yyparse() == 0) { 209 | tr_tree_init(root); /* calculate finish time */ 210 | tr_attach_id(root); /* add default ID */ 211 | tree = root; 212 | } 213 | free_buffer(); 214 | return tree; 215 | } 216 | Tree **tr_parse_str(const char *str, int *n) 217 | { 218 | Tree **forest; 219 | 220 | alloc_str_buffer(str); 221 | lineno = 0; 222 | forest = 0; 223 | *n = 0; 224 | while (yyparse() == 0) { 225 | tr_tree_init(root); /* calculate finish time */ 226 | tr_attach_id(root); /* add default ID */ 227 | forest = (Tree**)realloc(forest, sizeof(Tree*) * (*n + 1)); 228 | forest[(*n)++] = root; 229 | } 230 | free_buffer(); 231 | return forest; 232 | } 233 | -------------------------------------------------------------------------------- /phyml.h: -------------------------------------------------------------------------------- 1 | #ifdef HAVE_PHYML 2 | #ifndef lh3_PHYLOTREE_PHYML_H_ 3 | #define lh3_PHYLOTREE_PHYML_H_ 4 | 5 | #include "tree.h" 6 | #include "align.h" 7 | 8 | struct __Arbre; 9 | 10 | typedef struct __PhymlConfig 11 | { 12 | int is_nucl; 13 | int is_stat; 14 | int bs; 15 | char *model; 16 | double kappa; 17 | double alpha; 18 | double invar; 19 | int n_cat; 20 | 21 | /* for phyml addon */ 22 | double prob_not_exist; 23 | double prob_dup; 24 | double prob_loss_dup; 25 | double prob_loss_spec; 26 | double lk_scale; 27 | Tree *ctree, *spec_tree; 28 | } PhymlConfig; 29 | 30 | #ifdef __cplusplus 31 | extern "C" { 32 | #endif 33 | 34 | void phyml_init(); 35 | Tree *phyml_core(MultiAlign *ma, PhymlConfig *pc, Tree *inp, int is_build, int is_opt); 36 | Tree *phyml_arbre2tree(struct __Arbre *tree); 37 | Matrix *phyml_matrix(MultiAlign *ma, PhymlConfig *pc); 38 | PhymlConfig *phyml_init_config(); 39 | void phyml_free_config(PhymlConfig *pc); 40 | int phyml_task(int argc, char *argv[]); 41 | 42 | #ifdef __cplusplus 43 | } 44 | #endif 45 | 46 | #endif 47 | #endif /* HAVE_PHYML */ 48 | -------------------------------------------------------------------------------- /phyml/Makefile: -------------------------------------------------------------------------------- 1 | CC= gcc 2 | CXX= g++ 3 | CFLAGS= -W -Wall -O2 -fomit-frame-pointer 4 | CXXFLAGS= $(CFLAGS) 5 | DFLAGS= -DPHYML 6 | DFLAGS_SPEC=-DUNIX -DLH3_REVISION -DLH3_ADDON 7 | PROG= phyml 8 | LIBOBJS= bionj.o eigen.o free.o ml.o models.o optimiz.o options.o simu.o \ 9 | utilities.o lh3_addon.o lh3_spec.o 10 | INCLUDES= -I. 11 | LIBS= -L. -lphyml -lm 12 | SUBDIRS= . 13 | 14 | .SUFFIXES:.c .cc .o 15 | 16 | .c.o: 17 | $(CC) -c $(DFLAGS) $(DFLAGS_SPEC) $(CFLAGS) $(INCLUDES) $< -o $@ 18 | 19 | .cc.o: 20 | $(CXX) -c $(DFLAGS) $(DFLAGS_SPEC) $(CXXFLAGS) $(INCLUDES) $< -o $@ 21 | 22 | all:$(PROG) 23 | 24 | lib-recur all-recur clean-recur install-recur: 25 | @target=`echo $@ | sed s/-recur//`; \ 26 | wdir=`pwd`; \ 27 | list='$(SUBDIRS)'; for subdir in $$list; do \ 28 | cd $$subdir; \ 29 | $(MAKE) CC="$(CC)" DFLAGS="$(DFLAGS)" CFLAGS="$(CFLAGS)" \ 30 | INCLUDES="$(INCLUDES)" $$target || exit 1; \ 31 | cd $$wdir; \ 32 | done; 33 | 34 | lib:libphyml.a 35 | 36 | libphyml.a:$(LIBOBJS) 37 | $(AR) -cru $@ $(LIBOBJS) 38 | 39 | phyml:lib-recur main.o 40 | $(CC) $(CFLAGS) $(DFLAGS) main.o -o $@ $(LIBS) 41 | 42 | clean: 43 | rm -f gmon.out *.o a.out phyml lib*.a 44 | -------------------------------------------------------------------------------- /phyml/Makefile.mac: -------------------------------------------------------------------------------- 1 | CC= gcc 2 | CXX= g++ 3 | CFLAGS= -Wall -O4 -fomit-frame-pointer 4 | CXXFLAGS= $(CFLAGS) 5 | DFLAGS= -DPHYML 6 | DFLAGS_SPEC=-DUNIX -DLH3_REVISION -DLH3_ADDON 7 | PROG= phyml 8 | LIBOBJS= bionj.o eigen.o free.o ml.o models.o optimiz.o options.o simu.o \ 9 | utilities.o lh3_addon.o lh3_spec.o 10 | INCLUDES= -I. 11 | LIBS= -L. -lphyml -lm 12 | SUBDIRS= . 13 | 14 | .SUFFIXES:.c .cc .o 15 | 16 | .c.o: 17 | $(CC) -c $(DFLAGS) $(DFLAGS_SPEC) $(CFLAGS) $(INCLUDES) $< -o $@ 18 | 19 | .cc.o: 20 | $(CXX) -c $(DFLAGS) $(DFLAGS_SPEC) $(CXXFLAGS) $(INCLUDES) $< -o $@ 21 | 22 | all:$(PROG) 23 | 24 | lib-recur all-recur clean-recur install-recur: 25 | @target=`echo $@ | sed s/-recur//`; \ 26 | wdir=`pwd`; \ 27 | list='$(SUBDIRS)'; for subdir in $$list; do \ 28 | cd $$subdir; \ 29 | $(MAKE) CC="$(CC)" DFLAGS="$(DFLAGS)" CFLAGS="$(CFLAGS)" \ 30 | INCLUDES="$(INCLUDES)" $$target -f Makefile.mac || exit 1; \ 31 | cd $$wdir; \ 32 | done; 33 | 34 | lib:libphyml.a 35 | 36 | libphyml.a:$(LIBOBJS) 37 | libtool -static -o $@ $(LIBOBJS) 38 | 39 | phyml:lib-recur main.o 40 | $(CC) $(CFLAGS) $(DFLAGS) main.o -o $@ $(LIBS) 41 | 42 | clean: 43 | rm -f gmon.out *.o a.out phyml lib*.a 44 | -------------------------------------------------------------------------------- /phyml/bionj.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | PHYML : a program that computes maximum likelihood phylogenies from 4 | DNA or AA homologous sequences 5 | 6 | Copyright (C) Stephane Guindon. Oct 2003 onward 7 | 8 | All parts of the source except where indicated are distributed under 9 | the GNU public licence. See http://www.opensource.org for details. 10 | 11 | */ 12 | 13 | #ifndef NJ_H 14 | #define NJ_H 15 | 16 | #include "utilities.h" 17 | #include "optimiz.h" 18 | /*#include "tools.h"*/ 19 | 20 | void Bionj(matrix *mat); 21 | void Finish(matrix *mat); 22 | void Bionj_Scores(matrix *mat); 23 | void Compute_Sx(matrix *mat); 24 | double Sum_S(matrix *mat, int i); 25 | double Dist(matrix *mat, int x, int y); 26 | double Q_Agglo(matrix *mat, int x, int y); 27 | double Variance(matrix *mat, int x, int y); 28 | double Br_Length(matrix *mat, int x, int y); 29 | void Update_Dist(matrix *mat, int x, int y); 30 | double Lamda(matrix *mat, int x, int y, double vxy); 31 | void Best_Pair(matrix *mat, int *x, int *y, double *score); 32 | double Var_Red(matrix *mat, int x, int y, int i, double lamda, double vxy); 33 | void Update_Tree(matrix *mat, int x, int y, double lx, double ly, double score); 34 | void Update_Mat(matrix *mat, int x, int y, 35 | double lx, double ly, double vxy, double lamda); 36 | double Dist_Red(matrix *mat, int x, double lx, int y, 37 | double ly, int i, double lamda); 38 | int Bionj_Br_Length_Post(node *a, node *d, matrix *mat); 39 | void Bionj_Br_Length(matrix *mat); 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /phyml/eigen.h: -------------------------------------------------------------------------------- 1 | #ifndef EIGEN_H 2 | #define EIGEN_H 3 | 4 | int eigen(int job, double *A, int n, double *rr, double *ri, 5 | double *vr, double *vi, double *w); 6 | void balance(double *mat, int n, int *low, int *hi, double *scale); 7 | void unbalance(int n, double *vr, double *vi, int low, int hi, 8 | double *scale); 9 | int realeig(int job, double *mat, int n,int low, int hi, double *valr, 10 | double *vali, double *vr, double *vi); 11 | void elemhess(int job, double *mat, int n, int low, int hi, 12 | double *vr, double *vi, int *work); 13 | 14 | 15 | /* complex functions */ 16 | 17 | typedef struct { double re, im; } complex; 18 | #define csize(a) (fabs(a.re)+fabs(a.im)) 19 | 20 | complex compl (double re,double im); 21 | complex _conj (complex a); 22 | complex cplus (complex a, complex b); 23 | complex cminus (complex a, complex b); 24 | complex cby (complex a, complex b); 25 | complex cdiv (complex a,complex b); 26 | /* complex local_cexp (complex a); */ 27 | complex cfactor (complex x, double a); 28 | int cxtoy (complex *x, complex *y, int n); 29 | int cmatby (complex *a, complex *b, complex *c, int n,int m,int k); 30 | int cmatout (FILE * fout, complex *x, int n, int m); 31 | int cmatinv( complex *x, int n, int m, double *space); 32 | 33 | 34 | #endif 35 | 36 | -------------------------------------------------------------------------------- /phyml/free.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | PHYML : a program that computes maximum likelihood phylogenies from 4 | DNA or AA homologous sequences 5 | 6 | Copyright (C) Stephane Guindon. Oct 2003 onward 7 | 8 | All parts of the source except where indicated are distributed under 9 | the GNU public licence. See http://www.opensource.org for details. 10 | 11 | */ 12 | 13 | #ifndef FREE_H 14 | #define FREE_H 15 | 16 | void Free_All_Nodes_Light(arbre *tree); 17 | void Free_All_Edges_Light(arbre *tree); 18 | void Free_Mat(matrix *mat); 19 | void Free_Partial_Lk(double ****p_lk, int len, int n_catg); 20 | void Free_Tree(arbre *tree); 21 | void Free_Edge(edge *b); 22 | void Free_Node(node *n); 23 | void Free_Cseq(allseq *alldata); 24 | void Free_Seq(seq **d, int n_otu); 25 | void Free_All(seq **d, allseq *alldata, arbre *tree); 26 | void Free_SubTree(edge *b_fcus, node *a, node *d, arbre *tree); 27 | void Free_Tree_Ins_Tar(arbre *tree); 28 | void Free_Tree_Lk(arbre *tree); 29 | void Free_dPij(arbre *tree); 30 | void Free_NNI(arbre *tree); 31 | void Free_Edge_P_Lk_Struct(edge *b, arbre *tree); 32 | void Free_Node_Lk(node *n); 33 | void Free_Edge_Lk(arbre *tree, edge *b); 34 | void Free_Model(model *mod); 35 | void Free(void *p); 36 | void Free_Input(option *input); 37 | void Free_Code(code *c_code); 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /phyml/lh3_addon.h: -------------------------------------------------------------------------------- 1 | #ifdef LH3_ADDON 2 | #ifndef LH3_ADDON_H_ 3 | #define LH3_ADDON_H_ 4 | 5 | #include "../tree.h" 6 | 7 | struct __Arbre; 8 | struct __Edge; 9 | struct __PhymlConfig; 10 | 11 | typedef struct 12 | { 13 | int is_dup, tot_dup; 14 | int n_loss, tot_dup_loss, tot_spec_loss; 15 | int is_cal; 16 | TreePtr spec; 17 | int *passed; 18 | } lh3Spec; 19 | 20 | typedef struct 21 | { 22 | double ratio_not_exist; 23 | double log_dup, log_spec; 24 | double log_loss_spec, log_present_spec; 25 | double log_loss_dup, log_present_dup; 26 | double ratio_cons, loglk_spec; 27 | double lk_scale; 28 | Tree *ctree, *spec_tree; 29 | int is_NNI, min_branch; 30 | void *hs; /* HashedSets* */ 31 | } lh3Tree; 32 | 33 | typedef struct 34 | { 35 | Tree *cleaf, *spec; 36 | } lh3Node; 37 | 38 | typedef struct 39 | { 40 | int do_exist; 41 | int is_cal[2]; /* whether the left part (0) and the right part (1) have been calculated. */ 42 | char *array[2]; /* leaf array for the left part (0) and the right part (1). */ 43 | lh3Spec spec[2]; 44 | lh3Spec cur_spec; 45 | } lh3Edge; 46 | 47 | #ifdef __cplusplus 48 | extern "C" { 49 | #endif 50 | void lh3_addon_init(struct __Arbre *tree, struct __PhymlConfig *pc); 51 | void lh3_addon_free(struct __Arbre *tree); 52 | 53 | void lh3_cons_clear_cal_flag(struct __Arbre *tree); 54 | void lh3_cons_cal_one_branch(struct __Arbre *tree, struct __Edge *start, int direction); 55 | void lh3_cons_cal_all_branch(struct __Arbre *tree); 56 | double lh3_cons_update_one_ratio(struct __Arbre *tree, struct __Edge *b_fcus); 57 | double lh3_cons_cal_all_ratio(struct __Arbre *tree); 58 | 59 | void lh3_spec_clear_cal_flag(struct __Arbre *tree); 60 | void lh3_spec_cal_one_branch(struct __Arbre *tree, struct __Edge *start, int direction); 61 | void lh3_spec_cal_all_branch(struct __Arbre *tree); 62 | double lh3_spec_update_one_ratio(struct __Arbre *tree, struct __Edge *b_fcus); 63 | double lh3_spec_cal_all_ratio(struct __Arbre *tree); 64 | #ifdef __cplusplus 65 | } 66 | #endif 67 | 68 | #endif 69 | 70 | #endif 71 | -------------------------------------------------------------------------------- /phyml/ml.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | PHYML : a program that computes maximum likelihood phylogenies from 4 | DNA or AA homologous sequences 5 | 6 | Copyright (C) Stephane Guindon. Oct 2003 onward 7 | 8 | All parts of the source except where indicated are distributed under 9 | the GNU public licence. See http://www.opensource.org for details. 10 | 11 | */ 12 | 13 | #ifndef ML_H 14 | #define ML_H 15 | 16 | 17 | void Init_Tips_At_One_Site_Nucleotides(char state,double **p_lk); 18 | void Init_Tips_At_One_Site_AA(char aa,double **p_lk); 19 | void Get_All_Partial_Lk(arbre *tree,edge *b_fcus,node *a,node *d); 20 | void Get_All_Partial_Lk_Scale(arbre *tree,edge *b_fcus,node *a,node *d); 21 | void Pre_Order_Lk(node *pere,node *fils,arbre *tree); 22 | void Post_Order_Lk(node *pere,node *fils,arbre *tree); 23 | void Lk(arbre *tree,allseq *alldata); 24 | void Site_Lk(arbre *tree,allseq *alldata); 25 | double Lk_At_Given_Edge(arbre *tree,edge *b_fcus); 26 | void Update_P(arbre *tree,int t_edge_num); 27 | double Return_Lk(arbre *tree); 28 | double Return_Abs_Lk(arbre *tree); 29 | matrix *ML_Dist_fixed(allseq *data, model *mod, int n_cat); 30 | matrix *ML_Dist(allseq *data,model *mod); 31 | double Lk_Given_Two_Seq(allseq *data,int numseq1,int numseq2,double dist,model *mod,double *loglk,double *dloglk,double *d2loglk); 32 | double ***Get_Partial_Lk_Struct(arbre *tree,int len,int n_catg); 33 | void Unconstraint_Lk(arbre *tree); 34 | void Update_P_Lk(arbre *tree,edge *b_fcus,node *n); 35 | void Make_Tree_4_Lk(arbre *tree,allseq *alldata,int n_site); 36 | void Init_P_Lk_Tips(arbre *tree); 37 | 38 | 39 | 40 | #endif 41 | 42 | 43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /phyml/models.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | PHYML : a program that computes maximum likelihood phylogenies from 4 | DNA or AA homologous sequences 5 | 6 | Copyright (C) Stephane Guindon. Oct 2003 onward 7 | 8 | All parts of the source except where indicated are distributed under 9 | the GNU public licence. See http://www.opensource.org for details. 10 | 11 | */ 12 | 13 | #ifndef MODELS_H 14 | #define MODELS_H 15 | 16 | void PMat(double l, model *mod, double ***Pij); 17 | void PMat_K80(double l,double kappa, double ***Pij); 18 | void PMat_TN93(double l, model *mod, double ***Pij); 19 | void PMat_Empirical(double l, model *mod, double ***Pij); 20 | void dPMat(double l, double rr, model *mod, double ***dPij); 21 | void d2PMat(double l, double rr, model *mod, double ***d2Pij); 22 | void dPMat_K80(double l, double ***dPij, double rr, double k); 23 | void d2PMat_K80(double l, double ***d2Pij, double rr, double k); 24 | void dPMat_TN93(double l, double ***dPij, model *mod, double rr); 25 | void d2PMat_TN93(double l, double ***dPij, model *mod, double rr); 26 | int GetDaa (double *daa, double *pi, char *file_name); 27 | int Matinv (double *x, int n, int m, double *space); 28 | void Init_Model(allseq *data, model *mod); 29 | int Init_Qmat_Dayhoff(double *daa, double *pi); 30 | int Init_Qmat_JTT(double *daa, double *pi); 31 | void Update_Qmat_GTR(model *mod); 32 | void Translate_Custom_Mod_String(model *mod); 33 | int Init_Qmat_WAG(double *daa, double *pi); 34 | void Set_Model_Parameters(arbre *tree); 35 | int Init_Qmat_RtREV(double *daa, double *pi); 36 | int Init_Qmat_CpREV(double *daa, double *pi); 37 | int Init_Qmat_VT(double *daa, double *pi); 38 | int Init_Qmat_Blosum62(double *daa, double *pi); 39 | int Init_Qmat_MtMam(double *daa, double *pi); 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /phyml/optimiz.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | PHYML : a program that computes maximum likelihood phylogenies from 4 | DNA or AA homologous sequences 5 | 6 | Copyright (C) Stephane Guindon. Oct 2003 onward 7 | 8 | All parts of the source except where indicated are distributed under 9 | the GNU public licence. See http://www.opensource.org for details. 10 | 11 | */ 12 | 13 | #ifndef OPTIMIZ_H 14 | #define OPTIMIZ_H 15 | 16 | void Optimiz_Ext_Br(arbre *tree); 17 | void Optimize_Alpha(arbre *tree); 18 | void Optimize_Kappa(arbre *tree); 19 | void Optimize_Lambda(arbre *tree); 20 | void Optimize_Param_Parall(arbre *tree); 21 | double Br_Len_NR(arbre *tree,allseq *alldata,edge *b_fcus, int n_passes); 22 | double Optimize_Branch_Quad(arbre *tree, allseq *alldata, edge *b_fcus); 23 | void Optimize_After_Hide(arbre *tree, allseq *alldata, node *h); 24 | void Round_Optimize(arbre *tree, allseq *data); 25 | void Print_Lk_Progress(arbre *tree, double lk_new, double lk_old, int n_iter); 26 | int Dist_Seq_Brak(double *ax, double *bx, double *cx, 27 | double *fa, double *fb, double *fc, 28 | allseq *data, int num1, int num2, model *mod); 29 | double Dist_Seq_Brent(double ax, double bx, double cx, double tol, 30 | double *xmin, allseq *data, 31 | int num1, int num2, model *mod); 32 | double Optimize_One_Dist(allseq *data, int numseq1, int numseq2, 33 | double init_dist, model *mod); 34 | double Kappa_Golden(double ax, double bx, double cx, double tol, 35 | double *xmin, arbre *tree, allseq *alldata); 36 | double Lambda_Golden(double ax, double bx, double cx, double tol, 37 | double *xmin, arbre *tree, allseq *alldata); 38 | double Alpha_Golden_Br_Opt(double ax, double bx, double cx, double tol, 39 | double *xmin, arbre *tree, allseq *alldata, 40 | int n_opt, double *init_l); 41 | double Alpha_Golden(double ax, double bx, double cx, double tol,double *xmin, 42 | arbre *tree, allseq *alldata); 43 | double Br_Len_Golden(double ax, double bx, double cx, double tol, 44 | double *xmin, edge *b_fcus, arbre *tree); 45 | double Br_Len_Brent(double ax, double bx, double cx, double tol, 46 | double *xmin, edge *b_fcus, arbre *tree, int n_iter_max); 47 | int Br_Len_Brak(double *ax, double *bx, double *cx, 48 | double *fa, double *fb, double *fc, 49 | edge *b_fcus, arbre *tree); 50 | double Optimize_Path_Length(model *mod, allseq *alldata, edge *a, 51 | int lra, edge *b, int lrb, double i_len); 52 | void Optimize_Param_Serie(node *a, node *d, edge *b_fcus, arbre *tree, 53 | allseq *alldata, int n_passes); 54 | double Optimize_Dist(model *mod, double init, allseq *twoseqs); 55 | double Pinvar_Golden(double ax, double bx, double cx, double tol, 56 | double *xmin, arbre *tree, allseq *alldata, int n_iter_max); 57 | void Optimize_Pinvar(arbre *tree); 58 | int Lambda_Brak(double *ax, double *bx, double *cx, 59 | double *fa, double *fb, double *fc, 60 | arbre *tree); 61 | int Kappa_Brak(double *ax, double *bx, double *cx, 62 | double *fa, double *fb, double *fc, 63 | arbre *tree); 64 | int Alpha_Brak(double *ax, double *bx, double *cx, 65 | double *fa, double *fb, double *fc, 66 | arbre *tree); 67 | int Pinvar_Brak(double *ax, double *bx, double *cx, 68 | double *fa, double *fb, double *fc, 69 | arbre *tree); 70 | void Optimiz_All_Free_Param(arbre *tree, int verbose); 71 | void Optimiz_RRparam_GTR(arbre *tree, int num_param); 72 | double RRparam_GTR_Golden(double ax, double bx, double cx, double tol, 73 | double *xmin, arbre *tree, allseq *alldata, double *param, int n_iter_max); 74 | 75 | int Powell_GTR_Param(arbre *tree, double *p, int n, double ftol); 76 | double Linmin_GTR_Param(arbre *tree,double *p, double *xi, int n); 77 | double F1dim(arbre *tree, double x, double *p, double *xi, double n); 78 | int Mnbrak_1dim(double *ax, double *bx, double *cx, 79 | double *fa, double *fb, double *fc, 80 | arbre *tree, 81 | double *p, double *xi, double n); 82 | double Brent_1dim(double ax, double bx, double cx, 83 | double tol, double *xmin, 84 | arbre *tree, 85 | double *p, double *xi, double n); 86 | 87 | int Min_With_Derivatives(arbre *tree, double *p, int n, double ftol, double step_size, 88 | double (*func) (), void (*dfunc)(), double (*linmin)()); 89 | void BFGS(arbre *tree, double *p, int n, double gtol, double step_size, 90 | double(*func)(), void (*dfunc)(), void (*lnsrch)(),int *failed); 91 | void Lnsrch_RR_Param(arbre *tree, int n, double *xold, double fold, double *g, double *p, double *x, 92 | double *f, double stpmax, int *check); 93 | void Optimize_Single_Param_Generic(arbre *tree, double *param, 94 | double start, 95 | double lim_inf, double lim_sup, 96 | int n_max_iter); 97 | int Generic_Brak(double *param, 98 | double *ax, double *bx, double *cx, 99 | double *fa, double *fb, double *fc, 100 | double lim_inf, double lim_sup, 101 | arbre *tree); 102 | double Generic_Brent(double *param, 103 | double ax, double bx, double cx, double tol, 104 | double *xmin, arbre *tree, int n_iter_max); 105 | void Optimize_Br_Len_Serie(node *a, node *d, edge *b_fcus, 106 | arbre *tree,allseq *alldata, int n_passes); 107 | void Lnsrch_Nucleotide_Frequencies(arbre *tree, int n, double *xold, 108 | double fold, double *g, double *p, double *x, 109 | double *f, double stpmax, int *check); 110 | 111 | void Optimize_Global_Rate(arbre *tree); 112 | 113 | 114 | 115 | #endif 116 | 117 | -------------------------------------------------------------------------------- /phyml/options.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ensembl/treebest/347fa82a0ce1c169849053fdc9ff7d19d221f290/phyml/options.c -------------------------------------------------------------------------------- /phyml/options.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | PHYML : a program that computes maximum likelihood phylogenies from 4 | DNA or AA homologous sequences 5 | 6 | Copyright (C) Stephane Guindon. Oct 2003 onward 7 | 8 | All parts of the source except where indicated are distributed under 9 | the GNU public licence. See http://www.opensource.org for details. 10 | 11 | */ 12 | 13 | #ifndef OPTIONS_H 14 | #define OPTIONS_H 15 | 16 | void Usage(); 17 | option *Get_Input(int argc, char **argv); 18 | void Init_Optimiz(optimiz *s_opt); 19 | void Get_Input_Interactive(option *input); 20 | void Get_Input_CommandLine_DNA(option *input, int argc, char **argv); 21 | void Get_Input_CommandLine_AA(option *input, int argc, char **argv); 22 | void Set_Defaults(option* input); 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /phyml/simu.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | PHYML : a program that computes maximum likelihood phylogenies from 4 | DNA or AA homologous sequences 5 | 6 | Copyright (C) Stephane Guindon. Oct 2003 onward 7 | 8 | All parts of the source except where indicated are distributed under 9 | the GNU public licence. See http://www.opensource.org for details. 10 | 11 | */ 12 | 13 | #ifndef CURR_H 14 | #define CURR_H 15 | 16 | void Simu(arbre *tree,int n_step_max); 17 | void Select_Edges_To_Swap(arbre *tree,edge **sorted_b,int *n_neg); 18 | void Fix_All(arbre *tree); 19 | void Update_Bl(arbre *tree,double fact); 20 | void Make_N_Swap(arbre *tree,edge **b,int beg,int end); 21 | int Make_Best_Swap(arbre *tree); 22 | int Mov_Backward_Topo_Bl(arbre *tree,double lk_old,edge **tested_b,int n_tested); 23 | void Unswap_N_Branch(arbre *tree,edge **b,int beg,int end); 24 | void Swap_N_Branch(arbre *tree,edge **b,int beg,int end); 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /pre_cons.c: -------------------------------------------------------------------------------- 1 | /** \file pre_cons.c 2 | * \brief Pre-process the constrained trees. 3 | */ 4 | #include 5 | #include 6 | #include "tree.h" 7 | #include "utils.h" 8 | 9 | const char *tr_cons_flag = "{C}"; 10 | const char *tr_free_flag = "{P}"; 11 | 12 | static void label_cons(Tree *cons, int root_flag) 13 | { 14 | int i, m; 15 | Tree **node, *p; 16 | 17 | if (cons == 0 || cons->n_leaf == 1) return; 18 | node = tr_stack(cons, Tree*); 19 | m = tr_expand_internal_node(cons, node); 20 | node[m - 1]->is_cons = root_flag; 21 | /* is_cons == 1: inheritable constraint 22 | * is_cons == 2: temporary constraint 23 | * is_cons == 0: no constraint 24 | */ 25 | for (i = m - 2; i >= 0; --i) { 26 | p = node[i]; 27 | if (p->name) { 28 | if (strstr(p->name, tr_cons_flag)) p->is_cons = 1; 29 | else if (strstr(p->name, tr_free_flag)) p->is_cons = 0; 30 | else if (p->name[0]) p->is_cons = (p->pre->is_cons != 1)? 2 : 1; 31 | else p->is_cons = (p->pre->is_cons >= 2)? 0 : p->pre->is_cons; 32 | } else p->is_cons = (p->pre->is_cons >= 2)? 0 : p->pre->is_cons; 33 | } 34 | free(node); 35 | } 36 | /** \fn Tree *tr_pre_cons(Tree *cons, int root_flag) 37 | * \brief Pre-process the constrained trees. 38 | * \param cons A constrained tree. 39 | * \param root_flag Whether the root is labeled as {C}. 40 | * 41 | * This function makes multifurcation at nodes labeled 42 | * as {P} and returns the resultant tree. It first labels the root according to 43 | * root_flag, and then scans and labels, from the root, the nodes without {P} or {C} 44 | * tags. Finally, adjacent {P} nodes are merged to a multifurcated one. 45 | */ 46 | Tree *tr_pre_cons(Tree *cons, int root_flag) 47 | { 48 | Tree **node, **subnode, *p, *q; 49 | int m, i, j, k; 50 | 51 | if (cons == 0) return 0; 52 | label_cons(cons, root_flag); 53 | node = tr_stack(cons, Tree*); 54 | subnode = tr_stack(cons, Tree*); 55 | m = tr_expand_node(cons, node); 56 | for (i = 0; i < m; ++i) { 57 | p = node[i]; 58 | subnode[i] = q = tr_new_node(); 59 | if (p->n == 0) { 60 | q->name = (char*)malloc(sizeof(char) * (strlen(p->name) + 1)); 61 | strcpy(q->name, p->name); 62 | q->id = p->id; 63 | q->d = p->d; 64 | q->ptr = p->ptr; 65 | q->is_cons = 1; 66 | } else { 67 | int count = 0, count2 = 0; 68 | if (!p->is_cons) { 69 | for (j = 0; j < p->n; ++j) 70 | if (subnode[p->node[j]->ftime]->is_cons == 0) { 71 | count += subnode[p->node[j]->ftime]->n; 72 | ++count2; 73 | } 74 | } 75 | if (count == 0) { 76 | q->n = p->n; 77 | q->bs = p->bs; 78 | q->ptr = p->ptr; 79 | q->d = p->d; 80 | q->is_cons = p->is_cons; 81 | q->node = (Tree**)malloc(sizeof(Tree*) * q->n); 82 | if (p->name) { 83 | q->name = (char*)malloc(sizeof(char) * (strlen(p->name) + 1)); 84 | strcpy(q->name, p->name); 85 | } 86 | for (j = 0; j < q->n; ++j) { 87 | subnode[p->node[j]->ftime]->pre = q; 88 | q->node[j] = subnode[p->node[j]->ftime]; 89 | } 90 | } else { 91 | int l; 92 | Tree *r; 93 | q->n = p->n - count2 + count; 94 | q->node = (Tree**)malloc(sizeof(Tree*) * q->n); 95 | q->is_cons = 0; 96 | for (k = j = 0; j < p->n; ++j) { 97 | r = subnode[p->node[j]->ftime]; 98 | if (r->is_cons) 99 | q->node[k++] = r; 100 | else { 101 | for (l = 0; l < r->n; ++l) { 102 | q->node[k++] = r->node[l]; 103 | r->node[l]->pre = q; 104 | } 105 | tr_delete_node(r); 106 | subnode[p->node[j]->ftime] = 0; 107 | } 108 | } 109 | } 110 | } 111 | } 112 | p = subnode[m-1]; 113 | free(node); 114 | free(subnode); 115 | tr_tree_init(p); 116 | return p; 117 | } 118 | -------------------------------------------------------------------------------- /read.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "tree.h" 5 | 6 | Matrix *tr_read_matrix(FILE *fp) 7 | { 8 | int i, j, n; 9 | char name[256]; 10 | Matrix *mat; 11 | double x; 12 | 13 | assert(fp); 14 | fscanf(fp, "%d", &n); 15 | mat = (Matrix*)malloc(sizeof(Matrix)); 16 | mat->count = n; 17 | mat->dist = (double*)malloc(sizeof(double) * mat->count * mat->count); 18 | mat->name = (char**)malloc(sizeof(char*) * mat->count); 19 | 20 | for (i = 0; i < mat->count; ++i) { 21 | if (fscanf(fp, "%s", name) == 0) { 22 | fprintf(stderr, "[tr_read_matrix] fail to read distance matrix\n"); 23 | tr_delete_matrix(mat); 24 | return 0; 25 | } 26 | mat->name[i] = (char*)malloc(sizeof(char) * (strlen(name) + 1)); 27 | strcpy(mat->name[i], name); 28 | for (j = 0; j < mat->count; ++j) { 29 | fscanf(fp, "%lf", &x); 30 | mat->dist[j * mat->count + i] = x; 31 | } 32 | mat->dist[i * mat->count + i] = 0.0; 33 | } 34 | return mat; 35 | } 36 | void tr_delete_matrix(Matrix *mat) 37 | { 38 | int i; 39 | if (mat == 0) return; 40 | free(mat->dist); 41 | for (i = 0; i < mat->count; ++i) 42 | free(mat->name[i]); 43 | free(mat->name); 44 | free(mat); 45 | } 46 | -------------------------------------------------------------------------------- /read_aln.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "align.h" 6 | #include "common/hash_char.h" 7 | #include "align_lib/table.h" 8 | 9 | MultiAlign *ma_read(FILE *fp, int is_nucl, int is_aln) 10 | { 11 | assert(fp); 12 | int c = fgetc(fp); 13 | ungetc(c, fp); 14 | if (is_aln < 0) is_aln = (c == 'C')? 1 : 0; 15 | if (is_aln) return ma_read_aln_alignment(fp, is_nucl); 16 | else return ma_read_alignment(fp, is_nucl); 17 | } 18 | MultiAlign *ma_read_aln_alignment(FILE *fp, int is_nucl) 19 | { 20 | char name[256]; 21 | hash_map_char *hash; 22 | int n, k, l, i, b_max, *cur_len, c; 23 | char *buffer; 24 | MultiAlign *ma; 25 | 26 | assert(fp); 27 | ma = ma_alloc(); 28 | buffer = 0; 29 | b_max = 0; 30 | cur_len = 0; 31 | hash = new hash_map_char; 32 | /* skip first line */ 33 | while ((c = fgetc(fp)) != '\n' && c != EOF); 34 | 35 | n = 0; cur_len = 0; 36 | while (!feof(fp)) { 37 | if (c == EOF) break; 38 | c = fgetc(fp); 39 | if (c == EOF) break; 40 | ungetc(c, fp); 41 | if (isalpha(c) || isdigit(c)) { 42 | fscanf(fp, "%s", name); 43 | l = 0; 44 | while ((c = fgetc(fp)) != '\n' && c != EOF) { /* read alignment */ 45 | if (c == '.' || c == '~' || c == '-' || isalpha(c)) { 46 | if (c == '.' || c == '~') c = (is_nucl)? ALN_NT4_GAP : ALN_AA_GAP; 47 | else c = (is_nucl)? aln_nt4_table[(unsigned char)c] : aln_aa_table[(unsigned char)c]; 48 | if (l == b_max) { 49 | b_max += 1024; 50 | buffer = (char*)realloc(buffer, sizeof(char) * b_max); 51 | } 52 | buffer[l++] = c; 53 | } 54 | } 55 | buffer[l] = '\0'; 56 | if (!hash->find(name, &k)) { 57 | hash->insert(name, n); 58 | if (n == ma->max) { 59 | ma->max += 16; 60 | cur_len = (int*)realloc(cur_len, sizeof(int) * ma->max); 61 | ma->name = (char**)realloc(ma->name, sizeof(char*) * ma->max); 62 | ma->seq = (char**)realloc(ma->seq, sizeof(char*) * ma->max); 63 | for (i = ma->max - 16; i < ma->max; ++i) { 64 | cur_len[i] = 0; 65 | ma->name[i] = 0; 66 | ma->seq[i] = 0; 67 | } 68 | } 69 | k = ma->n = n; 70 | ++n; 71 | } 72 | ma->name[k] = (char*)malloc(sizeof(char) * (strlen(name) + 1)); 73 | strcpy(ma->name[k], name); 74 | cur_len[k] += l; 75 | ma->seq[k] = (char*)realloc(ma->seq[k], sizeof(char) * (cur_len[k] + 1)); 76 | memcpy(ma->seq[k] + cur_len[k] - l, buffer, l); 77 | } else while ((c = fgetc(fp)) != '\n' && c != EOF); 78 | } 79 | if (n == 0) { ma_free(ma); return 0; } 80 | l = cur_len[0]; 81 | for (k = 1; k < n; ++k) { 82 | if (cur_len[k] != l) { 83 | fprintf(stderr, "[ma_read_aln_alignment] fail to parse ALN format.\n"); 84 | ma_free(ma); return 0; 85 | } 86 | } 87 | ma->max = ma->n = n; 88 | ma->len = cur_len[0]; 89 | ma->is_nucl = is_nucl; 90 | free(buffer); 91 | free(cur_len); 92 | delete hash; 93 | return ma; 94 | } 95 | -------------------------------------------------------------------------------- /reroot.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "tree.h" 3 | #include "utils.h" 4 | 5 | static double tr_root_max = 0.0; /* used for static Tree *tr_locate_root() */ 6 | static Tree *tr_root_max_ptr = 0; 7 | 8 | /* 9 | * reroot a tree 10 | */ 11 | Tree *tr_reroot(Tree *root, Tree *node, double dist) 12 | { 13 | int i; 14 | double d, tmp; 15 | Tree *p, *q, *r, *s, *new_root; 16 | if (node == root) return root; 17 | if (dist < 0.0 || dist > node->d) dist = node->d / 2.0; 18 | tmp = node->d; 19 | 20 | /* p: the central multi-parent node 21 | * q: the new parent, previous a child of p 22 | * r: old parent 23 | * i: previous position of q in p 24 | * d: previous distance p->d 25 | */ 26 | q = new_root = tr_new_node(); 27 | q->n = 2; 28 | q->node = (Tree**)malloc(sizeof(Tree*) * 2); 29 | q->node[0] = node; 30 | q->node[0]->d = dist; 31 | p = node->pre; 32 | q->node[0]->pre = q; 33 | for (i = 0; i < p->n; ++i) 34 | if (p->node[i] == node) break; 35 | q->node[1] = p; 36 | d = p->d; 37 | p->d = tmp - dist; 38 | r = p->pre; 39 | p->pre = q; 40 | while (r) { 41 | s = r->pre; /* store r's parent */ 42 | p->node[i] = r; /* change r to p's child */ 43 | for (i = 0; i < r->n; ++i) /* update i */ 44 | if (r->node[i] == p) break; 45 | r->pre = p; /* update r's parent */ 46 | tmp = r->d; r->d = d; d = tmp; /* swap r->d and d, i.e. update r->d */ 47 | q = p; p = r; r = s; /* update p, q and r */ 48 | } 49 | /* now p is the root node */ 50 | if (p->n == 2) { /* remove p and link the other child of p to q */ 51 | r = p->node[1 - i]; /* get the other child */ 52 | for (i = 0; i < q->n; ++i) /* the position of p in q */ 53 | if (q->node[i] == p) break; 54 | r->d += p->d; 55 | r->pre = q; 56 | q->node[i] = r; /* link r to q */ 57 | tr_delete_node(p); 58 | } else { /* remove one child in p */ 59 | int j, k; 60 | for (j = k = 0; j < p->n; ++j) { 61 | p->node[k] = p->node[j]; 62 | if (j != i) ++k; 63 | } 64 | --(p->n); 65 | } 66 | return new_root; 67 | } 68 | /* 69 | * find the root minimize the tree height 70 | * I can choose not to use recursive, but it will become more complex. 71 | * Note that two global static variables are used here. 72 | */ 73 | static Tree *tr_locate_root(Tree *tree, double *dist) 74 | { 75 | double max1, max2, d; 76 | int i; 77 | Tree *p, *mp; 78 | 79 | if (tree->n == 0) { 80 | *dist = tree->d; 81 | return tree; 82 | } 83 | for (i = 0, max1 = max2 = -1e30, mp = 0; i < tree->n; ++i) { 84 | p = tr_locate_root(tree->node[i], &d); /* p is always the leaf node */ 85 | if (d > max1) { 86 | max2 = max1; max1 = d; mp = p; 87 | } else if (d > max2) max2 = d; 88 | } 89 | if (max1 + max2 > tr_root_max) { 90 | tr_root_max = max1 + max2; 91 | tr_root_max_ptr = mp; 92 | } 93 | *dist = max1 + tree->d; 94 | return mp; 95 | } 96 | /* 97 | * root a tree by minimizing the height of the tree 98 | */ 99 | Tree *tr_root_by_min_height(Tree *root) 100 | { 101 | double dist, d; 102 | Tree *p; 103 | 104 | if (root == 0) return 0; 105 | if (root->n_leaf < 3) return root; /* already rooted */ 106 | tr_root_max = -1e30; 107 | tr_root_max_ptr = 0; 108 | tr_locate_root(root, &d); 109 | dist = tr_root_max / 2.0; 110 | p = tr_root_max_ptr; d = 0.0; 111 | for (;;) { 112 | if (p->d + d >= dist || p->pre == 0) break; 113 | d += p->d; 114 | p = p->pre; 115 | } 116 | p = tr_reroot(root, p, dist - d); 117 | tr_tree_init(p); 118 | return p; 119 | } 120 | Tree *tr_remove_root(Tree *root) 121 | { 122 | Tree *p; 123 | double d; 124 | 125 | if (root == 0) return 0; 126 | d = -2e30; 127 | if (root->n > 2 || root->n == 0) return root; 128 | if (root->node[0]->d > -1e30 && root->node[1]->d > -1e30) 129 | d = root->node[0]->d + root->node[1]->d; 130 | if (root->node[0]->n == 0) { /* node[0] is a leaf node, merge to node[1] */ 131 | if (root->node[1]->n == 0) return root; 132 | p = root->node[1]; 133 | p->node = (Tree**)realloc(p->node, sizeof(Tree*) * (p->n + 1)); 134 | p->node[p->n++] = root->node[0]; 135 | root->node[0]->pre = p; 136 | root->node[0]->d = d; 137 | } else { 138 | /* no matter whether root->node[1] is a leaf node or not, the procedure is the same. */ 139 | p = root->node[0]; 140 | p->node = (Tree**)realloc(p->node, sizeof(Tree*) * (p->n + 1)); 141 | p->node[p->n++] = root->node[1]; 142 | root->node[1]->pre = p; 143 | root->node[1]->d = d; 144 | } 145 | p->pre = 0; 146 | p->d = 0.0; 147 | tr_delete_node(root); 148 | tr_tree_init(p); 149 | return p; 150 | } 151 | -------------------------------------------------------------------------------- /scripts/benchmark.pl: -------------------------------------------------------------------------------- 1 | #!/usr/local/bin/perl -w 2 | 3 | use strict; 4 | use Getopt::Long; 5 | use File::Copy qw(cp); 6 | 7 | my %opt = (t=>100, d=>-1.0, l=>-1.0, p=>-1.0, a=>-1.0, m=>'WAG', b=>'jtt', n=>'njtree', s=>'seq-gen'); 8 | GetOptions(\%opt, "t=i", "d=f", "l=f", "p=f", "a=f", "i=f", "m=s", "b=s", "h!", "g=i", "n=s", "s=s"); 9 | if ($opt{h}) { 10 | &usage(\%opt); 11 | exit 1; 12 | } 13 | 14 | my $p = "tmp"; 15 | 16 | for (my $i = 0; $i < $opt{t}; ++$i) { 17 | my $height = 0.25 * rand() + 0.25; 18 | my $len = int(850 * rand() + 150); 19 | my $d = ($opt{d} < 0.0)? 0.20 * rand() : $opt{d}; 20 | my $l = ($opt{l} < 0.0)? 0.02 * rand() : $opt{l}; 21 | my $P = ($opt{p} < 0.0)? 0.30 * rand() : $opt{p}; 22 | my $a = ($opt{a} < 0.0)? exp(2.0 * rand() - 1.0) : $opt{a}; 23 | system("$opt{n} simulate -p $P -d $d -l $l -nm $height > simu-$p.nh"); 24 | my ($fh, $fh_out); 25 | open($fh_out, ">simu-$p.mfa"); 26 | open($fh, "$opt{s} -or -m$opt{m} -a $a simu-$p.nh 2>/dev/null |"); 27 | <$fh>; 28 | while (<$fh>) { 29 | if (/^(\S+)\s+(\S+)$/) { 30 | print $fh_out ">$1\n$2\n"; 31 | } 32 | } 33 | close($fh); 34 | close($fh_out); 35 | system("$opt{n} nj -t $opt{b} -b0 simu-$p.mfa > simu-$p.nhx"); 36 | system("cat simu-$p.nh simu-$p.nhx | $opt{n} merge - > /dev/null 2>simu-$p.count"); 37 | open($fh, "simu-$p.count"); 38 | if (<$fh> =~ /^(\d+)\s(\d+)\s(\d+)\s(\d+)$/) { 39 | printf "%d\t%.3f\t%.3f\t%.3f\t%.3f\t", $i, $d, $l, $P, $a; 40 | print "$1\t$2\t$3\t$4\n"; 41 | $| = 1; 42 | if ($4 > 0) { 43 | cp("simu-$p.nhx", "simu.$i.nhx"); 44 | cp("simu-$p.nh", "simu.$i.nh"); 45 | } 46 | } 47 | close($fh); 48 | } 49 | 50 | sub usage 51 | { 52 | my $opt = shift; 53 | print <{t}] 58 | -d FLOAT duplication probability [$opt->{d}] 59 | -l FLOAT loss probability [$opt->{l}] 60 | -p FLOAT loss probability directly after duplication [$opt->{p}] 61 | -a FLOAT shape parameter (alpha) for gamma distribution [$opt->{a}] 62 | -m STR model of generator [$opt->{m}] 63 | -b STR model of tree builder [$opt->{b}] 64 | -n STR path of njtree [$opt->{n}] 65 | -s STR path of seq-gen [$opt->{s}] 66 | -h help 67 | 68 | EOF 69 | } 70 | -------------------------------------------------------------------------------- /scripts/get_part.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | die("Usage: get_part.pl []\n") if (@ARGV == 0); 4 | $name = shift(@ARGV); 5 | 6 | $flag = 0; 7 | while (<>) { 8 | if (/^\@begin (\S+)/) { 9 | $flag = ($1 eq $name)? 1 : 0; 10 | } elsif (/^\@end/) { 11 | $flag = 0; 12 | } elsif ($flag) { 13 | print; 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /simulate.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "tree.h" 7 | #include "utils.h" 8 | #include "cpp_utils.h" 9 | 10 | #define BLOCK_SIZE 16 11 | 12 | typedef struct 13 | { 14 | Tree *p, *pre; 15 | } STACK; 16 | 17 | void tr_default_length(Tree *tree, double height, int is_sum_length) 18 | { 19 | Tree **node, *p; 20 | int i, n; 21 | double scale = tree->d = 1.0; 22 | node = tr_stack(tree, Tree*); 23 | n = tr_expand_node(tree, node); 24 | for (i = n - 2; i >= 0; --i) { 25 | p = node[i]; 26 | p->d = p->pre->d + 1.0; 27 | if (p->d > scale) scale = p->d; 28 | } 29 | for (i = 0; i < n; ++i) { 30 | if (node[i]->n == 0) node[i]->d = scale; 31 | node[i]->d *= height / scale; 32 | } 33 | if (!is_sum_length) 34 | for (i = 0; i < n - 1; ++i) 35 | node[i]->d -= node[i]->pre->d; 36 | free(node); 37 | } 38 | int tr_simulate_aux(Tree **result) 39 | { 40 | if (result[0]->n == 0) { 41 | return 1; 42 | } else { 43 | int j, l; 44 | l = 1; 45 | for (j = 0; j < result[0]->n; ++j) { 46 | result[0]->node[j] = result[l]; 47 | result[l]->pre = result[0]; 48 | l += tr_simulate_aux(result + l); 49 | } 50 | return l; 51 | } 52 | } 53 | Tree *tr_simulate(Tree *spec, double pd, double pl, double pdl, double height, int is_name) 54 | { 55 | Tree *p, *q, **result; 56 | int k, max_k, l, max_l, count; 57 | STACK *stack, *top; 58 | double tmp; 59 | 60 | max_k = BLOCK_SIZE; 61 | k = l = max_l = count = 0; 62 | stack = (STACK*)malloc(sizeof(STACK) * BLOCK_SIZE); 63 | stack[k].p = spec; stack[k++].pre = 0; 64 | result = 0; 65 | tr_default_length(spec, height, 1); 66 | 67 | while (k) { 68 | top = stack + (--k); 69 | p = top->p; 70 | q = tr_new_node(); 71 | q->pre = top->pre; 72 | q->d = p->d; 73 | #ifndef _WIN32 74 | if (p != spec && drand48() < pd) { /* not the root */ 75 | #else 76 | if (p != spec && (double)rand() / RAND_MAX < pd) { /* not the root */ 77 | #endif 78 | q->n = 2; 79 | q->flag = 1; /* duplication */ 80 | q->node = (Tree**)malloc(sizeof(Tree*) * q->n); 81 | if (k+2 >= max_k) { 82 | max_k+= BLOCK_SIZE; 83 | stack = (STACK*)realloc(stack, sizeof(STACK) * max_k); 84 | } 85 | if (is_name) { 86 | q->name = (char*)malloc(sizeof(char) * (strlen(p->name)+1)); 87 | strcpy(q->name, p->name); 88 | } 89 | stack[k].p = p; stack[k++].pre = q; 90 | stack[k].p = p; stack[k++].pre = q; 91 | } else { 92 | q->flag = 0; /* not duplication */ 93 | if (p->n > 0) { /* internal node */ 94 | int i, j; 95 | double prob = (top->pre && top->pre->flag)? pdl : pl; 96 | for (i = j = 0; i < p->n; ++i) { 97 | #ifndef _WIN32 98 | if (drand48() >= prob) { /* not lost */ 99 | #else 100 | if ((double)rand() / RAND_MAX >= prob) { 101 | #endif 102 | ++j; 103 | if (k == max_k) { 104 | max_k += BLOCK_SIZE; 105 | stack = (STACK*)realloc(stack, sizeof(STACK) * max_k); 106 | } 107 | stack[k].p = p->node[i]; stack[k++].pre = q; 108 | } 109 | } 110 | if (is_name) { 111 | q->name = (char*)malloc(sizeof(char) * (strlen(p->name)+1)); 112 | strcpy(q->name, p->name); 113 | } 114 | if (j > 0) { 115 | q->n = j; 116 | q->node = (Tree**)malloc(sizeof(Tree*) * q->n); 117 | q->id = 1; 118 | } else q->id = -1; /* will be deleted */ 119 | } else { 120 | q->name = (char*)malloc(sizeof(char) * (strlen(p->name)+6)); 121 | q->id = 0; 122 | sprintf(q->name, "%.4d_%s", count++, p->name); 123 | } 124 | } 125 | if (l == max_l) { 126 | max_l += BLOCK_SIZE; 127 | result = (Tree**)realloc(result, sizeof(Tree*) * max_l); 128 | } 129 | result[l++] = q; 130 | } 131 | 132 | free(stack); 133 | /* adjust branch length */ 134 | for (k = 0; k < l; ++k) { 135 | p = result[k]; 136 | tmp = (k == 0)? 0 : p->pre->d; 137 | if (p->flag) /* duplication */ 138 | p->d = (p->d - tmp) / 2 + tmp; 139 | } 140 | for (k = l - 1; k > 0; --k) 141 | result[k]->d -= result[k]->pre->d; 142 | /* write tree */ 143 | k = tr_simulate_aux(result); 144 | if (l != k) fprintf(stderr, "[tr_simulate] inconsistencies occurs (%d,%d)\n", l, k); 145 | p = result[0]; 146 | tr_tree_init(p); 147 | k = tr_expand_node(p, result); 148 | /* write flag, prepare for cpp_subtree_aux() */ 149 | k = tr_expand_leaf(p, result); 150 | for (l = 0; l < k; ++l) 151 | result[l]->flag = result[l]->id; 152 | free(result); 153 | q = cpp_subtree_aux(p); 154 | tr_delete_tree(p); 155 | tr_tree_init(q); 156 | tr_attach_id(q); 157 | return q; 158 | } 159 | Tree *tr_shrink_spec(Tree *spec) 160 | { 161 | Tree **node, *p; 162 | int i, m; 163 | 164 | node = tr_stack(spec, Tree*); 165 | m = tr_expand_leaf(spec, node); 166 | for (i = 0; i < m; ++i) { 167 | p = node[i]; 168 | p->flag = (p->bs > 0)? 1 : -1; 169 | } 170 | free(node); 171 | p = cpp_subtree_aux(spec); 172 | tr_tree_init(p); 173 | return p; 174 | } 175 | static int tr_simulate_usage() 176 | { 177 | fprintf(stderr, "\n"); 178 | fprintf(stderr, "Usage: treebest simulate [options]\n\n"); 179 | fprintf(stderr, "Options: -d FNUM duplication probability [0.05]\n"); 180 | fprintf(stderr, " -l FNUM loss probability [0.01]\n"); 181 | fprintf(stderr, " -p FNUM loss probability after duplication [0.25]\n"); 182 | fprintf(stderr, " -m FNUM max height [0.25]\n"); 183 | fprintf(stderr, " -n not show internal name\n"); 184 | fprintf(stderr, " -h help\n\n"); 185 | return 1; 186 | } 187 | int tr_simulate_task(int argc, char *argv[]) 188 | { 189 | Tree *spec, *simu, *shrink; 190 | int c, is_name; 191 | double height, pd, pl, pdl; 192 | 193 | pd = 0.05; 194 | pl = 0.01; 195 | pdl = 0.25; 196 | height = 0.25; 197 | is_name = 1; 198 | #ifndef _WIN32 199 | srand48(time(0)^((int)getpid())); 200 | #else 201 | srand(time(0)); 202 | #endif 203 | 204 | while ((c = getopt(argc, argv, "d:l:p:m:hn")) >= 0) { 205 | switch (c) { 206 | case 'd': pd = atof(optarg); break; 207 | case 'l': pl = atof(optarg); break; 208 | case 'p': pdl = atof(optarg); break; 209 | case 'm': height = atof(optarg); break; 210 | case 'n': is_name = 0; break; 211 | case 'h': return tr_simulate_usage(); 212 | } 213 | } 214 | spec = tr_default_spec_tree(); 215 | shrink = tr_shrink_spec(spec); 216 | simu = tr_simulate(shrink, pd, pl, pdl, height, is_name); 217 | tr_tree_output(stdout, simu, 0); 218 | tr_delete_tree(spec); 219 | tr_delete_tree(shrink); 220 | tr_delete_tree(simu); 221 | return 0; 222 | } 223 | -------------------------------------------------------------------------------- /spec-ens.nh: -------------------------------------------------------------------------------- 1 | ((ORYSA-*4530.rice,ARATH-*3702)Magnoliophyta-3398,(SCHPO-*4896.S_pombe,YEAST*-4932)Ascomycota-4890, 2 | 3 | ((((((((((((HUMAN*-9606,PANTR*-9598.chimpanzee)Homo/Pan/Gorilla-207598, 4 | MACMU*-9544.monkey)Catarrhini-9526, 5 | OTOGA-*30611.galago)Primates-9443, 6 | 7 | ((((MOUSE*-10090,RAT*-10116)Murinae-39107),CAVPO*-10141.guineapig)Rodentia-9989,RABIT*-9986.rabbit)Glires-314147, 8 | TUPBG*-37347.treeshrew)Euarchontoglires-314146, 9 | (HORSE-*9796.horse, 10 | (BOVIN*-9913.cow,PIG-*9823)Cetartiodactyla-91561, 11 | (CANFA*-9615.dog,FELCA*-9685.cat)Carnivora-33554, 12 | (SORAR-*42254.shrew,ERIEU*-9365.hedgehog)Insectivora-9362, 13 | MYOLU-*59463.bat)Laurasiatheria-314145, 14 | (ECHTE*-9371.tenrec,LOXAF*-9785.elephant)Afrotheria-311790, 15 | 16 | DASNO*-9361.armadillo)Eutheria-9347,MONDO*-13616.opossum)Theria-32525, 17 | ORNAN*-9258.platypus)Mammalia-40674, 18 | CHICK*-9031)Amniota-32524, 19 | XENTR*-8364.frog)Tetrapoda-32523, 20 | (BRARE*-7955.zebrafish, 21 | 22 | ((TETNG*-99883.pufferfish,FUGRU*-31033.pufferfish)Tetraodontidae-31031, 23 | 24 | (GASAC*-69293.stickleback,ORYLA*-8090.ricefish)Smegmamorpha-129949)Percomorpha-32485)Clupeocephala-186625)Euteleostomi-117571, 25 | (CIOIN*-7719,CIOSA*-51511)Ciona-7718)Chordata-7711, 26 | (((DROME*-7227.fly,DROPS-*7237.fly)Sophophora-32341, 27 | 28 | (AEDAE*-7159.mosquito,ANOGA*-7165.mosquito)Culicidae-7157)Diptera-7147, 29 | APIME-*7460.honeybee)Endopterygota-33392, 30 | SCHMA-*6183.fluke, 31 | 32 | (CAEEL*-6239.worm,CAEBR-*6238.worm,CAERE-*31234.worm)Caenorhabditis-6237)Bilateria-33213)Eukaryota-2759; 33 | -------------------------------------------------------------------------------- /spec-enstax.nh: -------------------------------------------------------------------------------- 1 | ((4530,3702)3398,(4896,4932*)4890,((((((((((((9606*,9598*)207598,9544*)9526,30611)9443,(((10090*,10116*)39107,10141*)9989,9986*)314147,37347*)314146,(9796,(9913*,9823)91561,(9615*,9685*)33554,(42254,9365*)9362,59463)314145,(9371*,9785*)311790,9361*)9347,13616*)32525,9258*)40674,9031*)32524,8364*)32523,(7955*,((99883*,31033*)31031,(69293*,8090*)129949)32485)186625)117571,(7719*,51511*)7718)7711,(((7227*,7237)32341,(7159*,7165*)7157)7147,7460)33392,6183,(6239*,6238,31234)6237)33213)2759; 2 | -------------------------------------------------------------------------------- /spec-taxon_id.nh: -------------------------------------------------------------------------------- 1 | ((4530*-ORYSA.rice,3702*-ARATH)3398-Magnoliophyta,(4896*-SCHPO.S_pombe,4932*-YEAST)4890-Ascomycota, 2 | ((((((((((((9606*-HUMAN,9598*-PANTR.chimpanzee)207598-Homo/Pan/Gorilla, 3 | 9544*-MACMU.monkey)9526-Catarrhini, 4 | 30611*-OTOGA.galago)9443-Primates, 5 | ((10090*-MOUSE,10116*-RAT)39107-Murinae,9986-RABIT)314147-Glires)314146-Euarchontoglires, 6 | ((9913*-BOVIN.cow,9823*-PIG)91561-Cetartiodactyla, 7 | (9615*-CANFA.dog,9685*-FELCA.cat)33554-Carnivora, 8 | 42254*-SORAR.shrew, 9 | 59463*-MYOLU.bat)314145-Laurasiatheria, 10 | (9371-ECHTE.tenrec,9785-LOXAF.elephant)311790-Afrotheria, 11 | 9361-DASNO.armadillo)9347-Eutheria,13616*-MONDO.opossum)32525-Theria, 12 | 9258*-ORNAN.platypus)40674-Mammalia, 13 | 9031*-CHICK)32524-Amniota, 14 | 8364*-XENTR.frog)32523-Tetrapoda, 15 | (7955*-BRARE.zebrafish, 16 | ((99883*-TETNG.pufferfish,31033*-FUGRU.pufferfish)31031-Tetraodontidae, 17 | (69293*-GASAC.stickleback,8090*-ORYLA.ricefish)129949-Smegmamorpha)32485-Percomorpha)186625-Clupeocephala)117571-Euteleostomi, 18 | (7719*-CIOIN,51511*-CIOSA)7718-Ciona)7711-Chordata, 19 | (((7227*-DROME.fly,7237*-DROPS.fly)32341-Sophophora, 20 | (7159*-AEDAE.mosquito,7165*-ANOGA.mosquito)7157-Culicidae)7147-Diptera, 21 | 7460*-APIME.honeybee)33392-Endopterygota, 22 | 6183*-SCHMA.fluke, 23 | (6239*-CAEEL.worm,6238*-CAEBR.worm,31234*-CAERE.worm)6237-Caenorhabditis)33213-Bilateria)2759-Eukaryota; 24 | -------------------------------------------------------------------------------- /spec.c: -------------------------------------------------------------------------------- 1 | /*** 2 | * Author: liheng 3 | * Created: unknown 4 | * Last MDF: 2005-01-22 5 | * 6 | * 2005-01-22 liheng 7 | * 8 | * * preprocess the species tree 9 | * * add fully-sequenced tag 10 | */ 11 | #include 12 | #include "cpp_utils.h" 13 | #include "tree.h" 14 | 15 | /* AEDAE */ 16 | 17 | char *tr_species_tree_string="\ 18 | ((ORYSA*-4530.rice,ARATH*-3702)Magnoliophyta-3398,(SCHPO*-4896.S_pombe,YEAST*-4932)Ascomycota-4890,\n\ 19 | ((((((((((((HUMAN*-9606,PANTR*-9598.chimpanzee)Homo/Pan/Gorilla-207598, \n\ 20 | MACMU*-9544.monkey)Catarrhini-9526, \n\ 21 | OTOGA-*30611.galago)Primates-9443, \n\ 22 | ((MOUSE*-10090,RAT*-10116)Murinae-39107,RABIT-9986)Glires-314147)Euarchontoglires-314146, \n\ 23 | ((BOVIN*-9913.cow,PIG-*9823)Cetartiodactyla-91561, \n\ 24 | (CANFA*-9615.dog,FELCA-*9685.cat)Carnivora-33554, \n\ 25 | SORAR-*42254.shrew, \n\ 26 | MYOLU-*59463.bat)Laurasiatheria-314145, \n\ 27 | (ECHTE-9371.tenrec,LOXAF-9785.elephant)Afrotheria-311790, \n\ 28 | DASNO-9361.armadillo)Eutheria-9347,MONDO*-13616.opossum)Theria-32525,\n\ 29 | ORNAN-*9258.platypus)Mammalia-40674,\n\ 30 | CHICK*-9031)Amniota-32524,\n\ 31 | XENTR*-8364.frog)Tetrapoda-32523,\n\ 32 | (BRARE*-7955.zebrafish, \n\ 33 | ((TETNG*-99883.pufferfish,FUGRU*-31033.pufferfish)Tetraodontidae-31031,\n\ 34 | (GASAC*-69293.stickleback,ORYLA*-8090.ricefish)Smegmamorpha-129949)Percomorpha-32485)Clupeocephala-186625)Euteleostomi-117571,\n\ 35 | (CIOIN*-7719,CIOSA*-51511)Ciona-7718)Chordata-7711,\n\ 36 | (((DROME*-7227.fly,DROPS*-7237.fly)Sophophora-32341,\n\ 37 | (AEDAE*-7159.mosquito,ANOGA*-7165.mosquito)Culicidae-7157)Diptera-7147, \n\ 38 | APIME-*7460.honeybee)Endopterygota-33392,\n\ 39 | SCHMA*-6183.fluke,\n\ 40 | (CAEEL*-6239.worm,CAEBR*-6238.worm,CAERE*-31234.worm)Caenorhabditis-6237)Bilateria-33213)Eukaryota-2759;"; 41 | 42 | char *tr_core_species_list[] = { 43 | "HUMAN", "MOUSE", "MONDO", "CHICK", "XENTR", "BRARE", "FUGRU", "CIOIN", "DROME", 44 | "CAEEL", "SCHMA", "YEAST", "SCHPO", "ORYSA", "ARATH", 0}; 45 | 46 | Tree *tr_default_spec_tree() 47 | { 48 | Tree **forest, *tree; 49 | int n; 50 | 51 | forest = tr_parse_str(tr_species_tree_string, &n); 52 | tree = forest[0]; 53 | free(forest); 54 | cpp_post_spec_tree(tree, 0); 55 | return tree; 56 | } 57 | Tree *tr_core_spec_tree() 58 | { 59 | Tree **forest, *tree; 60 | int n; 61 | 62 | forest = tr_parse_str(tr_species_tree_string, &n); 63 | tree = forest[0]; 64 | free(forest); 65 | cpp_post_spec_tree(tree, 1); 66 | return tree; 67 | } 68 | -------------------------------------------------------------------------------- /spec.nh: -------------------------------------------------------------------------------- 1 | ((ORYSA*-4530.rice,ARATH*-3702)Magnoliophyta-3398,(SCHPO*-4896.S_pombe,YEAST*-4932)Ascomycota-4890, 2 | ((((((((((((HUMAN*-9606,PANTR*-9598.chimpanzee)Homo/Pan/Gorilla-207598, 3 | MACMU*-9544.monkey)Catarrhini-9526, 4 | OTOGA-*30611.galago)Primates-9443, 5 | ((MOUSE*-10090,RAT*-10116)Murinae-39107,RABIT-9986)Glires-314147)Euarchontoglires-314146, 6 | ((BOVIN*-9913.cow,PIG-*9823)Cetartiodactyla-91561, 7 | (CANFA*-9615.dog,FELCA-*9685.cat)Carnivora-33554, 8 | SORAR-*42254.shrew, 9 | MYOLU-*59463.bat)Laurasiatheria-314145, 10 | (ECHTE-9371.tenrec,LOXAF-9785.elephant)Afrotheria-311790, 11 | DASNO-9361.armadillo)Eutheria-9347,MONDO*-13616.opossum)Theria-32525, 12 | ORNAN-*9258.platypus)Mammalia-40674, 13 | CHICK*-9031)Amniota-32524, 14 | XENTR*-8364.frog)Tetrapoda-32523, 15 | (BRARE*-7955.zebrafish, 16 | ((TETNG*-99883.pufferfish,FUGRU*-31033.pufferfish)Tetraodontidae-31031, 17 | (GASAC*-69293.stickleback,ORYLA*-8090.ricefish)Smegmamorpha-129949)Percomorpha-32485)Clupeocephala-186625)Euteleostomi-117571, 18 | (CIOIN*-7719,CIOSA*-51511)Ciona-7718)Chordata-7711, 19 | (((DROME*-7227.fly,DROPS*-7237.fly)Sophophora-32341, 20 | (AEDAE*-7159.mosquito,ANOGA*-7165.mosquito)Culicidae-7157)Diptera-7147, 21 | APIME-*7460.honeybee)Endopterygota-33392, 22 | SCHMA*-6183.fluke, 23 | (CAEEL*-6239.worm,CAEBR*-6238.worm,CAERE*-31234.worm)Caenorhabditis-6237)Bilateria-33213)Eukaryota-2759; 24 | -------------------------------------------------------------------------------- /subtree.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "tree.h" 5 | #include "cpp_utils.h" 6 | 7 | /* 8 | * find the last common ancester of the specified nodes (id >= 0). O(N) 9 | * It is in fact a simplized version of tr_subtree_aux(). 10 | */ 11 | Tree *tr_comm_ancester_aux(Tree *t) 12 | { 13 | Tree *p, *q; 14 | int i, k; 15 | 16 | q = 0; 17 | if (t->n == 0) return (t->flag < 0)? 0 : t; 18 | for (i = 0, k = 0; i < t->n; ++i) { 19 | p = tr_comm_ancester_aux(t->node[i]); 20 | if (p) { 21 | ++k; 22 | q = p; 23 | } 24 | } 25 | if (k == 0) p = 0; 26 | else if (k == 1) p = q; 27 | else p = t; 28 | return p; 29 | } 30 | /* 31 | * find the last common ancester. O(N) 32 | */ 33 | Tree *tr_comm_ancester(Tree *root, int m, char **name) 34 | { 35 | cpp_attach_flag_by_name(root, m, name); 36 | return tr_comm_ancester_aux(root); 37 | } 38 | -------------------------------------------------------------------------------- /timeout.c: -------------------------------------------------------------------------------- 1 | /*++ 2 | * NAME 3 | * timeout 1 4 | * SUMMARY 5 | * run command with bounded time 6 | * SYNOPSIS 7 | * \fBtimeout\fR [-\fIsignal\fR] \fItime\fR \fIcommand\fR ... 8 | * DESCRIPTION 9 | * \fBtimeout\fR executes a command and imposes an elapsed time limit. 10 | * The command is run in a separate POSIX process group so that the 11 | * right thing happens with commands that spawn child processes. 12 | * 13 | * Arguments: 14 | * .IP \fI-signal\fR 15 | * Specify an optional signal to send to the controlled process. 16 | * By default, \fBtimeout\fR sends SIGKILL, which cannot be caught 17 | * or ignored. 18 | * .IP \fItime\fR 19 | * The elapsed time limit after which the command is terminated. 20 | * .IP \fIcommand\fR 21 | * The command to be executed. 22 | * DIAGNOSTICS 23 | * The command exit status is the exit status of the command 24 | * (status 1 in case of a usage error). 25 | * AUTHOR(S) 26 | * Wietse Venema 27 | * This program is part of SATAN. 28 | *--*/ 29 | 30 | /* System libraries. */ 31 | 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | 39 | extern int optind; 40 | 41 | /* Application-specific. */ 42 | 43 | #define perrorexit(s) { perror(s); exit(1); } 44 | 45 | static int kill_signal = SIGKILL; 46 | static char *progname; 47 | static char *commandname; 48 | 49 | static void usage() 50 | { 51 | fprintf(stderr, "usage: %s [-signal] time command...\n", progname); 52 | exit(1); 53 | } 54 | 55 | static void terminate(int sig) 56 | { 57 | signal(kill_signal, SIG_DFL); 58 | fprintf(stderr, "Timeout: aborting command ``%s'' with signal %d\n", commandname, kill_signal); 59 | kill(0, kill_signal); 60 | } 61 | 62 | int main(int argc, char *argv[]) 63 | { 64 | int time_to_run = 0; 65 | pid_t pid; 66 | pid_t child_pid; 67 | int status; 68 | 69 | progname = argv[0]; 70 | 71 | /* 72 | * Parse JCL. 73 | */ 74 | while (--argc && *++argv && **argv == '-') 75 | if ((kill_signal = atoi(*argv + 1)) <= 0) 76 | usage(); 77 | 78 | if (argc < 2 || (time_to_run = atoi(argv[0])) <= 0) 79 | usage(); 80 | 81 | commandname = argv[1]; 82 | 83 | /* 84 | * Run the command and its watchdog in a separate process group so that 85 | * both can be killed off with one signal. 86 | */ 87 | setsid(); 88 | switch (child_pid = fork()) { 89 | case -1: /* error */ 90 | perrorexit("timeout: fork"); 91 | case 00: /* run controlled command */ 92 | execvp(argv[1], argv + 1); 93 | perrorexit(argv[1]); 94 | default: /* become watchdog */ 95 | (void) signal(SIGHUP, terminate); 96 | (void) signal(SIGINT, terminate); 97 | (void) signal(SIGQUIT, terminate); 98 | (void) signal(SIGTERM, terminate); 99 | (void) signal(SIGALRM, terminate); 100 | alarm(time_to_run); 101 | while ((pid = wait(&status)) != -1 && pid != child_pid) 102 | /* void */ ; 103 | return (pid == child_pid ? status : -1); 104 | } 105 | return 0; 106 | } 107 | -------------------------------------------------------------------------------- /tree_plot.h: -------------------------------------------------------------------------------- 1 | #ifndef TREE_PLOT_H_ 2 | #define TREE_PLOT_H_ 3 | 4 | #include "tree.h" 5 | 6 | #define plot_ptr(p) ((PlotPtr*)(p)->ptr) 7 | 8 | typedef struct 9 | { 10 | double x, y; 11 | void *ptr; 12 | } PlotPtr; 13 | 14 | typedef struct 15 | { 16 | int width, height; 17 | unsigned c_line, c_ext, c_int, c_bs, c_dup; 18 | int is_real; 19 | int x_margin, y_margin; 20 | int font_size; 21 | int is_SDI; 22 | double font_width; 23 | double line_width; 24 | double box_width; 25 | double x_skip; 26 | char *font; 27 | int is_color; 28 | int is_bs; 29 | 30 | int show_spec; 31 | } PlotParam; 32 | 33 | #ifdef __cplusplus 34 | extern "C" { 35 | #endif 36 | 37 | void plot_modify_PlotPtr(Tree *tree, int is_init); 38 | int plot_cal_x_y(Tree *tree, int is_real); 39 | void plot_eps(FILE *eps, Tree *tree, PlotParam *pp); 40 | PlotParam *plot_default_param(); 41 | 42 | #ifdef __cplusplus 43 | } 44 | #endif 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /trimpoor.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "tree.h" 4 | #include "utils.h" 5 | #include "cpp_utils.h" 6 | #include "common/hash_char.h" 7 | 8 | /* tr_SDI and tr_lost_infer must be applied */ 9 | int tr_qual(const Tree *tree) 10 | { 11 | int i, n, score; 12 | int ndd_loss, ncd_loss, ns_loss, ncd, ns, ndd; 13 | Tree **node, *p; 14 | if (tree == 0) return 0; 15 | node = tr_stack(tree, Tree*); 16 | n = tr_expand_node(tree, node); 17 | ndd_loss = ncd_loss = ns_loss = ncd = ns = ndd = 0; 18 | for (i = 0; i < n; ++i) { 19 | p = node[i]; 20 | if (p->n) { // internal node 21 | if (SDIptr(p)->type == 'S') ++ns; 22 | else { 23 | if (SDIptr(p)->is_confirmed_dup) ++ncd; 24 | else ++ndd; 25 | } 26 | } 27 | if (!p->pre) continue; 28 | if (SDIptr(p->pre)->type == 'S') ns_loss += SDIptr(p)->n_lost; 29 | else { 30 | if (SDIptr(p->pre)->is_confirmed_dup) ncd_loss += SDIptr(p)->n_lost; 31 | else ndd_loss += SDIptr(p)->n_lost; 32 | } 33 | } 34 | free(node); 35 | /* HERE IS THE FORMULAR */ 36 | /**/ score = ns_loss * 5 + ncd_loss * 3 + ndd_loss * 1 - ns * 4 - ndd * 2; /**/ 37 | return score; 38 | } 39 | 40 | /* tr_SDI and tr_lost_infer must be applied */ 41 | static Tree *tr_trim_few_leaf(Tree *tree, Tree *spec, int thres) 42 | { 43 | int i, j, n; 44 | char *notrim; 45 | int score, min; 46 | Tree **node, *sub, *ret, *p; 47 | hash_set_char *notrim_set; 48 | 49 | if (tree == 0) return 0; 50 | min = tr_qual(tree) - thres; 51 | notrim_set = new hash_set_char; 52 | ret = 0; 53 | node = tr_stack(tree, Tree*); 54 | n = tr_expand_leaf(tree, node); 55 | for (i = 0; i < n; ++i) { 56 | p = node[i]; 57 | if (p->n == 0) { 58 | cpp_get_keyval(p, "NoTrim", ¬rim); 59 | if (notrim && *notrim == 'Y') { /* do not trim */ 60 | if (p->name) notrim_set->insert(p->name); 61 | free(notrim); continue; 62 | } 63 | } 64 | for (j = 0; j < n; ++j) node[j]->flag = 1; 65 | if (p->n == 0) p->flag = -1; 66 | else if (p->n == 2 && p->node[0]->n == 0 && p->node[1]->n == 0) { // allow to remove two leaves 67 | if (!notrim_set->find(p->node[0]->name) && !notrim_set->find(p->node[1]->name)) 68 | p->node[0]->flag = p->node[1]->flag = -1; 69 | } else continue; // do nothing 70 | sub = cpp_subtree_aux(tree); 71 | tr_tree_init(sub); tr_attach_id(sub); tr_clear_ptr(sub); 72 | tr_SDI(sub, spec, 0); tr_lost_infer(sub, spec); 73 | score = tr_qual(sub); 74 | if (score < min) { 75 | min = score; 76 | if (ret) { 77 | tr_delete_tree_SDIptr(ret); 78 | tr_delete_tree(ret); 79 | } 80 | ret = sub; 81 | } else { 82 | tr_delete_tree_SDIptr(sub); 83 | tr_delete_tree(sub); 84 | } 85 | } 86 | if (ret) { 87 | for (i = 0; i < n; ++i) 88 | if (node[i]->n == 0 && notrim_set->find(node[i]->name)) 89 | cpp_insert_keyval(node[i], "NoTrim", "Y"); 90 | } 91 | delete notrim_set; 92 | free(node); 93 | return ret; 94 | } 95 | Tree *tr_trim_poor_leaves(const Tree *tree, Tree *spec, int thres) 96 | { 97 | char *trim; 98 | Tree **node, *sub, *tmp; 99 | int i; 100 | node = tr_stack(tree, Tree*); 101 | tr_expand_leaf(tree, node); 102 | for (i = 0; i < tree->n_leaf; ++i) { 103 | cpp_get_keyval(node[i], "DoTrim", &trim); 104 | node[i]->flag = (trim && *trim == 'Y')? -1 : 1; 105 | free(trim); 106 | } 107 | sub = cpp_subtree_aux(tree); 108 | tr_tree_init(sub); tr_attach_id(sub); tr_clear_ptr(sub); 109 | tr_SDI(sub, spec, 0); tr_lost_infer(sub, spec); 110 | while ((tmp = tr_trim_few_leaf(sub, spec, thres))) { 111 | tr_delete_tree_SDIptr(sub); 112 | tr_delete_tree(sub); 113 | sub = tmp; 114 | } 115 | free(node); 116 | return sub; 117 | } 118 | extern "C" 119 | { 120 | FILE *tr_get_fp(const char *); 121 | int tr_trimpoor_task(int argc, char *argv[]); 122 | } 123 | int tr_trimpoor_usage() 124 | { 125 | fprintf(stderr, "treebest trimpoor [=0]\n"); 126 | return 1; 127 | } 128 | int tr_trimpoor_task(int argc, char *argv[]) 129 | { 130 | Tree *tree, *spec_ori, *rst, *spec; 131 | int thres = 0; 132 | FILE *fp; 133 | if (argc > 2) thres = atoi(argv[2]); 134 | if (argc == 1) return tr_trimpoor_usage(); 135 | fp = tr_get_fp(argv[1]); 136 | tree = tr_parse_first(fp); 137 | fclose(fp); 138 | spec_ori = tr_default_spec_tree(); 139 | spec = cpp_shrink_spec_by_tree(spec_ori, tree); 140 | tr_delete_tree(spec_ori); 141 | tr_SDI(tree, spec, 0); tr_lost_infer(tree, spec); 142 | rst = tr_trim_poor_leaves(tree, spec, thres); 143 | tr_tree_output(stdout, rst, OUTPUT_SDI); 144 | tr_delete_tree(spec); 145 | tr_delete_tree_SDIptr(tree); 146 | tr_delete_tree(tree); 147 | tr_delete_tree_SDIptr(rst); 148 | tr_delete_tree(rst); 149 | return 0; 150 | } 151 | -------------------------------------------------------------------------------- /utils.h: -------------------------------------------------------------------------------- 1 | /*** 2 | * Created: unknown 3 | * Author: liheng 4 | * Last MDF: 2005-01-22 5 | * 6 | * 7 | * 2005-01-14 liheng: 8 | * 9 | * * declaration of tr_attach_name_by_id() and tr_expand_leaf_by_id() 10 | * 11 | * 2005-01-22 liheng 12 | * 13 | * * several declarations 14 | */ 15 | #ifndef UTILES_H_ 16 | #define UTILES_H_ 17 | 18 | #include "tree.h" 19 | 20 | #define tr_stack(tree,type) ((type*)malloc(sizeof(type) * ((tree)->ftime+1))) 21 | 22 | #ifdef __cplusplus 23 | extern "C" { 24 | #endif 25 | 26 | /* count the number of leaves and correct Tree::pre pointer if wrong */ 27 | int tr_get_leaf_num(TreePtr root); 28 | /* set Tree::finish_time and Tree::n_leaf */ 29 | int tr_tree_init(Tree *root); 30 | /* set Tree::id */ 31 | int tr_attach_id(Tree *root); 32 | /* get all the nodes of `root', according to finish time */ 33 | int tr_expand_node(const Tree *root, Tree **node); 34 | /* get internal nodes, finish time */ 35 | int tr_expand_internal_node(const Tree *root, Tree **node); 36 | /* get leaf nodes */ 37 | int tr_expand_leaf(const Tree *root, Tree **node); 38 | /* get leaf node according to Tree::id */ 39 | int tr_expand_leaf_by_id(const Tree *root, Tree **leaf); 40 | /* search a node by name */ 41 | Tree *tr_search_by_name(const Tree *root, const char *name); 42 | /* search a node by node */ 43 | Tree *tr_search(const Tree *root, const Tree *ptr); 44 | 45 | /* copy a node, except for ->ptr point */ 46 | Tree *tr_copy_node(const Tree *node); 47 | /* copy a tree, call tr_copy_node() */ 48 | Tree *tr_copy_tree(const Tree *root); 49 | 50 | void tr_clear_ptr(Tree *tree); 51 | 52 | char *cpystr(const char *s); 53 | 54 | #ifdef __cplusplus 55 | } 56 | #endif 57 | 58 | #endif 59 | --------------------------------------------------------------------------------