├── HomBlocks.pl
├── PartitionFinderV1.1.1
├── .DS_Store
├── PartitionFinder.py
├── PartitionFinderProtein.py
├── README.md
├── RELEASE-VERSION
├── ROGP_v2.0-backup.pl
├── circoletto.pl
├── docs
│ ├── .DS_Store
│ └── Manual_v1.1.1.pdf
├── examples
│ ├── .DS_Store
│ ├── README.txt
│ ├── aminoacid
│ │ ├── Als_etal_2004.phy
│ │ └── partition_finder.cfg
│ └── nucleotide
│ │ ├── .DS_Store
│ │ ├── partition_finder.cfg
│ │ └── test.phy
├── partfinder
│ ├── .DS_Store
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── algorithm.py
│ ├── algorithm.pyc
│ ├── alignment.py
│ ├── alignment.pyc
│ ├── analysis.py
│ ├── analysis.pyc
│ ├── analysis_method.py
│ ├── analysis_method.pyc
│ ├── config.py
│ ├── config.pyc
│ ├── main.py
│ ├── main.pyc
│ ├── neighbour.py
│ ├── neighbour.pyc
│ ├── parser.py
│ ├── parser.pyc
│ ├── partition.py
│ ├── partition.pyc
│ ├── phyml.py
│ ├── phyml.pyc
│ ├── phyml_models.py
│ ├── phyml_models.pyc
│ ├── progress.py
│ ├── progress.pyc
│ ├── pyparsing.py
│ ├── pyparsing.pyc
│ ├── raxml.py
│ ├── raxml.pyc
│ ├── raxml_models.py
│ ├── raxml_models.pyc
│ ├── reporter.py
│ ├── reporter.pyc
│ ├── results.py
│ ├── results.pyc
│ ├── scheme.py
│ ├── scheme.pyc
│ ├── submodels.py
│ ├── submodels.pyc
│ ├── subset.py
│ ├── subset.pyc
│ ├── threadpool.py
│ ├── threadpool.pyc
│ ├── util.py
│ ├── util.pyc
│ ├── version.py
│ └── version.pyc
└── programs
│ ├── .DS_Store
│ ├── phyml
│ └── raxml
├── README.md
├── Xenarthrans
└── fasta
│ ├── Bradypus_pygmaeus.fasta
│ ├── Bradypus_torquatus.fasta
│ ├── Bradypus_tridactylus.fasta
│ ├── Bradypus_variegatus.fasta
│ ├── Bradypus_variegatus_old.fasta
│ ├── Cabassous_centralis.fasta
│ ├── Cabassous_chacoensis.fasta
│ ├── Cabassous_tatouay.fasta
│ ├── Cabassous_unicinctus_ISEM_T-2291.fasta
│ ├── Cabassous_unicinctus_MNHN_1999-1068.fasta
│ ├── Calyptophractus_retusus.fasta
│ ├── Chaetophractus_vellerosus.fasta
│ ├── Chaetophractus_villosus.fasta
│ ├── Chlamyphorus_truncatus.fasta
│ ├── Choloepus_didactylus.fasta
│ ├── Choloepus_didactylus_old.fasta
│ ├── Choloepus_hoffmanni.fasta
│ ├── Cyclopes_didactylus.fasta
│ ├── Dasypus_hybridus.fasta
│ ├── Dasypus_kappleri.fasta
│ ├── Dasypus_novemcinctus.fasta
│ ├── Dasypus_novemcinctus_old.fasta
│ ├── Dasypus_pilosus_LSUMZ_21888.fasta
│ ├── Dasypus_pilosus_MSB_49990.fasta
│ ├── Dasypus_sabanicola.fasta
│ ├── Dasypus_septemcinctus.fasta
│ ├── Dasypus_yepesi.fasta
│ ├── Euphractus_sexcinctus.fasta
│ ├── Myrmecophaga_tridactyla.fasta
│ ├── Priodontes_maximus.fasta
│ ├── Tamandua_mexicana.fasta
│ ├── Tamandua_tetradactyla.fasta
│ ├── Tamandua_tetradactyla_old.fasta
│ ├── Tolypeutes_matacus.fasta
│ ├── Tolypeutes_tricinctus.fasta
│ ├── Zaedyus_pichiy.fasta
│ └── readme
├── bin
├── BMGE.jar
├── Gblocks
├── blastall
├── formatdb
├── noisy
├── progressiveMauve
├── readal
└── trimal
└── plant
└── fasta
├── Acidosasa_purpurea.fasta
├── Aegilops_cylindrica.fasta
├── Aegilops_geniculata.fasta
├── Aegilops_speltoides_SPE0661.fasta
├── Aegilops_tauschii.fasta
├── Agrostis_stolonifera.fasta
├── Anomochloa_marantoidea.fasta
├── Arundinaria_appalachiana.fasta
├── Arundinaria_gigantea.fasta
├── Arundinaria_tecta.fasta
├── Bambusa_emeiensis.fasta
├── Bambusa_multiplex.fasta
├── Bambusa_oldhamii.fasta
├── Brachypodium_distachyon.fasta
├── Coix_lacryma-jobi.fasta
├── Dendrocalamus_latiflorus.fasta
├── Deschampsia_antarctica.fasta
├── Ferrocalamus_rimosivaginus.fasta
├── Festuca_altissima.fasta
├── Festuca_arundinacea.fasta
├── Festuca_ovina.fasta
├── Festuca_pratensis.fasta
├── Hordeum_vulgare_sub_vulgare.fasta
├── Indocalamus_longiauritus.fasta
├── Leersia_tisserantii.fasta
├── Lolium_multiflorum.fasta
├── Lolium_perenne.fasta
├── Oryza_meridionalis.fasta
├── Oryza_nivara.fasta
├── Oryza_rufipogon.fasta
├── Oryza_sativa_93-11.fasta
├── Oryza_sativa_Nipponbare.fasta
├── Panicum_virgatum.fasta
├── Pharus_lappulaceus.fasta
├── Pharus_latifolius.fasta
├── Phragmites_australis.fasta
├── Phyllostachys_edulis.fasta
├── Phyllostachys_nigra_var_henonis.fasta
├── Phyllostachys_propinqua.fasta
├── Puelia_olyriformis.fasta
├── Rhynchoryza_subulata.fasta
├── Saccharum_officinarum.fasta
├── Secale_cereale.fasta
├── Setaria_italica.fasta
├── Sorghum_bicolor.fasta
├── Sorghum_timorense.fasta
├── Triticum_aestivum.fasta
├── Triticum_monococcum.fasta
├── Triticum_urartu.fasta
├── Typha_latifolia.fasta
├── Zea_mays.fasta
├── Zizania_latifolia.fasta
└── readme
/PartitionFinderV1.1.1/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/.DS_Store
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/PartitionFinder.py:
--------------------------------------------------------------------------------
1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
2 | #
3 | #This program is free software: you can redistribute it and/or modify it
4 | #under the terms of the GNU General Public License as published by the
5 | #Free Software Foundation, either version 3 of the License, or (at your
6 | #option) any later version.
7 | #
8 | #This program is distributed in the hope that it will be useful, but
9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 | #General Public License for more details. You should have received a copy
12 | #of the GNU General Public License along with this program. If not, see
13 | #. PartitionFinder also includes the PhyML
14 | #program, the RAxML program, the PyParsing library, and the python-cluster library
15 | #all of which are protected by their own licenses and conditions, using
16 | #PartitionFinder implies that you agree with those licences and conditions as well.
17 |
18 | import sys
19 | from partfinder import main
20 |
21 | if __name__ == "__main__":
22 | # Well behaved unix programs exits with 0 on success...
23 | sys.exit(main.main("PartitionFinder", "DNA"))
24 |
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/PartitionFinderProtein.py:
--------------------------------------------------------------------------------
1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
2 | #
3 | #This program is free software: you can redistribute it and/or modify it
4 | #under the terms of the GNU General Public License as published by the
5 | #Free Software Foundation, either version 3 of the License, or (at your
6 | #option) any later version.
7 | #
8 | #This program is distributed in the hope that it will be useful, but
9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 | #General Public License for more details. You should have received a copy
12 | #of the GNU General Public License along with this program. If not, see
13 | #. PartitionFinder also includes the PhyML
14 | #program, the RAxML program, the PyParsing library, and the python-cluster library
15 | #all of which are protected by their own licenses and conditions, using
16 | #PartitionFinder implies that you agree with those licences and conditions as well.
17 |
18 | import sys
19 | from partfinder import main
20 |
21 | if __name__ == "__main__":
22 | # Well behaved unix programs exits with 0 on success...
23 | sys.exit(main.main("PartitionFinderProtein", "protein"))
24 |
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/README.md:
--------------------------------------------------------------------------------
1 | # PartitionFinder
2 |
3 | PartitionFinder and PartitionFinderProtein are Python programs for simultaneously
4 | choosing partitioning schemes and models of molecular evolution for sequence data.
5 | You can use them before running a phylogenetic analysis, in order
6 | to decide how to divide up your sequence data into separate blocks before
7 | analysis, and to simultaneously perform model selection on each of those
8 | blocks.
9 |
10 | # Operating System
11 |
12 | Mac and Windows are supported.
13 | All of the code was written with Linux in mind too, so if you are interested
14 | in porting it to Linux, please get in touch (or just try it out!).
15 |
16 | # Manual
17 |
18 | is in the /docs folder.
19 |
20 | # Quick Start
21 |
22 | * Make sure you have Python 2.7 installed first, if not, go to www.python.org/getit/
23 |
24 | * For PartitionFinderProtein just substitute 'PartitionFinderProtein' for 'PartitionFinder' below
25 |
26 | 1. Open Terminal (on a Mac) or Command Prompt (on Windows) and cd to the directory with PartitionFinder in it
27 | 2. Run PartitionFinder by typing at the command prompt:
28 |
29 | python PartitionFinder.py example
30 |
31 | This will run the included example analysis for PartitionFinder. More generally, the command line for PartitionFinder looks like this:
32 |
33 | python
34 |
35 | where is the full file-path to the PartitionFinder.py file
36 | and is the full filepath to a folder with a phylip alignemnt and associated .cfg file.
37 |
38 | For more details, read the manual.
39 |
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/RELEASE-VERSION:
--------------------------------------------------------------------------------
1 | 1.1.1
2 |
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/ROGP_v2.0-backup.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 | #
3 | #AUTHOR
4 | #Guiqi Bi :fenghen360@126.com
5 | #VERSION
6 | #ROGP v0.1
7 | #COPYRIGHT & LICENCE
8 | #This script is free software; you can redistribute it and/or modify it.
9 | #This script is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of merchantability or fitness for a particular purpose.
10 |
11 | my $USAGE = "\nusage: ./ROGP.pl
12 | \nparameters:
13 | -in= Genome alignment outputfile derived from Muave. If you set --align, ignore this input parameter.
14 | -number= Number of taxa used in aliggment (should be precious). If you set --align, ignore this input parameter.
15 | --align If you want to align sequences by mauve, add this parameter (Default: progressiveMauve).
16 | Then you should split every sequence into a single fasta file. Suffix must be .fasta
17 | --path= Absolute path to directory where you put in fasta sequences.
18 | --mauve-out= The output file produced by mauve (Absolute path). If you set --align parameter.
19 | -help Print the usage.\n";
20 |
21 |
22 | #-------------------------------------------------------------------------------------------
23 | #提取参数
24 | foreach my $paras (@ARGV){
25 | if ($paras=~/in/){
26 | $in=(split "=", $paras)[1];
27 | }
28 | if ($paras=~/number/){
29 | $number=(split "=", $paras)[1];
30 | }
31 | if ($paras=~/help/){
32 | print $USAGE;
33 | }
34 | if ($paras=~/align/){
35 | $align=1;
36 | }
37 | if ($paras=~/path/){
38 | $path=(split "=", $paras)[1];
39 | }
40 | if ($paras=~/mauve-out/){
41 | $mauveout=(split "=", $paras)[1];
42 | }
43 | }
44 | #-------------------------------------------------------------------------------------------
45 | #参数检验
46 | if($align){
47 | undef $in; #如果设置了align,就清空$in和$number
48 | undef $number;
49 | if(!$mauveout){
50 | print "Please set the --mauve-out= parameter!\n";#检查是否设置了--mauve-out参数
51 | exit;
52 | }
53 | elsif(!$path){
54 | print "Please set the --path= parameter!\n";#检查是否设置了path参数
55 | exit;
56 | }
57 | else{
58 | $in=$mauveout; #把mauveout回传给$in
59 | my @files=glob "$path*.fasta"; #匹配fasta文件并保存到数组里
60 | my $filecom;
61 | $number=$#files+1; #回传文件数量给number变量
62 | print "Totla $number files detected!\nThe list of sequences will be aligned:\n";
63 | foreach(@files){
64 | $filecom.=" $_";
65 | print "$_\n"; #打印文件进行检验
66 | }
67 | print "<===========Please re-check.============>\n\nKeep going?\n\[Enter press/Ctrl+C\]\n";
68 | my $go=; #标准输入来决定程序是否进行下去
69 | if(!$go){exit;}
70 | else{print "Aligning, please wait.\n";
71 | `./progressiveMauve --output=$mauveout $filecom|tee mauve-screenout.log`;
72 | open MAUVE,"<","mauve-screenout.log";
73 | while(){print;}
74 | }
75 |
76 | }
77 |
78 | }
79 |
80 | print "\n\nROGP started!\n\nAligned fasta file is $in\nNumber of the taxon species used in alignment is $number\n";
81 |
82 | my $taxon=1; #记录fasta标头识别数字
83 | my $file=1; #用于输出模块标头的head
84 | my $module;
85 |
86 | #-------------------------------------------------------------------------------------------
87 | #首先提取fasta标头
88 | open(HEAD, ">>head.tmp");
89 | open(IN, "<$in")||die "Can't open $in:$!\n";
90 | while(){
91 | if($_=~m/^>/){
92 | print HEAD "$_";
93 | }
94 | }
95 | close(HEAD);
96 |
97 | print "Identify the colinear blocks. Be patient\n\n\n";
98 | open(TMP, "){
100 |
101 | if($_=~m/^>\s$taxon:/){
102 | $module.="$_";
103 | $taxon++;
104 | if($taxon==$number+1){
105 | open(OUT, ">module_$file.head") ;
106 | print OUT "$module";
107 | close(OUT);
108 | undef $module;
109 | $taxon=1;
110 | $file++;
111 | }
112 | }
113 | else{
114 | undef $module;
115 | $taxon=1;
116 | }
117 | }
118 |
119 | `rm head.tmp`;
120 | print "Finished!\n\n\n";
121 |
122 |
123 | #-------------------------------------------------------------------------------------------
124 | #抽取序列并修改文件名
125 | my @head=glob("*.head");
126 | my $temp_num=@head; #temp_num临时记录
127 | print "$temp_num colinear blocks were identified totally!\nNow extracting these sequences!\n\n\n";
128 | foreach my $head(@head){
129 |
130 | &extract($head,$in);
131 | `rm $head`;
132 | }
133 |
134 | `rename .head.fasta .fasta *.fasta`;
135 |
136 | #写了个子程序用来提取序列,直接拿的g.pl进行的修改
137 | sub extract{
138 | my $biaotou=shift @_;
139 | my $seq=shift @_;
140 | my @liuyuan;
141 | open IN,"<",$biaotou;
142 | while(){
143 | push @liuyuan,"$_";
144 | }
145 |
146 | foreach(0...$#liuyuan){
147 |
148 | open OUT,"<",$seq;
149 | open FASTA,">>","$biaotou.fasta";
150 | my $turnoff=0;
151 | while($line=){
152 |
153 | if($line eq $liuyuan[$_]){$turnoff=1;
154 | print FASTA "$line";
155 | next;
156 | }
157 | elsif($line ne $liuyuan[$_]&&$line=~m/>.*\n/){$turnoff=0;}
158 | if($turnoff){print FASTA "$line";}
159 |
160 |
161 | }
162 | close(OUT);
163 | close(FASTA);
164 | }
165 | }
166 | #-------------------------------------------------------------------------------------------
167 | #改每个模块序列的标头,否则太长,Gblock无法处理,并处理每个模块序列尾部的=号
168 |
169 | my @seq=glob("*.fasta");
170 | foreach my $seq(@seq){
171 | open(TMP2IN, "<$seq")||die "Can't open $in:$!\n";
172 | open(TMP2OUT, ">>$seq.rename")||die "Can't open $in:$!\n";
173 | while(){
174 | if($_=~m/=/){next;}
175 | if($_=~m/^>/){
176 | my @array=split(/ [+|-] /,$_);
177 | my @array2=split(/\\/,@array[$#array]);
178 | my @array3=split(/\//,@array2[$#array2]);
179 | @array3[$#array3]=~s/\.fasta//g;
180 | print TMP2OUT ">@array3[$#array3]";
181 | undef @array;
182 | undef @array2;
183 | undef @array3;
184 | }
185 | else {print TMP2OUT "$_";}
186 |
187 | }
188 | close(TMP2IN);
189 | close(TMP2OU);
190 | `rm $seq`;
191 | }
192 | `rename .fasta.rename .fasta *.rename`;
193 |
194 |
195 |
196 |
197 |
198 | #-------------------------------------------------------------------------------------------
199 | #使用Gblocks进行序列处理,需要将Gblocks添加至环境变量,也可以再设置参数,填写Gblock的位置,等去看看Gblock是否有没有交互的,好直接加参数进去
200 | print "Now work with Gblock!\n\n\n" ;
201 |
202 | my @trimed=glob("*.fasta");
203 | foreach my $trimed(@trimed){
204 | `./Gblocks $trimed out`;
205 | `rm $trimed`;
206 | }
207 |
208 |
209 | #-------------------------------------------------------------------------------------------
210 | #处理gb后缀的结果文件
211 | #perl -e '$gb=shift;open GB,$gb;while(){if(/^>/){print "$_";}if(/^[A|T|C|G|N]{10}\s/i){$_=~s/\s//g;print "$_";}}' block20.fasta-gb > block20-gb
212 |
213 | my @gb=glob("*.fasta-gb");
214 | foreach my $gb(@gb){
215 | my $delete=0; #设置个阈值,如果没有匹配到任何ATCG的话,就直接略过,并删掉产生文件
216 | open(GB, "<$gb")||die "Can't open $in:$!\n";
217 | open(GBOUT, ">>$gb.out")||die "Can't open $in:$!\n";
218 | while(){
219 |
220 | if(/^>/){print GBOUT "$_";}
221 | elsif(/^[A|T|C|G|N]{10}\s/i){
222 | $delete++;
223 | $_=~s/\s//g;
224 | print GBOUT "$_\n";
225 | }
226 |
227 | }
228 | close(GB);
229 | close(GBOUT);
230 | if ($delete==0){`rm $gb.out`;}
231 | `rm $gb`;
232 | }
233 |
234 | `rename fasta-gb.out fasta *.fasta-gb.out`;
235 |
236 |
237 |
238 | #-------------------------------------------------------------------------------------------
239 | #最后合并文件,报告提取出多少模块,和总序列长度
240 | my @final=glob("*.fasta");
241 | my $f_length=@final;
242 | if ($f_length==$temp_num){
243 | print "All blocks extracted by Mauve have conserved sequences.\n\n\n";
244 | }
245 | else {print "Only $f_length blocks have conserved sequences.\n\n\n";}
246 | #先建一个数组
247 | my @fasta;
248 | my $hehe=1;
249 | foreach my $final(@final){
250 |
251 | if ($hehe>1){last;}
252 | else {
253 | open(HEHE, "<$final")||die "Can't open $in:$!\n";
254 | while(){
255 | if($_=~m/^>/){push @fasta, $_;}
256 | }
257 | close(HEHE);
258 | }
259 | $hehe++;
260 | }
261 |
262 | open(CAN,">>all-sequence.fasta")||die"Can'not open file";
263 | my $character_length;
264 |
265 | foreach $fasta(@fasta){
266 | my $can_all; #用来连接一个物种的序列
267 | print CAN "$fasta";
268 | foreach $final(@final){
269 | open(FILE,"<$final")||die"Can'not open file";
270 | my $turnoff=0; #用来判断是否连接
271 | my $can; #用来连接序列
272 | while(my $line=){
273 | if ($line eq $fasta){$turnoff=1;next;}
274 | elsif($line ne $fasta&&$line=~m/>.*\n/){$turnoff=0;}
275 | if($turnoff){chomp $line;$can.=$line;}
276 | }
277 | $can_all.=$can;
278 |
279 | }
280 | print CAN "$can_all\n";
281 | $character_length=length($can_all);
282 | }
283 | close(CAN);
284 |
285 |
286 | print "The final concatenated sequences was writen in all-sequence.fasta\n\n";
287 | print "The concatenated length is $character_length bp\n\n";
288 | print "ROGP DATA PREPRATION COMPLETED! ENJOY IT!!\n\n\n";
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/circoletto.pl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/circoletto.pl
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/docs/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/docs/.DS_Store
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/docs/Manual_v1.1.1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/docs/Manual_v1.1.1.pdf
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/examples/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/examples/.DS_Store
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/examples/README.txt:
--------------------------------------------------------------------------------
1 | README for Examples
2 | ___________________
3 |
4 |
5 | These folders contain two simple examples.
6 |
7 | The /nucleotide folder demonstrates PartitionFinder
8 | the /aminoacid folder demonstrates PartitionFinderProtein
9 |
10 | Instructions on how to run these examples are provided in the manual:
11 |
12 | For Mac Users: Page 7 of the manual
13 | For Windows Users: Page 9 of the manual
14 |
15 | Please email me if you have any questions.
16 |
17 | Rob Lanfear
18 | May 2012
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/examples/aminoacid/Als_etal_2004.phy:
--------------------------------------------------------------------------------
1 | 4 949
2 | AD00P055 SLMLLISSSIVENGAGTGWTVYPPLSSNIAHSGSSVDLAIFSLHLAGISSILGAINFITTIINMKVNNLFFDQMSLFIWAVGITALLLLLSLPVLAGAITMLLTDRNLNTSFFDPAGGGDPILYQHLFWFFGHPXXXXXXXXXXGIISHIISQESGKKETFGSLGMIYAMLAIGLLGFIVWAHHMFTVGMDIDTRAYFTSATMIIAVPTGIKIFSWLATIYGTQINYSPSMLWSLGFIFLFAVGGLTGVILANSSIDITLHDTYYVVAHFHYVLSMGAIFAIFGGFIHWYPLFTGLMMNSYLLKIQFILMFIGVNXXXXXXXXXXXXXXXXXXXXXPDMXLSWNIISSLGSYMSFISMMMMMMIIWESMIKQRLILFSLNMSSSIEWLQNTPPNEHSYNELPILSNFMATWSNLNFQNSVSPLMEQIIFFHDHSLIILIMITMLLSYMMLSMFWNKFINRFLLEGQMIELIXXXXXXXXXXXXXXXXXRLLYLLDELNNPLITIKSIGHQWYWSYEYSDFKNIEFDSYMINEYNLNNFRLLDVDNRIIIPMNNNIRMLITATDVIHSWTVPSIGVKVDANPGRLNQTSFFINRPGIFFGQCSEICGANHSFMPIVIESISIKNFXDAPGHSDFIKNMITGTSQAXCAVLIVAAGTGEXEAGISKNGQTREHALXAFTLGVKQLIVGVNKMXSTEPPYSESRFEEIKKEVSSYIKKIGYNPAAVAFVPISGWHGDNMLEASTKMPWFKGWQVERKEGKAEGKCLIEALDAILPPARPTDKALRLPLQDVYKIGGIGTVPVGRVETGVLKPGTIVVFAPANITTEVKSVEMHHEXLQEAVPGDNVGFNVKNVSVKELRRGYVAGDTKNNPPKGAADFTAQVIVLNHPGQISNGYTPVLDCHTAHIACKFAEIKEKVDXXSGKSXEVDPKSIKSGDDAXVNMVXSKPLXXES
3 | RV03N585 SLMLLISSSIVENGAGTGWTVYPPLSSNIAHSGSSVDLAIFSLHLAGISSILGAINFITTIINMKVNNLFFDQMSLFIWAVGITALLLLLSLPVLAGAITMLLTDRNLNTSFFDPAGGGDPILYQHLFWFFGHPEVYILILPGFGIISHIISQESGKKETFGSLGMIYAMLAIGLLGFIVWAHHMFTVGMDIDTRAYITSATMIIAVPTGIKIFSWLATIYGTQINYSPSMLWSLGFIFLFAVGGLTGVILANSSIDITLHDTYYVVAHFHYVLSMGAIFAIFGGFIHWYPLFTGLMMNSYLLKIQFILMFIGVNXXXXXXXXXXXXXXXXXXXXXPDMFLSWNIISSLGSYMSFISMMMMMMIIWESMIKQRLILFSLNMSSSIEWLQNTPPNEHSYNELPILSNFMATWSNLNFQNSVSPLMEQIIFFHDHSLIILIMITMLLSYMMLSMFWNKFINRFLLEGQMIEXXXXXXXXIILIFIALPSLRLLYLLDELNNPLITIKSIGHQWYWSYEYSDFKNIEFDSYMINKYNLNNFRLLDVDNRIIIPMNNNIRMLITATDVIHSWTVPSIGVKVDANPGRLNQTSFFINRPGIFFGQCSEICGANHSFMPIVIESISIKNFIDAPGHSDFIKNMITGTSQADCAVLIVAAGTGEFEAGISKNGQTREHALLAFTLGVKQLIVGVNKMDSTEPPYSESRFEEIKKEVSSYIKKIGYNPAAVAFVPISGWHGDNMLEASTKMPWFKGWQVERKEGKAEGKCLIEALDAILPPARPTDKALRLPLQDVYKIGGIGTVPVGRVETGVLKPGTIVVFAPANITTEVKSVEMHHEALQEAVPGDNVGFNVKNVSVKELRRGYVAGDTKNNPPKGAADFTAQVIVLNHPGQISNGYTPVLDCHTAHIACKFAEIKEKVDRRSGKSTEVDPKSIKSGDAAIVNLVPSKPLCVES
4 | TDA99Q996 SLMLLISSSIVENGAGTGWTVYPPLSSNIAHSGSSVDLAIFSLHLAGISSILGAINFITTIINMKVNNMSFDQMSLFIWAVGITALLLLLSLPVLAGAITMLLTDRNLNTSFFDPAGGGDPILYQXXXXXXXXXXXXXXXXXXXXIISXIISQESXKKETFGSLGMIYAMLAIGLLGFIVWAHHMFTVGMDIDTRAYFTSATMIIAVPTGIKIFSWLATIYGSQINYSPSMLWSLGFIFLFAVGGLTGVILANSSIDITLHDTYYVVAHFHYVLSMGAIFAIFGGFIHWYPLFTGLMMNSYLLXIXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXLSWNIVSSLGSYMSFISMLLMMMIIWESMIKKRLILFSLNMSSSIEWLQNTPPNEHSYNELPILNNFMATWSNLNFQNSVSPLMEQIIFFNDHSLIILIMITMLLSYMMLSMFWNKFINRFLLEGQMXXLIXXXXXXXXXXXXXXXSLRLLYLLDELNNPLITIKSIGHQWYWSYEYSDFKNIEFDSYMINEYNLNNFRLLDVDNRIIIPMNNNIRMLITATDVIHSWTIPAIGVKVDANPGRLNQSSFFINRPGIFFGQCSEICGANHSFMPIVIESISIKNFIDAPGHSDFIKNMITGTSQADCAVLIVAAGTGEFEAGISKNGQTREHALLAFTLGVKQLIVGVNKMDSTEPPYSESRFEEIKKEVSSYIKKIGYNPAAVAFVPISGWHGDNMLEASTKMPWFKGWQVERKEGKAEGKCLIEALDAILPPARPTDKALRLPLQDVYKIGGIGTVPVGRVETGVLKPGTIVVFAPANITTEVKSVEMHHEALQEAVPGDNVGFNVKNVSVKELRRGYVAGDTKNNPPKGAADFTAQVIVLNHPGQISNGYTPVLDCHTAHIACKFAEIKEKVDRRSGKSTEVDPKSIKSGDAAIVNLVPSKPLCVES
5 | ZD99S305 SLMLLISSSIVENGAGTGWTVYPPLSSNIAHSGSSVDLAIFSLHLAGISSILGAINFITTIINMKVNNLFFDQMSLFIWAVGITALLLLLSLPVLAGAITMLLTDRNLNTSFFDPAGGGDPILYQHLFWFFGHPXXXXXXXXXXXXXXXXXXXESGKKETFGSLGMIYAMLAIGLLGFIVWAHHMFTVGMDIDTRAYFTSATMIIAVPTGIKIFSWLATIYGTQINYSPSMLWSLGFIFLFAVGGLTGVILANSSIDITLHDTYYVVAHFHYVLSMGAIFAIFGGFIHWYPLFTGLMMNSYLLKIQFILMXXXXXXXXXXXXXXXXXXXXXXXXXXPDMXLSWNIISSLGSYMSFISMMMMMMIIWESMIKQRLILFSLNMSSSIEWLQNTPPNEHSYNELPILSNFMATWSNLNFQNSVSPLMEQIIFFHDHSLIILIMITMLLSYMMLSMFWNKFINRFLLEGQMIELIXXXXXXIILIFIALPSLRLLYLLDELNNPLITIKSIGHQWYWSYEYSDFKNIEFDSYMINEYNLNNFRLLDVDNRIIIPMNNNIRMLITATDVIHSWTVPSIGVKVDANPGRLNQTSFFINRPGIFFGQCSEICGANHSFMPIVIESISIKNFIDAPGHSDFIKNMITGTSQADCAVLIVAAGTGEFEAGISKNGQTREHALLAFTLGVKQLIVGVNKMDSTEPPYSESRFEEIKKEVSSYIKKIGYNPAAVAFVPISGWHGDNMLEASTKMPWFKGWQVERKEGKAEGKCLIEALDAILPPARPTDKALRLPLQDVYKIGGIGTVPVGRVETGVLKPGTIVVFAPANITTEVKSVEMHHEALQEAVPGDNVGFNVKNVSVKELRRGYVAGDTKNNPPKGAADFTAQVIVLNHPGQISNGYTPVLDCHTAHIACKFAEIKEKVDRRSGKSTEVDPKSIKSGDAAIVNLVPSKPLCVES
6 |
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/examples/aminoacid/partition_finder.cfg:
--------------------------------------------------------------------------------
1 | ## ALIGNMENT FILE ##
2 | alignment = Als_etal_2004.phy;
3 |
4 | ## BRANCHLENGTHS: linked | unlinked ##
5 | branchlengths = linked;
6 |
7 | ## MODELS OF EVOLUTION for PartitionFinder: all | raxml | mrbayes | ##
8 | ## for PartitionFinderProtein: all_protein | ##
9 | models = all_protein;
10 |
11 | # MODEL SELECCTION: AIC | AICc | BIC #
12 | model_selection = BIC;
13 |
14 | ## DATA BLOCKS: see manual for how to define ##
15 | [data_blocks]
16 | COI = 1-407;
17 | COII = 408-624;
18 | EF1a = 625-949;
19 |
20 | ## SCHEMES, search: all | user | greedy ##
21 | [schemes]
22 | search = greedy;
23 |
24 | #user schemes go here if search=user. See manual for how to define.#
25 |
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/examples/nucleotide/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/examples/nucleotide/.DS_Store
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/examples/nucleotide/partition_finder.cfg:
--------------------------------------------------------------------------------
1 | ## ALIGNMENT FILE ##
2 | alignment = test.phy;
3 |
4 | ## BRANCHLENGTHS: linked | unlinked ##
5 | branchlengths = linked;
6 |
7 | ## MODELS OF EVOLUTION for PartitionFinder: all | raxml | mrbayes | beast | ##
8 | ## for PartitionFinderProtein: all_protein | ##
9 | models = all;
10 |
11 | # MODEL SELECCTION: AIC | AICc | BIC #
12 | model_selection = BIC;
13 |
14 | ## DATA BLOCKS: see manual for how to define ##
15 | [data_blocks]
16 | Gene1_pos1 = 1-789\3;
17 | Gene1_pos2 = 2-789\3;
18 | Gene1_pos3 = 3-789\3;
19 | Gene2_pos1 = 790-1449\3;
20 | Gene2_pos2 = 791-1449\3;
21 | Gene2_pos3 = 792-1449\3;
22 | Gene3_pos1 = 1450-2208\3;
23 | Gene3_pos2 = 1451-2208\3;
24 | Gene3_pos3 = 1452-2208\3;
25 |
26 | ## SCHEMES, search: all | greedy | rcluster | hcluster | user ##
27 | [schemes]
28 | search = greedy;
29 |
30 | #user schemes go here if search=user. See manual for how to define.#
31 |
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/examples/nucleotide/test.phy:
--------------------------------------------------------------------------------
1 | 4 2208
2 | spp1 CTTGAGGTTCAGAATGGTAATGAA------GTGCTGGTGCTGGAAGTTCAGCAGCAGCTCGGCGGCGGTATCGTACGTACCATCGCCATGGGTTCTTCCGACGGTCTGCGTCGCGGTCTGGATGTAAAAGACCTCGAGCACCCGATCGAAGTCCCAGTTGGTAAAGCAACACTGGGTCGTATCATGAACGTACTGGGTCAGCCAGTAGACATGAAGGGCGACATCGGTGAAGAAGAGCGTTGGGCT---------------ATCCACCGTGAAGCACCATCCTATGAAGAGCTGTCAAGCTCTCAGGAACTGCTGGAAACCGGCATCAAAGTTATCGACCTGATGTGTCCGTTTGCGAAGGGCGGTAAAGTTGGTCTGTTCGGTGGTGCGGGTGTAGGTAAAACCGTAAACATGATGGAGCTTATTCGTAACATCGCGATCGAGCACTCCGGTTATTCTGTGTTTGCGGGCGTAGGTGAACGTACTCGTGAGGGTAACGACTTCTACCACGAAATGACCGACTCCAACGTTATCGAT---------------------AAAGTTTCTCTGGTTTATGGCCAGATGAACGAGCCACCAGGTAACCGTCTGCGCGTTGCGCTGACCGGTCTGACCATGGCTGAGAAGTTCCGTGACGAAGGTCGCGACGTACTGCTGTTCGTCGATAACATCTATCGTTACACCCTGGCAGGTACTGAAGTTTCAGCACTGCTGGGTCGTATGCCTTCAGCGGTAGGTTACCAGCCGACTCTGGCGGAAGAAATGGGCGTTCGCATTCCAACGCTGGAAGAGTGTGATATCTGCCACGGCAGCGGCGCTAAAGCCGGTTCGAAGCCGCAGACCTGTCCTACCTGTCACGGTGCAGGCCAGGTACAGATGCGCCAGGGCTTCTTCGCTGTACAGCAGACCTGTCCACACTGCCAGGGCCGCGGTACGCTGATCAAAGATCCGTGCAACAAATGTCACGGTCATGGTCGCGTAGAGAAAACCAAAACCCTGTCCGTAAAAATTCCGGCAGGCGTTGATACCGGCGATCGTATTCGTCTGACTGGCGAAGGTGAAGCTGGTGAGCACGGCGCACCGGCAGGCGATCTGTACGTTCAGGTGCAGGTGAAGCAGCACGCTATTTTCGAGCGTGAAGGCAACAACCTGTACTGTGAAGTGCCGATCAACTTCTCAATGGCGGCTCTTGGCGGCGAGATTGAAGTGCCGACGCTTGATGGTCGCGTGAAGCTGAAAGTTCCGGGCGAAACGCAAACTGGCAAGCTGTTCCGTATGCGTGGCAAGGGCGTGAAGTCCGTGCGCGGCGGTGCACAGGGCGACCTTCTGTGCCGCGTGGTGGTCGAGACACCGGTAGGTCTTAACGAGAAGCAGAAACAGCTGCTCAAAGATCTGCAGGAAAGTTTTGGCGGCCCAACGGGTGAAAACAACGTTGTTAACGCCCTGTCGCAGAAACTGGAATTGCTGATCCGCCGCGAAGGCAAAGTACATCAGCAAACTTATGTCCATGGTGTGCCACAGGCTCCGCTGGCGGTAACCGGTGAAACGGAAGTGACCGGTACACAGGTGCGTTTCTGGCCAAGCCACGAAACCTTCACCAACGTAATCGAATTCGAATATGAGATTCTGGCAAAACGTCTGCGCGAGCTGTCATTCCTGAACTCCGGCGTTTCCATCCGTCTGCGCGATAAGCGTGAC---GGCAAAGAAGACCATTTCCACTATGAAGGTGGTATCAAGGCGTTTATTGAGTATCTCAATAAAAATAAAACGCCTATCCACCCGAATATCTTCTACTTCTCCACCGAA---AAAGACGGTATTGGCGTAGAAGTGGCGTTGCAGTGGAACGATGGTTTCCAGGAAAACATCTACTGCTTCACCAACAACATTCCACAGCGTGATGGCGGTACTCACCTTGCAGGCTTCCGTGCGGCGATGACCCGTACGCTGAACGCTTACATGGACAAAGAAGGCTACAGCAAAAAAGCCAAA------GTCAGCGCCACCGGTGATGATGCCCGTGAAGGCCTGATTGCCGTCGTTTCCGTGAAAGTACCGGATCCGAAATTCTCCTCTCAGACTAAAGACAAACTGGTCTCTTCTGAGGTGAAAACGGCGGTAGAACAGCAGATGAATGAACTGCTGAGCGAATACCTGCTGGAAAACCCGTCTGACGCCAAAATC
3 | spp2 CTTGAGGTACAAAATGGTAATGAG------AGCCTGGTGCTGGAAGTTCAGCAGCAGCTCGGTGGTGGTATCGTACGTGCTATCGCCATGGGTTCTTCCGACGGTCTGCGTCGTGGTCTGGAAGTTAAAGACCTTGAGCACCCGATCGAAGTCCCGGTTGGTAAAGCAACGCTGGGTCGTATCATGAACGTGCTGGGTCAGCCGATCGATATGAAAGGCGACATCGGCGAAGAAGAACGTTGGGCG---------------ATTCACCGTGCAGCACCTTCCTATGAAGAGCTCTCCAGCTCTCAGGAACTGCTGGAAACCGGCATCAAAGTTATCGACCTGATGTGTCCGTTCGCGAAGGGCGGTAAAGTCGGTCTGTTCGGTGGTGCGGGTGTTGGTAAAACCGTAAACATGATGGAGCTGATCCGTAACATCGCGATCGAACACTCCGGTTACTCCGTGTTTGCTGGTGTTGGTGAGCGTACTCGTGAGGGTAACGACTTCTACCACGAAATGACCGACTCCAACGTTCTGGAT---------------------AAAGTATCCCTGGTTTACGGCCAGATGAACGAGCCGCCGGGAAACCGTCTGCGCGTTGCACTGACCGGCCTGACCATGGCTGAGAAATTCCGTGACGAAGGTCGTGACGTTCTGCTGTTCGTCGATAACATCTATCGTTATACCCTGGCCGGTACAGAAGTATCTGCACTGCTGGGTCGTATGCCTTCTGCGGTAGGTTATCAGCCGACGCTGGCGGAAGAGATGGGCGTTCGTATCCCGACGCTGGAAGAGTGCGACGTCTGCCACGGCAGCGGCGCGAAATCTGGCAGCAAACCGCAGACCTGTCCGACCTGTCATGGTCAGGGCCAGGTGCAGATGCGTCAGGGCTTCTTCGCCGTTCAGCAGACCTGTCCGCATTGTCAGGGGCGCGGTACGCTGATTAAAGATCCGTGCAACAAATGTCACGGTCACGGTCGCGTTGAGAAAACCAAAACCCTGTCGGTCAAAATCCCGGCGGGCGTGGATACCGGCGATCGTATTCGTCTGTCAGGAGAAGGCGAAGCGGGCGAACACGGTGCACCAGCAGGCGATCTGTACGTTCAGGTCCAGGTTAAGCAGCACGCCATCTTTGAGCGTGAAGGCAATAACCTGTACTGCGAAGTGCCTATTAACTTCACCATGGCAGCCCTCGGCGGCGAGATTGAAGTCCCGACGCTGGATGGCCGGGTGAATCTCAAAGTGCCTGGCGAAACGCAAACCGGCAAACTGTTCCGCATGCGCGGTAAAGGTGTGAAATCCGTGCGCGGTGGTGCTCAGGGCGACCTGCTGTGCCGCGTGGTGGTTGAAACACCAGTCGGGCTGAACGATAAGCAGAAACAGCTGCTGAAGGACCTGCAGGAAAGTTTTGGCGGACCAACGGGCGAGAAAAACGTGGTTAACGCCCTGTCGCAGAAGCTGGAGCTGGTTATTCAGCGCGACAATAAAGTTCACCGTCAGATCTATGCGCACGGTGTGCCGCAGGCTCCGCTGGCAGTGACCGGTGAGACCGAAAAAACCGGCACCATGGTACGTTTCTGGCCAAGCTATGAAACCTTCACCAACGTTGTCGAGTTCGAATACGAGATCCTGGCAAAACGTCTGCGTGAGCTGTCGTTCCTGAACTCCGGGGTTTCTATCCGTCTGCGTGACAAGCGTGAC---GGTAAAGAAGACCATTTCCACTACGAAGGCGGCATCAAGGCGTTCGTTGAGTATCTCAATAAGAACAAAACGCCGATCCACCCGAATATCTTCTACTTCTCCACCGAA---AAAGACGGTATTGGCGTCGAAGTAGCGCTGCAGTGGAACGACGGCTTCCAGGAAAACATCTACTGCTTCACCAACAACATCCCGCAGCGCGATGGCGGTACTCACCTTGCGGGCTTCCGCGCGGCGATGACCCGTACCCTGAACGCCTATATGGACAAAGAAGGCTACAGCAAAAAAGCCAAA------GTCAGCGCTACCGGCGACGATGCGCGTGAAGGCCTGATTGCCGTTGTCTCCGTGAAGGTTCCGGATCCGAAATTCTCCTCGCAGACCAAAGACAAACTGGTCTCCTCCGAGGTGAAAACCGCGGTTGAACAGCAGATGAATGAACTGCTGAACGAATACCTGCTGGAAAATCCGTCTGACGCGAAAATC
4 | spp3 CTTGAGGTACAGAATAACAGCGAG------AAGCTGGTGCTGGAAGTTCAGCAGCAGCTCGGCGGCGGTATCGTACGTACCATCGCAATGGGTTCTTCCGACGGTCTGCGTCGTGGTCTGGAAGTGAAAGACCTCGAGCACCCGATCGAAGTCCCGGTAGGTAAAGCGACCCTGGGTCGTATCATGAACGTGCTGGGTCAGCCAATCGATATGAAAGGCGACATCGGCGAAGAAGATCGTTGGGCG---------------ATTCACCGCGCAGCACCTTCCTATGAAGAGCTGTCCAGCTCTCAGGAACTGCTGGAAACCGGCATCAAAGTTATCGACCTGATTTGTCCGTTCGCTAAGGGCGGTAAAGTTGGTCTGTTCGGTGGTGCGGGCGTAGGTAAAACCGTAAACATGATGGAGCTGATCCGTAACATCGCGATCGAGCACTCCGGTTACTCCGTGTTTGCAGGCGTGGGTGAGCGTACTCGTGAGGGTAACGACTTCTACCACGAGATGACCGACTCCAACGTTCTGGAC---------------------AAAGTTGCACTGGTTTACGGCCAGATGAACGAGCCGCCAGGTAACCGTCTGCGCGTAGCGCTGACCGGTCTGACCATCGCGGAGAAATTCCGTGACGAAGGCCGTGACGTTCTGCTGTTCGTCGATAACATCTATCGTTATACCCTGGCCGGTACAGAAGTTTCTGCACTGCTGGGTCGTATGCCATCTGCGGTAGGTTATCAGCCTACTCTGGCAGAAGAGATGGGTGTTCGTATCCCGACGCTGGAAGAGTGTGAAGTTTGCCACGGCAGCGGCGCGAAAAAAGGTTCTTCTCCGCAGACCTGTCCAACCTGTCATGGACAGGGCCAGGTGCAGATGCGTCAGGGCTTCTTCACCGTGCAGCAAAGCTGCCCGCACTGCCAGGGCCGCGGTACCATCATTAAAGATCCGTGCACCAACTGTCACGGCCATGGCCGCGTAGAGAAAACCAAAACGCTGTCGGTAAAAATTCCGGCAGGCGTGGATACCGGCGATCGTATCCGCCTTTCTGGTGAAGGCGAAGCGGGCGAGCACGGCGCACCTTCAGGCGATCTGTACGTTCAGGTTCAGGTGAAACAGCACCCAATCTTCGAGCGTGAAGGCAATAACCTGTACTGCGAAGTGCCGATCAACTTTGCGATGGCTGCGCTGGGCGGGGAAATTGAAGTGCCGACCCTTGACGGCCGCGTTAAGCTGAAGGTACCGAGCGAAACGCAAACCGGCAAGCTGTTCCGCATGCGCGGTAAAGGCGTGAAATCCGTACGCGGTGGCGCGCAGGGCGATCTGCTGTGCCGCGTCGTCGTTGAAACTCCGGTTAGCCTGAACGAAAAGCAGAAGAAACTGCTGCGTGATTTGGAAGAGAGCTTTGGCGGCCCAACGGGGGCGAACAATGTTGTGAACGCCCTGTCCCAGAAGCTGGAGCTGCTGATTCGCCGCGAAGGCAAAACCCATCAGCAAACCTACGTGCACGGTGTGCCGCAGGCTCCGCTGGCGGTCACCGGTGAAACCGAACTGACCGGTACCCAGGTGCGTTTCTGGCCGAGCCATGAAACCTTCACCAACGTCACCGAATTCGAATATGACATCCTGGCTAAGCGCCTGCGTGAGCTGTCGTTCCTGAACTCCGGCGTCTCTATTCGCCTGAACGATAAGCGCGAC---GGCAAGCAGGATCACTTCCACTACGAAGGCGGCATCAAGGCGTTTGTTGAGTACCTCAACAAGAACAAAACCCCGATTCACCCGAACGTCTTCTATTTCAGCACTGAA---AAAGACGGCATCGGCGTGGAAGTGGCGCTGCAGTGGAACGACGGCTTCCAGGAAAATATCTACTGCTTTACCAACAACATTCCTCAGCGCGACGGCGGTACTCACCTTGCGGGCTTCCGCGCGGCGATGACCCGTACCCTGAACGCCTATATGGACAAAGAAGGCTACAGCAAAAAAGCCAAA------GTGAGCGCCACCGGTGACGATGCGCGTGAAGGCCTGATTGCCGTAGTGTCCGTGAAGGTGCCGGATCCGAAGTTCTCTTCCCAGACCAAAGACAAACTGGTTTCTTCGGAAGTGAAATCCGCGGTTGAACAGCAGATGAACGAACTGCTGGCTGAATACCTGCTGGAAAATCCGGGCGACGCAAAAATT
5 | spp4 CTCGAGGTGAAAAATGGTGATGCT------CGTCTGGTGCTGGAAGTTCAGCAGCAGCTGGGTGGTGGCGTGGTTCGTACCATCGCCATGGGTACTTCTGACGGCCTGAAGCGCGGTCTGGAAGTTACCGACCTGAAAAAACCTATCCAGGTTCCGGTTGGTAAAGCAACCCTCGGCCGTATCATGAACGTATTGGGTGAGCCAATCGACATGAAAGGCGACCTGCAGAATGACGACGGCACTGTAGTAGAGGTTTCCTCTATTCACCGTGCAGCACCTTCGTATGAAGATCAGTCTAACTCGCAGGAACTGCTGGAAACCGGCATCAAGGTTATCGACCTGATGTGTCCGTTCGCTAAGGGCGGTAAAGTCGGTCTGTTCGGTGGTGCGGGTGTAGGTAAAACCGTAAACATGATGGAGCTGATCCGTAACATCGCGGCTGAGCACTCAGGTTATTCGGTATTTGCTGGTGTGGGTGAGCGTACTCGTGAGGGTAACGACTTCTACCACGAAATGACTGACTCCAACGTTATCGAT---------------------AAAGTAGCGCTGGTGTATGGCCAGATGAACGAGCCGCCGGGTAACCGTCTGCGCGTAGCACTGACCGGTTTGACCATGGCGGAAAAATTCCGTGATGAAGGCCGTGACGTTCTGCTGTTCATCGACAACATCTATCGTTACACCCTGGCCGGTACTGAAGTATCAGCACTGCTGGGTCGTATGCCATCTGCGGTAGGCTATCAGCCAACGCTGGCAGAAGAGATGGGTGTGCGCATTCCAACACTGGAAGAGTGCGATGTCTGCCACGGTAGCGGCGCGAAAGCGGGGACCAAACCGCAGACCTGTCATACCTGTCATGGCGCAGGCCAGGTGCAGATGCGTCAGGGCTTCTTCACTGTGCAGCAGGCGTGTCCGACCTGTCACGGTCGCGGTTCAGTGATCAAAGATCCGTGCAATGCTTGTCATGGTCACGGTCGCGTTGAGCGCAGTAAAACCCTGTCGGTGAAAATTCCAGCAGGCGTGGATACCGGCGATCGCATTCGTCTGACCGGCGAAGGTGAAGCGGGCGAACAGGGCGCACCAGCGGGCGATCTGTACGTTCAGGTTTCGGTGAAAAAGCACCCGATCTTTGAGCGTGAAGATAACAACCTATATTGCGAAGTGCCGATTAACTTTGCGATGGCAGCATTGGGTGGCGAGATTGAAGTGCCGACGCTTGATGGGCGTGTGAACCTGAAAGTGCCTTCTGAAACGCAAACTGGCAAGCTGTTCCGCATGCGCGGTAAAGGCGTGAAATCGGTGCGTGGTGGTGCGGTAGGCGATTTGCTGTGTCGTGTGGTGGTGGAAACGCCAGTTAGCCTCAATGACAAACAGAAAGCGTTACTGCGTGAACTGGAAGAGAGTTTTGGCGGCCCGAGCGGTGAGAAAAACGTCGTAAACGCCCTGTCACAGAAGCTGGAGCTGACCATTCGCCGTGAAGGCAAAGTGCATCAGCAGGTTTATCAGCACGGCGTGCCGCAGGCACCGCTGGCGGTGTCCGGTGATACCGATGCAACCGGTACTCGCGTGCGTTTCTGGCCGAGCTACGAAACCTTCACCAATGTGATTGAGTTTGAGTACGAAATCCTGGCGAAACGCCTGCGTGAACTGTCGTTCCTGAACTCTGGCGTTTCGATTCGTCTGGAAGACAAACGCGAC---GGCAAGAACGATCACTTCCACTACGAAGGCGGCATCAAGGCGTTCGTTGAGTATCTCAACAAGAACAAAACCCCGATTCACCCAACGGTGTTCTACTTCTCGACGGAG---AAAGATGGCATTGGCGTGGAAGTGGCGCTGCAGTGGAACGATGGTTTCCAGGAAAACATCTACTGCTTCACCAACAACATTCCACAGCGCGACGGCGGTACGCACCTGGCGGGCTTCCGTGCGGCAATGACGCGTACGCTGAATGCCTACATGGATAAAGAAGGCTACAGCAAAAAAGCCAAA------GTCAGTGCGACCGGTGACGATGCGCGTGAAGGCCTGATTGCAGTGGTTTCCGTGAAAGTGCCGGATCCGAAATTCTCTTCTCAGACCAAAGATAAGCTGGTCTCTTCTGAAGTGAAATCGGCGGTTGAGCAGCAGATGAACGAACTGCTGGCGGAATACCTGCTGGAAAATCCGTCTGACGCGAAAATC
6 |
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/.DS_Store
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | #This program is free software: you can redistribute it and/or modify it
3 | #under the terms of the GNU General Public License as published by the
4 | #Free Software Foundation, either version 3 of the License, or (at your
5 | #option) any later version.
6 | #
7 | #This program is distributed in the hope that it will be useful, but
8 | #WITHOUT ANY WARRANTY; without even the implied warranty of
9 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 | #General Public License for more details. You should have received a copy
11 | #of the GNU General Public License along with this program. If not, see
12 | #. PartitionFinder also includes the PhyML
13 | #PartitionFinder implies that you agree with those licences and conditions as well.
14 |
15 | import logging
16 | log = logging.getLogger("config")
17 | import config
18 |
19 | # TODO: Not currently used
20 | # Activation should chdir, and maybe do some other stuff
21 | # So maybe need an 'activate' function on the config?
22 | # Should also clear out subsets, in the cache?
23 |
24 | class Current(object):
25 | """Keep a bunch of stuff current, that can be reinitialised"""
26 | def __init__(self):
27 | self._config = None
28 |
29 | def activate_config(self, c):
30 | assert isinstance(c, config.Configuration)
31 |
32 | if self._config is not None:
33 | log.debug("Resetting old configuration...")
34 | self._config.reset()
35 |
36 | log.debug("Assigning a new configuration...")
37 | self._config = c
38 |
39 | @property
40 | def active_config(self):
41 | if self._config is None:
42 | log.error("No configuration is currently active...")
43 |
44 | return self._config
45 |
46 |
47 | current = Current()
48 |
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/__init__.pyc
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/algorithm.py:
--------------------------------------------------------------------------------
1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
2 | #
3 | #This program is free software: you can redistribute it and/or modify it
4 | #under the terms of the GNU General Public License as published by the
5 | #Free Software Foundation, either version 3 of the License, or (at your
6 | #option) any later version.
7 | #
8 | #This program is distributed in the hope that it will be useful, but
9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 | #General Public License for more details. You should have received a copy
12 | #of the GNU General Public License along with this program. If not, see
13 | #. PartitionFinder also includes the PhyML
14 | #program, the RAxML program, and the PyParsing library,
15 | #all of which are protected by their own licenses and conditions, using
16 | #PartitionFinder implies that you agree with those licences and conditions as well.
17 |
18 | from math import sqrt
19 | from itertools import izip
20 |
21 |
22 | def k_subsets_i(n, k):
23 | '''
24 | from http://code.activestate.com/recipes/500268-all-k-subsets-from-an-n-set/
25 | Yield each subset of size k from the set of intergers 0 .. n - 1
26 | n -- an integer > 0
27 | k -- an integer > 0
28 | '''
29 | # Validate args
30 | if n < 0:
31 | raise ValueError('n must be > 0, got n=%d' % n)
32 | if k < 0:
33 | raise ValueError('k must be > 0, got k=%d' % k)
34 | # check base cases
35 | if k == 0 or n < k:
36 | yield set()
37 | elif n == k:
38 | yield set(range(n))
39 |
40 | else:
41 | # Use recursive formula based on binomial coeffecients:
42 | # choose(n, k) = choose(n - 1, k - 1) + choose(n - 1, k)
43 | for s in k_subsets_i(n - 1, k - 1):
44 | s.add(n - 1)
45 | yield s
46 | for s in k_subsets_i(n - 1, k):
47 | yield s
48 |
49 |
50 | def k_subsets(s, k):
51 | '''
52 | from http://code.activestate.com/recipes/500268-all-k-subsets-from-an-n-set/
53 | Yield all subsets of size k from set (or list) s
54 | s -- a set or list (any iterable will suffice)
55 | k -- an integer > 0
56 | '''
57 | s = list(s)
58 | n = len(s)
59 | for k_set in k_subsets_i(n, k):
60 | yield set([s[i] for i in k_set])
61 |
62 |
63 | def lumpings(scheme):
64 | """
65 | generate all possible lumpings of a given scheme, where a lumping involves
66 | joining two partitions together scheme has to be a list of digits
67 | """
68 | # Get the numbers involved in the scheme
69 | nums = set(scheme)
70 | lumpings = []
71 | for sub in k_subsets(nums, 2):
72 | lump = list(scheme)
73 | sub = list(sub)
74 | sub.sort()
75 | #now replace all the instance of one number in lump with the other in sub
76 | while lump.count(sub[1]) > 0:
77 | lump[lump.index(sub[1])] = sub[0]
78 | lumpings.append(lump)
79 |
80 | return lumpings
81 |
82 |
83 | def euclidean_distance(x, y):
84 | sum = 0
85 | for xval, yval in izip(x, y):
86 | sum += (xval - yval) ** 2
87 | dist = sqrt(sum)
88 | return dist
89 |
90 |
91 | # def getLevels(cluster, levs):
92 | # """
93 | # Returns the levels of the cluster as list.
94 | # """
95 | # levs.append(cluster.level())
96 |
97 | # left = cluster.items()[0]
98 | # right = cluster.items()[1]
99 | # if isinstance(left, Cluster):
100 | # first = getLevels(left, levs)
101 | # else:
102 | # first = left
103 | # if isinstance(right, Cluster):
104 | # second = getLevels(right, levs)
105 | # else:
106 | # second = right
107 | # return levs
108 |
109 |
110 | def levels_to_scheme(levels, namedict):
111 | """
112 | take the return from Cluster.getlevel
113 | and return it as a list of partition names description
114 | """
115 |
116 | levels = str(levels)
117 |
118 | for key in namedict.keys():
119 | old = str(namedict[key])
120 | new = '"%s"' % key
121 | levels = levels.replace(old, new)
122 |
123 | levels = eval(levels)
124 | return levels
125 |
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/algorithm.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/algorithm.pyc
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/alignment.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2012 Robert Lanfear and Brett Calcott
2 | #
3 | # This program is free software: you can redistribute it and/or modify it under
4 | # the terms of the GNU General Public License as published by the Free Software
5 | # Foundation, either version 3 of the License, or (at your option) any later
6 | # version.
7 | #
8 | # This program is distributed in the hope that it will be useful, but WITHOUT
9 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
10 | # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
11 | # details. You should have received a copy of the GNU General Public License
12 | # along with this program. If not, see .
13 | # PartitionFinder also includes the PhyML program, the RAxML program, and the
14 | # PyParsing library, all of which are protected by their own licenses and
15 | # conditions, using PartitionFinder implies that you agree with those licences
16 | # and conditions as well.
17 |
18 | """Loading, Saving, Parsing Alignment Files
19 |
20 | See the phyml details here:
21 | http://www.atgc-montpellier.fr/phyml/usersguide.php?type=command
22 |
23 | """
24 | import logging
25 | log = logging.getLogger("alignment")
26 |
27 | import os
28 |
29 | from pyparsing import (
30 | Word, OneOrMore, alphas, nums, Suppress, Optional, Group, stringEnd,
31 | delimitedList, ParseException, line, lineno, col, LineStart, restOfLine,
32 | LineEnd, White, Literal, Combine, Or, MatchFirst, ZeroOrMore)
33 |
34 | from util import PartitionFinderError
35 | class AlignmentError(PartitionFinderError):
36 | pass
37 |
38 | class AlignmentParser(object):
39 | """Parses an alignment and returns species sequence tuples"""
40 |
41 | # I think this covers it...
42 | BASES = Word(alphas + "?.-")
43 |
44 | def __init__(self):
45 | self.sequence_length = None
46 | self.species_count = None
47 | self.sequences = []
48 | self.current_sequence = 0
49 |
50 | self.root_parser = self.phylip_parser() + stringEnd
51 |
52 | def phylip_parser(self):
53 |
54 | INTEGER = Word(nums)
55 | INTEGER.setParseAction(lambda x: int(x[0]))
56 |
57 | header = INTEGER("species_count") + INTEGER("sequence_length") +\
58 | Suppress(restOfLine)
59 | header.setParseAction(self.set_header)
60 |
61 | sequence_name = Word(
62 | alphas + nums + "!#$%&\'*+-./;<=>?@[\\]^_`{|}~",
63 | max=100)
64 |
65 | # Take a copy and disallow line breaks in the bases
66 | bases = self.BASES.copy()
67 | bases.setWhitespaceChars(" \t")
68 | seq_start = sequence_name("species") + bases("sequence") + Suppress(LineEnd())
69 | seq_start.setParseAction(self.set_seq_start)
70 | seq_start_block = OneOrMore(seq_start)
71 | seq_start_block.setParseAction(self.set_start_block)
72 |
73 | seq_continue = bases("sequence") + Suppress(LineEnd())
74 | seq_continue.setParseAction(self.set_seq_continue)
75 |
76 | seq_continue_block = Suppress(LineEnd()) + OneOrMore(seq_continue)
77 | seq_continue_block.setParseAction(self.set_continue_block)
78 |
79 | return header + seq_start_block + ZeroOrMore(seq_continue_block)
80 |
81 | def set_header(self, text, loc, tokens):
82 | self.sequence_length = tokens.sequence_length
83 | self.species_count = tokens.species_count
84 |
85 | def set_seq_start(self, text, loc, tokens):
86 | self.sequences.append([tokens.species, tokens.sequence])
87 | self.current_sequence += 1
88 |
89 | def set_start_block(self, tokens):
90 | # End of block
91 | # Reset the counter
92 | self.current_sequence = 0
93 |
94 | def set_seq_continue(self, text, loc, tokens):
95 | append_to = self.sequences[self.current_sequence]
96 | append_to[1] += tokens.sequence
97 | self.current_sequence += 1
98 |
99 | def set_continue_block(self, tokens):
100 | self.current_sequence = 0
101 |
102 | def parse(self, s):
103 | try:
104 | defs = self.root_parser.parseString(s)
105 | except ParseException, p:
106 | log.error("Error in Alignment Parsing:" + str(p))
107 | log.error("A common cause of this error is having whitespace"
108 | ", i.e. spaces or tabs, in the species names. Please check this and remove"
109 | " all whitespace from species names, or replace them with e.g. underscores")
110 |
111 | raise AlignmentError
112 |
113 | # Check that all the sequences are equal length
114 | slen = None
115 | for nm, seq in self.sequences:
116 | if slen is None:
117 | # Use the first as the test case
118 | slen = len(seq)
119 | else:
120 | if len(seq) != slen:
121 | log.error("Bad alignment file: Not all species have the same sequences length")
122 | raise AlignmentError
123 |
124 | # Not all formats have a heading, but if we have one do some checking
125 | if self.sequence_length is not None:
126 | if self.sequence_length != slen:
127 | log.error("Bad Alignment file: sequence length count in header does not match"
128 | " sequence length in file, please check")
129 | raise AlignmentError
130 |
131 | if self.species_count is not None:
132 | if len(self.sequences) != self.species_count:
133 | log.error("Bad Alignment file: species count in header does not match"
134 | " number of sequences in file, please check")
135 | raise AlignmentError
136 |
137 | return self.sequences
138 |
139 | def parse(s):
140 | return AlignmentParser().parse(s)
141 |
142 | class Alignment(object):
143 | def __init__(self):
144 | self.species = {}
145 | self.sequence_len = 0
146 |
147 | def __str__(self):
148 | return "Alignment(%s species, %s codons)" % self.species, self.sequence_len
149 |
150 | def same_as(self, other):
151 | if self.sequence_len != other.sequence_len:
152 | log.warning("Alignments not the same, length differs %s: %s", self.sequence_len, other.sequence_len)
153 | return False
154 |
155 | if self.species != other.species:
156 | log.warning("Alignments not the same. "
157 | "This alignment has %s species, the alignment from the previous "
158 | "analysis had %s.", len(self.species), len(other.species))
159 | return False
160 |
161 | return True
162 |
163 | def from_parser_output(self, defs):
164 | """A series of species / sequences tuples
165 | e.g def = ("dog", "GATC"), ("cat", "GATT")
166 | """
167 | species = {}
168 | sequence_len = None
169 | for spec, seq in defs:
170 | # log.debug("Found Sequence for %s: %s...", spec, seq[:20])
171 | if spec in species:
172 | log.error("Repeated species name '%s' is repeated "
173 | "in alignment", spec)
174 | raise AlignmentError
175 |
176 | # Assign it
177 | species[spec] = seq
178 |
179 | if sequence_len is None:
180 | sequence_len = len(seq)
181 | else:
182 | if len(seq) != sequence_len:
183 | log.error("Sequence length of %s "
184 | "differs from previous sequences", spec)
185 | raise AlignmentError
186 | log.debug("Found %d species with sequence length %d",
187 | len(species), sequence_len)
188 |
189 | # Overwrite these
190 | self.species = species
191 | self.sequence_len = sequence_len
192 |
193 | def read(self, pth):
194 | if not os.path.exists(pth):
195 | log.error("Cannot find sequence file '%s'", pth)
196 | raise AlignmentError
197 |
198 | log.info("Reading alignment file '%s'", pth)
199 | text = open(pth, 'rU').read()
200 | self.from_parser_output(parse(text))
201 |
202 | def write(self, pth):
203 | self.write_phylip(pth)
204 |
205 | def write_phylip(self, pth):
206 | fd = open(pth, 'w')
207 | log.debug("Writing phylip file '%s'", pth)
208 |
209 | species_count = len(self.species)
210 | sequence_len = len(iter(self.species.itervalues()).next())
211 |
212 | fd.write("%d %d\n" % (species_count, sequence_len))
213 | for species, sequence in self.species.iteritems():
214 | # we use a version of phylip which can have longer species names, up to 100
215 | shortened = "%s " %(species[:99])
216 | fd.write(shortened)
217 | fd.write(sequence)
218 | fd.write("\n")
219 | fd.close()
220 |
221 | class SubsetAlignment(Alignment):
222 | """Create an alignment based on some others and a subset definition"""
223 | def __init__(self, source, subset):
224 | """create an alignment for this subset"""
225 | Alignment.__init__(self)
226 |
227 | #let's do a basic check to make sure that the specified sites aren't > alignment length
228 | site_max = max(subset.columns)+1
229 | log.debug("Max site in data_blocks: %d; max site in alignment: %d" %(site_max, source.sequence_len))
230 | if site_max>source.sequence_len:
231 | log.error("Site %d is specified in [data_blocks], but the alignment only has %d sites. Please check." %(site_max, source.sequence_len))
232 | raise AlignmentError
233 |
234 | # Pull out the columns we need
235 | for species_name, old_sequence in source.species.iteritems():
236 | new_sequence = ''.join([old_sequence[i] for i in subset.columns])
237 | self.species[species_name] = new_sequence
238 |
239 | if not self.species:
240 | log.error("No species found in %s", self)
241 | raise AlignmentError
242 |
243 | self.sequence_len = len(self.species.itervalues().next())
244 |
245 | class TestAlignment(Alignment):
246 | """Good for testing stuff"""
247 | def __init__(self, text):
248 | Alignment.__init__(self)
249 | self.from_parser_output(parse(text))
250 |
251 |
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/alignment.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/alignment.pyc
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/analysis.py:
--------------------------------------------------------------------------------
1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
2 | #
3 | #This program is free software: you can redistribute it and/or modify it
4 | #under the terms of the GNU General Public License as published by the
5 | #Free Software Foundation, either version 3 of the License, or (at your
6 | #option) any later version.
7 | #
8 | #This program is distributed in the hope that it will be useful, but
9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 | #General Public License for more details. You should have received a copy
12 | #of the GNU General Public License along with this program. If not, see
13 | #. PartitionFinder also includes the PhyML
14 | #program, the RAxML program, and the PyParsing library,
15 | #all of which are protected by their own licenses and conditions, using
16 | #PartitionFinder implies that you agree with those licences and conditions as well.
17 |
18 | import logging
19 | log = logging.getLogger("analysis")
20 |
21 | import os
22 | import shutil
23 |
24 | from alignment import Alignment, SubsetAlignment
25 | import threadpool
26 | import scheme
27 | import subset
28 | import results
29 | import threading
30 | from util import PartitionFinderError
31 | import util
32 |
33 | class AnalysisError(PartitionFinderError):
34 | pass
35 |
36 |
37 | class Analysis(object):
38 | """Performs the analysis and collects the results"""
39 | def __init__(self, cfg, force_restart=False, threads=-1):
40 | cfg.validate()
41 | self.cfg = cfg
42 | self.threads = threads
43 |
44 | self.results = results.AnalysisResults(self.cfg.model_selection)
45 |
46 | log.info("Beginning Analysis")
47 | self.process_restart(force_restart)
48 |
49 | # Check for old analyses to see if we can use the old data
50 | self.cfg.check_for_old_config()
51 |
52 | # Make some folders for the analysis
53 | self.cfg.make_output_folders()
54 | self.make_alignment(cfg.alignment_path)
55 | self.make_tree(cfg.user_tree_topology_path)
56 |
57 | # We need this to block the threads for critical stuff
58 | self.lock = threading.Condition(threading.Lock())
59 |
60 | def process_restart(self, force_restart):
61 | if force_restart:
62 | # Remove everything
63 | if os.path.exists(self.cfg.output_path):
64 | log.warning("Deleting all previous workings in '%s'", self.cfg.output_path)
65 | shutil.rmtree(self.cfg.output_path)
66 | else:
67 | # Just remove the schemes folder
68 | if os.path.exists(self.cfg.schemes_path):
69 | log.info("Removing Schemes in '%s' (they will be recalculated from existing subset data)", self.cfg.schemes_path)
70 | shutil.rmtree(self.cfg.schemes_path)
71 |
72 | def analyse(self):
73 | self.do_analysis()
74 | return self.results
75 |
76 | def make_alignment(self, source_alignment_path):
77 | # Make the alignment
78 | self.alignment = Alignment()
79 | self.alignment.read(source_alignment_path)
80 |
81 | # We start by copying the alignment
82 | self.alignment_path = os.path.join(self.cfg.start_tree_path, 'source.phy')
83 | if os.path.exists(self.alignment_path):
84 | # Make sure it is the same
85 | old_align = Alignment()
86 | old_align.read(self.alignment_path)
87 | if not old_align.same_as(self.alignment):
88 | log.error("Alignment file has changed since previous run. You need to use the force-restart option.")
89 | raise AnalysisError
90 |
91 | else:
92 | self.alignment.write(self.alignment_path)
93 |
94 | def need_new_tree(self, tree_path):
95 | if os.path.exists(tree_path):
96 | if ';' in open(tree_path).read():
97 | log.info("Starting tree file found.")
98 | redo_tree = False
99 | else:
100 | log.info("Starting tree file found but incomplete. Re-estimating")
101 | redo_tree = True
102 | else:
103 | log.info("No starting tree file found.")
104 | redo_tree = True
105 |
106 | return redo_tree
107 |
108 | def make_tree(self, user_path):
109 | # Begin by making a filtered alignment, containing ONLY those columns
110 | # that are defined in the subsets
111 | subset_with_everything = subset.Subset(*list(self.cfg.partitions))
112 | self.filtered_alignment = SubsetAlignment(self.alignment, subset_with_everything)
113 | self.filtered_alignment_path = os.path.join(self.cfg.start_tree_path, 'filtered_source.phy')
114 | self.filtered_alignment.write(self.filtered_alignment_path)
115 |
116 | # Now we've written this alignment, we need to lock everything in
117 | # place, no more adding partitions, or changing them from now on.
118 | self.cfg.partitions.check_against_alignment(self.alignment)
119 | self.cfg.partitions.finalise()
120 |
121 | # We start by copying the alignment
122 | self.alignment_path = os.path.join(self.cfg.start_tree_path, 'source.phy')
123 |
124 | # Now check for the tree
125 | tree_path = self.cfg.processor.make_tree_path(self.filtered_alignment_path)
126 |
127 | if self.need_new_tree(tree_path) == True:
128 | log.debug("Estimating new starting tree, no old tree found")
129 |
130 | # If we have a user tree, then use that, otherwise, create a topology
131 | util.clean_out_folder(self.cfg.start_tree_path, keep = ["filtered_source.phy", "source.phy"])
132 |
133 | if user_path is not None and user_path != "":
134 | # Copy it into the start tree folder
135 | log.info("Using user supplied topology at %s", user_path)
136 | topology_path = os.path.join(self.cfg.start_tree_path, 'user_topology.phy')
137 | self.cfg.processor.dupfile(user_path, topology_path)
138 | else:
139 | log.debug(
140 | "didn't find tree at %s, making a new one" % tree_path)
141 | topology_path = self.cfg.processor.make_topology(
142 | self.filtered_alignment_path, self.cfg.datatype, self.cfg.cmdline_extras)
143 |
144 | # Now estimate branch lengths
145 | tree_path = self.cfg.processor.make_branch_lengths(
146 | self.filtered_alignment_path,
147 | topology_path,
148 | self.cfg.datatype,
149 | self.cfg.cmdline_extras)
150 |
151 | self.tree_path = tree_path
152 | log.info("Starting tree with branch lengths is here: %s", self.tree_path)
153 |
154 | def run_task(self, m, sub):
155 | # This bit should run in parallel (forking the processor)
156 | self.cfg.processor.analyse(
157 | m,
158 | sub.alignment_path,
159 | self.tree_path,
160 | self.cfg.branchlengths,
161 | self.cfg.cmdline_extras
162 | )
163 |
164 | # Not entirely sure that WE NEED to block here, but it is safer to do
165 | # It shouldn't hold things up toooo long...
166 | self.lock.acquire()
167 | try:
168 | sub.parse_model_result(self.cfg, m)
169 | # Try finalising, then the result will get written out earlier...
170 | sub.finalise(self.cfg)
171 | finally:
172 | self.lock.release()
173 |
174 | def add_tasks_for_sub(self, tasks, sub):
175 | for m in sub.models_to_process:
176 | tasks.append((self.run_task, (m, sub)))
177 |
178 | def run_concurrent(self, tasks):
179 | for func, args in tasks:
180 | func(*args)
181 |
182 | def run_threaded(self, tasks):
183 | if not tasks:
184 | return
185 | pool = threadpool.Pool(tasks, self.threads)
186 | pool.join()
187 |
188 | def analyse_scheme(self, sch):
189 | # Progress
190 | self.cfg.progress.next_scheme()
191 |
192 | # Prepare by reading everything in first
193 | tasks = []
194 | for sub in sch:
195 | sub.prepare(self.cfg, self.alignment)
196 | self.add_tasks_for_sub(tasks, sub)
197 |
198 | # Now do the analysis
199 | if self.threads == 1:
200 | self.run_concurrent(tasks)
201 | else:
202 | self.run_threaded(tasks)
203 |
204 | # Now see if we're done
205 | for sub in sch:
206 | # ALL subsets should already be finalised in the task. We just
207 | # check again here
208 | if not sub.finalise(self.cfg):
209 | log.error("Failed to run models %s; not sure why", ", ".join(list(sub.models_to_do)))
210 | raise AnalysisError
211 |
212 | # AIC needs the number of sequences
213 | number_of_seq = len(self.alignment.species)
214 | result = scheme.SchemeResult(sch, number_of_seq, self.cfg.branchlengths, self.cfg.model_selection)
215 | self.results.add_scheme_result(sch, result)
216 |
217 | return result
218 |
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/analysis.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/analysis.pyc
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/analysis_method.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2012 Robert Lanfear and Brett Calcott
2 | #
3 | #This program is free software: you can redistribute it and/or modify it
4 | #under the terms of the GNU General Public License as published by the
5 | #Free Software Foundation, either version 3 of the License, or (at your
6 | #option) any later version.
7 | #
8 | #This program is distributed in the hope that it will be useful, but
9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 | #General Public License for more details. You should have received a copy
12 | #of the GNU General Public License along with this program. If not, see
13 | #. PartitionFinder also includes the PhyML
14 | #program, the RAxML program, the PyParsing library, and the python-cluster library
15 | #all of which are protected by their own licenses and conditions, using
16 | #PartitionFinder implies that you agree with those licences and conditions as well.
17 |
18 |
19 | import logging
20 | log = logging.getLogger("method")
21 |
22 | import os
23 | import math
24 | import scheme
25 | import algorithm
26 | import submodels
27 | import subset
28 | from analysis import Analysis, AnalysisError
29 | import neighbour
30 |
31 | class UserAnalysis(Analysis):
32 |
33 | def do_analysis(self):
34 | log.info("Performing User analysis")
35 | current_schemes = [s for s in self.cfg.user_schemes]
36 | scheme_count = len(current_schemes)
37 | subset_count = subset.count_subsets()
38 |
39 | self.cfg.progress.begin(scheme_count, subset_count)
40 | if scheme_count > 0:
41 | for s in current_schemes:
42 | res = self.analyse_scheme(s)
43 |
44 | # Write out the scheme
45 | self.cfg.reporter.write_scheme_summary(s, res)
46 | else:
47 | log.error("Search set to 'user', but no user schemes detected in .cfg file. Please check.")
48 | raise AnalysisError
49 |
50 | self.cfg.progress.end()
51 |
52 | self.cfg.reporter.write_best_scheme(self.results)
53 |
54 |
55 | class StrictClusteringAnalysis(Analysis):
56 | """
57 | This analysis uses model parameters to guess at similar partitions, then
58 | just joins them together this is much less accurate than other methods, but
59 | a LOT quicker - it runs in order N time (where N is the number of initial
60 | datablocks), whereas the greedy algorithm is still N squared.
61 | """
62 |
63 | def do_analysis(self):
64 | log.info("Performing strict clustering analysis")
65 |
66 | partnum = len(self.cfg.partitions)
67 | subset_count = 2 * partnum - 1
68 | scheme_count = partnum
69 | self.cfg.progress.begin(scheme_count, subset_count)
70 |
71 | # Start with the most partitioned scheme
72 | start_description = range(len(self.cfg.partitions))
73 | start_scheme = scheme.create_scheme(
74 | self.cfg, "start_scheme", start_description)
75 |
76 | # Analyse our first scheme
77 | log.info("Analysing starting scheme (scheme %s)" % start_scheme.name)
78 | self.analyse_scheme(start_scheme)
79 |
80 | # Current scheme number
81 | cur_s = 2
82 |
83 | # Now we try out all clusterings of the first scheme, to see if we can
84 | # find a better one
85 | while True:
86 | log.info("***Strict clustering algorithm step %d of %d***" %
87 | (cur_s - 1, partnum - 1))
88 |
89 | # Calculate the subsets which are most similar
90 | # e.g. combined rank ordering of euclidean distances
91 | # Could combine average site-rates, q matrices, and frequencies
92 | scheme_name = "step_%d" % (cur_s - 1)
93 | clustered_scheme = neighbour.get_nearest_neighbour_scheme(
94 | start_scheme, scheme_name, self.cfg)
95 |
96 | # Now analyse that new scheme
97 | cur_s += 1
98 | self.analyse_scheme(clustered_scheme)
99 |
100 | # Stop when we've anlaysed the scheme with all subsets combined
101 | if len(set(clustered_scheme.subsets)) == 1: # then it's the scheme with everything together
102 | break
103 | else:
104 | start_scheme = clustered_scheme
105 |
106 | self.cfg.progress.end()
107 |
108 | self.cfg.reporter.write_best_scheme(self.results)
109 |
110 |
111 | class AllAnalysis(Analysis):
112 |
113 | def do_analysis(self):
114 | log.info("Performing complete analysis")
115 | partnum = len(self.cfg.partitions)
116 |
117 | scheme_count = submodels.count_all_schemes(partnum)
118 | subset_count = submodels.count_all_subsets(partnum)
119 | self.cfg.progress.begin(scheme_count, subset_count)
120 |
121 | # Iterate over submodels, which we can turn into schemes afterwards in the loop
122 | model_iterator = submodels.submodel_iterator([], 1, partnum)
123 |
124 | scheme_name = 1
125 | for m in model_iterator:
126 | s = scheme.model_to_scheme(m, scheme_name, self.cfg)
127 | scheme_name = scheme_name + 1
128 | res = self.analyse_scheme(s)
129 |
130 | # Write out the scheme
131 | self.cfg.reporter.write_scheme_summary(s, res)
132 |
133 | self.cfg.reporter.write_best_scheme(self.results)
134 |
135 |
136 | class GreedyAnalysis(Analysis):
137 | def do_analysis(self):
138 | '''A greedy algorithm for heuristic partitioning searches'''
139 |
140 | log.info("Performing greedy analysis")
141 |
142 | partnum = len(self.cfg.partitions)
143 | scheme_count = submodels.count_greedy_schemes(partnum)
144 | subset_count = submodels.count_greedy_subsets(partnum)
145 |
146 | self.cfg.progress.begin(scheme_count, subset_count)
147 |
148 | # Start with the most partitioned scheme
149 | start_description = range(len(self.cfg.partitions))
150 | start_scheme = scheme.create_scheme(
151 | self.cfg, "start_scheme", start_description)
152 |
153 | log.info("Analysing starting scheme (scheme %s)" % start_scheme.name)
154 | self.analyse_scheme(start_scheme)
155 |
156 | step = 1
157 | cur_s = 2
158 |
159 | # Now we try out all lumpings of the current scheme, to see if we can
160 | # find a better one and if we do, we just keep going
161 | while True:
162 | log.info("***Greedy algorithm step %d***" % step)
163 |
164 | # Get a list of all possible lumpings of the best_scheme
165 | lumpings = algorithm.lumpings(start_description)
166 |
167 | # Save the current best score we have in results
168 | old_best_score = self.results.best_score
169 | for lumped_description in lumpings:
170 | lumped_scheme = scheme.create_scheme(self.cfg, cur_s, lumped_description)
171 | cur_s += 1
172 | # This is just checking to see if a scheme is any good, if it
173 | # is, we remember and write it later
174 | self.analyse_scheme(lumped_scheme)
175 |
176 | # Did out best score change (It ONLY gets better -- see in
177 | # results.py)
178 | if self.results.best_score == old_best_score:
179 | # It didn't, so we're done
180 | break
181 |
182 | # Let's look further. We use the description from our best scheme
183 | # (which will be the one that just changed in the last lumpings
184 | # iteration)
185 | start_description = self.results.best_result.scheme.description
186 |
187 | # Rename and record the best scheme for this step
188 | self.results.best_scheme.name = "step_%d" % step
189 | self.cfg.reporter.write_scheme_summary(
190 | self.results.best_scheme, self.results.best_result)
191 |
192 | # If it's the scheme with everything equal, quit
193 | if len(set(start_description)) == 1:
194 | break
195 |
196 | # Go do the next round...
197 | step += 1
198 |
199 | log.info("Greedy algorithm finished after %d steps" % step)
200 | log.info("Highest scoring scheme is scheme %s, with %s score of %.3f" %
201 | (self.results.best_scheme.name, self.cfg.model_selection,
202 | self.results.best_score))
203 |
204 | self.cfg.reporter.write_best_scheme(self.results)
205 |
206 |
207 | class RelaxedClusteringAnalysis(Analysis):
208 | '''
209 | A relaxed clustering algorithm for heuristic partitioning searches
210 |
211 | 1. Rank subsets by their similarity (defined by clustering-weights)
212 | 2. Analyse cluster-percent of the most similar schemes
213 | 3. Take the scheme that improves the AIC/BIC score the most
214 | 4. Quit if no improvements.
215 | '''
216 |
217 | def do_analysis(self):
218 | log.info("Performing relaxed clustering analysis")
219 |
220 | stop_at = self.cfg.cluster_percent * 0.01
221 |
222 | model_selection = self.cfg.model_selection
223 | partnum = len(self.cfg.partitions)
224 |
225 | scheme_count = submodels.count_relaxed_clustering_schemes(partnum, self.cfg.cluster_percent)
226 | subset_count = submodels.count_relaxed_clustering_subsets(partnum, self.cfg.cluster_percent)
227 |
228 | self.cfg.progress.begin(scheme_count, subset_count)
229 |
230 | # Start with the most partitioned scheme, and record it.
231 | start_description = range(len(self.cfg.partitions))
232 | start_scheme = scheme.create_scheme(
233 | self.cfg, "start_scheme", start_description)
234 | log.info("Analysing starting scheme (scheme %s)" % start_scheme.name)
235 | self.analyse_scheme(start_scheme)
236 | self.cfg.reporter.write_scheme_summary(
237 | self.results.best_scheme, self.results.best_result)
238 |
239 |
240 | # Start by remembering that we analysed the starting scheme
241 | subset_counter = 1
242 | step = 1
243 | while True:
244 |
245 | log.info("***Relaxed clustering algorithm step %d of %d***" % (step, partnum - 1))
246 | name_prefix = "step_%d" % (step)
247 |
248 | # Get a list of all possible lumpings of the best_scheme, ordered
249 | # according to the clustering weights
250 | lumped_subsets = neighbour.get_ranked_clustered_subsets(
251 | start_scheme, self.cfg)
252 |
253 | # reduce the size of the lumped subsets to cluster_percent long
254 | cutoff = int(math.ceil(len(lumped_subsets)*stop_at)) #round up to stop zeros
255 | lumped_subsets = lumped_subsets[:cutoff]
256 |
257 | # Now analyse the lumped schemes
258 | lumpings_done = 0
259 | old_best_score = self.results.best_score
260 |
261 | for subset_grouping in lumped_subsets:
262 | scheme_name = "%s_%d" % (name_prefix, lumpings_done + 1)
263 | lumped_scheme = neighbour.make_clustered_scheme(
264 | start_scheme, scheme_name, subset_grouping, self.cfg)
265 |
266 | new_result = self.analyse_scheme(lumped_scheme)
267 |
268 | log.debug("Difference in %s: %.1f", self.cfg.model_selection, (new_result.score-old_best_score))
269 |
270 | lumpings_done += 1
271 |
272 |
273 | if self.results.best_score != old_best_score:
274 | log.info("Analysed %.1f percent of the schemes for this step. The best "
275 | "scheme changed the %s score by %.1f units.",
276 | self.cfg.cluster_percent, self.cfg.model_selection,
277 | (self.results.best_score - old_best_score))
278 |
279 | #write out the best scheme
280 | self.results.best_scheme.name = "step_%d" % step
281 | self.cfg.reporter.write_scheme_summary(
282 | self.results.best_scheme, self.results.best_result)
283 |
284 |
285 | # Now we find out which is the best lumping we know of for this step
286 | start_scheme = self.results.best_scheme
287 | else:
288 | log.info("Analysed %.1f percent of the schemes for this step and found no schemes "
289 | "that improve the score, stopping" , self.cfg.cluster_percent)
290 | break
291 |
292 | # We're done if it's the scheme with everything together
293 | if len(set(lumped_scheme.subsets)) == 1:
294 | break
295 |
296 | step += 1
297 |
298 |
299 |
300 | log.info("Relaxed clustering algorithm finished after %d steps" % step)
301 | log.info("Best scoring scheme is scheme %s, with %s score of %.3f"
302 | % (self.results.best_scheme.name, model_selection, self.results.best_score))
303 |
304 | self.cfg.reporter.write_best_scheme(self.results)
305 |
306 |
307 | def choose_method(search):
308 | if search == 'all':
309 | method = AllAnalysis
310 | elif search == 'user':
311 | method = UserAnalysis
312 | elif search == 'greedy':
313 | method = GreedyAnalysis
314 | elif search == 'hcluster':
315 | method = StrictClusteringAnalysis
316 | elif search == 'rcluster':
317 | method = RelaxedClusteringAnalysis
318 | else:
319 | log.error("Search algorithm '%s' is not yet implemented", search)
320 | raise AnalysisError
321 | return method
322 |
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/analysis_method.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/analysis_method.pyc
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/config.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/config.pyc
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/main.py:
--------------------------------------------------------------------------------
1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
2 | #
3 | #This program is free software: you can redistribute it and/or modify it
4 | #under the terms of the GNU General Public License as published by the
5 | #Free Software Foundation, either version 3 of the License, or (at your
6 | #option) any later version.
7 | #
8 | #This program is distributed in the hope that it will be useful, but
9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 | #General Public License for more details. You should have received a copy
12 | #of the GNU General Public License along with this program. If not, see
13 | #. PartitionFinder also includes the PhyML
14 | #program, the RAxML program, the PyParsing library, and the python-cluster library
15 | #all of which are protected by their own licenses and conditions, using
16 | #PartitionFinder implies that you agree with those licences and conditions as well.
17 |
18 | import logging
19 | import sys
20 | import shlex
21 | import os
22 |
23 | logging.basicConfig(
24 | format="%(levelname)-8s | %(asctime)s | %(message)s",
25 | level=logging.INFO
26 | )
27 |
28 | # curdir = os.path.dirname(os.path.abspath(__file__))
29 | # rootdir, here = os.path.split(curdir)
30 | # config_path = os.path.join(rootdir, 'logging.cfg')
31 | # from logging import config as _logconfig
32 | # _logconfig.fileConfig(config_path)
33 |
34 | log = logging.getLogger("main")
35 | from optparse import OptionParser
36 |
37 | # We import everything here as it forces all of debug regions to be loaded
38 | import version
39 | import config
40 | import analysis_method
41 | import util
42 | import reporter
43 | import progress
44 | import datetime
45 | import parser
46 | import raxml
47 | import phyml
48 | from partfinder import current
49 |
50 |
51 | def debug_arg_callback(option, opt, value, parser):
52 | setattr(parser.values, option.dest, value.split(','))
53 |
54 |
55 | def get_debug_regions():
56 | mlogger = logging.Logger.manager
57 | return mlogger.loggerDict.keys()
58 |
59 |
60 | def set_debug_regions(regions):
61 | if regions is None:
62 | return
63 | valid_regions = set(get_debug_regions())
64 | if 'all' in regions:
65 | regions = valid_regions
66 | else:
67 | regions = set(regions)
68 | errors = set()
69 | for r in regions:
70 | if r not in valid_regions:
71 | log.error("'%s' is not a valid debug region", r)
72 | errors.add(r)
73 | if errors:
74 | return errors
75 |
76 | for r in regions:
77 | logging.getLogger(r).setLevel(logging.DEBUG)
78 |
79 | # Enhance the format
80 | fmt = logging.Formatter("%(levelname)-8s | %(asctime)s | %(name)-10s | %(message)s")
81 | logging.getLogger("").handlers[0].setFormatter(fmt)
82 |
83 | return None
84 |
85 | def clean_folder(folder):
86 | """ Delete all the files in a folder
87 | Thanks to StackOverflow for this:
88 | http://stackoverflow.com/questions/185936/delete-folder-contents-in-python
89 | """
90 | for the_file in os.listdir(folder):
91 | file_path = os.path.join(folder, the_file)
92 | try:
93 | if os.path.isfile(file_path):
94 | os.unlink(file_path)
95 | except Exception, e:
96 | log.error("Couldn't delete file from phylofiles folder: %s" % e)
97 | raise PartitionFinderError
98 |
99 | def parse_args(datatype, cmdargs=None):
100 | usage = """usage: python %prog [options]
101 |
102 | PartitionFinder and PartitionFinderProtein are designed to discover optimal
103 | partitioning schemes for nucleotide and amino acid sequence alignments.
104 | They are also useful for finding the best model of sequence evolution for datasets.
105 |
106 | The Input: : the full path to a folder containing:
107 | - A configuration file (partition_finder.cfg)
108 | - A nucleotide/aa alignment in Phylip format
109 | Take a look at the included 'example' folder for more details.
110 |
111 | The Output: A file in the same directory as the .cfg file, named
112 | 'analysis' This file contains information on the best
113 | partitioning scheme, and the best model for each partiiton
114 |
115 | Usage Examples:
116 | >python %prog example
117 | Analyse what is in the 'example' sub-folder in the current folder.
118 |
119 | >python %prog -v example
120 | Analyse what is in the 'example' sub-folder in the current folder, but
121 | show all the debug output
122 |
123 | >python %prog -c ~/data/frogs
124 | Check the configuration files in the folder data/frogs in the current
125 | user's home folder.
126 |
127 | >python %prog --force-restart ~/data/frogs
128 | Deletes any data produced by the previous runs (which is in
129 | ~/data/frogs/output) and starts afresh
130 | """
131 | op = OptionParser(usage)
132 | op.add_option(
133 | "-v", "--verbose",
134 | action="store_true", dest="verbose",
135 | help="show debug logging information (equivalent to --debug-out=all)")
136 | op.add_option(
137 | "-c", "--check-only",
138 | action="store_true", dest="check_only",
139 | help="just check the configuration files, don't do any processing")
140 | op.add_option(
141 | "--force-restart",
142 | action="store_true", dest="force_restart",
143 | help="delete all previous output and start afresh (!)")
144 | op.add_option(
145 | "-p", "--processes",
146 | type="int", dest="processes", default=-1, metavar="N",
147 | help="Number of concurrent processes to use."
148 | " Use -1 to match the number of cpus on the machine."
149 | " The default is to use -1.")
150 | op.add_option(
151 | "--show-python-exceptions",
152 | action="store_true", dest="show_python_exceptions",
153 | help="If errors occur, print the python exceptions")
154 | op.add_option(
155 | "--save-phylofiles",
156 | action="store_true", dest="save_phylofiles",
157 | help="save all of the phyml or raxml output. This can take a lot of space(!)")
158 | op.add_option(
159 | "--dump-results",
160 | action="store_true", dest="dump_results",
161 | help="Dump all results to a binary file. "
162 | "This is only of use for testing purposes.")
163 | op.add_option(
164 | "--compare-results",
165 | action="store_true", dest="compare_results",
166 | help="Compare the results to previously dumped binary results. "
167 | "This is only of use for testing purposes.")
168 | op.add_option(
169 | "--raxml",
170 | action="store_true", dest="raxml",
171 | help="Use RAxML (rather than PhyML) to do the analysis. See the manual"
172 | )
173 | op.add_option(
174 | "--cmdline-extras",
175 | type="str", dest="cmdline_extras", default="", metavar="N",
176 | help="Add additional commands to the phyml or raxml commandlines that PF uses."
177 | "This can be useful e.g. if you want to change the accuracy of lnL calculations"
178 | " ('-e' option in raxml), or use multi-threaded versions of raxml that require"
179 | " you to specify the number of threads you will let raxml use ('-T' option in "
180 | "raxml. E.g. you might specify this: --cmndline_extras ' -e 2.0 -T 10 '"
181 | " N.B. MAKE SURE YOU PUT YOUR EXTRAS IN QUOTES, and only use this command if you"
182 | " really know what you're doing and are very familiar with raxml and"
183 | " PartitionFinder"
184 | )
185 | op.add_option(
186 | "--weights",
187 | type="str", dest="cluster_weights", default=None, metavar="N",
188 | help="Mainly for algorithm development. Only use it if you know what you're doing."
189 | "A list of weights to use in the clustering algorithms. This list allows you "
190 | "to assign different weights to: the overall rate for a subset, the base/amino acid "
191 | "frequencies, model parameters, and alpha value. This will affect how subsets are "
192 | "clustered together. For instance: --cluster_weights '1, 2, 5, 1', would weight "
193 | "the base freqeuncies 2x more than the overall rate, the model parameters 5x "
194 | "more, and the alpha parameter the same as the model rate"
195 | )
196 | op.add_option(
197 | "--rcluster-percent",
198 | type="float", dest="cluster_percent", default=10.0, metavar="N",
199 | help="This defines the proportion of possible schemes that the relaxed clustering"
200 | " algorithm will consider before it stops looking. The default is 10%."
201 | "e.g. --cluster-percent 10.0"
202 |
203 | )
204 | op.add_option(
205 | '--debug-output',
206 | type='string',
207 | action='callback',
208 | dest='debug_output',
209 | metavar="REGION,REGION,...",
210 | callback=debug_arg_callback,
211 | help="(advanced option) Provide a list of debug regions to output extra "
212 | "information about what the program is doing."
213 | " Possible regions are 'all' or any of {%s}."
214 | % ",".join(get_debug_regions())
215 | )
216 |
217 | if cmdargs is None:
218 | options, args = op.parse_args()
219 | else:
220 | options, args = op.parse_args(cmdargs)
221 |
222 | options.datatype = datatype
223 | # We should have one argument: the folder to read the configuration from
224 | if not args:
225 | op.print_help()
226 | else:
227 | check_options(op, options)
228 |
229 | return options, args
230 |
231 |
232 | def check_options(op, options):
233 | # Error checking
234 | if options.dump_results and options.compare_results:
235 | op.error("options --dump_results and --compare_results are mutually exclusive!")
236 |
237 | if options.verbose:
238 | set_debug_regions(['all'])
239 | else:
240 | errors = set_debug_regions(options.debug_output)
241 | if errors is not None:
242 | bad = ",".join(list(errors))
243 | op.error("Invalid debug regions: %s" % bad)
244 |
245 | # Default to phyml
246 | if options.raxml == 1:
247 | options.phylogeny_program = 'raxml'
248 | else:
249 | options.phylogeny_program = 'phyml'
250 |
251 | #A warning for people using the Pthreads version of RAxML
252 | # if options.cmdline_extras.count("-T") > 0:
253 | # log.warning("It looks like you're using a Pthreads version of RAxML. Be aware "
254 | # "that the default behaviour of PartitionFinder is to run one version of RAxML per "
255 | # "available processor. This might not be what you want with Pthreads - since the "
256 | # "minimum number of threads per RAxML run is 2 (i.e. -T 2). Make sure to limit the "
257 | # "total number of RAxML runs you start using the -p option in PartitionFinder. "
258 | # "Specifically, the total number of processors you will use with the Pthreads "
259 | # "version is the number you set via the -T option in --cmdline-extras, multiplied "
260 | # "by the number of processors you set via the -p option in PartitionFinder. "
261 | # "You should also be aware that the Pthreads version of RAxML has a rare but "
262 | # "known bug on some platforms. This bug results in infinite liklelihood values "
263 | # "if it happens on your dataset, PartitionFinder will give an error. In that case "
264 | # "you should switch back to using a single-threaded version of RAxML, e.g. the "
265 | # "SSE3 or AVX version."
266 | # "See the manual for more info.")
267 |
268 |
269 | def check_python_version():
270 | """Check the python version is above 2.7 but lower than 3.0"""
271 |
272 | python_version = float(
273 | "%d.%d" % (sys.version_info[0], sys.version_info[1]))
274 |
275 | log.info("You have Python version %.1f" % python_version)
276 |
277 | if python_version < 2.7:
278 | log.error("Your Python version is %.1f, but this program requires Python 2.7. "
279 | "Please upgrade to version 2.7 by visiting www.python.org/getit, or by following"
280 | " the instructions in the PartitionFinder manual." % python_version)
281 | return 0
282 |
283 | if python_version > 3.0:
284 | log.warning("Your Python version is %.1f. This program was not built to run with "
285 | "version 3 or higher. To guarantee success, please use Python 2.7.x" % python_version)
286 |
287 |
288 | def main(name, datatype, passed_args=None):
289 | v = version.get_version()
290 |
291 | # If passed_args is None, this will use sys.argv
292 | options, args = parse_args(datatype, passed_args)
293 | if not args:
294 | # Help has already been printed
295 | return 2
296 |
297 | log.info("------------- %s %s -----------------", name, v)
298 | start_time = datetime.datetime.now().replace(microsecond=0) # start the clock ticking
299 |
300 | check_python_version()
301 |
302 | if passed_args is None:
303 | cmdline = "".join(sys.argv)
304 | else:
305 | cmdline = "".join(passed_args)
306 |
307 | log.info("Command-line arguments used: %s", cmdline)
308 |
309 | # Load, using the first argument as the folder
310 | try:
311 | cfg = config.Configuration(datatype,
312 | options.phylogeny_program,
313 | options.save_phylofiles,
314 | options.cmdline_extras,
315 | options.cluster_weights,
316 | options.cluster_percent)
317 |
318 | # Set up the progress callback
319 | progress.TextProgress(cfg)
320 | cfg.load_base_path(args[0])
321 |
322 | if options.check_only:
323 | log.info("Exiting without processing (because of the -c/--check-only option ...")
324 | else:
325 | try:
326 | # Now try processing everything....
327 | if not cfg.save_phylofiles:
328 | clean_folder(cfg.phylofiles_path)
329 | method = analysis_method.choose_method(cfg.search)
330 | reporter.TextReporter(cfg)
331 | anal = method(cfg,
332 | options.force_restart,
333 | options.processes)
334 | results = anal.analyse()
335 |
336 | if options.dump_results:
337 | results.dump(cfg)
338 | elif options.compare_results:
339 | results.compare(cfg)
340 | finally:
341 | # Make sure that we reset the configuration
342 | cfg.reset()
343 |
344 | # Successful exit
345 | end_time = datetime.datetime.now().replace(microsecond=0)
346 | processing_time = end_time - start_time
347 |
348 | log.info("Total processing time: %s (h:m:s)" % processing_time)
349 | log.info("Processing complete.")
350 |
351 | return 0
352 |
353 | except util.PartitionFinderError:
354 | log.error("Failed to run. See previous errors.")
355 | # Reraise if we were called by call_main, or if the options is set
356 | if options.show_python_exceptions or passed_args is not None:
357 | raise
358 |
359 | except KeyboardInterrupt:
360 | log.error("User interrupted the Program")
361 |
362 |
363 | return 1
364 |
365 |
366 | def call_main(datatype, cmdline):
367 | cmdargs = shlex.split(cmdline)
368 | main("" % datatype, datatype, cmdargs)
369 |
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/main.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/main.pyc
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/neighbour.py:
--------------------------------------------------------------------------------
1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
2 | #
3 | #This program is free software: you can redistribute it and/or modify it
4 | #under the terms of the GNU General Public License as published by the
5 | #Free Software Foundation, either version 3 of the License, or (at your
6 | #option) any later version.
7 | #
8 | #This program is distributed in the hope that it will be useful, but
9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 | #General Public License for more details. You should have received a copy
12 | #of the GNU General Public License along with this program. If not, see
13 | #. PartitionFinder also includes the PhyML
14 | #program, the RAxML program, and the PyParsing library,
15 | #all of which are protected by their own licenses and conditions, using
16 | #PartitionFinder implies that you agree with those licences and conditions as well.
17 |
18 | import subset
19 | import scheme
20 | from algorithm import euclidean_distance
21 |
22 | import logging
23 | log = logging.getLogger("cluster")
24 |
25 |
26 | def get_ranked_list(final_distances):
27 | """
28 | Return the closest subsets defined by a distance matrix usually there will
29 | just be a pair that's closer than all other pairs BUT, it's feasible (if
30 | unlikely) that >2 subsets are equally close. This is possible if, e.g. all
31 | weights are zero. Then we just want to group all the equally close
32 | subsets...
33 |
34 | So, we return a list of all the closest subsets
35 | """
36 |
37 | # Let's make a dict keyed by the distance in the matrix, using setdefault
38 | # to add things, in case there are subsets with identical pairwise
39 | # distances
40 | distances = {}
41 | for pair in final_distances:
42 | d = final_distances[pair]
43 |
44 | # Get any subs that we already know are that distance apart as a set
45 | # default to empty set if it's a new distance
46 | subs = distances.setdefault(d, set())
47 |
48 | # Add subs that correspond to this cell
49 | subs.add(pair[0])
50 | subs.add(pair[1])
51 |
52 | ordered_subsets = []
53 | unique_distances = list(distances.keys())
54 | unique_distances.sort()
55 |
56 | for d in unique_distances:
57 | ordered_subsets.append(list(distances[d]))
58 |
59 | return ordered_subsets
60 |
61 |
62 | def get_pairwise_dists(subsets, rates, freqs, model, alpha, weights):
63 |
64 | import itertools
65 | #set up all pairwise combinations as iterators
66 | s = itertools.combinations(subsets, 2)
67 | r = itertools.combinations(rates, 2)
68 | f = itertools.combinations(freqs, 2)
69 | m = itertools.combinations(model, 2)
70 | a = itertools.combinations(alpha, 2)
71 |
72 | #now we can izip over ALL of them at once (isn't python great!)
73 | subset_pairs = []
74 | r_dists = []
75 | f_dists = []
76 | m_dists = []
77 | a_dists = []
78 |
79 | for pair in itertools.izip(s, r, f, m, a):
80 | subset_pair = pair[0]
81 | subset_pairs.append(subset_pair)
82 |
83 | r_dist = euclidean_distance(pair[1][0], pair[1][1])
84 | f_dist = euclidean_distance(pair[2][0], pair[2][1])
85 | m_dist = euclidean_distance(pair[3][0], pair[3][1])
86 | a_dist = euclidean_distance(pair[4][0], pair[4][1])
87 |
88 | r_dists.append(r_dist)
89 | f_dists.append(f_dist)
90 | m_dists.append(m_dist)
91 | a_dists.append(a_dist)
92 |
93 | #print pair
94 |
95 | #and now we get the minmax values
96 | max_r = max(r_dists)
97 | max_f = max(f_dists)
98 | max_m = max(m_dists)
99 | max_a = max(a_dists)
100 |
101 | #now we go over them again, and normalise, weight, and sum
102 | final_dists = {}
103 | closest_pairs = []
104 | mindist = None
105 | for i, pair in enumerate(itertools.izip(r_dists, f_dists, m_dists, a_dists, subset_pairs)):
106 |
107 | if max_r > 0.0:
108 | r_final = pair[0] * float(weights["rate"]) / float(max_r)
109 | else:
110 | r_final = 0.0
111 | if max_f > 0.0:
112 | f_final = pair[1] * float(weights["freqs"]) / float(max_f)
113 | else:
114 | f_final = 0.0
115 | if max_m > 0.0:
116 | m_final = pair[2] * float(weights["model"]) / float(max_m)
117 | else:
118 | m_final = 0.0
119 | if max_a > 0:
120 | a_final = pair[3] * float(weights["alpha"]) / float(max_a)
121 | else:
122 | a_final = 0.0
123 |
124 | #print i, pair
125 |
126 | total_dist = r_final + f_final + m_final + a_final
127 |
128 | final_dists[pair[4]] = total_dist
129 |
130 | #check to see if this is the closest
131 | if (total_dist < mindist or mindist is None):
132 | mindist = total_dist
133 | closest_pairs = pair[4] # pair[4] is the tuple of two subsets
134 | elif total_dist == mindist:
135 | #we want a tuple with all of the subsets that are equally close
136 | #with no replicates, so we use tuple(set())
137 | closest_pairs = tuple(set(closest_pairs + (pair[4])))
138 |
139 | return final_dists, closest_pairs
140 |
141 | def get_distance_matrix(start_scheme, weights):
142 | #1. get the parameter lists for each subset
143 | subsets = [] # a list of subset names, so we know the order things appear in the list
144 | rates = [] # tree length
145 | freqs = [] # amino acid or base frequencies
146 | model = [] # model parameters e.g. A<->C
147 | alpha = [] #alpha parameter of the gamma distribution of rates across sites
148 |
149 | for s in start_scheme.subsets:
150 | param_dict = s.get_param_values()
151 | subsets.append(s)
152 | rates.append([param_dict["rate"]])
153 | freqs.append(param_dict["freqs"])
154 | model.append(param_dict["model"])
155 | alpha.append([param_dict["alpha"]])
156 |
157 | #get pairwise euclidean distances, and minmax values, for all parameters
158 | final_dists, closest_pairs = get_pairwise_dists(subsets, rates, freqs, model, alpha, weights)
159 |
160 | return final_dists, closest_pairs
161 |
162 | def get_closest_subsets(start_scheme, weights):
163 | """Find the closest subsets in a scheme
164 | """
165 | final_dists, closest_pairs = get_distance_matrix(start_scheme, weights)
166 |
167 | return closest_pairs
168 |
169 |
170 | def get_ranked_clustered_subsets(start_scheme, cfg):
171 | """
172 | The idea here is to take a scheme, and perform some analyses to find out
173 | how the subsets in that scheme cluster.
174 |
175 | We then just return the list of schemes, ordered by closest to most distant
176 | in the clustering space
177 | """
178 | final_dists, closest_pairs = get_distance_matrix(
179 | start_scheme, cfg.cluster_weights)
180 |
181 | ranked_subset_groupings = get_ranked_list(final_dists)
182 | return ranked_subset_groupings
183 |
184 |
185 | def make_clustered_scheme(start_scheme, scheme_name, subsets_to_cluster, cfg):
186 |
187 | #1. Create a new subset that merges the subsets_to_cluster
188 | newsub_parts = []
189 |
190 | #log.info("Clustering %d subsets" % len(subsets_to_cluster))
191 |
192 | for s in subsets_to_cluster:
193 | newsub_parts = newsub_parts + list(s.partitions)
194 | newsub = subset.Subset(*tuple(newsub_parts))
195 |
196 | #2. Then we define a new scheme with those merged subsets
197 | all_subs = [s for s in start_scheme.subsets]
198 |
199 | #pop out the subsets we're going to join together
200 | for s in subsets_to_cluster:
201 | all_subs.remove(s)
202 |
203 | #and now we add back in our new subset...
204 | all_subs.append(newsub)
205 |
206 | #and finally create the clustered scheme
207 | final_scheme = (scheme.Scheme(cfg, str(scheme_name), all_subs))
208 |
209 | return final_scheme
210 |
211 |
212 | def get_nearest_neighbour_scheme(start_scheme, scheme_name, cfg):
213 | """
214 | The idea here is to take a scheme, and perform some analyses to find a
215 | neighbouring scheme, where the neighbour has one less subset than the
216 | current scheme. Really this is just progressive clustering, but specified
217 | to work well with PartitionFinder
218 | """
219 |
220 | #1. First we get the closest subsets, based on some weights. This will almost always
221 | # be two subsets, but it's generalised so that it could be all of them...
222 | # cluster weights is a dictionary of weights, keyed by: rate, freqs, model
223 | # for the overall subset rate, the base/aminoacid frequencies, and the model parameters
224 | closest_subsets = get_closest_subsets(start_scheme, cfg.cluster_weights)
225 |
226 | scheme = make_clustered_scheme(
227 | start_scheme, scheme_name, closest_subsets, cfg)
228 |
229 | return scheme
230 |
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/neighbour.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/neighbour.pyc
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/parser.py:
--------------------------------------------------------------------------------
1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
2 | #
3 | #This program is free software: you can redistribute it and/or modify it
4 | #under the terms of the GNU General Public License as published by the
5 | #Free Software Foundation, either version 3 of the License, or (at your
6 | #option) any later version.
7 | #
8 | #This program is distributed in the hope that it will be useful, but
9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 | #General Public License for more details. You should have received a copy
12 | #of the GNU General Public License along with this program. If not, see
13 | #. PartitionFinder also includes the PhyML
14 | #program, the RAxML program, and the PyParsing library,
15 | #all of which are protected by their own licenses and conditions, using
16 | #PartitionFinder implies that you agree with those licences and conditions as well.
17 |
18 | import logging
19 | log = logging.getLogger("parser")
20 |
21 | from pyparsing import (
22 | Word, OneOrMore, alphas, nums, Suppress, Optional, Group, stringEnd,
23 | delimitedList, pythonStyleComment, line, lineno, col, Keyword, Or,
24 | NoMatch, CaselessKeyword, ParseException, SkipTo)
25 |
26 | # debugging
27 | # ParserElement.verbose_stacktrace = True
28 |
29 | import partition
30 | import scheme
31 | import subset
32 | import phyml_models
33 | import raxml_models
34 | import config
35 | from util import PartitionFinderError
36 |
37 | # Only used internally
38 |
39 |
40 | class ParserError(Exception):
41 | """Used for our own parsing problems"""
42 | def __init__(self, text, loc, msg):
43 | self.line = line(loc, text)
44 | self.col = col(loc, text)
45 | self.lineno = lineno(loc, text)
46 | self.msg = msg
47 |
48 | def format_message(self):
49 | return "%s at line:%s, column:%s" % (self.msg, self.lineno, self.col)
50 |
51 |
52 | class Parser(object):
53 | """Parse configuration files
54 |
55 | The results are put into the configuration object
56 | """
57 |
58 | # These will get set in the configuration passed in
59 | def __init__(self, cfg):
60 | # For adding variables
61 | self.cfg = cfg
62 |
63 | # Use these to keep track of stuff that is going on in parser
64 | self.schemes = []
65 | self.subsets = []
66 | self.init_grammar()
67 | self.ignore_schemes = False
68 | # provide useful error messages when parsing settings with limited options
69 |
70 | def init_grammar(self):
71 | """Set up the parsing classes
72 | Any changes to the grammar of the config file be done here.
73 | """
74 | # Some syntax that we need, but don't bother looking at
75 | SEMICOLON = (Suppress(";"))
76 | EQUALS = Suppress("=")
77 | OPENB = Suppress("(")
78 | CLOSEB = Suppress(")")
79 | BACKSLASH = Suppress("\\")
80 | DASH = Suppress("-")
81 |
82 | # Top Section
83 | FILENAME = Word(alphas + nums + '-_.')
84 | alignmentdef = Keyword('alignment') + EQUALS + FILENAME + SEMICOLON
85 | alignmentdef.setParseAction(self.set_alignment)
86 |
87 | treedef = Keyword('user_tree_topology') + EQUALS + FILENAME + SEMICOLON
88 | treedef.setParseAction(self.set_user_tree)
89 |
90 | def simple_option(name):
91 | opt = Keyword(name) + EQUALS + Word(alphas + nums + '-_') + SEMICOLON
92 | opt.setParseAction(self.set_simple_option)
93 | return opt
94 |
95 | branchdef = simple_option('branchlengths')
96 |
97 | MODELNAME = Word(alphas + nums + '+')
98 | modellist = delimitedList(MODELNAME)
99 | modeldef = Keyword("models") + EQUALS + Group(
100 | (
101 | CaselessKeyword("all") | CaselessKeyword("mrbayes") | CaselessKeyword("raxml") |
102 | CaselessKeyword("beast") | CaselessKeyword("all_protein") |
103 | CaselessKeyword(
104 | "all_protein_gamma") | CaselessKeyword("all_protein_gammaI")
105 | )("predefined") |
106 | Group(modellist)("userlist")) + SEMICOLON
107 | modeldef.setParseAction(self.set_models)
108 |
109 | modseldef = simple_option("model_selection")
110 | topsection = alignmentdef + Optional(treedef) + branchdef + \
111 | modeldef + modseldef
112 |
113 | # Partition Parsing
114 | column = Word(nums)
115 | partname = Word(alphas + '_-' + nums)
116 | partdef = column("start") +\
117 | Optional(DASH + column("end")) +\
118 | Optional(BACKSLASH + column("step"))
119 |
120 | partdef.setParseAction(self.define_range)
121 | partdeflist = Group(OneOrMore(Group(partdef)))
122 | partition = Optional("charset") + partname("name") + \
123 | EQUALS + partdeflist("parts") + SEMICOLON
124 | partition.setParseAction(self.define_partition)
125 | partlist = OneOrMore(Group(partition))
126 | partsection = Suppress("[data_blocks]") + partlist
127 |
128 | # Scheme Parsing
129 | schemename = Word(alphas + '_-' + nums)
130 | partnameref = partname.copy(
131 | ) # Make a copy, cos we set a different action on it
132 | partnameref.setParseAction(self.check_part_exists)
133 |
134 | subset = Group(OPENB + delimitedList(partnameref("name")) + CLOSEB)
135 | subset.setParseAction(self.define_subset)
136 |
137 | scheme = Group(OneOrMore(subset))
138 | schemedef = schemename("name") + \
139 | EQUALS + scheme("scheme") + SEMICOLON
140 | schemedef.setParseAction(self.define_schema)
141 |
142 | schemelist = OneOrMore(Group(schemedef))
143 |
144 | schemealgo = simple_option("search")
145 | schemesection = \
146 | Suppress("[schemes]") + schemealgo + Optional(schemelist)
147 |
148 | # We've defined the grammar for each section. Here we just put it all together
149 | self.config_parser = (
150 | topsection + partsection + schemesection + stringEnd)
151 |
152 | def set_alignment(self, text, loc, tokens):
153 | value = tokens[1]
154 | self.cfg.set_alignment_file(value)
155 | # TODO Make sure it is readable!
156 | # raise ParserError(text, loc, "No '%s' defined in the configuration" % var)
157 | #
158 |
159 | def set_user_tree(self, text, loc, tokens):
160 | self.cfg.user_tree = tokens[1]
161 | pass
162 |
163 | def set_simple_option(self, text, loc, tokens):
164 | try:
165 | self.cfg.set_option(tokens[0], tokens[1])
166 | except config.ConfigurationError:
167 | raise ParserError(text, loc, "Invalid option in .cfg file")
168 |
169 | def set_models(self, text, loc, tokens):
170 | if self.cfg.phylogeny_program == "phyml":
171 | self.phylo_models = phyml_models
172 | elif self.cfg.phylogeny_program == "raxml":
173 | self.phylo_models = raxml_models
174 |
175 | all_dna_mods = set(self.phylo_models.get_all_dna_models())
176 | all_protein_mods = set(self.phylo_models.get_all_protein_models())
177 | total_mods = all_dna_mods.union(all_protein_mods)
178 |
179 | mods = tokens[1]
180 | DNA_mods = 0
181 | prot_mods = 0
182 | if mods.userlist:
183 | modlist = mods.userlist
184 | log.info("Setting 'models' to a user-specified list")
185 | else:
186 | modsgroup = mods.predefined
187 | if modsgroup.lower() == "all":
188 | modlist = list(all_dna_mods)
189 | DNA_mods = DNA_mods + 1
190 | elif modsgroup.lower() == "mrbayes":
191 | modlist = set(phyml_models.get_mrbayes_models())
192 | DNA_mods = DNA_mods + 1
193 | elif modsgroup.lower() == "beast":
194 | modlist = set(phyml_models.get_beast_models())
195 | DNA_mods = DNA_mods + 1
196 | elif modsgroup.lower() == "raxml":
197 | modlist = set(phyml_models.get_raxml_models())
198 | DNA_mods = DNA_mods + 1
199 | elif modsgroup.lower() == "all_protein":
200 | modlist = set(self.phylo_models.get_all_protein_models())
201 | prot_mods = prot_mods + 1
202 | elif modsgroup.lower() == "all_protein_gamma":
203 | if self.cfg.phylogeny_program == "raxml":
204 | modlist = set(raxml_models.get_protein_models_gamma())
205 | prot_mods = prot_mods + 1
206 | else:
207 | log.error("The models option 'all_protein_gamma' is only available with raxml"
208 | ", (the --raxml commandline option). Please check and try again")
209 | raise ParserError
210 | elif modsgroup.lower() == "all_protein_gammaI":
211 | if self.cfg.phylogeny_program == "raxml":
212 | modlist = set(raxml_models.get_protein_models_gammaI())
213 | prot_mods = prot_mods + 1
214 | else:
215 | log.error("The models option 'all_protein_gammaI' is only available with raxml"
216 | ", (the --raxml commandline option). Please check and try again")
217 | raise ParserError
218 | else:
219 | pass
220 | log.info("Setting 'models' to '%s'", modsgroup)
221 |
222 | self.cfg.models = set()
223 | for m in modlist:
224 | if m not in total_mods:
225 | raise ParserError(
226 | text, loc, "'%s' is not a valid model for phylogeny "
227 | "program %s. Please check the lists of valid models in the"
228 | " manual and try again" % (m, self.cfg.phylogeny_program))
229 |
230 | if m in all_dna_mods:
231 | DNA_mods = DNA_mods + 1
232 | if m in all_protein_mods:
233 | prot_mods = prot_mods + 1
234 |
235 | self.cfg.models.add(m)
236 |
237 | log.info("The models included in this analysis are: %s",
238 | ", ".join(self.cfg.models))
239 |
240 | #check datatype against the model list that we've got a sensible model list
241 | if DNA_mods > 0 and prot_mods == 0 and self.cfg.datatype == "DNA":
242 | log.info("Setting datatype to 'DNA'")
243 | elif DNA_mods == 0 and prot_mods > 0 and self.cfg.datatype == "protein":
244 | log.info("Setting datatype to 'protein'")
245 | elif DNA_mods == 0 and prot_mods > 0 and self.cfg.datatype == "DNA":
246 | raise ParserError(
247 | text, loc, "The models list contains only models of amino acid change."
248 | " PartitionFinder.py only works with nucleotide models (like the GTR model)."
249 | " If you're analysing an amino acid dataset, please use PartitionFinderProtein,"
250 | " which you can download here: www.robertlanfear.com/partitionfinder."
251 | " The models line in the .cfg file is")
252 | elif DNA_mods > 0 and prot_mods == 0 and self.cfg.datatype == "protein":
253 | raise ParserError(
254 | text, loc, "The models list contains only models of nucelotide change."
255 | " PartitionFinderProtein.py only works with amino acid models (like the WAG model)."
256 | " If you're analysing a nucelotide dataset, please use PartitionFinder.py,"
257 | " which you can download here: www.robertlanfear.com/partitionfinder"
258 | " The models line in the .cfg file is")
259 | else: # we've got a mixture of models.
260 | raise ParserError(
261 | text, loc, "The models list contains a mixture of protein and nucelotide models."
262 | " If you're analysing a nucelotide dataset, please use PartitionFinder."
263 | " If you're analysing an amino acid dataset, please use PartitionFinderProtein."
264 | " You can download both of these programs from here: www.robertlanfear.com/partitionfinder"
265 | " The models line in the .cfg file is")
266 |
267 | def define_range(self, part):
268 | """Turn the 1, 2 or 3 tokens into integers, supplying a default if needed"""
269 | fromc = int(part.start)
270 |
271 | if part.end:
272 | toc = int(part.end)
273 | else:
274 | toc = fromc
275 |
276 | if part.step:
277 | stepc = int(part.step)
278 | else:
279 | stepc = 1
280 | return [fromc, toc, stepc]
281 |
282 | def define_partition(self, text, loc, part_def):
283 | """We have everything we need here to make a partition"""
284 | try:
285 | # Creation adds it to set
286 | p = partition.Partition(
287 | self.cfg, part_def.name, *tuple(part_def.parts))
288 | except partition.PartitionError:
289 | raise ParserError(
290 | text, loc, "Error in '%s' can be found" % part_def.name)
291 |
292 | def check_part_exists(self, text, loc, partref):
293 | if partref.name not in self.cfg.partitions:
294 | raise ParserError(text, loc, "Partition %s not defined" %
295 | partref.name)
296 |
297 | def define_subset(self, text, loc, subset_def):
298 | try:
299 | # Get the partitions from the names
300 | parts = [self.cfg.partitions[nm] for nm in subset_def[0]]
301 |
302 | # Keep a running list of these till we define the schema below
303 | self.subsets.append(subset.Subset(*tuple(parts)))
304 | except subset.SubsetError:
305 | raise ParserError(text, loc, "Error creating subset...")
306 |
307 | def define_schema(self, text, loc, scheme_def):
308 | try:
309 | # Clear out the subsets as we need to reuse it
310 | subs = tuple(self.subsets)
311 | self.subsets = []
312 |
313 | if self.ignore_schemes == False:
314 | sch = scheme.Scheme(self.cfg, scheme_def.name, subs)
315 | self.cfg.user_schemes.add_scheme(sch)
316 |
317 | except (scheme.SchemeError, subset.SubsetError):
318 | raise ParserError(text, loc, "Error in '%s' can be found" %
319 | scheme_def.name)
320 |
321 | def parse_file(self, fname):
322 | #this just reads in the config file into 's'
323 | s = open(fname, 'rU').read()
324 | self.parse_configuration(s)
325 |
326 | def parse_configuration(self, s):
327 | #parse the config cfg
328 | try:
329 | self.result = self.config_parser.ignore(
330 | pythonStyleComment).parseString(s)
331 | except ParserError, p:
332 | log.error(p.format_message())
333 | raise PartitionFinderError
334 | except ParseException, p:
335 | log.error("There was a problem loading your .cfg file, please check and try again")
336 | log.error(p)
337 |
338 | #let's see if there was something missing fro the input file
339 | expectations = ["models", "search", "[schemes]", "[data_blocks]",
340 | "model_selection", "branchlengths", "alignment"]
341 | missing = None
342 | for e in expectations:
343 | if p.msg.count(e):
344 | missing = e
345 |
346 | if missing:
347 | log.info("It looks like the '%s' option might be missing or in the wrong place" % (missing))
348 | log.info("Or perhaps something is wrong in the lines just before the '%s' option" % (missing))
349 | log.info("Please double check the .cfg file and try again")
350 | else:
351 | log.info(
352 | "The line causing the problem is this: '%s'" % (p.line))
353 | log.info("Please check that line, and make sure it appears in the right place in the .cfg file.")
354 | log.info("If it looks OK, try double-checking the semi-colons on other lines in the .cfg file")
355 | raise PartitionFinderError
356 |
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/parser.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/parser.pyc
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/partition.py:
--------------------------------------------------------------------------------
1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
2 | #
3 | #This program is free software: you can redistribute it and/or modify it
4 | #under the terms of the GNU General Public License as published by the
5 | #Free Software Foundation, either version 3 of the License, or (at your
6 | #option) any later version.
7 | #
8 | #This program is distributed in the hope that it will be useful, but
9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 | #General Public License for more details. You should have received a copy
12 | #of the GNU General Public License along with this program. If not, see
13 | #. PartitionFinder also includes the PhyML
14 | #program, the RAxML program, and the PyParsing library,
15 | #all of which are protected by their own licenses and conditions, using
16 | #PartitionFinder implies that you agree with those licences and conditions as well.
17 |
18 | import logging
19 | log = logging.getLogger("partition")
20 |
21 | from util import PartitionFinderError
22 | class PartitionError(PartitionFinderError):
23 | pass
24 |
25 | def columnset_to_string(colset):
26 | s = list(colset)
27 | s.sort()
28 | # Add one, cos we converted to zero base...
29 | return ', '.join([str(x+1) for x in s])
30 |
31 | class PartitionSet(object):
32 | """The set of all partitions loaded from a configuration file"""
33 | def __init__(self):
34 | """A set of Partitions"""
35 | self.sequence = 0
36 | self.parts_by_name = {}
37 | self.parts_by_number = {}
38 | self.partitions = set()
39 |
40 | # All of the columns
41 | self.columns = []
42 | self.columnset = set()
43 |
44 | self.finalised = False
45 |
46 | def __str__(self):
47 | return "PartitionSet(%s)" % ", ".join([str(p) for p in self.partitions])
48 |
49 | def add_partition(self, p):
50 | """Check for overlap (= intersection)"""
51 | if self.finalised:
52 | log.error("Cannot add partitions after a Scheme has been created")
53 | raise PartitionError
54 |
55 | if p.name in self.parts_by_name:
56 | log.error("Attempt to add %s when that name already exists", p)
57 | raise PartitionError
58 |
59 | overlap = []
60 | for otherp in self.partitions:
61 | if p.columnset & otherp.columnset:
62 | overlap.append(str(otherp))
63 | if overlap:
64 | log.error("%s overlaps with previously defined "
65 | "partitions: %s",
66 | p, ", ".join(overlap))
67 | raise PartitionError
68 |
69 | # Assign the partition to this set
70 | p.partition_set = self
71 |
72 | # Make sure we can look up by name
73 | self.parts_by_name[p.name] = p
74 | self.parts_by_number[self.sequence] = p
75 | p.sequence = self.sequence
76 | self.sequence += 1
77 | self.partitions.add(p)
78 |
79 | # Merge all the columns
80 | self.columns.extend(p.columns)
81 | self.columns.sort()
82 | self.columnset |= p.columnset
83 |
84 | def finalise(self):
85 | """Ensure that no more partitions can be added"""
86 | self.finalised = True
87 |
88 | def check_against_alignment(self, alignment):
89 | """Check the partition definitions against the alignment"""
90 |
91 | # TODO: pbly should check the converse too -- stuff defined that is
92 | # missing??
93 | self.fullset = set(range(0, alignment.sequence_len))
94 | leftout = self.fullset - self.columnset
95 | if leftout:
96 | # This does not raise an error, just a warning
97 | log.warn(
98 | "Columns defined in partitions range from %s to %s, "
99 | "but these columns in the alignment are missing: %s",
100 | self.columns[0]+1, self.columns[-1]+1,
101 | columnset_to_string(leftout))
102 |
103 | # We can treat this like a bit like a dictionary
104 | def __iter__(self):
105 | return iter(self.partitions)
106 |
107 | def __len__(self):
108 | return len(self.partitions)
109 |
110 | def __getitem__(self, k):
111 | if type(k) is int:
112 | return self.parts_by_number[k]
113 | return self.parts_by_name[k]
114 |
115 | def __contains__(self, k):
116 | return k in self.parts_by_name
117 |
118 | def names(self):
119 | return self.parts_by_name.keys()
120 |
121 | class Partition(object):
122 | """A set of columns from an alignment"""
123 | def __init__(self, cfg, name=None, *partlist):
124 | """A named partition
125 |
126 | """
127 | self.name = name
128 | description = []
129 |
130 | # This will get set later, when they are added to PartitionSet
131 | self.partition_set = None
132 |
133 | # We now need to convert to column definitions. Note that these are
134 | # zero based, which is not how they are specified in the config. So we
135 | # must do some fiddling to make sure they are right. In addition, we
136 | # use range(...) which excludes the final column, whereas the
137 | # definitions assume inclusive...
138 | columns = []
139 | for p in partlist:
140 |
141 | # Make sure it is sensible
142 | if len(p) < 2 or len(p) > 3:
143 | log.error("The Partition '%s' should contain\
144 | a list of start, a stop, and an optional step",
145 | self.name)
146 | raise PartitionError
147 | if len(p) == 2:
148 | start, stop = p
149 | step = 1
150 | else:
151 | start, stop, step = p
152 | if start > stop:
153 | log.error("Partition '%s' has beginning after end (%s > %s)",
154 | name, start, stop)
155 | raise PartitionError
156 |
157 | # Actually, subtracting 1 deals with both issues...
158 | columns.extend(range(start-1, stop, step))
159 | description.append((start, stop, step))
160 |
161 | self.description = tuple(description)
162 |
163 | # Normalise it all
164 | columns.sort()
165 | columnset = set(columns)
166 |
167 | # If there was any overlap then these will differ...
168 | if len(columns) != len(columnset):
169 | log.error("Partition '%s' has internal overlap", name)
170 | raise PartitionError
171 |
172 | # Both of these are useful?
173 | self.columns = columns
174 | self.columnset = columnset
175 |
176 | cfg.partitions.add_partition(self)
177 | log.debug("Created %s", self)
178 |
179 | def __repr__(self):
180 | outlist = ", ".join(["%s-%s\\%s" % tuple(p) for p in self.description])
181 | return "Partition<%s: %s>" % (self.name, outlist)
182 |
183 | def __str__(self):
184 | outlist = ", ".join(["%s-%s\\%s" % tuple(p) for p in self.description])
185 | return "Partition(%s, %s)" % (self.name, outlist)
186 |
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/partition.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/partition.pyc
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/phyml.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/phyml.pyc
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/phyml_models.py:
--------------------------------------------------------------------------------
1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
2 | #
3 | #This program is free software: you can redistribute it and/or modify it
4 | #under the terms of the GNU General Public License as published by the
5 | #Free Software Foundation, either version 3 of the License, or (at your
6 | #option) any later version.
7 | #
8 | #This program is distributed in the hope that it will be useful, but
9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 | #General Public License for more details. You should have received a copy
12 | #of the GNU General Public License along with this program. If not, see
13 | #. PartitionFinder also includes the PhyML
14 | #program, the RAxML program, and the PyParsing library,
15 | #all of which are protected by their own licenses and conditions, using
16 | #PartitionFinder implies that you agree with those licences and conditions as well.
17 |
18 | import logging
19 | log = logging.getLogger("analysis")
20 |
21 | import config
22 |
23 | # TODO need some error checking!
24 |
25 | # number of free parameters in substitution model, listed as "model+base_frequencies"
26 | # and the model string for PhyML as the second of the tuple.
27 | _base_models = {
28 | "JC" : (0+0, "-m 000000 -f '0.25, 0.25, 0.25, 0.25'"),
29 | "K80" : (1+0, "-m 010010 -f '0.25, 0.25, 0.25, 0.25'"),
30 | "TrNef" : (2+0, "-m 010020 -f '0.25, 0.25, 0.25, 0.25'"),
31 | "K81" : (2+0, "-m 012210 -f '0.25, 0.25, 0.25, 0.25'"),
32 | "TVMef" : (4+0, "-m 012314 -f '0.25, 0.25, 0.25, 0.25'"),
33 | "TIMef" : (3+0, "-m 012230 -f '0.25, 0.25, 0.25, 0.25'"),
34 | "SYM" : (5+0, "-m 012345 -f '0.25, 0.25, 0.25, 0.25'"),
35 | "F81" : (0+3, "-m 000000 -f e"),
36 | "HKY" : (1+3, "-m 010010 -f e"),
37 | "TrN" : (2+3, "-m 010020 -f e"),
38 | "K81uf" : (2+3, "-m 012210 -f e"),
39 | "TVM" : (4+3, "-m 012314 -f e"),
40 | "TIM" : (3+3, "-m 012230 -f e"),
41 | "GTR" : (5+3, "-m 012345 -f e")
42 | }
43 |
44 | # number of free parameters in substitution model, listed as "aa_frequencies"
45 | # and the model string for PhyML as the second of the tuple
46 | _base_protein_models = {
47 | "LG" : (0, "-m LG -d aa"),
48 | "WAG" : (0, "-m WAG -d aa"),
49 | "mtREV" : (0, "-m mtREV -d aa"),
50 | "Dayhoff" : (0, "-m Dayhoff -d aa"),
51 | "DCMut" : (0, "-m DCMut -d aa"),
52 | "JTT" : (0, "-m JTT -d aa"),
53 | "VT" : (0, "-m VT -d aa"),
54 | "Blosum62" : (0, "-m Blosum62 -d aa"),
55 | "CpREV" : (0, "-m CpREV -d aa"),
56 | "RtREV" : (0, "-m RtREV -d aa"),
57 | "MtMam" : (0, "-m MtMam -d aa"),
58 | "MtArt" : (0, "-m MtArt -d aa"),
59 | "HIVb" : (0, "-m HIVb -d aa"),
60 | "HIVw" : (0, "-m HIVw -d aa"),
61 | }
62 |
63 | # All the functions in here return the same thing with the same parameters,
64 | # this just caches the return ...
65 | def memoize(f):
66 | cache= {}
67 | def memf(*x):
68 | if x not in cache:
69 | cache[x] = f(*x)
70 | return cache[x]
71 | return memf
72 |
73 | @memoize
74 | def get_all_dna_models():
75 | '''
76 | Return a list of all implemented _base_models
77 | '''
78 | model_list = []
79 | for model in _base_models.keys():
80 | model_list.append(model)
81 | model_list.append("%s+I" %(model))
82 | model_list.append("%s+G" %(model))
83 | model_list.append("%s+I+G" %(model))
84 | return model_list
85 |
86 | @memoize
87 | def get_all_protein_models():
88 | '''
89 | Return a list of all implemented _base__protein_models
90 | '''
91 | model_list = []
92 | for model in _base_protein_models.keys():
93 | model_list.append(model)
94 | model_list.append("%s+F" %(model))
95 | model_list.append("%s+I" %(model))
96 | model_list.append("%s+G" %(model))
97 | model_list.append("%s+I+G" %(model))
98 | model_list.append("%s+I+F" %(model))
99 | model_list.append("%s+G+F" %(model))
100 | model_list.append("%s+I+G+F" %(model))
101 | return model_list
102 |
103 | @memoize
104 | def get_mrbayes_models():
105 | '''
106 | Return a list of all models implemented in MrBayes. Thanks to Ainsley Seago for this.
107 | '''
108 | mrbayes_base_models = ["JC", "F81", "K80", "HKY", "SYM", "GTR"]
109 | model_list = []
110 | for model in mrbayes_base_models:
111 | model_list.append(model)
112 | model_list.append("%s+I" %(model))
113 | model_list.append("%s+G" %(model))
114 | model_list.append("%s+I+G" %(model))
115 | return model_list
116 |
117 | def get_beast_models():
118 | '''
119 | Return a list of all models implemented in BEAST v1.7.2.
120 | '''
121 | beast_base_models = ["K80", "TrNef", "SYM", "HKY", "TrN", "GTR"]
122 | model_list = []
123 | for model in beast_base_models:
124 | model_list.append(model)
125 | model_list.append("%s+I" %(model))
126 | model_list.append("%s+G" %(model))
127 | model_list.append("%s+I+G" %(model))
128 | return model_list
129 |
130 |
131 | @memoize
132 | def get_raxml_models():
133 | '''
134 | Return a list of all models implemented in RaxML. Thanks to Ainsley Seago for this.
135 | '''
136 | model_list = ["GTR+G", "GTR+I+G"]
137 | return model_list
138 |
139 | @memoize
140 | def get_protein_models():
141 | '''
142 | Return a list of all protein models implemented in PhyML
143 | '''
144 | model_list = [
145 | "LG",
146 | "cheese"
147 | ]
148 | return model_list
149 |
150 |
151 |
152 | @memoize
153 | def get_num_params(modelstring):
154 | '''
155 | Input a model string like HKY+I+G or LG+G+F, and get the number of parameters
156 | '''
157 | elements = modelstring.split("+")
158 | model_name = elements[0]
159 | if model_name in _base_models.keys():
160 | model_params = _base_models[model_name][0]
161 | else:
162 | model_params = _base_protein_models[model_name][0]
163 | if "F" in elements[1:]:
164 | model_params = model_params+19-1 #the -1 here is to account for the fact we add 1 for the + in '+F' below
165 |
166 | extras = modelstring.count("+")
167 | total = model_params+extras
168 | log.debug("Model: %s Params: %d" %(modelstring, total))
169 |
170 | return total
171 |
172 | @memoize
173 | def get_model_difficulty(modelstring):
174 | '''
175 | Input a model string like HKY+I+G or LG+G+F, and a guess about how long it takes to analyse
176 | Right now, this is done with a simple hack. I just return a number that is the number of params
177 | plus a modifier for extra stuff like +I and +G
178 | the hardest models are +I+G, then +G, then +I
179 | this is just used to rank models for ordering the analysis
180 | The return is a 'difficulty' score that can be used to rank models
181 | '''
182 | elements = modelstring.split("+")
183 |
184 | model_params = get_num_params(modelstring)
185 |
186 | difficulty = 0
187 | if "G" in elements[1:]:
188 | difficulty = difficulty + 2000
189 | if "I" in elements[1:]:
190 | difficulty = difficulty + 1000
191 |
192 | extras = modelstring.count("+")
193 | total = model_params+extras+difficulty
194 | log.debug("Model: %s Difficulty: %d" %(modelstring, total))
195 |
196 | return total
197 |
198 |
199 |
200 | @memoize
201 | def get_model_commandline(modelstring):
202 | '''
203 | Input a model string, and get the PhyML command line
204 | '''
205 |
206 | # This is always the same - optimise brlens and model, not tree
207 | commandline = ["-o lr "]
208 |
209 | elements = modelstring.split("+")
210 | model_name = elements[0]
211 |
212 | # Everything but the first element
213 | extras = elements[1:]
214 |
215 | if model_name in _base_models.keys(): #DNA models
216 | commandline.append(_base_models[model_name][1])
217 | else: #protein models
218 | commandline.append(_base_protein_models[model_name][1])
219 | if "F" in extras:
220 | commandline.append("-f e") #emprical AA frequencies (+19 params)
221 | else:
222 | commandline.append("-f m") #AA frequences from the model (+0 params)
223 |
224 |
225 | if "I" in extras:
226 | commandline.append("-v e")
227 | if "G" in extras:
228 | commandline.append("-a e")
229 | commandline.append("-c 4")
230 | else:
231 | commandline.append("-c 1")
232 |
233 | return " ".join(commandline)
234 |
235 | if __name__ == "__main__":
236 | print " ",
237 | print "Name".ljust(12),
238 | print "Params".ljust(10),
239 | print "CommandLine"
240 | for i, model in enumerate(get_all_models()):
241 | print str(i+1).rjust(2),
242 | print model.ljust(12),
243 | print str(get_num_params(model)).ljust(10),
244 | print get_model_commandline(model)
245 | for model in get_protein_models():
246 | print model
247 |
248 |
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/phyml_models.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/phyml_models.pyc
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/progress.py:
--------------------------------------------------------------------------------
1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
2 | #
3 | #This program is free software: you can redistribute it and/or modify it
4 | #under the terms of the GNU General Public License as published by the
5 | #Free Software Foundation, either version 3 of the License, or (at your
6 | #option) any later version.
7 | #
8 | #This program is distributed in the hope that it will be useful, but
9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 | #General Public License for more details. You should have received a copy
12 | #of the GNU General Public License along with this program. If not, see
13 | #. PartitionFinder also includes the PhyML
14 | #program, the RAxML program, and the PyParsing library,
15 | #all of which are protected by their own licenses and conditions, using
16 | #PartitionFinder implies that you agree with those licences and conditions as well.
17 |
18 | import logging
19 | log = logging.getLogger("progress")
20 |
21 |
22 | class Progress(object):
23 | def __init__(self, cfg):
24 | self.cfg = cfg
25 | self.cfg.progress = self
26 |
27 | def begin(self, scheme_count, subset_count):
28 | pass
29 |
30 | def next_scheme(self):
31 | pass
32 |
33 | def subset_begin(self, sub):
34 | pass
35 |
36 | def subset_done(self, sub):
37 | pass
38 |
39 | def end(self):
40 | pass
41 |
42 |
43 | class NoProgress(Progress):
44 | pass
45 |
46 |
47 | class TextProgress(Progress):
48 |
49 | def begin(self, scheme_count, subset_count):
50 | self.scheme_count = scheme_count
51 | self.subset_count = subset_count
52 | self.schemes_analysed = 0
53 | self.subsets_analysed = set()
54 |
55 | log.info("PartitionFinder will have to analyse %d subsets to complete this analysis", subset_count)
56 | log.info("This will result in %s schemes being created", scheme_count)
57 | if subset_count > 10000:
58 | log.warning("%d is a lot of subsets, this might take a long time to analyse", subset_count)
59 | log.warning("Perhaps consider using a different search scheme instead (see Manual)")
60 |
61 | def next_scheme(self):
62 | self.schemes_analysed += 1
63 | #log.info("Analysing scheme %d/%d", self.schemes_analysed,self.scheme_count)
64 |
65 | def subset_begin(self, sub):
66 | #log.info("Begin analysing subset %s", sub)
67 | pass
68 |
69 | def subset_done(self, sub):
70 | old_num_done = len(self.subsets_analysed)
71 | self.subsets_analysed.add(sub.name)
72 | num_subs_done = len(self.subsets_analysed)
73 | if old_num_done != num_subs_done:
74 | percent_done = (
75 | float(num_subs_done) * 100.0) / float(self.subset_count)
76 | log.info("Finished subset %d/%d, %.2f percent done", num_subs_done, self.subset_count, percent_done)
77 |
78 | def end(self):
79 | pass
80 |
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/progress.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/progress.pyc
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/pyparsing.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/pyparsing.pyc
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/raxml.py:
--------------------------------------------------------------------------------
1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
2 | #
3 | #This program is free software: you can redistribute it and/or modify it
4 | #under the terms of the GNU General Public License as published by the
5 | #Free Software Foundation, either version 3 of the License, or (at your
6 | #option) any later version.
7 | #
8 | #This program is distributed in the hope that it will be useful, but
9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 | #General Public License for more details. You should have received a copy
12 | #of the GNU General Public License along with this program. If not, see
13 | #. PartitionFinder also includes the PhyML
14 | #program, the RAxML program, the PyParsing library, and the python-cluster library
15 | #all of which are protected by their own licenses and conditions, using
16 | #PartitionFinder implies that you agree with those licences and conditions as well.
17 |
18 | """Run raxml and parse the output"""
19 |
20 | import logging
21 | log = logging.getLogger("raxml")
22 |
23 | import subprocess
24 | import shlex
25 | import os
26 | import shutil
27 | import sys
28 | import fnmatch
29 | import util
30 |
31 | from pyparsing import (
32 | Word, Literal, nums, Suppress, ParseException,
33 | SkipTo, OneOrMore, Regex
34 | )
35 |
36 | import raxml_models as models
37 |
38 | _binary_name = 'raxml'
39 | if sys.platform == 'win32':
40 | _binary_name += ".exe"
41 |
42 | from util import PhylogenyProgramError
43 |
44 |
45 | class RaxmlError(PhylogenyProgramError):
46 | pass
47 |
48 |
49 | def find_program():
50 | """Locate the binary ..."""
51 | pth = os.path.abspath(__file__)
52 |
53 | # Split off the name and the directory...
54 | pth, notused = os.path.split(pth)
55 | pth, notused = os.path.split(pth)
56 | pth = os.path.join(pth, "programs", _binary_name)
57 | pth = os.path.normpath(pth)
58 |
59 | log.debug("Checking for program %s", _binary_name)
60 | if not os.path.exists(pth) or not os.path.isfile(pth):
61 | log.error("No such file: '%s'", pth)
62 | raise RaxmlError
63 | log.debug("Found program %s at '%s'", _binary_name, pth)
64 | return pth
65 |
66 | _raxml_binary = None
67 |
68 |
69 | def run_raxml(command):
70 | global _raxml_binary
71 | if _raxml_binary is None:
72 | _raxml_binary = find_program()
73 |
74 | # Add in the command file
75 | log.debug("Running 'raxml %s'", command)
76 | command = "\"%s\" %s" % (_raxml_binary, command)
77 |
78 | # Note: We use shlex.split as it does a proper job of handling command
79 | # lines that are complex
80 | p = subprocess.Popen(
81 | shlex.split(command),
82 | shell=False,
83 | stdout=subprocess.PIPE,
84 | stderr=subprocess.PIPE)
85 |
86 | # Capture the output, we might put it into the errors
87 | stdout, stderr = p.communicate()
88 | # p.terminate()
89 |
90 | if p.returncode != 0:
91 | log.error("RAxML did not execute successfully")
92 | log.error("RAxML output follows, in case it's helpful for finding the problem")
93 | log.error("%s", stdout)
94 | log.error("%s", stderr)
95 | raise RaxmlError
96 |
97 |
98 | def dupfile(src, dst):
99 | # Make a copy or a symlink so that we don't overwrite different model runs
100 | # of the same alignment
101 |
102 | # TODO maybe this should throw...?
103 | try:
104 | if os.path.exists(dst):
105 | os.remove(dst)
106 | shutil.copyfile(src, dst)
107 | except OSError:
108 | log.error("Cannot link/copy file %s to %s", src, dst)
109 | raise RaxmlError
110 |
111 |
112 | def make_topology(alignment_path, datatype, cmdline_extras):
113 | '''Make a MP tree to start the analysis'''
114 | log.info("Making MP tree for %s", alignment_path)
115 |
116 | cmdline_extras = check_defaults(cmdline_extras)
117 |
118 | # First get the MP topology like this (-p is a hard-coded random number seed):
119 | if datatype == "DNA":
120 | command = "-y -s '%s' -m GTRGAMMA -n MPTREE -p 123456789 %s" % (
121 | alignment_path, cmdline_extras)
122 | elif datatype == "protein":
123 | command = "-y -s '%s' -m PROTGAMMALG -n MPTREE -p 123456789 %s" % (
124 | alignment_path, cmdline_extras)
125 | else:
126 | log.error("Unrecognised datatype: '%s'" % (datatype))
127 | raise(RaxmlError)
128 |
129 | #force raxml to write to the dir with the alignment in it
130 | aln_dir, fname = os.path.split(alignment_path)
131 | command = ''.join([command, " -w '%s'" % os.path.abspath(aln_dir)])
132 |
133 | run_raxml(command)
134 | dir, aln = os.path.split(alignment_path)
135 | tree_path = os.path.join(dir, "RAxML_parsimonyTree.MPTREE")
136 | return tree_path
137 |
138 |
139 | def make_branch_lengths(alignment_path, topology_path, datatype, cmdline_extras):
140 | #Now we re-estimate branchlengths using a GTR+G model on the (unpartitioned) dataset
141 | cmdline_extras = check_defaults(cmdline_extras)
142 | dir_path, fname = os.path.split(topology_path)
143 | tree_path = os.path.join(dir_path, 'topology_tree.phy')
144 | log.debug("Copying %s to %s", topology_path, tree_path)
145 | dupfile(topology_path, tree_path)
146 | os.remove(topology_path) # saves headaches later...
147 |
148 | if datatype == "DNA":
149 | log.info("Estimating GTR+G branch lengths on tree using RAxML")
150 | command = "-f e -s '%s' -t '%s' -m GTRGAMMA -n BLTREE -w '%s' %s" % (
151 | alignment_path, tree_path, os.path.abspath(dir_path), cmdline_extras)
152 | run_raxml(command)
153 | if datatype == "protein":
154 | log.info("Estimating LG+G branch lengths on tree using RAxML")
155 | command = "-f e -s '%s' -t '%s' -m PROTGAMMALG -n BLTREE -w '%s' %s" % (
156 | alignment_path, tree_path, os.path.abspath(dir_path), cmdline_extras)
157 | run_raxml(command)
158 |
159 | dir, aln = os.path.split(alignment_path)
160 | tree_path = os.path.join(dir, "RAxML_result.BLTREE")
161 | log.info("Branchlength estimation finished")
162 |
163 | # Now return the path of the final tree with branch lengths
164 | return tree_path
165 |
166 |
167 | def check_defaults(cmdline_extras):
168 | """We use some sensible defaults, but allow users to override them with extra cmdline options"""
169 | if cmdline_extras.count("-e") > 0:
170 | #then the user has specified a particular accuracy:
171 | accuracy = ""
172 | else:
173 | #we specify a default accuracy of 1 lnL unit
174 | accuracy = " -e 1.0 "
175 |
176 | #we set this in case people are using the PThreads version of RAxML
177 | #note that this is intentionally set to give an error if people use Pthreads, because
178 | #they will need to consider by hand what the optimal setting is. And, if we set it >1
179 | #then we risk massively slowing things down because PF's default is to use all possible
180 | #processors.
181 | if cmdline_extras.count("-T") > 0:
182 | num_threads = ""
183 |
184 | else:
185 | num_threads = " -T 1 "
186 |
187 | #and we'll specify the -O option, so that the program doesn't exit if there are undetermined seqs.
188 | #we'll put spaces at the start and end too, just in case...
189 | cmdline_extras = ''.join(
190 | [" ", cmdline_extras, accuracy, num_threads, "-O "])
191 |
192 | return cmdline_extras
193 |
194 |
195 | def analyse(model, alignment_path, tree_path, branchlengths, cmdline_extras):
196 | """Do the analysis -- this will overwrite stuff!"""
197 |
198 | # Move it to a new name to stop raxml stomping on different model analyses
199 | # dupfile(alignment_path, analysis_path)
200 | model_params = models.get_model_commandline(model)
201 |
202 | if branchlengths == 'linked':
203 | #constrain all branchlengths to be equal
204 | bl = ' -f B '
205 | elif branchlengths == 'unlinked':
206 | #let branchlenghts vary among subsets
207 | bl = ' -f e '
208 | else:
209 | # WTF?
210 | log.error("Unknown option for branchlengths: %s", branchlengths)
211 | raise RaxmlError
212 |
213 | cmdline_extras = check_defaults(cmdline_extras)
214 |
215 | #raxml doesn't append alignment names automatically, like PhyML, let's do that here
216 | analysis_ID = raxml_analysis_ID(alignment_path, model)
217 |
218 | #force raxml to write to the dir with the alignment in it
219 | #-e 1.0 sets the precision to 1 lnL unit. This is all that's required here, and helps with speed.
220 | aln_dir, fname = os.path.split(alignment_path)
221 | command = " %s -s '%s' -t '%s' %s -n %s -w '%s' %s" % (
222 | bl, alignment_path, tree_path, model_params, analysis_ID, os.path.abspath(aln_dir), cmdline_extras)
223 | run_raxml(command)
224 |
225 |
226 | def raxml_analysis_ID(alignment_path, model):
227 | dir, file = os.path.split(alignment_path)
228 | aln_name = os.path.splitext(file)[0]
229 | analysis_ID = '%s_%s.txt' % (aln_name, model)
230 | return analysis_ID
231 |
232 |
233 | def make_tree_path(alignment_path):
234 | dir, aln = os.path.split(alignment_path)
235 | tree_path = os.path.join(dir, "RAxML_result.BLTREE")
236 | return tree_path
237 |
238 |
239 | def make_output_path(alignment_path, model):
240 | analysis_ID = raxml_analysis_ID(alignment_path, model)
241 | dir, aln_file = os.path.split(alignment_path)
242 | stats_fname = "RAxML_info.%s" % (analysis_ID)
243 | stats_path = os.path.join(dir, stats_fname)
244 | tree_fname = "RAxML_result.%s" % (analysis_ID)
245 | tree_path = os.path.join(dir, tree_fname)
246 | return stats_path, tree_path
247 |
248 |
249 | def remove_files(aln_path, model):
250 | '''remove all files from the alignment directory that are produced by raxml'''
251 | dir, file = os.path.split(aln_path)
252 | analysis_ID = raxml_analysis_ID(aln_path, model)
253 | dir = os.path.abspath(dir)
254 | fs = os.listdir(dir)
255 | fnames = fnmatch.filter(fs, '*%s*' % analysis_ID)
256 | util.delete_files(fnames)
257 |
258 |
259 | class RaxmlResult(object):
260 |
261 | def __init__(self):
262 | self.rates = {}
263 | self.freqs = {}
264 |
265 | def __str__(self):
266 | return "RaxmlResult(lnl:%s, tree_size:%s, secs:%s, alphs:%s)" % (
267 | self.lnl, self.tree_size, self.seconds, self.alpha)
268 |
269 |
270 | class Parser(object):
271 | def __init__(self, datatype):
272 |
273 | if datatype == "protein":
274 | letters = "ARNDCQEGHILKMFPSTWYV"
275 | elif datatype == "DNA":
276 | letters = "ATCG"
277 | else:
278 | log.error("Unknown datatype '%s', please check" % datatype)
279 | raise RaxmlError
280 |
281 | FLOAT = Word(nums + '.-').setParseAction(lambda x: float(x[0]))
282 |
283 | L = Word(letters, exact=1)
284 | COLON = Suppress(":")
285 |
286 | LNL_LABEL = Regex("Final GAMMA.+:") | Literal("Likelihood:")
287 | TIME_LABEL = Regex("Overall Time.+:") | Regex("Overall Time.+tion ")
288 | ALPHA_LABEL = Literal("alpha:")
289 | TREE_SIZE_LABEL = Literal("Tree-Length:")
290 |
291 | def labeled_float(label):
292 | return Suppress(SkipTo(label)) + Suppress(label) + FLOAT
293 |
294 | lnl = labeled_float(LNL_LABEL)
295 | lnl.setParseAction(self.set_lnl)
296 |
297 | seconds = labeled_float(TIME_LABEL)
298 | seconds.setParseAction(self.set_seconds)
299 |
300 | alpha = labeled_float(ALPHA_LABEL)
301 | alpha.setParseAction(self.set_alpha)
302 |
303 | tree_size = labeled_float(TREE_SIZE_LABEL)
304 | tree_size.setParseAction(self.set_tree_size)
305 |
306 | rate = Suppress("rate") + L + Suppress("<->") + L + COLON + FLOAT
307 | rate.setParseAction(self.set_rate)
308 | rates = OneOrMore(rate)
309 |
310 | freq = Suppress("freq pi(") + L + Suppress("):") + FLOAT
311 | freq.setParseAction(self.set_freq)
312 | freqs = OneOrMore(freq)
313 |
314 | # Just look for these things
315 | self.root_parser = seconds + lnl + alpha + tree_size + rates + freqs
316 |
317 | def set_seconds(self, tokens):
318 | self.result.seconds = tokens[0]
319 |
320 | def set_lnl(self, tokens):
321 | self.result.lnl = tokens[0]
322 |
323 | def set_tree_size(self, tokens):
324 | self.result.tree_size = tokens[0]
325 |
326 | def set_alpha(self, tokens):
327 | self.result.alpha = tokens[0]
328 |
329 | def set_rate(self, tokens):
330 | basefrom, baseto, rate = tokens
331 | self.result.rates[(basefrom, baseto)] = rate
332 |
333 | def set_freq(self, tokens):
334 | base, rate = tokens
335 | self.result.freqs[base] = rate
336 |
337 | def parse(self, text):
338 | log.debug("Parsing raxml output...")
339 | self.result = RaxmlResult()
340 | try:
341 | self.root_parser.parseString(text)
342 | except ParseException, p:
343 | log.error(str(p))
344 | raise RaxmlError
345 |
346 | log.debug("Result is %s", self.result)
347 | return self.result
348 |
349 |
350 | def parse(text, datatype):
351 | the_parser = Parser(datatype)
352 | return the_parser.parse(text)
353 |
354 | if __name__ == '__main__':
355 | logging.basicConfig(level=logging.DEBUG)
356 | pth = "./tests/misc/raxml_nucleotide.output"
357 | p = Parser('DNA')
358 | result = p.parse(open(pth).read())
359 |
360 | pth = "./tests/misc/raxml_aminoacid.output"
361 | p = Parser('protein')
362 | result = p.parse(open(pth).read())
363 |
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/raxml.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/raxml.pyc
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/raxml_models.py:
--------------------------------------------------------------------------------
1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
2 | #
3 | #This program is free software: you can redistribute it and/or modify it
4 | #under the terms of the GNU General Public License as published by the
5 | #Free Software Foundation, either version 3 of the License, or (at your
6 | #option) any later version.
7 | #
8 | #This program is distributed in the hope that it will be useful, but
9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 | #General Public License for more details. You should have received a copy
12 | #of the GNU General Public License along with this program. If not, see
13 | #. PartitionFinder also includes the PhyML
14 | #program, the RAxML program, and the PyParsing library,
15 | #all of which are protected by their own licenses and conditions, using
16 | #PartitionFinder implies that you agree with those licences and conditions as well.
17 |
18 | import logging
19 | log = logging.getLogger("analysis")
20 |
21 | import config
22 |
23 | # TODO need some error checking!
24 |
25 | # number of free parameters in substitution model, listed as "model+base_frequencies"
26 | _base_models = {
27 | "GTR" : (5+3, "")
28 | }
29 |
30 | # number of free parameters in substitution model, listed as "aa_frequencies"
31 | _base_protein_models = {
32 | "DAYHOFF" : (0, ""),
33 | "DCMUT" : (0, ""),
34 | "JTT" : (0, ""),
35 | "MTREV" : (0, ""),
36 | "WAG" : (0, ""),
37 | "RTREV" : (0, ""),
38 | "CPREV" : (0, ""),
39 | "VT" : (0, ""),
40 | "BLOSUM62" : (0, ""),
41 | "MTMAM" : (0, ""),
42 | "LG" : (0, ""),
43 | }
44 |
45 | # All the functions in here return the same thing with the same parameters,
46 | # this just caches the return ...
47 | def memoize(f):
48 | cache= {}
49 | def memf(*x):
50 | if x not in cache:
51 | cache[x] = f(*x)
52 | return cache[x]
53 | return memf
54 |
55 | @memoize
56 | def get_protein_models_gamma():
57 | '''
58 | Return a list of all implemented _base__protein_models in RAxML
59 | NB there are NO models in RAxML without Gamma
60 | '''
61 | model_list = []
62 | for model in _base_protein_models.keys():
63 | model_list.append("%s+G" %(model))
64 | model_list.append("%s+G+F" %(model))
65 | return model_list
66 |
67 | @memoize
68 | def get_protein_models_gammaI():
69 | '''
70 | Return a list of all implemented _base__protein_models in RAxML with invariant sites
71 | '''
72 | model_list = []
73 | for model in _base_protein_models.keys():
74 | model_list.append("%s+I+G" %(model))
75 | model_list.append("%s+I+G+F" %(model))
76 | return model_list
77 |
78 | def get_all_protein_models():
79 | model_list = get_protein_models_gamma() + get_protein_models_gammaI()
80 | return model_list
81 |
82 | @memoize
83 | def get_dna_models_gamma():
84 | '''
85 | Just one model in RAxML with +G.
86 | '''
87 | model_list = ["GTR+G"]
88 | return model_list
89 |
90 | @memoize
91 | def get_dna_models_gammaI():
92 | '''
93 | Just one model in RAxML with I+G.
94 | '''
95 | model_list = ["GTR+I+G"]
96 | return model_list
97 |
98 | @memoize
99 | def get_all_dna_models():
100 | model_list = get_dna_models_gamma() + get_dna_models_gammaI()
101 | return model_list
102 |
103 | @memoize
104 | def get_all_models():
105 | model_list = get_all_DNA_models() + get_all_protein_models()
106 | return model_list
107 |
108 | @memoize
109 | def get_model_commandline(modelstring):
110 | '''
111 | Input a model string, and get the piece of the raxml command line that defines that model
112 | '''
113 | commandline = '-m '
114 | elements = modelstring.split("+")
115 | model_name = elements[0]
116 |
117 | # Everything but the first element
118 | extras = elements[1:]
119 |
120 | if model_name in _base_models.keys(): #DNA models
121 | commandline = ''.join([commandline, "GTRGAMMA"])
122 | if "I" in extras:
123 | commandline = ''.join([commandline, "I"])
124 | else: #protein models, look like this 'PROTGAMMAILGF
125 | commandline = ''.join([commandline, "PROTGAMMA"])
126 | if "I" in extras:
127 | commandline = ''.join([commandline, "I"])
128 | commandline = ''.join([commandline, model_name])
129 | if "F" in extras:
130 | commandline = ''.join([commandline, "F"])
131 |
132 | return commandline
133 |
134 |
135 | @memoize
136 | def get_num_params(modelstring):
137 | '''
138 | Input a model string like HKY+I+G or LG+G+F, and get the number of parameters
139 | '''
140 | elements = modelstring.split("+")
141 | model_name = elements[0]
142 | if model_name in _base_models.keys():
143 | model_params = _base_models[model_name][0]
144 | else:
145 | model_params = _base_protein_models[model_name][0]
146 | if "F" in elements[1:]:
147 | model_params = model_params+19-1 #the -1 here is to account for the fact we add 1 for the + in '+F' below
148 |
149 | extras = modelstring.count("+")
150 | total = model_params+extras
151 | log.debug("Model: %s Params: %d" %(modelstring, total))
152 |
153 | return total
154 |
155 | @memoize
156 | def get_model_difficulty(modelstring):
157 | '''
158 | Input a model string like HKY+I+G or LG+G+F, and a guess about how long it takes to analyse
159 | Right now, this is done with a simple hack. I just return a number that is the number of params
160 | plus a modifier for extra stuff like +I and +G
161 | the hardest models are +I+G, then +G, then +I
162 | this is just used to rank models for ordering the analysis
163 | The return is a 'difficulty' score that can be used to rank models
164 | '''
165 | elements = modelstring.split("+")
166 |
167 | model_params = get_num_params(modelstring)
168 |
169 | difficulty = 0
170 | if "G" in elements[1:]:
171 | difficulty = difficulty + 2000
172 | if "I" in elements[1:]:
173 | difficulty = difficulty + 1000
174 |
175 | extras = modelstring.count("+")
176 | total = model_params+extras+difficulty
177 | log.debug("Model: %s Difficulty: %d" %(modelstring, total))
178 |
179 | return total
180 |
181 | def get_raxml_protein_modelstring(modelstring):
182 | """Start with a model like this: LG+I+G+F, return a model in raxml format like this:
183 | ILGF. This is only used for printing out RAxML partition files"""
184 | elements = modelstring.split("+")
185 | model_name = elements[0]
186 | extras = elements[1:]
187 |
188 | raxmlstring = model_name
189 | if "F" in extras:
190 | raxmlstring = ''.join([raxmlstring, "F"])
191 |
192 | return raxmlstring
193 |
194 | if __name__ == "__main__":
195 | print " ",
196 | print "Name".ljust(15),
197 | print "Params".ljust(10),
198 | print "Diff".ljust(10),
199 | print "CommandLine"
200 | for i, model in enumerate(get_all_DNA_models()):
201 | print str(i+1).rjust(2),
202 | print model.ljust(15),
203 | print str(get_num_params(model)).ljust(10),
204 | print str(get_model_difficulty(model)).ljust(10),
205 | print get_model_commandline(model)
206 | for i, model in enumerate(get_all_protein_models()):
207 | print str(i+1).rjust(2),
208 | print model.ljust(15),
209 | print str(get_num_params(model)).ljust(10),
210 | print str(get_model_difficulty(model)).ljust(10),
211 | print get_model_commandline(model)
212 |
213 |
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/raxml_models.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/raxml_models.pyc
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/reporter.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2012 Robert Lanfear and Brett Calcott
2 | #
3 | # This program is free software: you can redistribute it and/or modify it under
4 | # the terms of the GNU General Public License as published by the Free Software
5 | # Foundation, either version 3 of the License, or (at your option) any later
6 | # version.
7 | #
8 | # This program is distributed in the hope that it will be useful, but WITHOUT
9 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
10 | # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
11 | # details. You should have received a copy of the GNU General Public License
12 | # along with this program. If not, see .
13 | # PartitionFinder also includes the PhyML program, the RAxML program, and the
14 | # PyParsing library, all of which are protected by their own licenses and
15 | # conditions, using PartitionFinder implies that you agree with those licences
16 | # and conditions as well.
17 |
18 | import logging
19 | log = logging.getLogger("reporter")
20 |
21 | import os
22 |
23 | scheme_header_template = "%-18s: %s\n"
24 | scheme_subset_template = "%-6s | %-10s | %-30s | %-30s | %-40s\n"
25 | subset_template = "%-15s | %-15s | %-15s | %-15s | %-15s\n"
26 |
27 |
28 | class TextReporter(object):
29 | def __init__(self, config):
30 | self.cfg = config
31 | self.cfg.reporter = self
32 |
33 | def write_subset_summary(self, sub):
34 | pth = os.path.join(self.cfg.subsets_path, sub.name + '.txt')
35 | # Sort everything
36 | model_results = [(r.bic, r) for r in sub.results.values()]
37 | model_results.sort()
38 | output = open(pth, 'w')
39 | # TODO change back to full name...
40 | # output.write("Model selection results for subset: %s\n" % sub.full_name)
41 | output.write("Model selection results for subset: %s\n" % sub.name)
42 | output.write("Subset alignment stored here: %s\n" % sub.alignment_path)
43 | output.write("This subset contains the following data_blocks: %s\n" % sub)
44 | output.write("Models are organised according to their BIC scores\n\n")
45 | output.write(subset_template % ("Model", "lNL", "AIC", "AICc", "BIC"))
46 | for bic, r in model_results:
47 | output.write(subset_template % (r.model, r.lnl, r.aic, r.aicc, r.bic))
48 |
49 | def write_scheme_summary(self, sch, result):
50 | pth = os.path.join(self.cfg.schemes_path, sch.name + '.txt')
51 | output = open(pth, 'w')
52 | self.output_scheme(sch, result, output)
53 |
54 | def output_scheme(self, sch, result, output):
55 | self.write_scheme_header(sch, result, output)
56 | sorted_subsets = [sub for sub in sch]
57 | sorted_subsets.sort(key=lambda sub: min(sub.columns), reverse=False)
58 | self.write_subsets(sch, result, output, sorted_subsets)
59 | self.write_raxml(sch, result, output, sorted_subsets)
60 |
61 | def write_scheme_header(self, sch, result, output):
62 | output.write(scheme_header_template % ("Scheme Name", sch.name))
63 | output.write(scheme_header_template % ("Scheme lnL", result.lnl))
64 | if self.cfg.model_selection == "aic":
65 | output.write(scheme_header_template % ("Scheme AIC", result.aic))
66 | if self.cfg.model_selection == "aicc":
67 | output.write(scheme_header_template % ("Scheme AICc", result.aicc))
68 | if self.cfg.model_selection == "bic":
69 | output.write(scheme_header_template % ("Scheme BIC", result.bic))
70 | output.write(scheme_header_template % ("Number of params", result.sum_k))
71 | output.write(scheme_header_template % ("Number of sites", result.nsites))
72 | output.write(scheme_header_template % ("Number of subsets", result.nsubs))
73 | output.write("\n")
74 |
75 | def write_subsets(self, sch, result, output, sorted_subsets):
76 | output.write(scheme_subset_template % (
77 | "Subset", "Best Model", "Subset Partitions", "Subset Sites", "Alignment"))
78 | number = 1
79 |
80 | pf_scheme_description = []
81 | # a way to print out the scheme in PF format
82 |
83 | for sub in sorted_subsets:
84 | desc = {}
85 | names = []
86 | for part in sub:
87 | names.append(part.name)
88 | for subpart in part.description: # loop through each sub-part of the partition
89 | desc[subpart[0]] = subpart
90 |
91 | #pretty print the sites in the scheme
92 | desc_starts = desc.keys()
93 | desc_starts.sort()
94 | parts = []
95 | for key in desc_starts:
96 | part = desc[key]
97 | if part[2] == 1:
98 | text = "%s-%s" % (part[0], part[1])
99 | else:
100 | text = "%s-%s\\%s" % tuple(part)
101 | parts.append(text)
102 | parts = ', '.join(parts)
103 |
104 | names.sort()
105 | names = ', '.join(names)
106 |
107 | pf_scheme_description.append("(%s)" % names)
108 |
109 | output.write(scheme_subset_template % (
110 | number, sub.best_model, names, parts, sub.alignment_path))
111 | number += 1
112 |
113 | pf_scheme_description = " ".join(pf_scheme_description)
114 | output.write("\n\nScheme Description in PartitionFinder format\n")
115 | output.write("Scheme_%s = %s;" % (sch.name, pf_scheme_description))
116 |
117 | def write_raxml(self, sch, result, output, sorted_subsets):
118 | """Print out partition definitions in RaxML-like format, might be
119 | useful to some people
120 | """
121 | from raxml_models import get_raxml_protein_modelstring
122 | output.write("\n\nRaxML-style partition definitions\n")
123 | number = 1
124 | for sub in sorted_subsets:
125 |
126 | desc = {}
127 | names = []
128 | for part in sub:
129 | names.append(part.name)
130 | for subpart in part.description: # loop through each sub-part of the partition
131 | desc[subpart[0]] = subpart
132 |
133 | # Pretty print the sites in the scheme
134 | desc_starts = desc.keys()
135 | desc_starts.sort()
136 | parts = []
137 | for key in desc_starts:
138 | part = desc[key]
139 | if part[2] == 1:
140 | text = "%s-%s" % (part[0], part[1])
141 | else:
142 | text = "%s-%s\\%s" % tuple(part)
143 | parts.append(text)
144 | parts = ', '.join(parts)
145 |
146 | if self.cfg.datatype == "DNA":
147 | model = "DNA"
148 | elif self.cfg.datatype == "protein":
149 | model = get_raxml_protein_modelstring(sub.best_model)
150 | else:
151 | raise RuntimeError
152 |
153 | line = "%s, p%s = %s\n" % (model, number, parts)
154 | output.write(line)
155 |
156 | number += 1
157 |
158 | def write_best_scheme(self, result):
159 | pth = os.path.join(self.cfg.output_path, 'best_scheme.txt')
160 | output = open(pth, 'wb')
161 | output.write('Settings used\n\n')
162 | output.write(scheme_header_template % ("alignment", self.cfg.alignment_path))
163 | output.write(scheme_header_template % ("branchlengths", self.cfg.branchlengths))
164 | output.write(scheme_header_template % ("models", ', '.join(self.cfg.models)))
165 | output.write(scheme_header_template % ("model_selection",
166 | self.cfg.model_selection))
167 | output.write(scheme_header_template % ("search", self.cfg.search))
168 | if self.cfg.search in ["rcluster", "hcluster"]:
169 | pretty_weights = "rate = %s, base = %s, model = %s, alpha = %s" %(
170 | str(self.cfg.cluster_weights["rate"]),
171 | str(self.cfg.cluster_weights["freqs"]),
172 | str(self.cfg.cluster_weights["model"]),
173 | str(self.cfg.cluster_weights["alpha"]))
174 | output.write(scheme_header_template % ("weights", pretty_weights))
175 | if self.cfg.search == "rcluster":
176 | output.write(scheme_header_template % ("rcluster-percent",
177 | self.cfg.cluster_percent))
178 | output.write('\n\nBest partitioning scheme\n\n')
179 | self.output_scheme(result.best_scheme, result.best_result, output)
180 | log.info("Information on best scheme is here: %s", pth)
181 |
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/reporter.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/reporter.pyc
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/results.py:
--------------------------------------------------------------------------------
1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
2 | #
3 | #This program is free software: you can redistribute it and/or modify it
4 | #under the terms of the GNU General Public License as published by the
5 | #Free Software Foundation, either version 3 of the License, or (at your
6 | #option) any later version.
7 | #
8 | #This program is distributed in the hope that it will be useful, but
9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 | #General Public License for more details. You should have received a copy
12 | #of the GNU General Public License along with this program. If not, see
13 | #. PartitionFinder also includes the PhyML
14 | #program, the RAxML program, and the PyParsing library,
15 | #all of which are protected by their own licenses and conditions, using
16 | #PartitionFinder implies that you agree with those licences and conditions as well.
17 |
18 | import logging
19 | log = logging.getLogger("results")
20 |
21 | import os
22 | import cPickle as pickle
23 |
24 | from util import PartitionFinderError
25 |
26 | _check_fields = "lnl aic aicc bic".split()
27 |
28 |
29 | class ComparisonError(PartitionFinderError):
30 | pass
31 |
32 |
33 | class AnalysisResults(object):
34 | """
35 | This stores the results, keeping only the winning scheme.
36 | """
37 |
38 | MAX_ERROR = .1
39 |
40 | def __init__(self, model_selection):
41 | self.model_selection = model_selection
42 | self.best_score = None
43 | self.best_result = None
44 | self.best_scheme = None
45 |
46 | def add_scheme_result(self, sch, result):
47 | score = result.score
48 | if self.best_score is None or score < self.best_score:
49 | self.best_score = score
50 | self.best_result = result
51 | self.best_scheme = sch
52 |
53 | def get_dump_path(self, cfg):
54 | return os.path.join(cfg.base_path, 'results.bin')
55 |
56 | def get_result_fields(self):
57 | flds = []
58 | for k in _check_fields:
59 | flds.append(getattr(self.best_result, k))
60 | return flds
61 |
62 | def dump(self, cfg):
63 | pth = self.get_dump_path(cfg)
64 | log.info("Dumping all results to '%s'", pth)
65 | f = open(pth, 'wb')
66 | pickle.dump(self.get_result_fields(), f, -1)
67 |
68 | def compare(self, cfg):
69 | """We only compare the best result!"""
70 | pth = self.get_dump_path(cfg)
71 | if not os.path.exists(pth):
72 | log.error("Previous results file not found at '%s'. "
73 | "Did you run --dump-results previously?", pth)
74 | raise ComparisonError
75 |
76 | log.info("Loading old results from '%s'", pth)
77 | f = open(pth, 'rb')
78 | old_fields = pickle.load(f)
79 | f.close()
80 |
81 | cur_fields = self.get_result_fields()
82 |
83 | log.info("Comparing results...")
84 | # Now do the comparison
85 |
86 | errors = 0
87 | for nm, oldv, curv in zip(_check_fields, old_fields, cur_fields):
88 | if abs(oldv - curv) > self.MAX_ERROR:
89 | log.error("Differences were more than acceptable value of %s", AnalysisResults.MAX_ERROR)
90 | log.error("Old %s value: %s, new %s value %s", nm, oldv, nm, curv)
91 | errors += 1
92 |
93 | if errors > 0:
94 | raise ComparisonError
95 | else:
96 | log.info(
97 | "All results were within an acceptable %s of the dumped results",
98 | AnalysisResults.MAX_ERROR)
99 |
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/results.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/results.pyc
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/scheme.py:
--------------------------------------------------------------------------------
1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
2 | #
3 | #This program is free software: you can redistribute it and/or modify it
4 | #under the terms of the GNU General Public License as published by the
5 | #Free Software Foundation, either version 3 of the License, or (at your
6 | #option) any later version.
7 | #
8 | #This program is distributed in the hope that it will be useful, but
9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 | #General Public License for more details. You should have received a copy
12 | #of the GNU General Public License along with this program. If not, see
13 | #. PartitionFinder also includes the PhyML
14 | #program, the RAxML program, and the PyParsing library,
15 | #all of which are protected by their own licenses and conditions, using
16 | #PartitionFinder implies that you agree with those licences and conditions as well.
17 |
18 | import logging
19 | log = logging.getLogger("scheme")
20 | import subset
21 | import submodels
22 |
23 | from math import log as logarithm
24 |
25 | from util import PartitionFinderError
26 |
27 |
28 | class SchemeError(PartitionFinderError):
29 | pass
30 |
31 |
32 | class SchemeResult(object):
33 | def __init__(self, sch, nseq, branchlengths, model_selection):
34 | self.scheme_name = sch.name
35 | self.scheme = sch
36 | self.model_selection = model_selection
37 |
38 | # Calculate AIC, BIC, AICc for each scheme.
39 | # How you do this depends on whether brlens are linked or not.
40 | self.nsubs = len(sch.subsets) # number of subsets
41 | sum_subset_k = sum([s.best_params for s in sch]) # sum of number of parameters in the best model of each subset
42 |
43 | log.debug("Calculating number of parameters in scheme:")
44 | log.debug("Total parameters from subset models: %d" % (sum_subset_k))
45 |
46 | if branchlengths == 'linked': # linked brlens - only one extra parameter per subset
47 | self.sum_k = sum_subset_k + (self.nsubs - 1) + (
48 | (2 * nseq) - 3) # number of parameters in a scheme
49 | log.debug("Total parameters from brlens: %d" % ((2 * nseq) - 3))
50 | log.debug(
51 | "Parameters from subset multipliers: %d" % (self.nsubs - 1))
52 |
53 | elif branchlengths == 'unlinked': # unlinked brlens - every subset has its own set of brlens
54 | self.sum_k = sum_subset_k + (self.nsubs * (
55 | (2 * nseq) - 3)) # number of parameters in a scheme
56 | log.debug("Total parameters from brlens: %d" % ((
57 | 2 * nseq) - 3) * self.nsubs)
58 |
59 | else:
60 | # WTF?
61 | log.error("Unknown option for branchlengths: %s", branchlengths)
62 | raise AnalysisError
63 |
64 | log.debug("Grand total parameters: %d" % (self.sum_k))
65 |
66 | self.lnl = sum([s.best_lnl for s in sch])
67 | self.nsites = sum([len(s.columnset) for s in sch])
68 |
69 | K = float(self.sum_k)
70 | n = float(self.nsites)
71 | lnL = float(self.lnl)
72 |
73 | log.debug("n: %d\tK: %d" % (n, K))
74 |
75 | #here we put in a catch for small subsets, where n".format(self)
97 |
98 |
99 | class Scheme(object):
100 | def __init__(self, cfg, name, subsets, description=None):
101 | """A set of subsets of partitions"""
102 | self.name = name
103 | self.subsets = set()
104 | self.description = description
105 |
106 | # This one is a set of frozensets of partitions...
107 | part_subsets = set()
108 |
109 | # This is really long-winded, but it is mainly for error-checking
110 | partitions = set()
111 | duplicates = []
112 | for s in subsets:
113 | for p in s:
114 | if p in partitions:
115 | # This is an error -- we'll collect them up
116 | duplicates.append(str(p))
117 | else:
118 | partitions.add(p)
119 | self.subsets.add(s)
120 | part_subsets.add(s.partitions)
121 |
122 | self.part_subsets = frozenset(part_subsets)
123 |
124 | # Report the errors
125 | if duplicates:
126 | log.error("Scheme '%s' contains duplicate partitions: %s",
127 | name, ', '.join(duplicates))
128 | raise SchemeError
129 |
130 | # Hm. It seems this is the only way to get just one item out of a set
131 | # as pop would remove one...
132 | pset = cfg.partitions
133 |
134 | # Do a set-difference to see what is missing...
135 | missing = pset.partitions - partitions
136 | if missing:
137 | log.error("Scheme '%s' is missing partitions: %s",
138 | name, ', '.join([str(p) for p in missing]))
139 | raise SchemeError
140 |
141 | # This locks down whether new partitions can be created.
142 | if not cfg.partitions.finalised:
143 | cfg.partitions.finalise()
144 |
145 | log.debug("Created %s", self)
146 |
147 | def __iter__(self):
148 | return iter(self.subsets)
149 |
150 | def __str__(self):
151 | ss = ', '.join([str(s) for s in self.subsets])
152 | return "Scheme(%s, %s)" % (self.name, ss)
153 |
154 |
155 | class SchemeSet(object):
156 | """All the schemes added, and also a list of all unique subsets"""
157 | def __init__(self):
158 | """A collection of schemes"""
159 | self.clear_schemes()
160 |
161 | def clear_schemes(self):
162 | self.schemes_by_name = {}
163 | self.schemes_by_subsets = {}
164 |
165 | def add_scheme(self, scheme):
166 | if scheme.name in self.schemes_by_name:
167 | log.error("Cannot add two schemes with same name: '%s'" %
168 | scheme.name)
169 | raise SchemeError
170 |
171 | if scheme.part_subsets in self.schemes_by_subsets:
172 | existing_scheme = \
173 | self.schemes_by_subsets[scheme.part_subsets]
174 | log.warning(
175 | "Scheme named %s being added is identical to existing %s",
176 | scheme.name, existing_scheme)
177 | # raise SchemeError
178 |
179 | self.schemes_by_name[scheme.name] = scheme
180 | self.schemes_by_subsets[scheme.part_subsets] = scheme
181 |
182 | def __len__(self):
183 | return len(self.schemes_by_name)
184 |
185 | def __iter__(self):
186 | return iter(self.schemes_by_name.itervalues())
187 |
188 |
189 | def create_scheme(cfg, scheme_name, scheme_description):
190 | """
191 | Generate a single scheme given a list of numbers that represent the
192 | indexes of the partitions e.g. [0,1,2,3,4,5,6,7]
193 | """
194 |
195 | partition_count = len(
196 | cfg.partitions) # total number of partitions defined by user
197 |
198 | # Check that the correct number of items are in the list
199 | if len(scheme_description) != partition_count:
200 | log.error("There's a problem with the description of scheme %s" %
201 | scheme_name)
202 | raise SchemeError
203 |
204 | # Now generate the pattern
205 | subs = {}
206 | # We use the numbers returned to group the different subsets
207 | for sub_index, grouping in enumerate(scheme_description):
208 | insub = subs.setdefault(grouping, [])
209 | insub.append(sub_index)
210 |
211 | # We now have what we need to create a subset. Each entry will have a
212 | # set of values which are the index for the partition
213 | created_subsets = []
214 | for sub_indexes in subs.values():
215 | sub = subset.Subset(*tuple([cfg.partitions[i] for i in sub_indexes]))
216 | created_subsets.append(sub)
217 |
218 | return Scheme(cfg, str(scheme_name), created_subsets, description=scheme_description)
219 |
220 |
221 | def model_to_scheme(model, scheme_name, cfg):
222 | """Turn a model definition e.g. [0, 1, 2, 3, 4] into a scheme"""
223 | subs = {}
224 | # We use the numbers returned to group the different subsets
225 | for sub_index, grouping in enumerate(model):
226 | insub = subs.setdefault(grouping, [])
227 | insub.append(sub_index)
228 |
229 | # We now have what we need to create a subset. Each entry will have a
230 | # set of values which are the index for the partition
231 | created_subsets = []
232 | for sub_indexes in subs.values():
233 | sub = subset.Subset(*tuple([cfg.partitions[i] for i in sub_indexes]))
234 | created_subsets.append(sub)
235 |
236 | return Scheme(cfg, str(scheme_name), created_subsets)
237 |
238 |
239 | def generate_all_schemes(cfg):
240 | """
241 | Convert the abstract schema given by the algorithm into subsets
242 | """
243 |
244 | log.info("Generating all possible schemes for the partitions...")
245 |
246 | partition_count = len(
247 | cfg.partitions) # total number of partitions defined by user
248 |
249 | # Now generate the pattern for this many partitions
250 | all_schemes = submodels.get_submodels(partition_count)
251 | scheme_name = 1
252 | scheme_list = []
253 | for scheme in all_schemes:
254 | subs = {}
255 | # We use the numbers returned to group the different subsets
256 | for sub_index, grouping in enumerate(scheme):
257 | insub = subs.setdefault(grouping, [])
258 | insub.append(sub_index)
259 | # We now have what we need to create a subset. Each entry will have a
260 | # set of values which are the index for the partition
261 | created_subsets = []
262 | for sub_indexes in subs.values():
263 | sub = subset.Subset(
264 | *tuple([cfg.partitions[i] for i in sub_indexes]))
265 | created_subsets.append(sub)
266 |
267 | scheme_list.append(
268 | Scheme(cfg, str(scheme_name), created_subsets))
269 |
270 | log.debug("Created scheme %d of %d" % (scheme_name, len(all_schemes)))
271 |
272 | scheme_name += 1
273 |
274 | return scheme_list
275 |
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/scheme.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/scheme.pyc
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/submodels.py:
--------------------------------------------------------------------------------
1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
2 | #
3 | #This program is free software: you can redistribute it and/or modify it
4 | #under the terms of the GNU General Public License as published by the
5 | #Free Software Foundation, either version 3 of the License, or (at your
6 | #option) any later version.
7 | #
8 | #This program is distributed in the hope that it will be useful, but
9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 | #General Public License for more details. You should have received a copy
12 | #of the GNU General Public License along with this program. If not, see
13 | #. PartitionFinder also includes the PhyML
14 | #program, the RAxML program, and the PyParsing library,
15 | #all of which are protected by their own licenses and conditions, using
16 | #PartitionFinder implies that you agree with those licences and conditions as well.
17 |
18 | import math
19 | import logging
20 | log = logging.getLogger("submodels")
21 | import algorithm
22 |
23 | def submodel_generator(result, pat, current, maxn):
24 | """ result is a list to append to
25 | pat is the current pattern (starts as empty list)
26 | current is the current number of the pattern
27 | maxn is the number of items in the pattern
28 | """
29 | if pat:
30 | curmax = max(pat)
31 | else:
32 | curmax = 0
33 | for i in range(current):
34 | if i-1 <= curmax:
35 | newpat = pat[:]
36 | newpat.append(i)
37 | if current == maxn:
38 | result.append(newpat)
39 | else:
40 | submodel_generator(result, newpat, current+1, maxn)
41 |
42 | def submodel_iterator(pat, current, maxn):
43 | """same as generator but yields instead"""
44 | if pat:
45 | curmax = max(pat)
46 | else:
47 | curmax = 0
48 | for i in range(current):
49 | if i-1 <= curmax:
50 | newpat = pat[:]
51 | newpat.append(i)
52 | if current == maxn:
53 | yield newpat
54 | else:
55 | for b in submodel_iterator(newpat, current+1, maxn):
56 | yield b
57 |
58 | def a_choose_b(n,k):
59 | return reduce(lambda a,b: a*(n-b)/(b+1),xrange(k),1)
60 |
61 | def count_relaxed_clustering_subsets(N, cluster_percent, output=False):
62 | #startscheme
63 | start_scheme = N
64 | #firstbatch is just cluster_percent of N choose 2
65 | step_1 = int(math.ceil(a_choose_b(N, 2)*cluster_percent*0.01))
66 | previous = step_1
67 | cumsum = start_scheme+step_1
68 | if output: print start_scheme
69 | if output: print cumsum
70 | #now for the rest
71 | for i in reversed(xrange(N)):
72 | # once we get to the all combined scheme we can stop
73 | if i == 1:
74 | break
75 | num_new_schemes = int(math.ceil((a_choose_b(i, 2))*cluster_percent*0.01))
76 | # but those new schemes include a lot we will have already analysed
77 | # so we want to subtract that many. We could have already seen up to i-1 choose 2
78 | # the worst case is that the scheme we chose knocked out the maximum number of
79 | # previously analysed schemes, which is just 2(i)-1, so:
80 | worst_case = 2*i - 1
81 | num_already_analysed = previous - worst_case
82 | if num_already_analysed <0: num_already_analysed=0
83 | # now we transfer over the 'previous' for the next round of the loop
84 | previous = num_new_schemes
85 | # now we calculate the final number of new schemes
86 | num_new_schemes = num_new_schemes - num_already_analysed
87 | cumsum += num_new_schemes
88 | if output:print cumsum
89 | return cumsum
90 |
91 | def count_relaxed_clustering_schemes(N, cluster_percent, output=False):
92 | #startscheme
93 | start_scheme = 1
94 | #firstbatch is just cluster_percent of N choose 2
95 | step_1 = int(math.ceil(a_choose_b(N, 2)*cluster_percent*0.01))
96 | previous = step_1
97 | cumsum = start_scheme+step_1
98 | if output: print start_scheme
99 | if output: print cumsum
100 | #now for the rest
101 | for i in reversed(xrange(N)):
102 | # each subsequent step is cluster_percent of i choose 2
103 | if i == 1:
104 | break
105 | num_new_schemes = int(math.ceil((a_choose_b(i, 2))*cluster_percent*0.01))
106 | cumsum += num_new_schemes
107 | if output:print cumsum
108 | return cumsum
109 |
110 | def count_greedy_schemes(N):
111 | """oeis.org reveals this is 1+(N*(N+1)*(N-1))/6"""
112 | count = 1+(N*(N+1)*(N-1))/6
113 | return count
114 |
115 | def count_greedy_subsets(N):
116 | """oeis.org says thes are Central polygonal numbers: n^2 - n + 1. """
117 | count = (N*N) - N + 1
118 | return count
119 |
120 | def bell_numbers(N):
121 | ## Return the bell number for N subsets
122 | # script modified from Wikipedia: http://en.wikipedia.org/wiki/Bell_number
123 | N = N+1 ## Bell numbers are indexed from zero
124 | t = [[1]] ## Initialize the triangle as a two-dimensional array
125 | c = 1 ## Bell numbers count
126 | while c <= N:
127 | if c >= N:
128 | return t[-1][0] ## Yield the Bell number of the previous row
129 | row = [t[-1][-1]] ## Initialize a new row
130 | for b in t[-1]:
131 | row.append(row[-1] + b) ## Populate the new row
132 | c += 1 ## We have found another Bell number
133 | t.append(row) ## Append the row to the triangle
134 |
135 |
136 | def get_submodels(N):
137 | """Return all the submodels
138 | """
139 | log.debug("Generating submodels for %s partitions", N)
140 | result = []
141 | submodel_generator(result, [], 1, N)
142 | log.debug("Resulting number of partitions is %s", len(result))
143 | return result
144 |
145 | def count_all_schemes(N):
146 | """Count the number of submodels we"ve got"""
147 | count = bell_numbers(N)
148 | return count
149 |
150 | def count_all_subsets(N):
151 | """Count the number of subses we'll have to look at given a certain number of starting partitions"""
152 | count = (2**N) - 1
153 | return count
154 |
155 |
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/submodels.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/submodels.pyc
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/subset.py:
--------------------------------------------------------------------------------
1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
2 | #
3 | #This program is free software: you can redistribute it and/or modify it
4 | #under the terms of the GNU General Public License as published by the
5 | #Free Software Foundation, either version 3 of the License, or (at your
6 | #option) any later version.
7 | #
8 | #This program is distributed in the hope that it will be useful, but
9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 | #General Public License for more details. You should have received a copy
12 | #of the GNU General Public License along with this program. If not, see
13 | #. PartitionFinder also includes the PhyML
14 | #program, the RAxML program, and the PyParsing library,
15 | #all of which are protected by their own licenses and conditions, using
16 | #PartitionFinder implies that you agree with those licences and conditions as well.
17 |
18 | import logging
19 | log = logging.getLogger("subset")
20 | import os
21 | import weakref
22 |
23 | from hashlib import md5
24 |
25 | # import base64
26 | # from zlib import compress
27 |
28 | import cPickle as pickle
29 | from math import log as logarithm
30 | from alignment import Alignment, SubsetAlignment
31 | from util import PartitionFinderError, remove_runID_files
32 |
33 | FRESH, PREPARED, DONE = range(3)
34 |
35 |
36 | class SubsetError(PartitionFinderError):
37 | pass
38 |
39 |
40 | def count_subsets():
41 | return 1
42 | # return len(Subset._cache)
43 | #
44 |
45 |
46 | def clear_subsets():
47 | pass
48 | # Subset._cache.clear()
49 |
50 |
51 | class Subset(object):
52 | """A Subset of Partitions
53 | """
54 | # TODO: Move this to the config -- once we have a global one
55 | _cache = weakref.WeakValueDictionary()
56 |
57 | def __new__(cls, *parts):
58 | """Return the SAME subset if the partitions are identical. This is
59 | basically a pythonized factory. See here:
60 | http://codesnipers.com/?q=python-flyweights
61 | """
62 |
63 | cacheid = frozenset(parts)
64 | obj = Subset._cache.get(cacheid, None)
65 | # TODO Flush cache? USE MRU? functools.lrucache
66 | if not obj:
67 | obj = object.__new__(cls)
68 | Subset._cache[cacheid] = obj
69 | obj.init(cacheid, *parts)
70 |
71 | # obj = object.__new__(cls)
72 | # cacheid = frozenset(parts)
73 | # obj.init(cacheid, *parts)
74 | return obj
75 |
76 | def init(self, cacheid, *parts):
77 | # Error checking....
78 | self.status = FRESH
79 |
80 | tempparts = set()
81 | for p in parts:
82 | if p.partition_set is None:
83 | log.error("You cannot add a Partition to a Subset until "
84 | "the Partition belongs to a PartitionSet")
85 | raise SubsetError
86 |
87 | if p in tempparts:
88 | log.error("%s is duplicated in a Subset", p)
89 | raise SubsetError
90 |
91 | tempparts.add(p)
92 |
93 | self.partitions = cacheid
94 |
95 | # a list of columns in the subset
96 | self.columns = []
97 | self.columnset = set()
98 | for p in parts:
99 | self.columns += p.columns
100 | self.columnset |= p.columnset
101 | self.columns.sort()
102 |
103 | self.results = {}
104 | self.best_info_score = None # e.g. AIC, BIC, AICc
105 | self.best_model = None
106 | self.best_params = None
107 | self.best_lnl = None
108 | self.alignment_path = None
109 | log.debug("Created %s", self)
110 |
111 | def __str__(self):
112 | return "(%s)" % ", ".join([str(p.name) for p in self.partitions])
113 |
114 | @property
115 | def full_name(self):
116 | if hasattr(self, '_full_name'):
117 | nm = self._full_name
118 | else:
119 | s = sorted([p.name for p in self.partitions])
120 | nm = '-'.join(s)
121 | self._full_name = nm
122 | return nm
123 |
124 | @property
125 | def name(self):
126 | # Cache this
127 | if hasattr(self, '_name'):
128 | nm = self._name
129 | else:
130 | nm = self.full_name
131 | # This gets super long -- we can shorten it like this... This is
132 | # a slightly lazy solution. There is some vanishingly small chance
133 | # that we'll get the same thing. Google "MD5 Hash Collision"
134 | nm = md5(nm).hexdigest()
135 | self._name = nm
136 | return nm
137 |
138 | def __iter__(self):
139 | return iter(self.partitions)
140 |
141 | def add_result(self, cfg, model, result):
142 | result.model = model
143 | result.params = cfg.processor.models.get_num_params(model)
144 |
145 | K = float(result.params)
146 | n = float(len(self.columnset))
147 | lnL = float(result.lnl)
148 | #here we put in a catch for small subsets, where n. PartitionFinder also includes the PhyML
18 | #program, the RAxML program, the PyParsing library, and the python-cluster library
19 | #all of which are protected by their own licenses and conditions, using
20 | #PartitionFinder implies that you agree with those licences and conditions as well.
21 |
22 | import logging
23 | log = logging.getLogger("threadpool")
24 | import threading
25 | from time import sleep
26 | import multiprocessing
27 |
28 | _cpus = None
29 |
30 |
31 | def get_cpu_count():
32 | global _cpus
33 | if _cpus is not None:
34 | return _cpus
35 |
36 | try:
37 | _cpus = multiprocessing.cpu_count()
38 | except:
39 | _cpus = 1
40 | log.info("I cannot detect the number of processors...")
41 |
42 | log.info("Found %s cpus", _cpus)
43 | return _cpus
44 |
45 |
46 | class Pool(object):
47 | def __init__(self, tasks, numthreads=-1):
48 | """Initialize the thread pool with numthreads workers and all tasks"""
49 | self.more_tasks = True
50 | self.tasks = tasks
51 | self.task_lock = threading.Condition(threading.Lock())
52 | self.threads = []
53 | self.failed = False
54 |
55 | numtasks = len(tasks)
56 | if numtasks == 0:
57 | log.warning("You did not give any tasks to do...")
58 | self.more_tasks = False
59 | return
60 |
61 | if numthreads <= 1:
62 | numthreads = get_cpu_count()
63 | if numtasks < numthreads:
64 | numthreads = numtasks
65 |
66 | log.debug("Creating %s threads for %s tasks", numthreads, numtasks)
67 | for i in range(numthreads):
68 | t = Thread(self)
69 | self.threads.append(t)
70 | t.start()
71 |
72 | def next_task(self):
73 | self.task_lock.acquire()
74 | try:
75 | if self.tasks == []:
76 | self.more_tasks = False
77 | return None, None
78 | else:
79 | return self.tasks.pop(0)
80 | finally:
81 | self.task_lock.release()
82 |
83 | def kill(self, e):
84 | self.task_lock.acquire()
85 | self.tasks = []
86 | self.more_tasks = False
87 | self.failed = True
88 | self.exception = e
89 | self.task_lock.release()
90 |
91 | def join(self):
92 | # TODO: I don't think we need this bit....
93 | # Wait till all tasks have been taken
94 | while self.more_tasks:
95 | sleep(.1)
96 | # ... now wait for them all to finish
97 | for t in self.threads:
98 | t.join()
99 |
100 | if self.failed:
101 | raise self.exception
102 |
103 |
104 | class Thread(threading.Thread):
105 | def __init__(self, pool):
106 | threading.Thread.__init__(self)
107 | self.pool = pool
108 |
109 | def run(self):
110 | while 1:
111 | cmd, args = self.pool.next_task()
112 | # If there's nothing to do, return
113 | if cmd is None:
114 | break
115 | try:
116 | cmd(*args)
117 | except Exception as e:
118 | # The error should already have been reported.
119 | # Stop operation and kill the entire pool. Then reraise the
120 | # error
121 | self.pool.kill(e)
122 | break
123 |
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/threadpool.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/threadpool.pyc
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/util.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2012 Robert Lanfear and Brett Calcott
2 | #
3 | # This program is free software: you can redistribute it and/or modify it under
4 | # the terms of the GNU General Public License as published by the Free Software
5 | # Foundation, either version 3 of the License, or (at your option) any later
6 | # version.
7 | #
8 | #This program is distributed in the hope that it will be useful, but
9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 | #General Public License for more details. You should have received a copy
12 | #of the GNU General Public License along with this program. If not, see
13 | #. PartitionFinder also includes the PhyML
14 | #program, the RAxML program, and the PyParsing library,
15 | #all of which are protected by their own licenses and conditions, using
16 | #PartitionFinder implies that you agree with those licences and conditions as well.
17 |
18 | import logging
19 | log = logging.getLogger("util")
20 | import os
21 | import sys
22 | import fnmatch
23 |
24 |
25 | # Base error class
26 | class PartitionFinderError(Exception):
27 | pass
28 |
29 |
30 | class PhylogenyProgramError(PartitionFinderError):
31 | pass
32 |
33 |
34 | def check_file_exists(pth):
35 | if not os.path.exists(pth) or not os.path.isfile(pth):
36 | if pth.count("partition_finder.cfg") > 0:
37 | log.error("Failed to find configuration file: '%s'. "
38 | "For PartitionFinder to run, there must be a file called 'partition_finder.cfg' "
39 | "located in the same folder as your alignment. Please check and try again.", pth)
40 | raise PartitionFinderError
41 | else:
42 | log.error(
43 | "Failed to find file: '%s'. Please check and try again.", pth)
44 | raise PartitionFinderError
45 |
46 | def delete_files(pths):
47 | """ delete files, but watch out for a WindowsError that crops up sometimes with threading
48 | oddly, this error occurs, but the files get deleted anyway. So we ignore it for now
49 | """
50 | for f in pths:
51 | try:
52 | os.remove(f)
53 | except OSError:
54 | log.debug("Found and ignored Error when deleting file %s" % f)
55 | pass
56 | log.debug("deleted %d files" % len(pths))
57 |
58 |
59 | def check_folder_exists(pth):
60 | if not os.path.exists(pth) or not os.path.isdir(pth):
61 | log.error("No such folder: '%s'", pth)
62 | raise PartitionFinderError
63 |
64 | def clean_out_folder(folder, keep = []):
65 | """Hat Tip: http://stackoverflow.com/questions/185936/delete-folder-contents-in-python
66 | """
67 | for the_file in os.listdir(folder):
68 | if the_file not in keep:
69 | file_path = os.path.join(folder, the_file)
70 | try:
71 | if os.path.isfile(file_path):
72 | os.unlink(file_path)
73 | except Exception, e:
74 | log.error(e)
75 | raise PartitionFinderError
76 |
77 |
78 | def make_dir(pth):
79 | if os.path.exists(pth):
80 | if not os.path.isdir(pth):
81 | log.error("Cannot create folder '%s'", pth)
82 | raise PartitionFinderError
83 | else:
84 | os.mkdir(pth)
85 |
86 |
87 | def remove_runID_files(aln_pth):
88 | """remove all files that match a particular run_ID. Useful for cleaning out directories
89 | but ONLY after a whole analysis of a subset is completely finished, be careful!"""
90 | head, tail = os.path.split(aln_pth)
91 | run_ID = os.path.splitext(tail)[0]
92 | head = os.path.abspath(head)
93 | fnames = os.listdir(head)
94 | fs = fnmatch.filter(fnames, '*%s*' % run_ID)
95 | for f in fs:
96 | try:
97 | os.remove(os.path.join(head, f))
98 | except OSError:
99 | # Don't complain if you can't delete them
100 | # (This is here because we sometimes try and delete things twice in
101 | # the threading).
102 | pass
103 |
104 |
105 | # def we_are_frozen():
106 | # # All of the modules are built-in to the interpreter, e.g., by py2exe
107 | # return hasattr(sys, "frozen")
108 |
109 |
110 | # def get_root_install_path():
111 | # pth = os.path.abspath(__file__)
112 | # # Split off the name and the directory...
113 | # pth, not_used = os.path.split(pth)
114 | # pth, not_used = os.path.split(pth)
115 | # return pth
116 |
117 | # def module_path():
118 | # encoding = sys.getfilesystemencoding()
119 | # if we_are_frozen():
120 | # return os.path.dirname(unicode(sys.executable, encoding))
121 | # return os.path.abspath(os.path.dirname(unicode(__file__, encoding)))
122 |
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/util.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/util.pyc
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/version.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Author: Douglas Creager
3 | # This file is placed into the public domain.
4 | #
5 | # Minor Modifications have been made by Brett Calcott
6 | # * Write the VERSION into the current folder
7 |
8 | # Calculates the current version number. If possible, this is the
9 | # output of “git describe”, modified to conform to the versioning
10 | # scheme that setuptools uses. If “git describe” returns an error
11 | # (most likely because we're in an unpacked copy of a release tarball,
12 | # rather than in a git working copy), then we fall back on reading the
13 | # contents of the RELEASE-VERSION file.
14 | #
15 | # To use this script, simply import it your setup.py file, and use the
16 | # results of get_git_version() as your package version:
17 | #
18 | # from version import *
19 | #
20 | # setup(
21 | # version=get_git_version(),
22 | # .
23 | # .
24 | # .
25 | # )
26 | #
27 | # This will automatically update the RELEASE-VERSION file, if
28 | # necessary. Note that the RELEASE-VERSION file should *not* be
29 | # checked into git; please add it to your top-level .gitignore file.
30 | #
31 | # You'll probably want to distribute the RELEASE-VERSION file in your
32 | # sdist tarballs; to do this, just create a MANIFEST.in file that
33 | # contains the following line:
34 | #
35 | # include RELEASE-VERSION
36 | from subprocess import Popen, PIPE
37 | import os
38 |
39 |
40 | def get_version_path():
41 | # Get current dir, then parent dir
42 | pth = os.path.dirname(os.path.abspath(__file__))
43 | pth, here = os.path.split(pth)
44 | return os.path.join(pth, "RELEASE-VERSION")
45 |
46 |
47 | def call_git_describe(abbrev=4):
48 | try:
49 | p = Popen(['git', 'describe', '--abbrev=%d' % abbrev],
50 | stdout=PIPE, stderr=PIPE)
51 | p.stderr.close()
52 | line = p.stdout.readlines()[0]
53 | return line.strip()
54 |
55 | except:
56 | return None
57 |
58 |
59 | def read_release_version():
60 | try:
61 | f = open(get_version_path(), "r")
62 |
63 | try:
64 | version = f.readlines()[0]
65 | return version.strip()
66 |
67 | finally:
68 | f.close()
69 |
70 | except:
71 | return None
72 |
73 |
74 | def write_release_version(version):
75 | f = open(get_version_path(), "w")
76 | f.write("%s\n" % version)
77 | f.close()
78 |
79 |
80 |
81 | def get_git_version(abbrev=4):
82 | # Read in the version that's currently in RELEASE-VERSION.
83 |
84 | release_version = read_release_version()
85 |
86 | # First try to get the current version using “git describe”.
87 |
88 | version = call_git_describe(abbrev)
89 |
90 | # If that doesn't work, fall back on the value that's in
91 | # RELEASE-VERSION.
92 |
93 | if version is None:
94 | version = release_version
95 |
96 | # If we still don't have anything, that's an error.
97 |
98 | if version is None:
99 | raise ValueError("Cannot find the version number!")
100 |
101 | # If the current version is different from what's in the
102 | # RELEASE-VERSION file, update the file to be current.
103 |
104 | if version != release_version:
105 | write_release_version(version)
106 |
107 | # Finally, return the current version.
108 |
109 | return version
110 |
111 | def get_version():
112 | version = read_release_version()
113 | if version is None:
114 | raise ValueError("Cannot find the version number!")
115 | return version
116 |
117 | if __name__ == "__main__":
118 | print get_version()
119 |
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/version.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/version.pyc
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/programs/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/programs/.DS_Store
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/programs/phyml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/programs/phyml
--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/programs/raxml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/programs/raxml
--------------------------------------------------------------------------------
/Xenarthrans/fasta/readme:
--------------------------------------------------------------------------------
1 | This is the test dataset of Xenarthrans mitochondrial genomes
2 |
--------------------------------------------------------------------------------
/bin/BMGE.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/bin/BMGE.jar
--------------------------------------------------------------------------------
/bin/Gblocks:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/bin/Gblocks
--------------------------------------------------------------------------------
/bin/blastall:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/bin/blastall
--------------------------------------------------------------------------------
/bin/formatdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/bin/formatdb
--------------------------------------------------------------------------------
/bin/noisy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/bin/noisy
--------------------------------------------------------------------------------
/bin/progressiveMauve:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/bin/progressiveMauve
--------------------------------------------------------------------------------
/bin/readal:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/bin/readal
--------------------------------------------------------------------------------
/bin/trimal:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/bin/trimal
--------------------------------------------------------------------------------
/plant/fasta/readme:
--------------------------------------------------------------------------------
1 | This is the test data of 52 higher plant chloroplast genomes
2 |
--------------------------------------------------------------------------------