├── HomBlocks.pl
├── PartitionFinderV1.1.1
    ├── .DS_Store
    ├── PartitionFinder.py
    ├── PartitionFinderProtein.py
    ├── README.md
    ├── RELEASE-VERSION
    ├── ROGP_v2.0-backup.pl
    ├── circoletto.pl
    ├── docs
    │   ├── .DS_Store
    │   └── Manual_v1.1.1.pdf
    ├── examples
    │   ├── .DS_Store
    │   ├── README.txt
    │   ├── aminoacid
    │   │   ├── Als_etal_2004.phy
    │   │   └── partition_finder.cfg
    │   └── nucleotide
    │   │   ├── .DS_Store
    │   │   ├── partition_finder.cfg
    │   │   └── test.phy
    ├── partfinder
    │   ├── .DS_Store
    │   ├── __init__.py
    │   ├── __init__.pyc
    │   ├── algorithm.py
    │   ├── algorithm.pyc
    │   ├── alignment.py
    │   ├── alignment.pyc
    │   ├── analysis.py
    │   ├── analysis.pyc
    │   ├── analysis_method.py
    │   ├── analysis_method.pyc
    │   ├── config.py
    │   ├── config.pyc
    │   ├── main.py
    │   ├── main.pyc
    │   ├── neighbour.py
    │   ├── neighbour.pyc
    │   ├── parser.py
    │   ├── parser.pyc
    │   ├── partition.py
    │   ├── partition.pyc
    │   ├── phyml.py
    │   ├── phyml.pyc
    │   ├── phyml_models.py
    │   ├── phyml_models.pyc
    │   ├── progress.py
    │   ├── progress.pyc
    │   ├── pyparsing.py
    │   ├── pyparsing.pyc
    │   ├── raxml.py
    │   ├── raxml.pyc
    │   ├── raxml_models.py
    │   ├── raxml_models.pyc
    │   ├── reporter.py
    │   ├── reporter.pyc
    │   ├── results.py
    │   ├── results.pyc
    │   ├── scheme.py
    │   ├── scheme.pyc
    │   ├── submodels.py
    │   ├── submodels.pyc
    │   ├── subset.py
    │   ├── subset.pyc
    │   ├── threadpool.py
    │   ├── threadpool.pyc
    │   ├── util.py
    │   ├── util.pyc
    │   ├── version.py
    │   └── version.pyc
    └── programs
    │   ├── .DS_Store
    │   ├── phyml
    │   └── raxml
├── README.md
├── Xenarthrans
    └── fasta
    │   ├── Bradypus_pygmaeus.fasta
    │   ├── Bradypus_torquatus.fasta
    │   ├── Bradypus_tridactylus.fasta
    │   ├── Bradypus_variegatus.fasta
    │   ├── Bradypus_variegatus_old.fasta
    │   ├── Cabassous_centralis.fasta
    │   ├── Cabassous_chacoensis.fasta
    │   ├── Cabassous_tatouay.fasta
    │   ├── Cabassous_unicinctus_ISEM_T-2291.fasta
    │   ├── Cabassous_unicinctus_MNHN_1999-1068.fasta
    │   ├── Calyptophractus_retusus.fasta
    │   ├── Chaetophractus_vellerosus.fasta
    │   ├── Chaetophractus_villosus.fasta
    │   ├── Chlamyphorus_truncatus.fasta
    │   ├── Choloepus_didactylus.fasta
    │   ├── Choloepus_didactylus_old.fasta
    │   ├── Choloepus_hoffmanni.fasta
    │   ├── Cyclopes_didactylus.fasta
    │   ├── Dasypus_hybridus.fasta
    │   ├── Dasypus_kappleri.fasta
    │   ├── Dasypus_novemcinctus.fasta
    │   ├── Dasypus_novemcinctus_old.fasta
    │   ├── Dasypus_pilosus_LSUMZ_21888.fasta
    │   ├── Dasypus_pilosus_MSB_49990.fasta
    │   ├── Dasypus_sabanicola.fasta
    │   ├── Dasypus_septemcinctus.fasta
    │   ├── Dasypus_yepesi.fasta
    │   ├── Euphractus_sexcinctus.fasta
    │   ├── Myrmecophaga_tridactyla.fasta
    │   ├── Priodontes_maximus.fasta
    │   ├── Tamandua_mexicana.fasta
    │   ├── Tamandua_tetradactyla.fasta
    │   ├── Tamandua_tetradactyla_old.fasta
    │   ├── Tolypeutes_matacus.fasta
    │   ├── Tolypeutes_tricinctus.fasta
    │   ├── Zaedyus_pichiy.fasta
    │   └── readme
├── bin
    ├── BMGE.jar
    ├── Gblocks
    ├── blastall
    ├── formatdb
    ├── noisy
    ├── progressiveMauve
    ├── readal
    └── trimal
└── plant
    └── fasta
        ├── Acidosasa_purpurea.fasta
        ├── Aegilops_cylindrica.fasta
        ├── Aegilops_geniculata.fasta
        ├── Aegilops_speltoides_SPE0661.fasta
        ├── Aegilops_tauschii.fasta
        ├── Agrostis_stolonifera.fasta
        ├── Anomochloa_marantoidea.fasta
        ├── Arundinaria_appalachiana.fasta
        ├── Arundinaria_gigantea.fasta
        ├── Arundinaria_tecta.fasta
        ├── Bambusa_emeiensis.fasta
        ├── Bambusa_multiplex.fasta
        ├── Bambusa_oldhamii.fasta
        ├── Brachypodium_distachyon.fasta
        ├── Coix_lacryma-jobi.fasta
        ├── Dendrocalamus_latiflorus.fasta
        ├── Deschampsia_antarctica.fasta
        ├── Ferrocalamus_rimosivaginus.fasta
        ├── Festuca_altissima.fasta
        ├── Festuca_arundinacea.fasta
        ├── Festuca_ovina.fasta
        ├── Festuca_pratensis.fasta
        ├── Hordeum_vulgare_sub_vulgare.fasta
        ├── Indocalamus_longiauritus.fasta
        ├── Leersia_tisserantii.fasta
        ├── Lolium_multiflorum.fasta
        ├── Lolium_perenne.fasta
        ├── Oryza_meridionalis.fasta
        ├── Oryza_nivara.fasta
        ├── Oryza_rufipogon.fasta
        ├── Oryza_sativa_93-11.fasta
        ├── Oryza_sativa_Nipponbare.fasta
        ├── Panicum_virgatum.fasta
        ├── Pharus_lappulaceus.fasta
        ├── Pharus_latifolius.fasta
        ├── Phragmites_australis.fasta
        ├── Phyllostachys_edulis.fasta
        ├── Phyllostachys_nigra_var_henonis.fasta
        ├── Phyllostachys_propinqua.fasta
        ├── Puelia_olyriformis.fasta
        ├── Rhynchoryza_subulata.fasta
        ├── Saccharum_officinarum.fasta
        ├── Secale_cereale.fasta
        ├── Setaria_italica.fasta
        ├── Sorghum_bicolor.fasta
        ├── Sorghum_timorense.fasta
        ├── Triticum_aestivum.fasta
        ├── Triticum_monococcum.fasta
        ├── Triticum_urartu.fasta
        ├── Typha_latifolia.fasta
        ├── Zea_mays.fasta
        ├── Zizania_latifolia.fasta
        └── readme


/PartitionFinderV1.1.1/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/.DS_Store


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/PartitionFinder.py:
--------------------------------------------------------------------------------
 1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
 2 | #
 3 | #This program is free software: you can redistribute it and/or modify it
 4 | #under the terms of the GNU General Public License as published by the
 5 | #Free Software Foundation, either version 3 of the License, or (at your
 6 | #option) any later version.
 7 | #
 8 | #This program is distributed in the hope that it will be useful, but
 9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11 | #General Public License for more details. You should have received a copy
12 | #of the GNU General Public License along with this program.  If not, see
13 | #<http://www.gnu.org/licenses/>. PartitionFinder also includes the PhyML
14 | #program, the RAxML program, the PyParsing library, and the python-cluster library
15 | #all of which are protected by their own licenses and conditions, using
16 | #PartitionFinder implies that you agree with those licences and conditions as well.
17 | 
18 | import sys
19 | from partfinder import main
20 | 
21 | if __name__ == "__main__":
22 |     # Well behaved unix programs exits with 0 on success...
23 |     sys.exit(main.main("PartitionFinder", "DNA"))
24 | 


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/PartitionFinderProtein.py:
--------------------------------------------------------------------------------
 1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
 2 | #
 3 | #This program is free software: you can redistribute it and/or modify it
 4 | #under the terms of the GNU General Public License as published by the
 5 | #Free Software Foundation, either version 3 of the License, or (at your
 6 | #option) any later version.
 7 | #
 8 | #This program is distributed in the hope that it will be useful, but
 9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11 | #General Public License for more details. You should have received a copy
12 | #of the GNU General Public License along with this program.  If not, see
13 | #<http://www.gnu.org/licenses/>. PartitionFinder also includes the PhyML
14 | #program, the RAxML program, the PyParsing library, and the python-cluster library
15 | #all of which are protected by their own licenses and conditions, using
16 | #PartitionFinder implies that you agree with those licences and conditions as well.
17 | 
18 | import sys
19 | from partfinder import main
20 | 
21 | if __name__ == "__main__":
22 |     # Well behaved unix programs exits with 0 on success...
23 |     sys.exit(main.main("PartitionFinderProtein", "protein"))
24 | 


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/README.md:
--------------------------------------------------------------------------------
 1 | # PartitionFinder
 2 | 
 3 | PartitionFinder and PartitionFinderProtein are Python programs for simultaneously 
 4 | choosing partitioning schemes and models of molecular evolution for sequence data. 
 5 | You can use them before running a phylogenetic analysis, in order
 6 | to decide how to divide up your sequence data into separate blocks before
 7 | analysis, and to simultaneously perform model selection on each of those
 8 | blocks.
 9 | 
10 | # Operating System
11 | 
12 | Mac and Windows are supported.
13 | All of the code was written with Linux in mind too, so if you are interested
14 | in porting it to Linux, please get in touch (or just try it out!).
15 | 
16 | # Manual
17 | 
18 | is in the /docs folder. 
19 | 
20 | # Quick Start
21 | 
22 | * Make sure you have Python 2.7 installed first, if not, go to www.python.org/getit/
23 | 
24 | * For PartitionFinderProtein just substitute 'PartitionFinderProtein' for 'PartitionFinder' below
25 | 
26 | 1.  Open Terminal (on a Mac) or Command Prompt (on Windows) and cd to the directory with PartitionFinder in it
27 | 2.  Run PartitionFinder by typing at the command prompt:
28 | 
29 |     python PartitionFinder.py example
30 | 
31 | This will run the included example analysis for PartitionFinder. More generally, the command line for PartitionFinder looks like this:
32 | 
33 |     python <PartitionFinder.py> <foldername>
34 | 
35 | where <PartitionFinder.py> is the full file-path to the PartitionFinder.py file
36 | and <foldername> is the full filepath to a folder with a phylip alignemnt and associated .cfg file.
37 | 
38 | For more details, read the manual.
39 | 


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/RELEASE-VERSION:
--------------------------------------------------------------------------------
1 | 1.1.1
2 | 


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/ROGP_v2.0-backup.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/perl
  2 | #
  3 | #AUTHOR
  4 | #Guiqi Bi :fenghen360@126.com
  5 | #VERSION
  6 | #ROGP v0.1
  7 | #COPYRIGHT & LICENCE
  8 | #This script is free software; you can redistribute it and/or modify it.
  9 | #This  script is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of merchantability or fitness for a particular purpose.
 10 | 
 11 | my $USAGE = 	"\nusage: ./ROGP.pl <parameters>
 12 | 	 	\nparameters:
 13 | 		-in=<file>         Genome alignment outputfile derived from Muave. If you set --align, ignore this input parameter.
 14 | 		-number=<int>      Number of taxa used in aliggment (should be precious). If you set --align, ignore this input parameter.
 15 | 		--align            If you want to align sequences by mauve, add this parameter (Default: progressiveMauve).
 16 | 		                   Then you should split every sequence into a single fasta file. Suffix must be .fasta
 17 | 		--path=            Absolute path to directory where you put in fasta sequences.
 18 | 		--mauve-out=       The output file produced by mauve (Absolute path). If you set --align parameter.
 19 | 		-help              Print the usage.\n";
 20 | 		
 21 | 
 22 | #-------------------------------------------------------------------------------------------
 23 | #提取参数
 24 | foreach my $paras (@ARGV){
 25 | 	if ($paras=~/in/){
 26 | 		$in=(split "=", $paras)[1];
 27 | 	}
 28 | 	if ($paras=~/number/){
 29 | 		$number=(split "=", $paras)[1];
 30 | 	}
 31 | 	if ($paras=~/help/){
 32 | 		print $USAGE;
 33 | 	}
 34 | 	if ($paras=~/align/){
 35 | 	    $align=1;
 36 | 	}
 37 | 	if ($paras=~/path/){
 38 | 		$path=(split "=", $paras)[1];
 39 | 	}
 40 | 	if ($paras=~/mauve-out/){
 41 | 		$mauveout=(split "=", $paras)[1];
 42 | 	}
 43 | }
 44 | #-------------------------------------------------------------------------------------------
 45 | #参数检验
 46 | if($align){
 47 | 		undef $in; #如果设置了align,就清空$in和$number
 48 | 		undef $number;
 49 |         if(!$mauveout){
 50 | 		       print "Please set the --mauve-out= parameter!\n";#检查是否设置了--mauve-out参数
 51 | 			   exit;
 52 | 		}
 53 |         elsif(!$path){
 54 | 		       print "Please set the --path= parameter!\n";#检查是否设置了path参数
 55 | 			   exit;
 56 | 		}
 57 | 		else{
 58 | 		$in=$mauveout; #把mauveout回传给$in
 59 | 		my @files=glob "$path*.fasta";  #匹配fasta文件并保存到数组里
 60 | 		my $filecom;
 61 | 		$number=$#files+1; #回传文件数量给number变量
 62 | 		print "Totla $number files detected!\nThe list of sequences will be aligned:\n";
 63 | 		           foreach(@files){
 64 | 				            $filecom.=" $_";
 65 | 		                    print "$_\n";  #打印文件进行检验
 66 | 		                            }
 67 | 				 print "<===========Please re-check.============>\n\nKeep going?\n\[Enter press/Ctrl+C\]\n";
 68 | 				 my $go=<STDIN>; #标准输入来决定程序是否进行下去
 69 | 					if(!$go){exit;}
 70 | 					else{print "Aligning, please wait.\n";
 71 | 					    `./progressiveMauve --output=$mauveout $filecom|tee mauve-screenout.log`;
 72 | 						open MAUVE,"<","mauve-screenout.log";
 73 | 						while(<MAUVE>){print;}
 74 | 					    }
 75 |                     					
 76 | 		}
 77 | 
 78 | }
 79 | 
 80 | print "\n\nROGP started!\n\nAligned fasta file is $in\nNumber of the taxon species used in alignment is $number\n";
 81 | 
 82 | my $taxon=1; #记录fasta标头识别数字
 83 | my $file=1; #用于输出模块标头的head
 84 | my $module;
 85 | 
 86 | #-------------------------------------------------------------------------------------------
 87 | #首先提取fasta标头
 88 | open(HEAD, ">>head.tmp");
 89 | open(IN, "<$in")||die "Can't open $in:$!\n";
 90 | while(<IN>){
 91 | if($_=~m/^>/){
 92 | print HEAD "$_";
 93 | }
 94 | }
 95 | close(HEAD);
 96 | 
 97 | print "Identify the colinear blocks. Be patient\n\n\n";
 98 | open(TMP, "<head.tmp")||die "Can't open $in:$!\n";
 99 | while(<TMP>){
100 |      
101 | 	   if($_=~m/^>\s$taxon:/){
102 | 	   $module.="$_";
103 | 	   $taxon++;
104 | 	         if($taxon==$number+1){
105 | 	                  open(OUT, ">module_$file.head") ;
106 | 	                  print OUT "$module";
107 | 					  close(OUT);
108 | 					  undef $module;
109 | 					  $taxon=1;
110 | 					  $file++;
111 | 					  }
112 | 			     }
113 |        else{
114 | 	        undef $module;
115 | 	        $taxon=1;
116 | 	      }
117 | }
118 | 
119 | `rm head.tmp`;
120 | print "Finished!\n\n\n";
121 | 
122 | 
123 | #-------------------------------------------------------------------------------------------
124 | #抽取序列并修改文件名
125 | my @head=glob("*.head");
126 | my $temp_num=@head; #temp_num临时记录
127 | print "$temp_num colinear blocks were identified totally!\nNow extracting these sequences!\n\n\n";
128 | foreach my $head(@head){
129 |      
130 | 	 &extract($head,$in);
131 |      `rm $head`;
132 |       }
133 | 	  
134 | `rename .head.fasta .fasta *.fasta`;
135 | 
136 | #写了个子程序用来提取序列，直接拿的g.pl进行的修改
137 | sub extract{
138 | my $biaotou=shift @_;
139 | my $seq=shift @_;
140 | my @liuyuan;
141 | open IN,"<",$biaotou;
142 | while(<IN>){
143 | push @liuyuan,"$_";
144 | }
145 | 
146 | foreach(0...$#liuyuan){
147 | 
148 | open OUT,"<",$seq;
149 | open FASTA,">>","$biaotou.fasta";
150 | my $turnoff=0;
151 | while($line=<OUT>){
152 |            
153 |            if($line eq $liuyuan[$_]){$turnoff=1;
154 |                                            print FASTA "$line";
155 |                                            next;
156 |                                                  }
157 | 	       elsif($line ne $liuyuan[$_]&&$line=~m/>.*\n/){$turnoff=0;}
158 | 		   if($turnoff){print FASTA "$line";}	       
159 | 		   
160 | 
161 | }
162 | close(OUT);
163 | close(FASTA);
164 | }
165 | }
166 | #-------------------------------------------------------------------------------------------
167 | #改每个模块序列的标头，否则太长,Gblock无法处理,并处理每个模块序列尾部的=号
168 | 
169 | my @seq=glob("*.fasta");
170 |    foreach my $seq(@seq){
171 |         open(TMP2IN, "<$seq")||die "Can't open $in:$!\n";
172 |         open(TMP2OUT, ">>$seq.rename")||die "Can't open $in:$!\n";
173 |         while(<TMP2IN>){
174 | 		             if($_=~m/=/){next;}
175 | 		             if($_=~m/^>/){
176 | 					 my @array=split(/ [+|-] /,$_);
177 | 					 my @array2=split(/\\/,@array[$#array]);
178 | 					 my @array3=split(/\//,@array2[$#array2]);
179 | 					 @array3[$#array3]=~s/\.fasta//g;
180 | 					 print TMP2OUT ">@array3[$#array3]";
181 | 					 undef @array;
182 | 					 undef @array2;
183 | 					 undef @array3;
184 | 					 }
185 | 					 else {print TMP2OUT "$_";}
186 | 		             
187 | 		 }
188 | close(TMP2IN);
189 | close(TMP2OU);
190 |   `rm $seq`;
191 | }
192 | `rename .fasta.rename .fasta *.rename`;
193 | 
194 | 
195 | 
196 | 
197 | 
198 | #-------------------------------------------------------------------------------------------
199 | #使用Gblocks进行序列处理,需要将Gblocks添加至环境变量，也可以再设置参数，填写Gblock的位置,等去看看Gblock是否有没有交互的，好直接加参数进去
200 | print "Now work with Gblock!\n\n\n"	;
201 | 
202 | my @trimed=glob("*.fasta");
203 | foreach my $trimed(@trimed){
204 |          `./Gblocks $trimed out`;
205 | 		 `rm $trimed`;
206 | }
207 | 
208 | 
209 | #-------------------------------------------------------------------------------------------
210 | #处理gb后缀的结果文件
211 | #perl -e '$gb=shift;open GB,$gb;while(<GB>){if(/^>/){print "$_";}if(/^[A|T|C|G|N]{10}\s/i){$_=~s/\s//g;print "$_";}}' block20.fasta-gb > block20-gb
212 | 
213 | my @gb=glob("*.fasta-gb");
214 | foreach my $gb(@gb){
215 |         my $delete=0; #设置个阈值，如果没有匹配到任何ATCG的话，就直接略过，并删掉产生文件
216 |         open(GB, "<$gb")||die "Can't open $in:$!\n";
217 | 		open(GBOUT, ">>$gb.out")||die "Can't open $in:$!\n";
218 |         while(<GB>){
219 | 		          
220 | 		          if(/^>/){print GBOUT "$_";}
221 | 				  elsif(/^[A|T|C|G|N]{10}\s/i){
222 | 				              $delete++;
223 | 				              $_=~s/\s//g;
224 | 							  print GBOUT "$_\n";
225 | 							  }
226 | 		          
227 | 		}
228 | close(GB);
229 | close(GBOUT);
230 | if ($delete==0){`rm $gb.out`;}
231 | `rm $gb`;
232 | }
233 | 
234 | `rename fasta-gb.out fasta *.fasta-gb.out`;
235 | 
236 | 
237 | 
238 | #-------------------------------------------------------------------------------------------
239 | #最后合并文件,报告提取出多少模块，和总序列长度
240 | my @final=glob("*.fasta");
241 | my $f_length=@final;
242 | if ($f_length==$temp_num){
243 |        print "All blocks extracted by Mauve have conserved sequences.\n\n\n";
244 | }
245 | else {print "Only $f_length blocks have conserved sequences.\n\n\n";}
246 | #先建一个数组
247 | my @fasta;
248 | my $hehe=1;
249 | foreach my $final(@final){
250 |         
251 |         if ($hehe>1){last;}
252 |         else {
253 | 		open(HEHE, "<$final")||die "Can't open $in:$!\n";
254 | 		while(<HEHE>){
255 | 		if($_=~m/^>/){push @fasta, $_;}
256 | 		}
257 | 		close(HEHE);
258 |  }
259 |  $hehe++;
260 | }
261 | 
262 | open(CAN,">>all-sequence.fasta")||die"Can'not open file";
263 | my $character_length;
264 | 
265 | foreach $fasta(@fasta){
266 |           my $can_all; #用来连接一个物种的序列
267 | 		  print CAN "$fasta";
268 |   foreach $final(@final){
269 |           open(FILE,"<$final")||die"Can'not open file";
270 | 		  my $turnoff=0; #用来判断是否连接
271 | 		  my $can; #用来连接序列
272 |           while(my $line=<FILE>){
273 | 		  if ($line eq $fasta){$turnoff=1;next;}
274 | 		  elsif($line ne $fasta&&$line=~m/>.*\n/){$turnoff=0;}
275 |           if($turnoff){chomp $line;$can.=$line;}
276 | 		   }
277 |  $can_all.=$can;
278 | 
279 |  }
280 |   print CAN "$can_all\n";
281 | $character_length=length($can_all);
282 | }
283 | close(CAN);
284 | 
285 | 
286 | print "The final concatenated sequences was writen in all-sequence.fasta\n\n";
287 | print "The concatenated length is $character_length bp\n\n";
288 | print "ROGP DATA PREPRATION COMPLETED! ENJOY IT!!\n\n\n";
289 | 
290 | 
291 | 
292 | 
293 | 
294 | 
295 | 
296 | 
297 | 
298 | 
299 | 
300 | 
301 | 
302 | 
303 | 


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/circoletto.pl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/circoletto.pl


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/docs/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/docs/.DS_Store


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/docs/Manual_v1.1.1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/docs/Manual_v1.1.1.pdf


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/examples/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/examples/.DS_Store


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/examples/README.txt:
--------------------------------------------------------------------------------
 1 | README for Examples
 2 | ___________________
 3 | 
 4 | 
 5 | These folders contain two simple examples.
 6 | 
 7 | The /nucleotide folder demonstrates PartitionFinder
 8 | the /aminoacid folder demonstrates PartitionFinderProtein
 9 | 
10 | Instructions on how to run these examples are provided in the manual:
11 | 
12 | For Mac Users: Page 7 of the manual
13 | For Windows Users: Page 9 of the manual
14 | 
15 | Please email me if you have any questions.
16 | 
17 | Rob Lanfear
18 | May 2012


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/examples/aminoacid/Als_etal_2004.phy:
--------------------------------------------------------------------------------
1 |  4 949
2 | AD00P055  SLMLLISSSIVENGAGTGWTVYPPLSSNIAHSGSSVDLAIFSLHLAGISSILGAINFITTIINMKVNNLFFDQMSLFIWAVGITALLLLLSLPVLAGAITMLLTDRNLNTSFFDPAGGGDPILYQHLFWFFGHPXXXXXXXXXXGIISHIISQESGKKETFGSLGMIYAMLAIGLLGFIVWAHHMFTVGMDIDTRAYFTSATMIIAVPTGIKIFSWLATIYGTQINYSPSMLWSLGFIFLFAVGGLTGVILANSSIDITLHDTYYVVAHFHYVLSMGAIFAIFGGFIHWYPLFTGLMMNSYLLKIQFILMFIGVNXXXXXXXXXXXXXXXXXXXXXPDMXLSWNIISSLGSYMSFISMMMMMMIIWESMIKQRLILFSLNMSSSIEWLQNTPPNEHSYNELPILSNFMATWSNLNFQNSVSPLMEQIIFFHDHSLIILIMITMLLSYMMLSMFWNKFINRFLLEGQMIELIXXXXXXXXXXXXXXXXXRLLYLLDELNNPLITIKSIGHQWYWSYEYSDFKNIEFDSYMINEYNLNNFRLLDVDNRIIIPMNNNIRMLITATDVIHSWTVPSIGVKVDANPGRLNQTSFFINRPGIFFGQCSEICGANHSFMPIVIESISIKNFXDAPGHSDFIKNMITGTSQAXCAVLIVAAGTGEXEAGISKNGQTREHALXAFTLGVKQLIVGVNKMXSTEPPYSESRFEEIKKEVSSYIKKIGYNPAAVAFVPISGWHGDNMLEASTKMPWFKGWQVERKEGKAEGKCLIEALDAILPPARPTDKALRLPLQDVYKIGGIGTVPVGRVETGVLKPGTIVVFAPANITTEVKSVEMHHEXLQEAVPGDNVGFNVKNVSVKELRRGYVAGDTKNNPPKGAADFTAQVIVLNHPGQISNGYTPVLDCHTAHIACKFAEIKEKVDXXSGKSXEVDPKSIKSGDDAXVNMVXSKPLXXES
3 | RV03N585  SLMLLISSSIVENGAGTGWTVYPPLSSNIAHSGSSVDLAIFSLHLAGISSILGAINFITTIINMKVNNLFFDQMSLFIWAVGITALLLLLSLPVLAGAITMLLTDRNLNTSFFDPAGGGDPILYQHLFWFFGHPEVYILILPGFGIISHIISQESGKKETFGSLGMIYAMLAIGLLGFIVWAHHMFTVGMDIDTRAYITSATMIIAVPTGIKIFSWLATIYGTQINYSPSMLWSLGFIFLFAVGGLTGVILANSSIDITLHDTYYVVAHFHYVLSMGAIFAIFGGFIHWYPLFTGLMMNSYLLKIQFILMFIGVNXXXXXXXXXXXXXXXXXXXXXPDMFLSWNIISSLGSYMSFISMMMMMMIIWESMIKQRLILFSLNMSSSIEWLQNTPPNEHSYNELPILSNFMATWSNLNFQNSVSPLMEQIIFFHDHSLIILIMITMLLSYMMLSMFWNKFINRFLLEGQMIEXXXXXXXXIILIFIALPSLRLLYLLDELNNPLITIKSIGHQWYWSYEYSDFKNIEFDSYMINKYNLNNFRLLDVDNRIIIPMNNNIRMLITATDVIHSWTVPSIGVKVDANPGRLNQTSFFINRPGIFFGQCSEICGANHSFMPIVIESISIKNFIDAPGHSDFIKNMITGTSQADCAVLIVAAGTGEFEAGISKNGQTREHALLAFTLGVKQLIVGVNKMDSTEPPYSESRFEEIKKEVSSYIKKIGYNPAAVAFVPISGWHGDNMLEASTKMPWFKGWQVERKEGKAEGKCLIEALDAILPPARPTDKALRLPLQDVYKIGGIGTVPVGRVETGVLKPGTIVVFAPANITTEVKSVEMHHEALQEAVPGDNVGFNVKNVSVKELRRGYVAGDTKNNPPKGAADFTAQVIVLNHPGQISNGYTPVLDCHTAHIACKFAEIKEKVDRRSGKSTEVDPKSIKSGDAAIVNLVPSKPLCVES
4 | TDA99Q996 SLMLLISSSIVENGAGTGWTVYPPLSSNIAHSGSSVDLAIFSLHLAGISSILGAINFITTIINMKVNNMSFDQMSLFIWAVGITALLLLLSLPVLAGAITMLLTDRNLNTSFFDPAGGGDPILYQXXXXXXXXXXXXXXXXXXXXIISXIISQESXKKETFGSLGMIYAMLAIGLLGFIVWAHHMFTVGMDIDTRAYFTSATMIIAVPTGIKIFSWLATIYGSQINYSPSMLWSLGFIFLFAVGGLTGVILANSSIDITLHDTYYVVAHFHYVLSMGAIFAIFGGFIHWYPLFTGLMMNSYLLXIXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXLSWNIVSSLGSYMSFISMLLMMMIIWESMIKKRLILFSLNMSSSIEWLQNTPPNEHSYNELPILNNFMATWSNLNFQNSVSPLMEQIIFFNDHSLIILIMITMLLSYMMLSMFWNKFINRFLLEGQMXXLIXXXXXXXXXXXXXXXSLRLLYLLDELNNPLITIKSIGHQWYWSYEYSDFKNIEFDSYMINEYNLNNFRLLDVDNRIIIPMNNNIRMLITATDVIHSWTIPAIGVKVDANPGRLNQSSFFINRPGIFFGQCSEICGANHSFMPIVIESISIKNFIDAPGHSDFIKNMITGTSQADCAVLIVAAGTGEFEAGISKNGQTREHALLAFTLGVKQLIVGVNKMDSTEPPYSESRFEEIKKEVSSYIKKIGYNPAAVAFVPISGWHGDNMLEASTKMPWFKGWQVERKEGKAEGKCLIEALDAILPPARPTDKALRLPLQDVYKIGGIGTVPVGRVETGVLKPGTIVVFAPANITTEVKSVEMHHEALQEAVPGDNVGFNVKNVSVKELRRGYVAGDTKNNPPKGAADFTAQVIVLNHPGQISNGYTPVLDCHTAHIACKFAEIKEKVDRRSGKSTEVDPKSIKSGDAAIVNLVPSKPLCVES
5 | ZD99S305  SLMLLISSSIVENGAGTGWTVYPPLSSNIAHSGSSVDLAIFSLHLAGISSILGAINFITTIINMKVNNLFFDQMSLFIWAVGITALLLLLSLPVLAGAITMLLTDRNLNTSFFDPAGGGDPILYQHLFWFFGHPXXXXXXXXXXXXXXXXXXXESGKKETFGSLGMIYAMLAIGLLGFIVWAHHMFTVGMDIDTRAYFTSATMIIAVPTGIKIFSWLATIYGTQINYSPSMLWSLGFIFLFAVGGLTGVILANSSIDITLHDTYYVVAHFHYVLSMGAIFAIFGGFIHWYPLFTGLMMNSYLLKIQFILMXXXXXXXXXXXXXXXXXXXXXXXXXXPDMXLSWNIISSLGSYMSFISMMMMMMIIWESMIKQRLILFSLNMSSSIEWLQNTPPNEHSYNELPILSNFMATWSNLNFQNSVSPLMEQIIFFHDHSLIILIMITMLLSYMMLSMFWNKFINRFLLEGQMIELIXXXXXXIILIFIALPSLRLLYLLDELNNPLITIKSIGHQWYWSYEYSDFKNIEFDSYMINEYNLNNFRLLDVDNRIIIPMNNNIRMLITATDVIHSWTVPSIGVKVDANPGRLNQTSFFINRPGIFFGQCSEICGANHSFMPIVIESISIKNFIDAPGHSDFIKNMITGTSQADCAVLIVAAGTGEFEAGISKNGQTREHALLAFTLGVKQLIVGVNKMDSTEPPYSESRFEEIKKEVSSYIKKIGYNPAAVAFVPISGWHGDNMLEASTKMPWFKGWQVERKEGKAEGKCLIEALDAILPPARPTDKALRLPLQDVYKIGGIGTVPVGRVETGVLKPGTIVVFAPANITTEVKSVEMHHEALQEAVPGDNVGFNVKNVSVKELRRGYVAGDTKNNPPKGAADFTAQVIVLNHPGQISNGYTPVLDCHTAHIACKFAEIKEKVDRRSGKSTEVDPKSIKSGDAAIVNLVPSKPLCVES
6 | 


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/examples/aminoacid/partition_finder.cfg:
--------------------------------------------------------------------------------
 1 | ## ALIGNMENT FILE ##
 2 | alignment = Als_etal_2004.phy;
 3 | 
 4 | ## BRANCHLENGTHS: linked | unlinked ##
 5 | branchlengths = linked;
 6 | 
 7 | ## MODELS OF EVOLUTION for PartitionFinder: all | raxml | mrbayes | <list> ##
 8 | ##              for PartitionFinderProtein: all_protein | <list> ##
 9 | models = all_protein;
10 | 
11 | # MODEL SELECCTION: AIC | AICc | BIC #
12 | model_selection = BIC;
13 | 
14 | ## DATA BLOCKS: see manual for how to define ##
15 | [data_blocks]
16 | COI     =   1-407; 
17 | COII    =   408-624;
18 | EF1a    =   625-949;
19 | 
20 | ## SCHEMES, search: all | user | greedy ##
21 | [schemes]
22 | search = greedy;
23 | 
24 | #user schemes go here if search=user. See manual for how to define.#
25 | 


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/examples/nucleotide/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/examples/nucleotide/.DS_Store


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/examples/nucleotide/partition_finder.cfg:
--------------------------------------------------------------------------------
 1 | ## ALIGNMENT FILE ##
 2 | alignment = test.phy;
 3 | 
 4 | ## BRANCHLENGTHS: linked | unlinked ##
 5 | branchlengths = linked;
 6 | 
 7 | ## MODELS OF EVOLUTION for PartitionFinder: all | raxml | mrbayes | beast | <list> ##
 8 | ##              for PartitionFinderProtein: all_protein | <list> ##
 9 | models = all;
10 | 
11 | # MODEL SELECCTION: AIC | AICc | BIC #
12 | model_selection = BIC;
13 | 
14 | ## DATA BLOCKS: see manual for how to define ##
15 | [data_blocks]
16 | Gene1_pos1 = 1-789\3;
17 | Gene1_pos2 = 2-789\3;
18 | Gene1_pos3 = 3-789\3;
19 | Gene2_pos1 = 790-1449\3;
20 | Gene2_pos2 = 791-1449\3;
21 | Gene2_pos3 = 792-1449\3;
22 | Gene3_pos1 = 1450-2208\3;
23 | Gene3_pos2 = 1451-2208\3;
24 | Gene3_pos3 = 1452-2208\3;
25 | 
26 | ## SCHEMES, search: all | greedy | rcluster | hcluster | user ##
27 | [schemes]
28 | search = greedy;
29 | 
30 | #user schemes go here if search=user. See manual for how to define.#
31 | 


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/examples/nucleotide/test.phy:
--------------------------------------------------------------------------------
1 | 4 2208
2 | spp1     CTTGAGGTTCAGAATGGTAATGAA------GTGCTGGTGCTGGAAGTTCAGCAGCAGCTCGGCGGCGGTATCGTACGTACCATCGCCATGGGTTCTTCCGACGGTCTGCGTCGCGGTCTGGATGTAAAAGACCTCGAGCACCCGATCGAAGTCCCAGTTGGTAAAGCAACACTGGGTCGTATCATGAACGTACTGGGTCAGCCAGTAGACATGAAGGGCGACATCGGTGAAGAAGAGCGTTGGGCT---------------ATCCACCGTGAAGCACCATCCTATGAAGAGCTGTCAAGCTCTCAGGAACTGCTGGAAACCGGCATCAAAGTTATCGACCTGATGTGTCCGTTTGCGAAGGGCGGTAAAGTTGGTCTGTTCGGTGGTGCGGGTGTAGGTAAAACCGTAAACATGATGGAGCTTATTCGTAACATCGCGATCGAGCACTCCGGTTATTCTGTGTTTGCGGGCGTAGGTGAACGTACTCGTGAGGGTAACGACTTCTACCACGAAATGACCGACTCCAACGTTATCGAT---------------------AAAGTTTCTCTGGTTTATGGCCAGATGAACGAGCCACCAGGTAACCGTCTGCGCGTTGCGCTGACCGGTCTGACCATGGCTGAGAAGTTCCGTGACGAAGGTCGCGACGTACTGCTGTTCGTCGATAACATCTATCGTTACACCCTGGCAGGTACTGAAGTTTCAGCACTGCTGGGTCGTATGCCTTCAGCGGTAGGTTACCAGCCGACTCTGGCGGAAGAAATGGGCGTTCGCATTCCAACGCTGGAAGAGTGTGATATCTGCCACGGCAGCGGCGCTAAAGCCGGTTCGAAGCCGCAGACCTGTCCTACCTGTCACGGTGCAGGCCAGGTACAGATGCGCCAGGGCTTCTTCGCTGTACAGCAGACCTGTCCACACTGCCAGGGCCGCGGTACGCTGATCAAAGATCCGTGCAACAAATGTCACGGTCATGGTCGCGTAGAGAAAACCAAAACCCTGTCCGTAAAAATTCCGGCAGGCGTTGATACCGGCGATCGTATTCGTCTGACTGGCGAAGGTGAAGCTGGTGAGCACGGCGCACCGGCAGGCGATCTGTACGTTCAGGTGCAGGTGAAGCAGCACGCTATTTTCGAGCGTGAAGGCAACAACCTGTACTGTGAAGTGCCGATCAACTTCTCAATGGCGGCTCTTGGCGGCGAGATTGAAGTGCCGACGCTTGATGGTCGCGTGAAGCTGAAAGTTCCGGGCGAAACGCAAACTGGCAAGCTGTTCCGTATGCGTGGCAAGGGCGTGAAGTCCGTGCGCGGCGGTGCACAGGGCGACCTTCTGTGCCGCGTGGTGGTCGAGACACCGGTAGGTCTTAACGAGAAGCAGAAACAGCTGCTCAAAGATCTGCAGGAAAGTTTTGGCGGCCCAACGGGTGAAAACAACGTTGTTAACGCCCTGTCGCAGAAACTGGAATTGCTGATCCGCCGCGAAGGCAAAGTACATCAGCAAACTTATGTCCATGGTGTGCCACAGGCTCCGCTGGCGGTAACCGGTGAAACGGAAGTGACCGGTACACAGGTGCGTTTCTGGCCAAGCCACGAAACCTTCACCAACGTAATCGAATTCGAATATGAGATTCTGGCAAAACGTCTGCGCGAGCTGTCATTCCTGAACTCCGGCGTTTCCATCCGTCTGCGCGATAAGCGTGAC---GGCAAAGAAGACCATTTCCACTATGAAGGTGGTATCAAGGCGTTTATTGAGTATCTCAATAAAAATAAAACGCCTATCCACCCGAATATCTTCTACTTCTCCACCGAA---AAAGACGGTATTGGCGTAGAAGTGGCGTTGCAGTGGAACGATGGTTTCCAGGAAAACATCTACTGCTTCACCAACAACATTCCACAGCGTGATGGCGGTACTCACCTTGCAGGCTTCCGTGCGGCGATGACCCGTACGCTGAACGCTTACATGGACAAAGAAGGCTACAGCAAAAAAGCCAAA------GTCAGCGCCACCGGTGATGATGCCCGTGAAGGCCTGATTGCCGTCGTTTCCGTGAAAGTACCGGATCCGAAATTCTCCTCTCAGACTAAAGACAAACTGGTCTCTTCTGAGGTGAAAACGGCGGTAGAACAGCAGATGAATGAACTGCTGAGCGAATACCTGCTGGAAAACCCGTCTGACGCCAAAATC
3 | spp2     CTTGAGGTACAAAATGGTAATGAG------AGCCTGGTGCTGGAAGTTCAGCAGCAGCTCGGTGGTGGTATCGTACGTGCTATCGCCATGGGTTCTTCCGACGGTCTGCGTCGTGGTCTGGAAGTTAAAGACCTTGAGCACCCGATCGAAGTCCCGGTTGGTAAAGCAACGCTGGGTCGTATCATGAACGTGCTGGGTCAGCCGATCGATATGAAAGGCGACATCGGCGAAGAAGAACGTTGGGCG---------------ATTCACCGTGCAGCACCTTCCTATGAAGAGCTCTCCAGCTCTCAGGAACTGCTGGAAACCGGCATCAAAGTTATCGACCTGATGTGTCCGTTCGCGAAGGGCGGTAAAGTCGGTCTGTTCGGTGGTGCGGGTGTTGGTAAAACCGTAAACATGATGGAGCTGATCCGTAACATCGCGATCGAACACTCCGGTTACTCCGTGTTTGCTGGTGTTGGTGAGCGTACTCGTGAGGGTAACGACTTCTACCACGAAATGACCGACTCCAACGTTCTGGAT---------------------AAAGTATCCCTGGTTTACGGCCAGATGAACGAGCCGCCGGGAAACCGTCTGCGCGTTGCACTGACCGGCCTGACCATGGCTGAGAAATTCCGTGACGAAGGTCGTGACGTTCTGCTGTTCGTCGATAACATCTATCGTTATACCCTGGCCGGTACAGAAGTATCTGCACTGCTGGGTCGTATGCCTTCTGCGGTAGGTTATCAGCCGACGCTGGCGGAAGAGATGGGCGTTCGTATCCCGACGCTGGAAGAGTGCGACGTCTGCCACGGCAGCGGCGCGAAATCTGGCAGCAAACCGCAGACCTGTCCGACCTGTCATGGTCAGGGCCAGGTGCAGATGCGTCAGGGCTTCTTCGCCGTTCAGCAGACCTGTCCGCATTGTCAGGGGCGCGGTACGCTGATTAAAGATCCGTGCAACAAATGTCACGGTCACGGTCGCGTTGAGAAAACCAAAACCCTGTCGGTCAAAATCCCGGCGGGCGTGGATACCGGCGATCGTATTCGTCTGTCAGGAGAAGGCGAAGCGGGCGAACACGGTGCACCAGCAGGCGATCTGTACGTTCAGGTCCAGGTTAAGCAGCACGCCATCTTTGAGCGTGAAGGCAATAACCTGTACTGCGAAGTGCCTATTAACTTCACCATGGCAGCCCTCGGCGGCGAGATTGAAGTCCCGACGCTGGATGGCCGGGTGAATCTCAAAGTGCCTGGCGAAACGCAAACCGGCAAACTGTTCCGCATGCGCGGTAAAGGTGTGAAATCCGTGCGCGGTGGTGCTCAGGGCGACCTGCTGTGCCGCGTGGTGGTTGAAACACCAGTCGGGCTGAACGATAAGCAGAAACAGCTGCTGAAGGACCTGCAGGAAAGTTTTGGCGGACCAACGGGCGAGAAAAACGTGGTTAACGCCCTGTCGCAGAAGCTGGAGCTGGTTATTCAGCGCGACAATAAAGTTCACCGTCAGATCTATGCGCACGGTGTGCCGCAGGCTCCGCTGGCAGTGACCGGTGAGACCGAAAAAACCGGCACCATGGTACGTTTCTGGCCAAGCTATGAAACCTTCACCAACGTTGTCGAGTTCGAATACGAGATCCTGGCAAAACGTCTGCGTGAGCTGTCGTTCCTGAACTCCGGGGTTTCTATCCGTCTGCGTGACAAGCGTGAC---GGTAAAGAAGACCATTTCCACTACGAAGGCGGCATCAAGGCGTTCGTTGAGTATCTCAATAAGAACAAAACGCCGATCCACCCGAATATCTTCTACTTCTCCACCGAA---AAAGACGGTATTGGCGTCGAAGTAGCGCTGCAGTGGAACGACGGCTTCCAGGAAAACATCTACTGCTTCACCAACAACATCCCGCAGCGCGATGGCGGTACTCACCTTGCGGGCTTCCGCGCGGCGATGACCCGTACCCTGAACGCCTATATGGACAAAGAAGGCTACAGCAAAAAAGCCAAA------GTCAGCGCTACCGGCGACGATGCGCGTGAAGGCCTGATTGCCGTTGTCTCCGTGAAGGTTCCGGATCCGAAATTCTCCTCGCAGACCAAAGACAAACTGGTCTCCTCCGAGGTGAAAACCGCGGTTGAACAGCAGATGAATGAACTGCTGAACGAATACCTGCTGGAAAATCCGTCTGACGCGAAAATC
4 | spp3     CTTGAGGTACAGAATAACAGCGAG------AAGCTGGTGCTGGAAGTTCAGCAGCAGCTCGGCGGCGGTATCGTACGTACCATCGCAATGGGTTCTTCCGACGGTCTGCGTCGTGGTCTGGAAGTGAAAGACCTCGAGCACCCGATCGAAGTCCCGGTAGGTAAAGCGACCCTGGGTCGTATCATGAACGTGCTGGGTCAGCCAATCGATATGAAAGGCGACATCGGCGAAGAAGATCGTTGGGCG---------------ATTCACCGCGCAGCACCTTCCTATGAAGAGCTGTCCAGCTCTCAGGAACTGCTGGAAACCGGCATCAAAGTTATCGACCTGATTTGTCCGTTCGCTAAGGGCGGTAAAGTTGGTCTGTTCGGTGGTGCGGGCGTAGGTAAAACCGTAAACATGATGGAGCTGATCCGTAACATCGCGATCGAGCACTCCGGTTACTCCGTGTTTGCAGGCGTGGGTGAGCGTACTCGTGAGGGTAACGACTTCTACCACGAGATGACCGACTCCAACGTTCTGGAC---------------------AAAGTTGCACTGGTTTACGGCCAGATGAACGAGCCGCCAGGTAACCGTCTGCGCGTAGCGCTGACCGGTCTGACCATCGCGGAGAAATTCCGTGACGAAGGCCGTGACGTTCTGCTGTTCGTCGATAACATCTATCGTTATACCCTGGCCGGTACAGAAGTTTCTGCACTGCTGGGTCGTATGCCATCTGCGGTAGGTTATCAGCCTACTCTGGCAGAAGAGATGGGTGTTCGTATCCCGACGCTGGAAGAGTGTGAAGTTTGCCACGGCAGCGGCGCGAAAAAAGGTTCTTCTCCGCAGACCTGTCCAACCTGTCATGGACAGGGCCAGGTGCAGATGCGTCAGGGCTTCTTCACCGTGCAGCAAAGCTGCCCGCACTGCCAGGGCCGCGGTACCATCATTAAAGATCCGTGCACCAACTGTCACGGCCATGGCCGCGTAGAGAAAACCAAAACGCTGTCGGTAAAAATTCCGGCAGGCGTGGATACCGGCGATCGTATCCGCCTTTCTGGTGAAGGCGAAGCGGGCGAGCACGGCGCACCTTCAGGCGATCTGTACGTTCAGGTTCAGGTGAAACAGCACCCAATCTTCGAGCGTGAAGGCAATAACCTGTACTGCGAAGTGCCGATCAACTTTGCGATGGCTGCGCTGGGCGGGGAAATTGAAGTGCCGACCCTTGACGGCCGCGTTAAGCTGAAGGTACCGAGCGAAACGCAAACCGGCAAGCTGTTCCGCATGCGCGGTAAAGGCGTGAAATCCGTACGCGGTGGCGCGCAGGGCGATCTGCTGTGCCGCGTCGTCGTTGAAACTCCGGTTAGCCTGAACGAAAAGCAGAAGAAACTGCTGCGTGATTTGGAAGAGAGCTTTGGCGGCCCAACGGGGGCGAACAATGTTGTGAACGCCCTGTCCCAGAAGCTGGAGCTGCTGATTCGCCGCGAAGGCAAAACCCATCAGCAAACCTACGTGCACGGTGTGCCGCAGGCTCCGCTGGCGGTCACCGGTGAAACCGAACTGACCGGTACCCAGGTGCGTTTCTGGCCGAGCCATGAAACCTTCACCAACGTCACCGAATTCGAATATGACATCCTGGCTAAGCGCCTGCGTGAGCTGTCGTTCCTGAACTCCGGCGTCTCTATTCGCCTGAACGATAAGCGCGAC---GGCAAGCAGGATCACTTCCACTACGAAGGCGGCATCAAGGCGTTTGTTGAGTACCTCAACAAGAACAAAACCCCGATTCACCCGAACGTCTTCTATTTCAGCACTGAA---AAAGACGGCATCGGCGTGGAAGTGGCGCTGCAGTGGAACGACGGCTTCCAGGAAAATATCTACTGCTTTACCAACAACATTCCTCAGCGCGACGGCGGTACTCACCTTGCGGGCTTCCGCGCGGCGATGACCCGTACCCTGAACGCCTATATGGACAAAGAAGGCTACAGCAAAAAAGCCAAA------GTGAGCGCCACCGGTGACGATGCGCGTGAAGGCCTGATTGCCGTAGTGTCCGTGAAGGTGCCGGATCCGAAGTTCTCTTCCCAGACCAAAGACAAACTGGTTTCTTCGGAAGTGAAATCCGCGGTTGAACAGCAGATGAACGAACTGCTGGCTGAATACCTGCTGGAAAATCCGGGCGACGCAAAAATT
5 | spp4     CTCGAGGTGAAAAATGGTGATGCT------CGTCTGGTGCTGGAAGTTCAGCAGCAGCTGGGTGGTGGCGTGGTTCGTACCATCGCCATGGGTACTTCTGACGGCCTGAAGCGCGGTCTGGAAGTTACCGACCTGAAAAAACCTATCCAGGTTCCGGTTGGTAAAGCAACCCTCGGCCGTATCATGAACGTATTGGGTGAGCCAATCGACATGAAAGGCGACCTGCAGAATGACGACGGCACTGTAGTAGAGGTTTCCTCTATTCACCGTGCAGCACCTTCGTATGAAGATCAGTCTAACTCGCAGGAACTGCTGGAAACCGGCATCAAGGTTATCGACCTGATGTGTCCGTTCGCTAAGGGCGGTAAAGTCGGTCTGTTCGGTGGTGCGGGTGTAGGTAAAACCGTAAACATGATGGAGCTGATCCGTAACATCGCGGCTGAGCACTCAGGTTATTCGGTATTTGCTGGTGTGGGTGAGCGTACTCGTGAGGGTAACGACTTCTACCACGAAATGACTGACTCCAACGTTATCGAT---------------------AAAGTAGCGCTGGTGTATGGCCAGATGAACGAGCCGCCGGGTAACCGTCTGCGCGTAGCACTGACCGGTTTGACCATGGCGGAAAAATTCCGTGATGAAGGCCGTGACGTTCTGCTGTTCATCGACAACATCTATCGTTACACCCTGGCCGGTACTGAAGTATCAGCACTGCTGGGTCGTATGCCATCTGCGGTAGGCTATCAGCCAACGCTGGCAGAAGAGATGGGTGTGCGCATTCCAACACTGGAAGAGTGCGATGTCTGCCACGGTAGCGGCGCGAAAGCGGGGACCAAACCGCAGACCTGTCATACCTGTCATGGCGCAGGCCAGGTGCAGATGCGTCAGGGCTTCTTCACTGTGCAGCAGGCGTGTCCGACCTGTCACGGTCGCGGTTCAGTGATCAAAGATCCGTGCAATGCTTGTCATGGTCACGGTCGCGTTGAGCGCAGTAAAACCCTGTCGGTGAAAATTCCAGCAGGCGTGGATACCGGCGATCGCATTCGTCTGACCGGCGAAGGTGAAGCGGGCGAACAGGGCGCACCAGCGGGCGATCTGTACGTTCAGGTTTCGGTGAAAAAGCACCCGATCTTTGAGCGTGAAGATAACAACCTATATTGCGAAGTGCCGATTAACTTTGCGATGGCAGCATTGGGTGGCGAGATTGAAGTGCCGACGCTTGATGGGCGTGTGAACCTGAAAGTGCCTTCTGAAACGCAAACTGGCAAGCTGTTCCGCATGCGCGGTAAAGGCGTGAAATCGGTGCGTGGTGGTGCGGTAGGCGATTTGCTGTGTCGTGTGGTGGTGGAAACGCCAGTTAGCCTCAATGACAAACAGAAAGCGTTACTGCGTGAACTGGAAGAGAGTTTTGGCGGCCCGAGCGGTGAGAAAAACGTCGTAAACGCCCTGTCACAGAAGCTGGAGCTGACCATTCGCCGTGAAGGCAAAGTGCATCAGCAGGTTTATCAGCACGGCGTGCCGCAGGCACCGCTGGCGGTGTCCGGTGATACCGATGCAACCGGTACTCGCGTGCGTTTCTGGCCGAGCTACGAAACCTTCACCAATGTGATTGAGTTTGAGTACGAAATCCTGGCGAAACGCCTGCGTGAACTGTCGTTCCTGAACTCTGGCGTTTCGATTCGTCTGGAAGACAAACGCGAC---GGCAAGAACGATCACTTCCACTACGAAGGCGGCATCAAGGCGTTCGTTGAGTATCTCAACAAGAACAAAACCCCGATTCACCCAACGGTGTTCTACTTCTCGACGGAG---AAAGATGGCATTGGCGTGGAAGTGGCGCTGCAGTGGAACGATGGTTTCCAGGAAAACATCTACTGCTTCACCAACAACATTCCACAGCGCGACGGCGGTACGCACCTGGCGGGCTTCCGTGCGGCAATGACGCGTACGCTGAATGCCTACATGGATAAAGAAGGCTACAGCAAAAAAGCCAAA------GTCAGTGCGACCGGTGACGATGCGCGTGAAGGCCTGATTGCAGTGGTTTCCGTGAAAGTGCCGGATCCGAAATTCTCTTCTCAGACCAAAGATAAGCTGGTCTCTTCTGAAGTGAAATCGGCGGTTGAGCAGCAGATGAACGAACTGCTGGCGGAATACCTGCTGGAAAATCCGTCTGACGCGAAAATC
6 | 


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/.DS_Store


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | #This program is free software: you can redistribute it and/or modify it
 3 | #under the terms of the GNU General Public License as published by the
 4 | #Free Software Foundation, either version 3 of the License, or (at your
 5 | #option) any later version.
 6 | #
 7 | #This program is distributed in the hope that it will be useful, but
 8 | #WITHOUT ANY WARRANTY; without even the implied warranty of
 9 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
10 | #General Public License for more details. You should have received a copy
11 | #of the GNU General Public License along with this program.  If not, see
12 | #<http://www.gnu.org/licenses/>. PartitionFinder also includes the PhyML
13 | #PartitionFinder implies that you agree with those licences and conditions as well.
14 | 
15 | import logging
16 | log = logging.getLogger("config")
17 | import config
18 | 
19 | # TODO: Not currently used
20 | # Activation should chdir, and maybe do some other stuff
21 | # So maybe need an 'activate' function on the config?
22 | # Should also clear out subsets, in the cache?
23 | 
24 | class Current(object):
25 |     """Keep a bunch of stuff current, that can be reinitialised"""
26 |     def __init__(self):
27 |         self._config = None
28 | 
29 |     def activate_config(self, c):
30 |         assert isinstance(c, config.Configuration)
31 | 
32 |         if self._config is not None:
33 |             log.debug("Resetting old configuration...")
34 |             self._config.reset()
35 | 
36 |         log.debug("Assigning a new configuration...")
37 |         self._config = c
38 | 
39 |     @property
40 |     def active_config(self):
41 |         if self._config is None:
42 |             log.error("No configuration is currently active...")
43 | 
44 |         return self._config
45 | 
46 | 
47 | current = Current()
48 | 


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/__init__.pyc


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/algorithm.py:
--------------------------------------------------------------------------------
  1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
  2 | #
  3 | #This program is free software: you can redistribute it and/or modify it
  4 | #under the terms of the GNU General Public License as published by the
  5 | #Free Software Foundation, either version 3 of the License, or (at your
  6 | #option) any later version.
  7 | #
  8 | #This program is distributed in the hope that it will be useful, but
  9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 11 | #General Public License for more details. You should have received a copy
 12 | #of the GNU General Public License along with this program.  If not, see
 13 | #<http://www.gnu.org/licenses/>. PartitionFinder also includes the PhyML
 14 | #program, the RAxML program, and the PyParsing library,
 15 | #all of which are protected by their own licenses and conditions, using
 16 | #PartitionFinder implies that you agree with those licences and conditions as well.
 17 | 
 18 | from math import sqrt
 19 | from itertools import izip
 20 | 
 21 | 
 22 | def k_subsets_i(n, k):
 23 |     '''
 24 |         from http://code.activestate.com/recipes/500268-all-k-subsets-from-an-n-set/
 25 |     Yield each subset of size k from the set of intergers 0 .. n - 1
 26 |     n -- an integer > 0
 27 |     k -- an integer > 0
 28 |     '''
 29 |     # Validate args
 30 |     if n < 0:
 31 |         raise ValueError('n must be > 0, got n=%d' % n)
 32 |     if k < 0:
 33 |         raise ValueError('k must be > 0, got k=%d' % k)
 34 |     # check base cases
 35 |     if k == 0 or n < k:
 36 |         yield set()
 37 |     elif n == k:
 38 |         yield set(range(n))
 39 | 
 40 |     else:
 41 |         # Use recursive formula based on binomial coeffecients:
 42 |         # choose(n, k) = choose(n - 1, k - 1) + choose(n - 1, k)
 43 |         for s in k_subsets_i(n - 1, k - 1):
 44 |             s.add(n - 1)
 45 |             yield s
 46 |         for s in k_subsets_i(n - 1, k):
 47 |             yield s
 48 | 
 49 | 
 50 | def k_subsets(s, k):
 51 |     '''
 52 |         from http://code.activestate.com/recipes/500268-all-k-subsets-from-an-n-set/
 53 |     Yield all subsets of size k from set (or list) s
 54 |     s -- a set or list (any iterable will suffice)
 55 |     k -- an integer > 0
 56 |     '''
 57 |     s = list(s)
 58 |     n = len(s)
 59 |     for k_set in k_subsets_i(n, k):
 60 |         yield set([s[i] for i in k_set])
 61 | 
 62 | 
 63 | def lumpings(scheme):
 64 |     """
 65 |     generate all possible lumpings of a given scheme, where a lumping involves
 66 |     joining two partitions together scheme has to be a list of digits
 67 |     """
 68 |     # Get the numbers involved in the scheme
 69 |     nums = set(scheme)
 70 |     lumpings = []
 71 |     for sub in k_subsets(nums, 2):
 72 |         lump = list(scheme)
 73 |         sub = list(sub)
 74 |         sub.sort()
 75 |         #now replace all the instance of one number in lump with the other in sub
 76 |         while lump.count(sub[1]) > 0:
 77 |             lump[lump.index(sub[1])] = sub[0]
 78 |         lumpings.append(lump)
 79 | 
 80 |     return lumpings
 81 | 
 82 | 
 83 | def euclidean_distance(x, y):
 84 |     sum = 0
 85 |     for xval, yval in izip(x, y):
 86 |         sum += (xval - yval) ** 2
 87 |     dist = sqrt(sum)
 88 |     return dist
 89 | 
 90 | 
 91 | # def getLevels(cluster, levs):
 92 |     # """
 93 |     # Returns the levels of the cluster as list.
 94 |     # """
 95 |     # levs.append(cluster.level())
 96 | 
 97 |     # left = cluster.items()[0]
 98 |     # right = cluster.items()[1]
 99 |     # if isinstance(left, Cluster):
100 |         # first = getLevels(left, levs)
101 |     # else:
102 |         # first = left
103 |     # if isinstance(right, Cluster):
104 |         # second = getLevels(right, levs)
105 |     # else:
106 |         # second = right
107 |     # return levs
108 | 
109 | 
110 | def levels_to_scheme(levels, namedict):
111 |     """
112 |     take the return from Cluster.getlevel
113 |     and return it as a list of partition names description
114 |     """
115 | 
116 |     levels = str(levels)
117 | 
118 |     for key in namedict.keys():
119 |         old = str(namedict[key])
120 |         new = '"%s"' % key
121 |         levels = levels.replace(old, new)
122 | 
123 |     levels = eval(levels)
124 |     return levels
125 | 


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/algorithm.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/algorithm.pyc


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/alignment.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2012 Robert Lanfear and Brett Calcott
  2 | #
  3 | # This program is free software: you can redistribute it and/or modify it under
  4 | # the terms of the GNU General Public License as published by the Free Software
  5 | # Foundation, either version 3 of the License, or (at your option) any later
  6 | # version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful, but WITHOUT
  9 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 10 | # FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 11 | # details. You should have received a copy of the GNU General Public License
 12 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 13 | # PartitionFinder also includes the PhyML program, the RAxML program, and the
 14 | # PyParsing library, all of which are protected by their own licenses and
 15 | # conditions, using PartitionFinder implies that you agree with those licences
 16 | # and conditions as well.
 17 | 
 18 | """Loading, Saving, Parsing Alignment Files
 19 | 
 20 |     See the phyml details here:
 21 |     http://www.atgc-montpellier.fr/phyml/usersguide.php?type=command
 22 | 
 23 | """
 24 | import logging
 25 | log = logging.getLogger("alignment")
 26 | 
 27 | import os
 28 | 
 29 | from pyparsing import (
 30 |     Word, OneOrMore, alphas, nums, Suppress, Optional, Group, stringEnd,
 31 |     delimitedList, ParseException, line, lineno, col, LineStart, restOfLine,
 32 |     LineEnd, White, Literal, Combine, Or, MatchFirst, ZeroOrMore)
 33 | 
 34 | from util import PartitionFinderError
 35 | class AlignmentError(PartitionFinderError):
 36 |     pass
 37 | 
 38 | class AlignmentParser(object):
 39 |     """Parses an alignment and returns species sequence tuples"""
 40 | 
 41 |     # I think this covers it...
 42 |     BASES = Word(alphas + "?.-")
 43 | 
 44 |     def __init__(self):
 45 |         self.sequence_length = None
 46 |         self.species_count = None
 47 |         self.sequences = []
 48 |         self.current_sequence = 0
 49 | 
 50 |         self.root_parser = self.phylip_parser() + stringEnd
 51 | 
 52 |     def phylip_parser(self):
 53 | 
 54 |         INTEGER = Word(nums)
 55 |         INTEGER.setParseAction(lambda x: int(x[0]))
 56 | 
 57 |         header = INTEGER("species_count") + INTEGER("sequence_length") +\
 58 |                 Suppress(restOfLine)
 59 |         header.setParseAction(self.set_header)
 60 | 
 61 |         sequence_name = Word(
 62 |             alphas + nums + "!#$%&\'*+-./;<=>?@[\\]^_`{|}~",
 63 |             max=100)
 64 | 
 65 |         # Take a copy and disallow line breaks in the bases
 66 |         bases = self.BASES.copy()
 67 |         bases.setWhitespaceChars(" \t")
 68 |         seq_start = sequence_name("species") + bases("sequence") + Suppress(LineEnd())
 69 |         seq_start.setParseAction(self.set_seq_start)
 70 |         seq_start_block = OneOrMore(seq_start)
 71 |         seq_start_block.setParseAction(self.set_start_block)
 72 | 
 73 |         seq_continue = bases("sequence") + Suppress(LineEnd())
 74 |         seq_continue.setParseAction(self.set_seq_continue)
 75 | 
 76 |         seq_continue_block = Suppress(LineEnd()) + OneOrMore(seq_continue)
 77 |         seq_continue_block.setParseAction(self.set_continue_block)
 78 | 
 79 |         return header + seq_start_block + ZeroOrMore(seq_continue_block)
 80 | 
 81 |     def set_header(self, text, loc, tokens):
 82 |         self.sequence_length = tokens.sequence_length
 83 |         self.species_count = tokens.species_count
 84 | 
 85 |     def set_seq_start(self, text, loc, tokens):
 86 |         self.sequences.append([tokens.species, tokens.sequence])
 87 |         self.current_sequence += 1
 88 | 
 89 |     def set_start_block(self, tokens):
 90 |         # End of block
 91 |         # Reset the counter
 92 |         self.current_sequence = 0
 93 | 
 94 |     def set_seq_continue(self, text, loc, tokens):
 95 |         append_to = self.sequences[self.current_sequence]
 96 |         append_to[1] += tokens.sequence
 97 |         self.current_sequence += 1
 98 | 
 99 |     def set_continue_block(self, tokens):
100 |         self.current_sequence = 0
101 | 
102 |     def parse(self, s):
103 |         try:
104 |             defs = self.root_parser.parseString(s)
105 |         except ParseException, p:
106 |             log.error("Error in Alignment Parsing:" + str(p))
107 |             log.error("A common cause of this error is having whitespace"
108 |             ", i.e. spaces or tabs, in the species names. Please check this and remove"
109 |             " all whitespace from species names, or replace them with e.g. underscores")
110 | 
111 |             raise AlignmentError
112 | 
113 |         # Check that all the sequences are equal length
114 |         slen = None
115 |         for nm, seq in self.sequences:
116 |             if slen is None:
117 |                 # Use the first as the test case
118 |                 slen = len(seq)
119 |             else:
120 |                 if len(seq) != slen:
121 |                     log.error("Bad alignment file: Not all species have the same sequences length")
122 |                     raise AlignmentError
123 | 
124 |         # Not all formats have a heading, but if we have one do some checking
125 |         if self.sequence_length is not None:
126 |             if self.sequence_length != slen:
127 |                 log.error("Bad Alignment file: sequence length count in header does not match"
128 |                 " sequence length in file, please check")
129 |                 raise AlignmentError
130 | 
131 |         if self.species_count is not None:
132 |             if len(self.sequences) != self.species_count:
133 |                 log.error("Bad Alignment file: species count in header does not match"
134 |                 " number of sequences in file, please check")
135 |                 raise AlignmentError
136 | 
137 |         return self.sequences
138 | 
139 | def parse(s):
140 |     return AlignmentParser().parse(s)
141 | 
142 | class Alignment(object):
143 |     def __init__(self):
144 |         self.species = {}
145 |         self.sequence_len = 0
146 | 
147 |     def __str__(self):
148 |         return "Alignment(%s species, %s codons)" % self.species, self.sequence_len
149 | 
150 |     def same_as(self, other):
151 |         if self.sequence_len != other.sequence_len:
152 |             log.warning("Alignments not the same, length differs %s: %s", self.sequence_len, other.sequence_len)
153 |             return False
154 | 
155 |         if self.species != other.species:
156 |             log.warning("Alignments not the same. " 
157 |                         "This alignment has %s species, the alignment from the previous "
158 |                         "analysis had %s.", len(self.species), len(other.species))
159 |             return False
160 | 
161 |         return True
162 | 
163 |     def from_parser_output(self, defs):
164 |         """A series of species / sequences tuples
165 |         e.g def = ("dog", "GATC"), ("cat", "GATT")
166 |         """
167 |         species = {}
168 |         sequence_len = None
169 |         for spec, seq in defs:
170 |             # log.debug("Found Sequence for %s: %s...", spec, seq[:20])
171 |             if spec in species:
172 |                 log.error("Repeated species name '%s' is repeated "
173 |                           "in alignment", spec)
174 |                 raise AlignmentError
175 | 
176 |             # Assign it
177 |             species[spec] = seq
178 | 
179 |             if sequence_len is None:
180 |                 sequence_len = len(seq)
181 |             else:
182 |                 if len(seq) != sequence_len:
183 |                     log.error("Sequence length of %s "
184 |                               "differs from previous sequences", spec)
185 |                     raise AlignmentError
186 |         log.debug("Found %d species with sequence length %d",
187 |                   len(species), sequence_len)
188 | 
189 |         # Overwrite these
190 |         self.species = species
191 |         self.sequence_len = sequence_len
192 | 
193 |     def read(self, pth):
194 |         if not os.path.exists(pth):
195 |             log.error("Cannot find sequence file '%s'", pth)
196 |             raise AlignmentError
197 | 
198 |         log.info("Reading alignment file '%s'", pth)
199 |         text = open(pth, 'rU').read()
200 |         self.from_parser_output(parse(text))
201 | 
202 |     def write(self, pth):
203 |         self.write_phylip(pth)
204 | 
205 |     def write_phylip(self, pth):
206 |         fd = open(pth, 'w')
207 |         log.debug("Writing phylip file '%s'", pth)
208 | 
209 |         species_count = len(self.species)
210 |         sequence_len = len(iter(self.species.itervalues()).next())
211 | 
212 |         fd.write("%d %d\n" % (species_count, sequence_len))
213 |         for species, sequence in self.species.iteritems():
214 |             # we use a version of phylip which can have longer species names, up to 100
215 |             shortened = "%s    " %(species[:99])
216 |             fd.write(shortened)
217 |             fd.write(sequence)
218 |             fd.write("\n")
219 |         fd.close()
220 | 
221 | class SubsetAlignment(Alignment):
222 |     """Create an alignment based on some others and a subset definition"""
223 |     def __init__(self, source, subset):
224 |         """create an alignment for this subset"""
225 |         Alignment.__init__(self)
226 | 
227 |         #let's do a basic check to make sure that the specified sites aren't > alignment length
228 |         site_max = max(subset.columns)+1
229 |         log.debug("Max site in data_blocks: %d; max site in alignment: %d" %(site_max, source.sequence_len))
230 |         if site_max>source.sequence_len:
231 |             log.error("Site %d is specified in [data_blocks], but the alignment only has %d sites. Please check." %(site_max, source.sequence_len))
232 |             raise AlignmentError
233 | 
234 |         # Pull out the columns we need
235 |         for species_name, old_sequence in source.species.iteritems():
236 |             new_sequence = ''.join([old_sequence[i] for i in subset.columns])
237 |             self.species[species_name] = new_sequence
238 | 
239 |         if not self.species:
240 |             log.error("No species found in %s", self)
241 |             raise AlignmentError
242 | 
243 |         self.sequence_len = len(self.species.itervalues().next())
244 | 
245 | class TestAlignment(Alignment):
246 |     """Good for testing stuff"""
247 |     def __init__(self, text):
248 |         Alignment.__init__(self)
249 |         self.from_parser_output(parse(text))
250 | 
251 | 


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/alignment.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/alignment.pyc


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/analysis.py:
--------------------------------------------------------------------------------
  1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
  2 | #
  3 | #This program is free software: you can redistribute it and/or modify it
  4 | #under the terms of the GNU General Public License as published by the
  5 | #Free Software Foundation, either version 3 of the License, or (at your
  6 | #option) any later version.
  7 | #
  8 | #This program is distributed in the hope that it will be useful, but
  9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 11 | #General Public License for more details. You should have received a copy
 12 | #of the GNU General Public License along with this program.  If not, see
 13 | #<http://www.gnu.org/licenses/>. PartitionFinder also includes the PhyML
 14 | #program, the RAxML program, and the PyParsing library,
 15 | #all of which are protected by their own licenses and conditions, using
 16 | #PartitionFinder implies that you agree with those licences and conditions as well.
 17 | 
 18 | import logging
 19 | log = logging.getLogger("analysis")
 20 | 
 21 | import os
 22 | import shutil
 23 | 
 24 | from alignment import Alignment, SubsetAlignment
 25 | import threadpool
 26 | import scheme
 27 | import subset
 28 | import results
 29 | import threading
 30 | from util import PartitionFinderError
 31 | import util
 32 | 
 33 | class AnalysisError(PartitionFinderError):
 34 |     pass
 35 | 
 36 | 
 37 | class Analysis(object):
 38 |     """Performs the analysis and collects the results"""
 39 |     def __init__(self, cfg, force_restart=False, threads=-1):
 40 |         cfg.validate()
 41 |         self.cfg = cfg
 42 |         self.threads = threads
 43 | 
 44 |         self.results = results.AnalysisResults(self.cfg.model_selection)
 45 | 
 46 |         log.info("Beginning Analysis")
 47 |         self.process_restart(force_restart)
 48 | 
 49 |         # Check for old analyses to see if we can use the old data
 50 |         self.cfg.check_for_old_config()
 51 | 
 52 |         # Make some folders for the analysis
 53 |         self.cfg.make_output_folders()
 54 |         self.make_alignment(cfg.alignment_path)
 55 |         self.make_tree(cfg.user_tree_topology_path)
 56 | 
 57 |         # We need this to block the threads for critical stuff
 58 |         self.lock = threading.Condition(threading.Lock())
 59 | 
 60 |     def process_restart(self, force_restart):
 61 |         if force_restart:
 62 |             # Remove everything
 63 |             if os.path.exists(self.cfg.output_path):
 64 |                 log.warning("Deleting all previous workings in '%s'", self.cfg.output_path)
 65 |                 shutil.rmtree(self.cfg.output_path)
 66 |         else:
 67 |             # Just remove the schemes folder
 68 |             if os.path.exists(self.cfg.schemes_path):
 69 |                 log.info("Removing Schemes in '%s' (they will be recalculated from existing subset data)", self.cfg.schemes_path)
 70 |                 shutil.rmtree(self.cfg.schemes_path)
 71 | 
 72 |     def analyse(self):
 73 |         self.do_analysis()
 74 |         return self.results
 75 | 
 76 |     def make_alignment(self, source_alignment_path):
 77 |         # Make the alignment
 78 |         self.alignment = Alignment()
 79 |         self.alignment.read(source_alignment_path)
 80 | 
 81 |         # We start by copying the alignment
 82 |         self.alignment_path = os.path.join(self.cfg.start_tree_path, 'source.phy')
 83 |         if os.path.exists(self.alignment_path):
 84 |             # Make sure it is the same
 85 |             old_align = Alignment()
 86 |             old_align.read(self.alignment_path)
 87 |             if not old_align.same_as(self.alignment):
 88 |                 log.error("Alignment file has changed since previous run. You need to use the force-restart option.")
 89 |                 raise AnalysisError
 90 | 
 91 |         else:
 92 |             self.alignment.write(self.alignment_path)
 93 | 
 94 |     def need_new_tree(self, tree_path):
 95 |         if os.path.exists(tree_path):
 96 |             if ';' in open(tree_path).read():
 97 |                 log.info("Starting tree file found.")
 98 |                 redo_tree = False
 99 |             else: 
100 |                 log.info("Starting tree file found but incomplete. Re-estimating")
101 |                 redo_tree = True
102 |         else:
103 |             log.info("No starting tree file found.")
104 |             redo_tree = True
105 |         
106 |         return redo_tree
107 | 
108 |     def make_tree(self, user_path):
109 |         # Begin by making a filtered alignment, containing ONLY those columns
110 |         # that are defined in the subsets
111 |         subset_with_everything = subset.Subset(*list(self.cfg.partitions))
112 |         self.filtered_alignment = SubsetAlignment(self.alignment, subset_with_everything)
113 |         self.filtered_alignment_path = os.path.join(self.cfg.start_tree_path, 'filtered_source.phy')
114 |         self.filtered_alignment.write(self.filtered_alignment_path)
115 | 
116 |         # Now we've written this alignment, we need to lock everything in
117 |         # place, no more adding partitions, or changing them from now on.
118 |         self.cfg.partitions.check_against_alignment(self.alignment)
119 |         self.cfg.partitions.finalise()
120 | 
121 |         # We start by copying the alignment
122 |         self.alignment_path = os.path.join(self.cfg.start_tree_path, 'source.phy')
123 | 
124 |         # Now check for the tree
125 |         tree_path = self.cfg.processor.make_tree_path(self.filtered_alignment_path)
126 | 
127 |         if self.need_new_tree(tree_path) == True:
128 |             log.debug("Estimating new starting tree, no old tree found")
129 |             
130 |             # If we have a user tree, then use that, otherwise, create a topology
131 |             util.clean_out_folder(self.cfg.start_tree_path, keep = ["filtered_source.phy", "source.phy"])
132 |             
133 |             if user_path is not None and user_path != "":
134 |                 # Copy it into the start tree folder
135 |                 log.info("Using user supplied topology at %s", user_path)
136 |                 topology_path = os.path.join(self.cfg.start_tree_path, 'user_topology.phy')
137 |                 self.cfg.processor.dupfile(user_path, topology_path)
138 |             else:
139 |                 log.debug(
140 |                     "didn't find tree at %s, making a new one" % tree_path)
141 |                 topology_path = self.cfg.processor.make_topology(
142 |                     self.filtered_alignment_path, self.cfg.datatype, self.cfg.cmdline_extras)
143 | 
144 |             # Now estimate branch lengths
145 |             tree_path = self.cfg.processor.make_branch_lengths(
146 |                 self.filtered_alignment_path,
147 |                 topology_path,
148 |                 self.cfg.datatype,
149 |                 self.cfg.cmdline_extras)
150 | 
151 |         self.tree_path = tree_path
152 |         log.info("Starting tree with branch lengths is here: %s", self.tree_path)
153 | 
154 |     def run_task(self, m, sub):
155 |         # This bit should run in parallel (forking the processor)
156 |         self.cfg.processor.analyse(
157 |             m,
158 |             sub.alignment_path,
159 |             self.tree_path,
160 |             self.cfg.branchlengths,
161 |             self.cfg.cmdline_extras
162 |         )
163 | 
164 |         # Not entirely sure that WE NEED to block here, but it is safer to do
165 |         # It shouldn't hold things up toooo long...
166 |         self.lock.acquire()
167 |         try:
168 |             sub.parse_model_result(self.cfg, m)
169 |             # Try finalising, then the result will get written out earlier...
170 |             sub.finalise(self.cfg)
171 |         finally:
172 |             self.lock.release()
173 | 
174 |     def add_tasks_for_sub(self, tasks, sub):
175 |         for m in sub.models_to_process:
176 |             tasks.append((self.run_task, (m, sub)))
177 | 
178 |     def run_concurrent(self, tasks):
179 |         for func, args in tasks:
180 |             func(*args)
181 | 
182 |     def run_threaded(self, tasks):
183 |         if not tasks:
184 |             return
185 |         pool = threadpool.Pool(tasks, self.threads)
186 |         pool.join()
187 | 
188 |     def analyse_scheme(self, sch):
189 |         # Progress
190 |         self.cfg.progress.next_scheme()
191 | 
192 |         # Prepare by reading everything in first
193 |         tasks = []
194 |         for sub in sch:
195 |             sub.prepare(self.cfg, self.alignment)
196 |             self.add_tasks_for_sub(tasks, sub)
197 | 
198 |         # Now do the analysis
199 |         if self.threads == 1:
200 |             self.run_concurrent(tasks)
201 |         else:
202 |             self.run_threaded(tasks)
203 | 
204 |         # Now see if we're done
205 |         for sub in sch:
206 |             # ALL subsets should already be finalised in the task. We just
207 |             # check again here
208 |             if not sub.finalise(self.cfg):
209 |                 log.error("Failed to run models %s; not sure why", ", ".join(list(sub.models_to_do)))
210 |                 raise AnalysisError
211 | 
212 |         # AIC needs the number of sequences
213 |         number_of_seq = len(self.alignment.species)
214 |         result = scheme.SchemeResult(sch, number_of_seq, self.cfg.branchlengths, self.cfg.model_selection)
215 |         self.results.add_scheme_result(sch, result)
216 | 
217 |         return result
218 | 


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/analysis.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/analysis.pyc


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/analysis_method.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2012 Robert Lanfear and Brett Calcott
  2 | #
  3 | #This program is free software: you can redistribute it and/or modify it
  4 | #under the terms of the GNU General Public License as published by the
  5 | #Free Software Foundation, either version 3 of the License, or (at your
  6 | #option) any later version.
  7 | #
  8 | #This program is distributed in the hope that it will be useful, but
  9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 11 | #General Public License for more details. You should have received a copy
 12 | #of the GNU General Public License along with this program.  If not, see
 13 | #<http://www.gnu.org/licenses/>. PartitionFinder also includes the PhyML
 14 | #program, the RAxML program, the PyParsing library, and the python-cluster library
 15 | #all of which are protected by their own licenses and conditions, using
 16 | #PartitionFinder implies that you agree with those licences and conditions as well.
 17 | 
 18 | 
 19 | import logging
 20 | log = logging.getLogger("method")
 21 | 
 22 | import os
 23 | import math
 24 | import scheme
 25 | import algorithm
 26 | import submodels
 27 | import subset
 28 | from analysis import Analysis, AnalysisError
 29 | import neighbour
 30 | 
 31 | class UserAnalysis(Analysis):
 32 | 
 33 |     def do_analysis(self):
 34 |         log.info("Performing User analysis")
 35 |         current_schemes = [s for s in self.cfg.user_schemes]
 36 |         scheme_count = len(current_schemes)
 37 |         subset_count = subset.count_subsets()
 38 | 
 39 |         self.cfg.progress.begin(scheme_count, subset_count)
 40 |         if scheme_count > 0:
 41 |             for s in current_schemes:
 42 |                 res = self.analyse_scheme(s)
 43 | 
 44 |                 # Write out the scheme
 45 |                 self.cfg.reporter.write_scheme_summary(s, res)
 46 |         else:
 47 |             log.error("Search set to 'user', but no user schemes detected in .cfg file. Please check.")
 48 |             raise AnalysisError
 49 | 
 50 |         self.cfg.progress.end()
 51 | 
 52 |         self.cfg.reporter.write_best_scheme(self.results)
 53 | 
 54 | 
 55 | class StrictClusteringAnalysis(Analysis):
 56 |     """
 57 |     This analysis uses model parameters to guess at similar partitions, then
 58 |     just joins them together this is much less accurate than other methods, but
 59 |     a LOT quicker - it runs in order N time (where N is the number of initial
 60 |     datablocks), whereas the greedy algorithm is still N squared.
 61 |     """
 62 | 
 63 |     def do_analysis(self):
 64 |         log.info("Performing strict clustering analysis")
 65 | 
 66 |         partnum = len(self.cfg.partitions)
 67 |         subset_count = 2 * partnum - 1
 68 |         scheme_count = partnum
 69 |         self.cfg.progress.begin(scheme_count, subset_count)
 70 | 
 71 |         # Start with the most partitioned scheme
 72 |         start_description = range(len(self.cfg.partitions))
 73 |         start_scheme = scheme.create_scheme(
 74 |             self.cfg, "start_scheme", start_description)
 75 | 
 76 |         # Analyse our first scheme
 77 |         log.info("Analysing starting scheme (scheme %s)" % start_scheme.name)
 78 |         self.analyse_scheme(start_scheme)
 79 | 
 80 |         # Current scheme number
 81 |         cur_s = 2
 82 | 
 83 |         # Now we try out all clusterings of the first scheme, to see if we can
 84 |         # find a better one
 85 |         while True:
 86 |             log.info("***Strict clustering algorithm step %d of %d***" %
 87 |                      (cur_s - 1, partnum - 1))
 88 | 
 89 |             # Calculate the subsets which are most similar
 90 |             # e.g. combined rank ordering of euclidean distances
 91 |             # Could combine average site-rates, q matrices, and frequencies
 92 |             scheme_name = "step_%d" % (cur_s - 1)
 93 |             clustered_scheme = neighbour.get_nearest_neighbour_scheme(
 94 |                 start_scheme, scheme_name, self.cfg)
 95 | 
 96 |             # Now analyse that new scheme
 97 |             cur_s += 1
 98 |             self.analyse_scheme(clustered_scheme)
 99 | 
100 |             # Stop when we've anlaysed the scheme with all subsets combined
101 |             if len(set(clustered_scheme.subsets)) == 1:  # then it's the scheme with everything together
102 |                 break
103 |             else:
104 |                 start_scheme = clustered_scheme
105 | 
106 |         self.cfg.progress.end()
107 | 
108 |         self.cfg.reporter.write_best_scheme(self.results)
109 | 
110 | 
111 | class AllAnalysis(Analysis):
112 | 
113 |     def do_analysis(self):
114 |         log.info("Performing complete analysis")
115 |         partnum = len(self.cfg.partitions)
116 | 
117 |         scheme_count = submodels.count_all_schemes(partnum)
118 |         subset_count = submodels.count_all_subsets(partnum)
119 |         self.cfg.progress.begin(scheme_count, subset_count)
120 | 
121 |         # Iterate over submodels, which we can turn into schemes afterwards in the loop
122 |         model_iterator = submodels.submodel_iterator([], 1, partnum)
123 | 
124 |         scheme_name = 1
125 |         for m in model_iterator:
126 |             s = scheme.model_to_scheme(m, scheme_name, self.cfg)
127 |             scheme_name = scheme_name + 1
128 |             res = self.analyse_scheme(s)
129 | 
130 |             # Write out the scheme
131 |             self.cfg.reporter.write_scheme_summary(s, res)
132 | 
133 |         self.cfg.reporter.write_best_scheme(self.results)
134 | 
135 | 
136 | class GreedyAnalysis(Analysis):
137 |     def do_analysis(self):
138 |         '''A greedy algorithm for heuristic partitioning searches'''
139 | 
140 |         log.info("Performing greedy analysis")
141 | 
142 |         partnum = len(self.cfg.partitions)
143 |         scheme_count = submodels.count_greedy_schemes(partnum)
144 |         subset_count = submodels.count_greedy_subsets(partnum)
145 | 
146 |         self.cfg.progress.begin(scheme_count, subset_count)
147 | 
148 |         # Start with the most partitioned scheme
149 |         start_description = range(len(self.cfg.partitions))
150 |         start_scheme = scheme.create_scheme(
151 |             self.cfg, "start_scheme", start_description)
152 | 
153 |         log.info("Analysing starting scheme (scheme %s)" % start_scheme.name)
154 |         self.analyse_scheme(start_scheme)
155 | 
156 |         step = 1
157 |         cur_s = 2
158 | 
159 |         # Now we try out all lumpings of the current scheme, to see if we can
160 |         # find a better one and if we do, we just keep going
161 |         while True:
162 |             log.info("***Greedy algorithm step %d***" % step)
163 | 
164 |             # Get a list of all possible lumpings of the best_scheme
165 |             lumpings = algorithm.lumpings(start_description)
166 | 
167 |             # Save the current best score we have in results
168 |             old_best_score = self.results.best_score
169 |             for lumped_description in lumpings:
170 |                 lumped_scheme = scheme.create_scheme(self.cfg, cur_s, lumped_description)
171 |                 cur_s += 1
172 |                 # This is just checking to see if a scheme is any good, if it
173 |                 # is, we remember and write it later
174 |                 self.analyse_scheme(lumped_scheme)
175 | 
176 |             # Did out best score change (It ONLY gets better -- see in
177 |             # results.py)
178 |             if self.results.best_score == old_best_score:
179 |                 # It didn't, so we're done
180 |                 break
181 | 
182 |             # Let's look further. We use the description from our best scheme
183 |             # (which will be the one that just changed in the last lumpings
184 |             # iteration)
185 |             start_description = self.results.best_result.scheme.description
186 | 
187 |             # Rename and record the best scheme for this step
188 |             self.results.best_scheme.name = "step_%d" % step
189 |             self.cfg.reporter.write_scheme_summary(
190 |                 self.results.best_scheme, self.results.best_result)
191 | 
192 |             # If it's the scheme with everything equal, quit
193 |             if len(set(start_description)) == 1:
194 |                 break
195 | 
196 |             # Go do the next round...
197 |             step += 1
198 | 
199 |         log.info("Greedy algorithm finished after %d steps" % step)
200 |         log.info("Highest scoring scheme is scheme %s, with %s score of %.3f" %
201 |                  (self.results.best_scheme.name, self.cfg.model_selection,
202 |                   self.results.best_score))
203 | 
204 |         self.cfg.reporter.write_best_scheme(self.results)
205 | 
206 | 
207 | class RelaxedClusteringAnalysis(Analysis):
208 |     '''
209 |     A relaxed clustering algorithm for heuristic partitioning searches
210 | 
211 |     1. Rank subsets by their similarity (defined by clustering-weights)
212 |     2. Analyse cluster-percent of the most similar schemes
213 |     3. Take the scheme that improves the AIC/BIC score the most
214 |     4. Quit if no improvements.
215 |     '''
216 | 
217 |     def do_analysis(self):
218 |         log.info("Performing relaxed clustering analysis")
219 | 
220 |         stop_at = self.cfg.cluster_percent * 0.01
221 | 
222 |         model_selection = self.cfg.model_selection
223 |         partnum = len(self.cfg.partitions)
224 | 
225 |         scheme_count = submodels.count_relaxed_clustering_schemes(partnum, self.cfg.cluster_percent)
226 |         subset_count = submodels.count_relaxed_clustering_subsets(partnum, self.cfg.cluster_percent)
227 | 
228 |         self.cfg.progress.begin(scheme_count, subset_count)
229 | 
230 |         # Start with the most partitioned scheme, and record it.
231 |         start_description = range(len(self.cfg.partitions))
232 |         start_scheme = scheme.create_scheme(
233 |             self.cfg, "start_scheme", start_description)
234 |         log.info("Analysing starting scheme (scheme %s)" % start_scheme.name)
235 |         self.analyse_scheme(start_scheme)
236 |         self.cfg.reporter.write_scheme_summary(
237 |             self.results.best_scheme, self.results.best_result)
238 | 
239 | 
240 |         # Start by remembering that we analysed the starting scheme
241 |         subset_counter = 1
242 |         step = 1
243 |         while True:
244 | 
245 |             log.info("***Relaxed clustering algorithm step %d of %d***" % (step, partnum - 1))
246 |             name_prefix = "step_%d" % (step)
247 | 
248 |             # Get a list of all possible lumpings of the best_scheme, ordered
249 |             # according to the clustering weights
250 |             lumped_subsets = neighbour.get_ranked_clustered_subsets(
251 |                 start_scheme, self.cfg)
252 | 
253 |             # reduce the size of the lumped subsets to cluster_percent long
254 |             cutoff = int(math.ceil(len(lumped_subsets)*stop_at)) #round up to stop zeros
255 |             lumped_subsets = lumped_subsets[:cutoff]
256 | 
257 |             # Now analyse the lumped schemes
258 |             lumpings_done = 0
259 |             old_best_score = self.results.best_score
260 | 
261 |             for subset_grouping in lumped_subsets:
262 |                 scheme_name = "%s_%d" % (name_prefix, lumpings_done + 1)
263 |                 lumped_scheme = neighbour.make_clustered_scheme(
264 |                     start_scheme, scheme_name, subset_grouping, self.cfg)
265 | 
266 |                 new_result = self.analyse_scheme(lumped_scheme)
267 | 
268 |                 log.debug("Difference in %s: %.1f", self.cfg.model_selection, (new_result.score-old_best_score))
269 | 
270 |                 lumpings_done += 1
271 | 
272 | 
273 |             if self.results.best_score != old_best_score:
274 |                 log.info("Analysed %.1f percent of the schemes for this step. The best "
275 |                          "scheme changed the %s score by %.1f units.",
276 |                          self.cfg.cluster_percent, self.cfg.model_selection,
277 |                          (self.results.best_score - old_best_score))
278 | 
279 |                 #write out the best scheme
280 |                 self.results.best_scheme.name = "step_%d" % step
281 |                 self.cfg.reporter.write_scheme_summary(
282 |                     self.results.best_scheme, self.results.best_result)
283 | 
284 | 
285 |                 # Now we find out which is the best lumping we know of for this step
286 |                 start_scheme = self.results.best_scheme
287 |             else:
288 |                 log.info("Analysed %.1f percent of the schemes for this step and found no schemes "
289 |                          "that improve the score, stopping" , self.cfg.cluster_percent)
290 |                 break
291 | 
292 |             # We're done if it's the scheme with everything together
293 |             if len(set(lumped_scheme.subsets)) == 1:
294 |                 break
295 | 
296 |             step += 1
297 | 
298 | 
299 | 
300 |         log.info("Relaxed clustering algorithm finished after %d steps" % step)
301 |         log.info("Best scoring scheme is scheme %s, with %s score of %.3f"
302 |                  % (self.results.best_scheme.name, model_selection, self.results.best_score))
303 | 
304 |         self.cfg.reporter.write_best_scheme(self.results)
305 | 
306 | 
307 | def choose_method(search):
308 |     if search == 'all':
309 |         method = AllAnalysis
310 |     elif search == 'user':
311 |         method = UserAnalysis
312 |     elif search == 'greedy':
313 |         method = GreedyAnalysis
314 |     elif search == 'hcluster':
315 |         method = StrictClusteringAnalysis
316 |     elif search == 'rcluster':
317 |         method = RelaxedClusteringAnalysis
318 |     else:
319 |         log.error("Search algorithm '%s' is not yet implemented", search)
320 |         raise AnalysisError
321 |     return method
322 | 


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/analysis_method.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/analysis_method.pyc


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/config.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/config.pyc


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/main.py:
--------------------------------------------------------------------------------
  1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
  2 | #
  3 | #This program is free software: you can redistribute it and/or modify it
  4 | #under the terms of the GNU General Public License as published by the
  5 | #Free Software Foundation, either version 3 of the License, or (at your
  6 | #option) any later version.
  7 | #
  8 | #This program is distributed in the hope that it will be useful, but
  9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 11 | #General Public License for more details. You should have received a copy
 12 | #of the GNU General Public License along with this program.  If not, see
 13 | #<http://www.gnu.org/licenses/>. PartitionFinder also includes the PhyML
 14 | #program, the RAxML program, the PyParsing library, and the python-cluster library
 15 | #all of which are protected by their own licenses and conditions, using
 16 | #PartitionFinder implies that you agree with those licences and conditions as well.
 17 | 
 18 | import logging
 19 | import sys
 20 | import shlex
 21 | import os
 22 | 
 23 | logging.basicConfig(
 24 |     format="%(levelname)-8s | %(asctime)s | %(message)s",
 25 |     level=logging.INFO
 26 | )
 27 | 
 28 | # curdir = os.path.dirname(os.path.abspath(__file__))
 29 | # rootdir, here = os.path.split(curdir)
 30 | # config_path = os.path.join(rootdir, 'logging.cfg')
 31 | # from logging import config as _logconfig
 32 | # _logconfig.fileConfig(config_path)
 33 | 
 34 | log = logging.getLogger("main")
 35 | from optparse import OptionParser
 36 | 
 37 | # We import everything here as it forces all of debug regions to be loaded
 38 | import version
 39 | import config
 40 | import analysis_method
 41 | import util
 42 | import reporter
 43 | import progress
 44 | import datetime
 45 | import parser
 46 | import raxml
 47 | import phyml
 48 | from partfinder import current
 49 | 
 50 | 
 51 | def debug_arg_callback(option, opt, value, parser):
 52 |     setattr(parser.values, option.dest, value.split(','))
 53 | 
 54 | 
 55 | def get_debug_regions():
 56 |     mlogger = logging.Logger.manager
 57 |     return mlogger.loggerDict.keys()
 58 | 
 59 | 
 60 | def set_debug_regions(regions):
 61 |     if regions is None:
 62 |         return
 63 |     valid_regions = set(get_debug_regions())
 64 |     if 'all' in regions:
 65 |         regions = valid_regions
 66 |     else:
 67 |         regions = set(regions)
 68 |         errors = set()
 69 |         for r in regions:
 70 |             if r not in valid_regions:
 71 |                 log.error("'%s' is not a valid debug region", r)
 72 |                 errors.add(r)
 73 |         if errors:
 74 |             return errors
 75 | 
 76 |     for r in regions:
 77 |         logging.getLogger(r).setLevel(logging.DEBUG)
 78 | 
 79 |     # Enhance the format
 80 |     fmt = logging.Formatter("%(levelname)-8s | %(asctime)s | %(name)-10s | %(message)s")
 81 |     logging.getLogger("").handlers[0].setFormatter(fmt)
 82 | 
 83 |     return None
 84 | 
 85 | def clean_folder(folder):
 86 |     """ Delete all the files in a folder 
 87 |     Thanks to StackOverflow for this:  
 88 |     http://stackoverflow.com/questions/185936/delete-folder-contents-in-python
 89 |     """
 90 |     for the_file in os.listdir(folder):
 91 |         file_path = os.path.join(folder, the_file)
 92 |         try:
 93 |             if os.path.isfile(file_path):
 94 |                 os.unlink(file_path)
 95 |         except Exception, e:
 96 |             log.error("Couldn't delete file from phylofiles folder: %s" % e)
 97 |             raise PartitionFinderError
 98 | 
 99 | def parse_args(datatype, cmdargs=None):
100 |     usage = """usage: python %prog [options] <foldername>
101 | 
102 |     PartitionFinder and PartitionFinderProtein are designed to discover optimal
103 |     partitioning schemes for nucleotide and amino acid sequence alignments.
104 |     They are also useful for finding the best model of sequence evolution for datasets.
105 | 
106 |     The Input: <foldername>: the full path to a folder containing:
107 |         - A configuration file (partition_finder.cfg)
108 |         - A nucleotide/aa alignment in Phylip format
109 |     Take a look at the included 'example' folder for more details.
110 | 
111 |     The Output: A file in the same directory as the .cfg file, named
112 |     'analysis' This file contains information on the best
113 |     partitioning scheme, and the best model for each partiiton
114 | 
115 |     Usage Examples:
116 |         >python %prog example
117 |         Analyse what is in the 'example' sub-folder in the current folder.
118 | 
119 |         >python %prog -v example
120 |         Analyse what is in the 'example' sub-folder in the current folder, but
121 |         show all the debug output
122 | 
123 |         >python %prog -c ~/data/frogs
124 |         Check the configuration files in the folder data/frogs in the current
125 |         user's home folder.
126 | 
127 |         >python %prog --force-restart ~/data/frogs
128 |         Deletes any data produced by the previous runs (which is in
129 |         ~/data/frogs/output) and starts afresh
130 |     """
131 |     op = OptionParser(usage)
132 |     op.add_option(
133 |         "-v", "--verbose",
134 |         action="store_true", dest="verbose",
135 |         help="show debug logging information (equivalent to --debug-out=all)")
136 |     op.add_option(
137 |         "-c", "--check-only",
138 |         action="store_true", dest="check_only",
139 |         help="just check the configuration files, don't do any processing")
140 |     op.add_option(
141 |         "--force-restart",
142 |         action="store_true", dest="force_restart",
143 |         help="delete all previous output and start afresh (!)")
144 |     op.add_option(
145 |         "-p", "--processes",
146 |         type="int", dest="processes", default=-1, metavar="N",
147 |         help="Number of concurrent processes to use."
148 |         " Use -1 to match the number of cpus on the machine."
149 |         " The default is to use -1.")
150 |     op.add_option(
151 |         "--show-python-exceptions",
152 |         action="store_true", dest="show_python_exceptions",
153 |         help="If errors occur, print the python exceptions")
154 |     op.add_option(
155 |         "--save-phylofiles",
156 |         action="store_true", dest="save_phylofiles",
157 |         help="save all of the phyml or raxml output. This can take a lot of space(!)")
158 |     op.add_option(
159 |         "--dump-results",
160 |         action="store_true", dest="dump_results",
161 |         help="Dump all results to a binary file. "
162 |         "This is only of use for testing purposes.")
163 |     op.add_option(
164 |         "--compare-results",
165 |         action="store_true", dest="compare_results",
166 |         help="Compare the results to previously dumped binary results. "
167 |         "This is only of use for testing purposes.")
168 |     op.add_option(
169 |         "--raxml",
170 |         action="store_true", dest="raxml",
171 |         help="Use RAxML (rather than PhyML) to do the analysis. See the manual"
172 |     )
173 |     op.add_option(
174 |         "--cmdline-extras",
175 |         type="str", dest="cmdline_extras", default="", metavar="N",
176 |         help="Add additional commands to the phyml or raxml commandlines that PF uses."
177 |         "This can be useful e.g. if you want to change the accuracy of lnL calculations"
178 |         " ('-e' option in raxml), or use multi-threaded versions of raxml that require"
179 |         " you to specify the number of threads you will let raxml use ('-T' option in "
180 |         "raxml. E.g. you might specify this: --cmndline_extras ' -e 2.0 -T 10 '"
181 |         " N.B. MAKE SURE YOU PUT YOUR EXTRAS IN QUOTES, and only use this command if you"
182 |         " really know what you're doing and are very familiar with raxml and"
183 |         " PartitionFinder"
184 |     )
185 |     op.add_option(
186 |         "--weights",
187 |         type="str", dest="cluster_weights", default=None, metavar="N",
188 |         help="Mainly for algorithm development. Only use it if you know what you're doing."
189 |         "A list of weights to use in the clustering algorithms. This list allows you "
190 |         "to assign different weights to: the overall rate for a subset, the base/amino acid "
191 |         "frequencies, model parameters, and alpha value. This will affect how subsets are "
192 |         "clustered together. For instance: --cluster_weights '1, 2, 5, 1', would weight "
193 |         "the base freqeuncies 2x more than the overall rate, the model parameters 5x "
194 |         "more, and the alpha parameter the same as the model rate"
195 |     )
196 |     op.add_option(
197 |         "--rcluster-percent",
198 |         type="float", dest="cluster_percent", default=10.0, metavar="N",
199 |         help="This defines the proportion of possible schemes that the relaxed clustering"
200 |         " algorithm will consider before it stops looking. The default is 10%."
201 |         "e.g. --cluster-percent 10.0"
202 | 
203 |     )
204 |     op.add_option(
205 |         '--debug-output',
206 |         type='string',
207 |         action='callback',
208 |         dest='debug_output',
209 |         metavar="REGION,REGION,...",
210 |         callback=debug_arg_callback,
211 |         help="(advanced option) Provide a list of debug regions to output extra "
212 |         "information about what the program is doing."
213 |         " Possible regions are 'all' or any of {%s}."
214 |         % ",".join(get_debug_regions())
215 |     )
216 | 
217 |     if cmdargs is None:
218 |         options, args = op.parse_args()
219 |     else:
220 |         options, args = op.parse_args(cmdargs)
221 | 
222 |     options.datatype = datatype
223 |     # We should have one argument: the folder to read the configuration from
224 |     if not args:
225 |         op.print_help()
226 |     else:
227 |         check_options(op, options)
228 | 
229 |     return options, args
230 | 
231 | 
232 | def check_options(op, options):
233 |     # Error checking
234 |     if options.dump_results and options.compare_results:
235 |         op.error("options --dump_results and --compare_results are mutually exclusive!")
236 | 
237 |     if options.verbose:
238 |         set_debug_regions(['all'])
239 |     else:
240 |         errors = set_debug_regions(options.debug_output)
241 |         if errors is not None:
242 |             bad = ",".join(list(errors))
243 |             op.error("Invalid debug regions: %s" % bad)
244 | 
245 |     # Default to phyml
246 |     if options.raxml == 1:
247 |         options.phylogeny_program = 'raxml'
248 |     else:
249 |         options.phylogeny_program = 'phyml'
250 | 
251 |     #A warning for people using the Pthreads version of RAxML
252 |     # if options.cmdline_extras.count("-T") > 0:
253 |         # log.warning("It looks like you're using a Pthreads version of RAxML. Be aware "
254 |         # "that the default behaviour of PartitionFinder is to run one version of RAxML per "
255 |         # "available processor. This might not be what you want with Pthreads - since the "
256 |         # "minimum number of threads per RAxML run is 2 (i.e. -T 2). Make sure to limit the "
257 |         # "total number of RAxML runs you start using the -p option in PartitionFinder. "
258 |         # "Specifically, the total number of processors you will use with the Pthreads "
259 |         # "version is the number you set via the -T option in --cmdline-extras, multiplied "
260 |         # "by the number of processors you set via the -p option in PartitionFinder. "
261 |         # "You should also be aware that the Pthreads version of RAxML has a rare but "
262 |         # "known bug on some platforms. This bug results in infinite liklelihood values "
263 |         # "if it happens on your dataset, PartitionFinder will give an error. In that case "
264 |         # "you should switch back to using a single-threaded version of RAxML, e.g. the "
265 |         # "SSE3 or AVX version."
266 |         # "See the manual for more info.")
267 | 
268 | 
269 | def check_python_version():
270 |     """Check the python version is above 2.7 but lower than 3.0"""
271 | 
272 |     python_version = float(
273 |         "%d.%d" % (sys.version_info[0], sys.version_info[1]))
274 | 
275 |     log.info("You have Python version %.1f" % python_version)
276 | 
277 |     if python_version < 2.7:
278 |         log.error("Your Python version is %.1f, but this program requires Python 2.7. "
279 |                   "Please upgrade to version 2.7 by visiting www.python.org/getit, or by following"
280 |                   " the instructions in the PartitionFinder manual." % python_version)
281 |         return 0
282 | 
283 |     if python_version > 3.0:
284 |         log.warning("Your Python version is %.1f. This program was not built to run with "
285 |                     "version 3 or higher. To guarantee success, please use Python 2.7.x" % python_version)
286 | 
287 | 
288 | def main(name, datatype, passed_args=None):
289 |     v = version.get_version()
290 | 
291 |     # If passed_args is None, this will use sys.argv
292 |     options, args = parse_args(datatype, passed_args)
293 |     if not args:
294 |         # Help has already been printed
295 |         return 2
296 | 
297 |     log.info("------------- %s %s -----------------", name, v)
298 |     start_time = datetime.datetime.now().replace(microsecond=0)  # start the clock ticking
299 | 
300 |     check_python_version()
301 | 
302 |     if passed_args is None:
303 |         cmdline = "".join(sys.argv)
304 |     else:
305 |         cmdline = "".join(passed_args)
306 | 
307 |     log.info("Command-line arguments used: %s", cmdline)
308 | 
309 |     # Load, using the first argument as the folder
310 |     try:
311 |         cfg = config.Configuration(datatype, 
312 |                                    options.phylogeny_program,
313 |                                    options.save_phylofiles, 
314 |                                    options.cmdline_extras,
315 |                                    options.cluster_weights,
316 |                                    options.cluster_percent)
317 | 
318 |         # Set up the progress callback
319 |         progress.TextProgress(cfg)
320 |         cfg.load_base_path(args[0])
321 | 
322 |         if options.check_only:
323 |             log.info("Exiting without processing (because of the -c/--check-only option ...")
324 |         else:
325 |             try:
326 |                 # Now try processing everything....
327 |                 if not cfg.save_phylofiles:
328 |                     clean_folder(cfg.phylofiles_path)
329 |                 method = analysis_method.choose_method(cfg.search)
330 |                 reporter.TextReporter(cfg)
331 |                 anal = method(cfg,
332 |                             options.force_restart,
333 |                             options.processes)
334 |                 results = anal.analyse()
335 | 
336 |                 if options.dump_results:
337 |                     results.dump(cfg)
338 |                 elif options.compare_results:
339 |                     results.compare(cfg)
340 |             finally:
341 |                 # Make sure that we reset the configuration
342 |                 cfg.reset()
343 | 
344 |         # Successful exit
345 |         end_time = datetime.datetime.now().replace(microsecond=0)
346 |         processing_time = end_time - start_time
347 | 
348 |         log.info("Total processing time: %s (h:m:s)" % processing_time)
349 |         log.info("Processing complete.")
350 | 
351 |         return 0
352 | 
353 |     except util.PartitionFinderError:
354 |         log.error("Failed to run. See previous errors.")
355 |         # Reraise if we were called by call_main, or if the options is set
356 |         if options.show_python_exceptions or passed_args is not None:
357 |             raise
358 | 
359 |     except KeyboardInterrupt:
360 |         log.error("User interrupted the Program")
361 | 
362 | 
363 |     return 1
364 | 
365 | 
366 | def call_main(datatype, cmdline):
367 |     cmdargs = shlex.split(cmdline)
368 |     main("<main.py:call_main(%s)>" % datatype, datatype, cmdargs)
369 | 


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/main.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/main.pyc


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/neighbour.py:
--------------------------------------------------------------------------------
  1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
  2 | #
  3 | #This program is free software: you can redistribute it and/or modify it
  4 | #under the terms of the GNU General Public License as published by the
  5 | #Free Software Foundation, either version 3 of the License, or (at your
  6 | #option) any later version.
  7 | #
  8 | #This program is distributed in the hope that it will be useful, but
  9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 11 | #General Public License for more details. You should have received a copy
 12 | #of the GNU General Public License along with this program.  If not, see
 13 | #<http://www.gnu.org/licenses/>. PartitionFinder also includes the PhyML
 14 | #program, the RAxML program, and the PyParsing library,
 15 | #all of which are protected by their own licenses and conditions, using
 16 | #PartitionFinder implies that you agree with those licences and conditions as well.
 17 | 
 18 | import subset
 19 | import scheme
 20 | from algorithm import euclidean_distance
 21 | 
 22 | import logging
 23 | log = logging.getLogger("cluster")
 24 | 
 25 | 
 26 | def get_ranked_list(final_distances):
 27 |     """
 28 |     Return the closest subsets defined by a distance matrix usually there will
 29 |     just be a pair that's closer than all other pairs BUT, it's feasible (if
 30 |     unlikely) that >2 subsets are equally close.  This is possible if, e.g. all
 31 |     weights are zero. Then we just want to group all the equally close
 32 |     subsets...
 33 | 
 34 |     So, we return a list of all the closest subsets
 35 |     """
 36 | 
 37 |     # Let's make a dict keyed by the distance in the matrix, using setdefault
 38 |     # to add things, in case there are subsets with identical pairwise
 39 |     # distances
 40 |     distances = {}
 41 |     for pair in final_distances:
 42 |         d = final_distances[pair]
 43 | 
 44 |         # Get any subs that we already know are that distance apart as a set
 45 |         # default to empty set if it's a new distance
 46 |         subs = distances.setdefault(d, set())
 47 | 
 48 |         # Add subs that correspond to this cell
 49 |         subs.add(pair[0])
 50 |         subs.add(pair[1])
 51 | 
 52 |     ordered_subsets = []
 53 |     unique_distances = list(distances.keys())
 54 |     unique_distances.sort()
 55 | 
 56 |     for d in unique_distances:
 57 |         ordered_subsets.append(list(distances[d]))
 58 | 
 59 |     return ordered_subsets
 60 | 
 61 | 
 62 | def get_pairwise_dists(subsets, rates, freqs, model, alpha, weights):
 63 | 
 64 |     import itertools
 65 |     #set up all pairwise combinations as iterators
 66 |     s = itertools.combinations(subsets, 2)
 67 |     r = itertools.combinations(rates, 2)
 68 |     f = itertools.combinations(freqs, 2)
 69 |     m = itertools.combinations(model, 2)
 70 |     a = itertools.combinations(alpha, 2)
 71 | 
 72 |     #now we can izip over ALL of them at once (isn't python great!)
 73 |     subset_pairs = []
 74 |     r_dists = []
 75 |     f_dists = []
 76 |     m_dists = []
 77 |     a_dists = []
 78 | 
 79 |     for pair in itertools.izip(s, r, f, m, a):
 80 |         subset_pair = pair[0]
 81 |         subset_pairs.append(subset_pair)
 82 | 
 83 |         r_dist = euclidean_distance(pair[1][0], pair[1][1])
 84 |         f_dist = euclidean_distance(pair[2][0], pair[2][1])
 85 |         m_dist = euclidean_distance(pair[3][0], pair[3][1])
 86 |         a_dist = euclidean_distance(pair[4][0], pair[4][1])
 87 | 
 88 |         r_dists.append(r_dist)
 89 |         f_dists.append(f_dist)
 90 |         m_dists.append(m_dist)
 91 |         a_dists.append(a_dist)
 92 | 
 93 |         #print pair
 94 | 
 95 |     #and now we get the minmax values
 96 |     max_r = max(r_dists)
 97 |     max_f = max(f_dists)
 98 |     max_m = max(m_dists)
 99 |     max_a = max(a_dists)
100 | 
101 |     #now we go over them again, and normalise, weight, and sum
102 |     final_dists = {}
103 |     closest_pairs = []
104 |     mindist = None
105 |     for i, pair in enumerate(itertools.izip(r_dists, f_dists, m_dists, a_dists, subset_pairs)):
106 | 
107 |         if max_r > 0.0:
108 |             r_final = pair[0] * float(weights["rate"]) / float(max_r)
109 |         else:
110 |             r_final = 0.0
111 |         if max_f > 0.0:
112 |             f_final = pair[1] * float(weights["freqs"]) / float(max_f)
113 |         else:
114 |             f_final = 0.0
115 |         if max_m > 0.0:
116 |             m_final = pair[2] * float(weights["model"]) / float(max_m)
117 |         else:
118 |             m_final = 0.0
119 |         if max_a > 0:
120 |             a_final = pair[3] * float(weights["alpha"]) / float(max_a)
121 |         else:
122 |             a_final = 0.0
123 | 
124 |         #print i, pair
125 | 
126 |         total_dist = r_final + f_final + m_final + a_final
127 | 
128 |         final_dists[pair[4]] = total_dist
129 | 
130 |         #check to see if this is the closest
131 |         if (total_dist < mindist or mindist is None):
132 |             mindist = total_dist
133 |             closest_pairs = pair[4]  # pair[4] is the tuple of two subsets
134 |         elif total_dist == mindist:
135 |             #we want a tuple with all of the subsets that are equally close
136 |             #with no replicates, so we use tuple(set())
137 |             closest_pairs = tuple(set(closest_pairs + (pair[4])))
138 | 
139 |     return final_dists, closest_pairs
140 | 
141 | def get_distance_matrix(start_scheme, weights):
142 |     #1. get the parameter lists for each subset
143 |     subsets = []  # a list of subset names, so we know the order things appear in the list
144 |     rates = []  # tree length
145 |     freqs = []  # amino acid or base frequencies
146 |     model = []  # model parameters e.g. A<->C
147 |     alpha = [] #alpha parameter of the gamma distribution of rates across sites
148 | 
149 |     for s in start_scheme.subsets:
150 |         param_dict = s.get_param_values()
151 |         subsets.append(s)
152 |         rates.append([param_dict["rate"]])
153 |         freqs.append(param_dict["freqs"])
154 |         model.append(param_dict["model"])
155 |         alpha.append([param_dict["alpha"]])
156 | 
157 |     #get pairwise euclidean distances, and minmax values, for all parameters
158 |     final_dists, closest_pairs = get_pairwise_dists(subsets, rates, freqs, model, alpha, weights)
159 | 
160 |     return final_dists, closest_pairs
161 | 
162 | def get_closest_subsets(start_scheme, weights):
163 |     """Find the closest subsets in a scheme
164 |     """
165 |     final_dists, closest_pairs = get_distance_matrix(start_scheme, weights)
166 | 
167 |     return closest_pairs
168 | 
169 | 
170 | def get_ranked_clustered_subsets(start_scheme, cfg):
171 |     """
172 |     The idea here is to take a scheme, and perform some analyses to find out
173 |     how the subsets in that scheme cluster.
174 | 
175 |     We then just return the list of schemes, ordered by closest to most distant
176 |     in the clustering space
177 |     """
178 |     final_dists, closest_pairs = get_distance_matrix(
179 |         start_scheme, cfg.cluster_weights)
180 | 
181 |     ranked_subset_groupings = get_ranked_list(final_dists)
182 |     return ranked_subset_groupings
183 | 
184 | 
185 | def make_clustered_scheme(start_scheme, scheme_name, subsets_to_cluster, cfg):
186 | 
187 |     #1. Create a new subset that merges the subsets_to_cluster
188 |     newsub_parts = []
189 | 
190 |     #log.info("Clustering %d subsets" % len(subsets_to_cluster))
191 | 
192 |     for s in subsets_to_cluster:
193 |         newsub_parts = newsub_parts + list(s.partitions)
194 |     newsub = subset.Subset(*tuple(newsub_parts))
195 | 
196 |     #2. Then we define a new scheme with those merged subsets
197 |     all_subs = [s for s in start_scheme.subsets]
198 | 
199 |     #pop out the subsets we're going to join together
200 |     for s in subsets_to_cluster:
201 |         all_subs.remove(s)
202 | 
203 |     #and now we add back in our new subset...
204 |     all_subs.append(newsub)
205 | 
206 |     #and finally create the clustered scheme
207 |     final_scheme = (scheme.Scheme(cfg, str(scheme_name), all_subs))
208 | 
209 |     return final_scheme
210 | 
211 | 
212 | def get_nearest_neighbour_scheme(start_scheme, scheme_name, cfg):
213 |     """
214 |     The idea here is to take a scheme, and perform some analyses to find a
215 |     neighbouring scheme, where the neighbour has one less subset than the
216 |     current scheme.  Really this is just progressive clustering, but specified
217 |     to work well with PartitionFinder
218 |     """
219 | 
220 |     #1. First we get the closest subsets, based on some weights. This will almost always
221 |     #   be two subsets, but it's generalised so that it could be all of them...
222 |     #   cluster weights is a dictionary of weights, keyed by: rate, freqs, model
223 |     #   for the overall subset rate, the base/aminoacid frequencies, and the model parameters
224 |     closest_subsets = get_closest_subsets(start_scheme, cfg.cluster_weights)
225 | 
226 |     scheme = make_clustered_scheme(
227 |         start_scheme, scheme_name, closest_subsets, cfg)
228 | 
229 |     return scheme
230 | 


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/neighbour.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/neighbour.pyc


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/parser.py:
--------------------------------------------------------------------------------
  1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
  2 | #
  3 | #This program is free software: you can redistribute it and/or modify it
  4 | #under the terms of the GNU General Public License as published by the
  5 | #Free Software Foundation, either version 3 of the License, or (at your
  6 | #option) any later version.
  7 | #
  8 | #This program is distributed in the hope that it will be useful, but
  9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 11 | #General Public License for more details. You should have received a copy
 12 | #of the GNU General Public License along with this program.  If not, see
 13 | #<http://www.gnu.org/licenses/>. PartitionFinder also includes the PhyML
 14 | #program, the RAxML program, and the PyParsing library,
 15 | #all of which are protected by their own licenses and conditions, using
 16 | #PartitionFinder implies that you agree with those licences and conditions as well.
 17 | 
 18 | import logging
 19 | log = logging.getLogger("parser")
 20 | 
 21 | from pyparsing import (
 22 |     Word, OneOrMore, alphas, nums, Suppress, Optional, Group, stringEnd,
 23 |     delimitedList, pythonStyleComment, line, lineno, col, Keyword, Or,
 24 |     NoMatch, CaselessKeyword, ParseException, SkipTo)
 25 | 
 26 | # debugging
 27 | # ParserElement.verbose_stacktrace = True
 28 | 
 29 | import partition
 30 | import scheme
 31 | import subset
 32 | import phyml_models
 33 | import raxml_models
 34 | import config
 35 | from util import PartitionFinderError
 36 | 
 37 | # Only used internally
 38 | 
 39 | 
 40 | class ParserError(Exception):
 41 |     """Used for our own parsing problems"""
 42 |     def __init__(self, text, loc, msg):
 43 |         self.line = line(loc, text)
 44 |         self.col = col(loc, text)
 45 |         self.lineno = lineno(loc, text)
 46 |         self.msg = msg
 47 | 
 48 |     def format_message(self):
 49 |         return "%s at line:%s, column:%s" % (self.msg, self.lineno, self.col)
 50 | 
 51 | 
 52 | class Parser(object):
 53 |     """Parse configuration files
 54 | 
 55 |     The results are put into the configuration object
 56 |     """
 57 | 
 58 |     # These will get set in the configuration passed in
 59 |     def __init__(self, cfg):
 60 |         # For adding variables
 61 |         self.cfg = cfg
 62 | 
 63 |         # Use these to keep track of stuff that is going on in parser
 64 |         self.schemes = []
 65 |         self.subsets = []
 66 |         self.init_grammar()
 67 |         self.ignore_schemes = False
 68 |         # provide useful error messages when parsing settings with limited options
 69 | 
 70 |     def init_grammar(self):
 71 |         """Set up the parsing classes
 72 |         Any changes to the grammar of the config file be done here.
 73 |         """
 74 |         # Some syntax that we need, but don't bother looking at
 75 |         SEMICOLON = (Suppress(";"))
 76 |         EQUALS = Suppress("=")
 77 |         OPENB = Suppress("(")
 78 |         CLOSEB = Suppress(")")
 79 |         BACKSLASH = Suppress("\\")
 80 |         DASH = Suppress("-")
 81 | 
 82 |         # Top Section
 83 |         FILENAME = Word(alphas + nums + '-_.')
 84 |         alignmentdef = Keyword('alignment') + EQUALS + FILENAME + SEMICOLON
 85 |         alignmentdef.setParseAction(self.set_alignment)
 86 | 
 87 |         treedef = Keyword('user_tree_topology') + EQUALS + FILENAME + SEMICOLON
 88 |         treedef.setParseAction(self.set_user_tree)
 89 | 
 90 |         def simple_option(name):
 91 |             opt = Keyword(name) + EQUALS + Word(alphas + nums + '-_') + SEMICOLON
 92 |             opt.setParseAction(self.set_simple_option)
 93 |             return opt
 94 | 
 95 |         branchdef = simple_option('branchlengths')
 96 | 
 97 |         MODELNAME = Word(alphas + nums + '+')
 98 |         modellist = delimitedList(MODELNAME)
 99 |         modeldef = Keyword("models") + EQUALS + Group(
100 |             (
101 |                 CaselessKeyword("all") | CaselessKeyword("mrbayes") | CaselessKeyword("raxml") |
102 |                 CaselessKeyword("beast") | CaselessKeyword("all_protein") |
103 |                 CaselessKeyword(
104 |                     "all_protein_gamma") | CaselessKeyword("all_protein_gammaI")
105 |             )("predefined") |
106 |             Group(modellist)("userlist")) + SEMICOLON
107 |         modeldef.setParseAction(self.set_models)
108 | 
109 |         modseldef = simple_option("model_selection")
110 |         topsection = alignmentdef + Optional(treedef) + branchdef + \
111 |             modeldef + modseldef
112 | 
113 |         # Partition Parsing
114 |         column = Word(nums)
115 |         partname = Word(alphas + '_-' + nums)
116 |         partdef = column("start") +\
117 |             Optional(DASH + column("end")) +\
118 |             Optional(BACKSLASH + column("step"))
119 | 
120 |         partdef.setParseAction(self.define_range)
121 |         partdeflist = Group(OneOrMore(Group(partdef)))
122 |         partition = Optional("charset") + partname("name") + \
123 |             EQUALS + partdeflist("parts") + SEMICOLON
124 |         partition.setParseAction(self.define_partition)
125 |         partlist = OneOrMore(Group(partition))
126 |         partsection = Suppress("[data_blocks]") + partlist
127 | 
128 |         # Scheme Parsing
129 |         schemename = Word(alphas + '_-' + nums)
130 |         partnameref = partname.copy(
131 |         )  # Make a copy, cos we set a different action on it
132 |         partnameref.setParseAction(self.check_part_exists)
133 | 
134 |         subset = Group(OPENB + delimitedList(partnameref("name")) + CLOSEB)
135 |         subset.setParseAction(self.define_subset)
136 | 
137 |         scheme = Group(OneOrMore(subset))
138 |         schemedef = schemename("name") + \
139 |             EQUALS + scheme("scheme") + SEMICOLON
140 |         schemedef.setParseAction(self.define_schema)
141 | 
142 |         schemelist = OneOrMore(Group(schemedef))
143 | 
144 |         schemealgo = simple_option("search")
145 |         schemesection = \
146 |             Suppress("[schemes]") + schemealgo + Optional(schemelist)
147 | 
148 |         # We've defined the grammar for each section. Here we just put it all together
149 |         self.config_parser = (
150 |             topsection + partsection + schemesection + stringEnd)
151 | 
152 |     def set_alignment(self, text, loc, tokens):
153 |         value = tokens[1]
154 |         self.cfg.set_alignment_file(value)
155 |         # TODO Make sure it is readable!
156 |         # raise ParserError(text, loc, "No '%s' defined in the configuration" % var)
157 |         #
158 | 
159 |     def set_user_tree(self, text, loc, tokens):
160 |         self.cfg.user_tree = tokens[1]
161 |         pass
162 | 
163 |     def set_simple_option(self, text, loc, tokens):
164 |         try:
165 |             self.cfg.set_option(tokens[0], tokens[1])
166 |         except config.ConfigurationError:
167 |             raise ParserError(text, loc, "Invalid option in .cfg file")
168 | 
169 |     def set_models(self, text, loc, tokens):
170 |         if self.cfg.phylogeny_program == "phyml":
171 |             self.phylo_models = phyml_models
172 |         elif self.cfg.phylogeny_program == "raxml":
173 |             self.phylo_models = raxml_models
174 | 
175 |         all_dna_mods = set(self.phylo_models.get_all_dna_models())
176 |         all_protein_mods = set(self.phylo_models.get_all_protein_models())
177 |         total_mods = all_dna_mods.union(all_protein_mods)
178 | 
179 |         mods = tokens[1]
180 |         DNA_mods = 0
181 |         prot_mods = 0
182 |         if mods.userlist:
183 |             modlist = mods.userlist
184 |             log.info("Setting 'models' to a user-specified list")
185 |         else:
186 |             modsgroup = mods.predefined
187 |             if modsgroup.lower() == "all":
188 |                 modlist = list(all_dna_mods)
189 |                 DNA_mods = DNA_mods + 1
190 |             elif modsgroup.lower() == "mrbayes":
191 |                 modlist = set(phyml_models.get_mrbayes_models())
192 |                 DNA_mods = DNA_mods + 1
193 |             elif modsgroup.lower() == "beast":
194 |                 modlist = set(phyml_models.get_beast_models())
195 |                 DNA_mods = DNA_mods + 1
196 |             elif modsgroup.lower() == "raxml":
197 |                 modlist = set(phyml_models.get_raxml_models())
198 |                 DNA_mods = DNA_mods + 1
199 |             elif modsgroup.lower() == "all_protein":
200 |                 modlist = set(self.phylo_models.get_all_protein_models())
201 |                 prot_mods = prot_mods + 1
202 |             elif modsgroup.lower() == "all_protein_gamma":
203 |                 if self.cfg.phylogeny_program == "raxml":
204 |                     modlist = set(raxml_models.get_protein_models_gamma())
205 |                     prot_mods = prot_mods + 1
206 |                 else:
207 |                     log.error("The models option 'all_protein_gamma' is only available with raxml"
208 |                               ", (the --raxml commandline option). Please check and try again")
209 |                     raise ParserError
210 |             elif modsgroup.lower() == "all_protein_gammaI":
211 |                 if self.cfg.phylogeny_program == "raxml":
212 |                     modlist = set(raxml_models.get_protein_models_gammaI())
213 |                     prot_mods = prot_mods + 1
214 |                 else:
215 |                     log.error("The models option 'all_protein_gammaI' is only available with raxml"
216 |                               ", (the --raxml commandline option). Please check and try again")
217 |                     raise ParserError
218 |             else:
219 |                 pass
220 |             log.info("Setting 'models' to '%s'", modsgroup)
221 | 
222 |         self.cfg.models = set()
223 |         for m in modlist:
224 |             if m not in total_mods:
225 |                 raise ParserError(
226 |                     text, loc, "'%s' is not a valid model for phylogeny "
227 |                                "program %s. Please check the lists of valid models in the"
228 |                                " manual and try again" % (m, self.cfg.phylogeny_program))
229 | 
230 |             if m in all_dna_mods:
231 |                 DNA_mods = DNA_mods + 1
232 |             if m in all_protein_mods:
233 |                 prot_mods = prot_mods + 1
234 | 
235 |             self.cfg.models.add(m)
236 | 
237 |         log.info("The models included in this analysis are: %s",
238 |                  ", ".join(self.cfg.models))
239 | 
240 |         #check datatype against the model list that we've got a sensible model list
241 |         if DNA_mods > 0 and prot_mods == 0 and self.cfg.datatype == "DNA":
242 |             log.info("Setting datatype to 'DNA'")
243 |         elif DNA_mods == 0 and prot_mods > 0 and self.cfg.datatype == "protein":
244 |             log.info("Setting datatype to 'protein'")
245 |         elif DNA_mods == 0 and prot_mods > 0 and self.cfg.datatype == "DNA":
246 |             raise ParserError(
247 |                 text, loc, "The models list contains only models of amino acid change."
248 |                 " PartitionFinder.py only works with nucleotide models (like the GTR model)."
249 |                 " If you're analysing an amino acid dataset, please use PartitionFinderProtein,"
250 |                 " which you can download here: www.robertlanfear.com/partitionfinder."
251 |                 " The models line in the .cfg file is")
252 |         elif DNA_mods > 0 and prot_mods == 0 and self.cfg.datatype == "protein":
253 |             raise ParserError(
254 |                 text, loc, "The models list contains only models of nucelotide change."
255 |                 " PartitionFinderProtein.py only works with amino acid models (like the WAG model)."
256 |                 " If you're analysing a nucelotide dataset, please use PartitionFinder.py,"
257 |                 " which you can download here: www.robertlanfear.com/partitionfinder"
258 |                 " The models line in the .cfg file is")
259 |         else:  # we've got a mixture of models.
260 |             raise ParserError(
261 |                 text, loc, "The models list contains a mixture of protein and nucelotide models."
262 |                 " If you're analysing a nucelotide dataset, please use PartitionFinder."
263 |                 " If you're analysing an amino acid dataset, please use PartitionFinderProtein."
264 |                 " You can download both of these programs from here: www.robertlanfear.com/partitionfinder"
265 |                 " The models line in the .cfg file is")
266 | 
267 |     def define_range(self, part):
268 |         """Turn the 1, 2 or 3 tokens into integers, supplying a default if needed"""
269 |         fromc = int(part.start)
270 | 
271 |         if part.end:
272 |             toc = int(part.end)
273 |         else:
274 |             toc = fromc
275 | 
276 |         if part.step:
277 |             stepc = int(part.step)
278 |         else:
279 |             stepc = 1
280 |         return [fromc, toc, stepc]
281 | 
282 |     def define_partition(self, text, loc, part_def):
283 |         """We have everything we need here to make a partition"""
284 |         try:
285 |             # Creation adds it to set
286 |             p = partition.Partition(
287 |                 self.cfg, part_def.name, *tuple(part_def.parts))
288 |         except partition.PartitionError:
289 |             raise ParserError(
290 |                 text, loc, "Error in '%s' can be found" % part_def.name)
291 | 
292 |     def check_part_exists(self, text, loc, partref):
293 |         if partref.name not in self.cfg.partitions:
294 |             raise ParserError(text, loc, "Partition %s not defined" %
295 |                               partref.name)
296 | 
297 |     def define_subset(self, text, loc, subset_def):
298 |         try:
299 |             # Get the partitions from the names
300 |             parts = [self.cfg.partitions[nm] for nm in subset_def[0]]
301 | 
302 |             # Keep a running list of these till we define the schema below
303 |             self.subsets.append(subset.Subset(*tuple(parts)))
304 |         except subset.SubsetError:
305 |             raise ParserError(text, loc, "Error creating subset...")
306 | 
307 |     def define_schema(self, text, loc, scheme_def):
308 |         try:
309 |             # Clear out the subsets as we need to reuse it
310 |             subs = tuple(self.subsets)
311 |             self.subsets = []
312 | 
313 |             if self.ignore_schemes == False:
314 |                 sch = scheme.Scheme(self.cfg, scheme_def.name, subs)
315 |                 self.cfg.user_schemes.add_scheme(sch)
316 | 
317 |         except (scheme.SchemeError, subset.SubsetError):
318 |             raise ParserError(text, loc, "Error in '%s' can be found" %
319 |                               scheme_def.name)
320 | 
321 |     def parse_file(self, fname):
322 |         #this just reads in the config file into 's'
323 |         s = open(fname, 'rU').read()
324 |         self.parse_configuration(s)
325 | 
326 |     def parse_configuration(self, s):
327 |         #parse the config cfg
328 |         try:
329 |             self.result = self.config_parser.ignore(
330 |                 pythonStyleComment).parseString(s)
331 |         except ParserError, p:
332 |             log.error(p.format_message())
333 |             raise PartitionFinderError
334 |         except ParseException, p:
335 |             log.error("There was a problem loading your .cfg file, please check and try again")
336 |             log.error(p)
337 | 
338 |             #let's see if there was something missing fro the input file
339 |             expectations = ["models", "search", "[schemes]", "[data_blocks]",
340 |                             "model_selection", "branchlengths", "alignment"]
341 |             missing = None
342 |             for e in expectations:
343 |                 if p.msg.count(e):
344 |                     missing = e
345 | 
346 |             if missing:
347 |                 log.info("It looks like the '%s' option might be missing or in the wrong place" % (missing))
348 |                 log.info("Or perhaps something is wrong in the lines just before the '%s' option" % (missing))
349 |                 log.info("Please double check the .cfg file and try again")
350 |             else:
351 |                 log.info(
352 |                     "The line causing the problem is this: '%s'" % (p.line))
353 |                 log.info("Please check that line, and make sure it appears in the right place in the .cfg file.")
354 |                 log.info("If it looks OK, try double-checking the semi-colons on other lines in the .cfg file")
355 |             raise PartitionFinderError
356 | 


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/parser.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/parser.pyc


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/partition.py:
--------------------------------------------------------------------------------
  1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
  2 | #
  3 | #This program is free software: you can redistribute it and/or modify it
  4 | #under the terms of the GNU General Public License as published by the
  5 | #Free Software Foundation, either version 3 of the License, or (at your
  6 | #option) any later version.
  7 | #
  8 | #This program is distributed in the hope that it will be useful, but
  9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 11 | #General Public License for more details. You should have received a copy
 12 | #of the GNU General Public License along with this program.  If not, see
 13 | #<http://www.gnu.org/licenses/>. PartitionFinder also includes the PhyML
 14 | #program, the RAxML program, and the PyParsing library, 
 15 | #all of which are protected by their own licenses and conditions, using 
 16 | #PartitionFinder implies that you agree with those licences and conditions as well.
 17 | 
 18 | import logging
 19 | log = logging.getLogger("partition")
 20 | 
 21 | from util import PartitionFinderError
 22 | class PartitionError(PartitionFinderError):
 23 |     pass
 24 | 
 25 | def columnset_to_string(colset):
 26 |     s = list(colset)
 27 |     s.sort()
 28 |     # Add one, cos we converted to zero base...
 29 |     return ', '.join([str(x+1) for x in s])
 30 | 
 31 | class PartitionSet(object):
 32 |     """The set of all partitions loaded from a configuration file"""
 33 |     def __init__(self):
 34 |         """A set of Partitions"""
 35 |         self.sequence = 0
 36 |         self.parts_by_name = {}
 37 |         self.parts_by_number = {}
 38 |         self.partitions = set()
 39 | 
 40 |         # All of the columns
 41 |         self.columns = []
 42 |         self.columnset = set()
 43 | 
 44 |         self.finalised = False
 45 | 
 46 |     def __str__(self):
 47 |         return "PartitionSet(%s)" % ", ".join([str(p) for p in self.partitions])
 48 | 
 49 |     def add_partition(self, p):
 50 |         """Check for overlap (= intersection)"""
 51 |         if self.finalised:
 52 |             log.error("Cannot add partitions after a Scheme has been created")
 53 |             raise PartitionError
 54 | 
 55 |         if p.name in self.parts_by_name:
 56 |             log.error("Attempt to add %s when that name already exists", p)
 57 |             raise PartitionError
 58 | 
 59 |         overlap = []
 60 |         for otherp in self.partitions:
 61 |             if p.columnset & otherp.columnset:
 62 |                 overlap.append(str(otherp))
 63 |         if overlap:
 64 |             log.error("%s overlaps with previously defined "
 65 |                       "partitions: %s",
 66 |                       p, ", ".join(overlap))
 67 |             raise PartitionError
 68 | 
 69 |         # Assign the partition to this set
 70 |         p.partition_set = self
 71 | 
 72 |         # Make sure we can look up by name
 73 |         self.parts_by_name[p.name] = p
 74 |         self.parts_by_number[self.sequence] = p
 75 |         p.sequence = self.sequence
 76 |         self.sequence += 1
 77 |         self.partitions.add(p)
 78 | 
 79 |         # Merge all the columns
 80 |         self.columns.extend(p.columns)
 81 |         self.columns.sort()
 82 |         self.columnset |= p.columnset
 83 | 
 84 |     def finalise(self):
 85 |         """Ensure that no more partitions can be added"""
 86 |         self.finalised = True
 87 | 
 88 |     def check_against_alignment(self, alignment):
 89 |         """Check the partition definitions against the alignment"""
 90 | 
 91 |         # TODO: pbly should check the converse too -- stuff defined that is
 92 |         # missing??
 93 |         self.fullset = set(range(0, alignment.sequence_len))
 94 |         leftout = self.fullset - self.columnset
 95 |         if leftout:
 96 |             # This does not raise an error, just a warning
 97 |             log.warn(
 98 |                 "Columns defined in partitions range from %s to %s, "
 99 |                 "but these columns in the alignment are missing: %s", 
100 |                 self.columns[0]+1, self.columns[-1]+1,
101 |                 columnset_to_string(leftout))
102 |         
103 |     # We can treat this like a bit like a dictionary
104 |     def __iter__(self):
105 |         return iter(self.partitions)
106 | 
107 |     def __len__(self):
108 |         return len(self.partitions)
109 | 
110 |     def __getitem__(self, k):
111 |         if type(k) is int:
112 |             return self.parts_by_number[k]
113 |         return self.parts_by_name[k]
114 | 
115 |     def __contains__(self, k):
116 |         return k in self.parts_by_name
117 | 
118 |     def names(self):
119 |         return self.parts_by_name.keys()
120 | 
121 | class Partition(object):
122 |     """A set of columns from an alignment"""
123 |     def __init__(self, cfg, name=None, *partlist):
124 |         """A named partition
125 | 
126 |         """
127 |         self.name = name
128 |         description = []
129 | 
130 |         # This will get set later, when they are added to PartitionSet
131 |         self.partition_set = None
132 | 
133 |         # We now need to convert to column definitions. Note that these are
134 |         # zero based, which is not how they are specified in the config. So we
135 |         # must do some fiddling to make sure they are right. In addition, we
136 |         # use range(...) which excludes the final column, whereas the
137 |         # definitions assume inclusive...
138 |         columns = []
139 |         for p in partlist:
140 | 
141 |             # Make sure it is sensible
142 |             if len(p) < 2 or len(p) > 3:
143 |                 log.error("The Partition '%s' should contain\
144 |                           a list of start, a stop, and an optional step",
145 |                           self.name)
146 |                 raise PartitionError
147 |             if len(p) == 2:
148 |                 start, stop = p
149 |                 step = 1
150 |             else:
151 |                 start, stop, step = p
152 |             if start > stop:
153 |                 log.error("Partition '%s' has beginning after end (%s > %s)",
154 |                           name, start, stop)
155 |                 raise PartitionError
156 | 
157 |             # Actually, subtracting 1 deals with both issues...
158 |             columns.extend(range(start-1, stop, step))
159 |             description.append((start, stop, step))
160 | 
161 |         self.description = tuple(description)
162 | 
163 |         # Normalise it all
164 |         columns.sort()
165 |         columnset = set(columns)
166 | 
167 |         # If there was any overlap then these will differ...
168 |         if len(columns) != len(columnset):
169 |             log.error("Partition '%s' has internal overlap", name)
170 |             raise PartitionError
171 | 
172 |         # Both of these are useful?
173 |         self.columns = columns
174 |         self.columnset = columnset
175 | 
176 |         cfg.partitions.add_partition(self)
177 |         log.debug("Created %s", self)
178 | 
179 |     def __repr__(self):
180 |         outlist = ", ".join(["%s-%s\\%s" % tuple(p) for p in self.description])
181 |         return "Partition<%s: %s>" % (self.name, outlist)
182 | 
183 |     def __str__(self):
184 |         outlist = ", ".join(["%s-%s\\%s" % tuple(p) for p in self.description])
185 |         return "Partition(%s, %s)" % (self.name, outlist)
186 | 


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/partition.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/partition.pyc


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/phyml.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/phyml.pyc


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/phyml_models.py:
--------------------------------------------------------------------------------
  1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
  2 | #
  3 | #This program is free software: you can redistribute it and/or modify it
  4 | #under the terms of the GNU General Public License as published by the
  5 | #Free Software Foundation, either version 3 of the License, or (at your
  6 | #option) any later version.
  7 | #
  8 | #This program is distributed in the hope that it will be useful, but
  9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 11 | #General Public License for more details. You should have received a copy
 12 | #of the GNU General Public License along with this program.  If not, see
 13 | #<http://www.gnu.org/licenses/>. PartitionFinder also includes the PhyML
 14 | #program, the RAxML program, and the PyParsing library, 
 15 | #all of which are protected by their own licenses and conditions, using 
 16 | #PartitionFinder implies that you agree with those licences and conditions as well.
 17 | 
 18 | import logging
 19 | log = logging.getLogger("analysis")
 20 | 
 21 | import config
 22 | 
 23 | # TODO need some error checking!
 24 | 
 25 | # number of free parameters in substitution model, listed as "model+base_frequencies"
 26 | # and the model string for PhyML as the second of the tuple.
 27 | _base_models = {
 28 |     "JC"    :   (0+0, "-m 000000 -f '0.25, 0.25, 0.25, 0.25'"),
 29 |     "K80"   :   (1+0, "-m 010010 -f '0.25, 0.25, 0.25, 0.25'"),
 30 |     "TrNef" :   (2+0, "-m 010020 -f '0.25, 0.25, 0.25, 0.25'"),
 31 |     "K81"   :   (2+0, "-m 012210 -f '0.25, 0.25, 0.25, 0.25'"),
 32 |     "TVMef" :   (4+0, "-m 012314 -f '0.25, 0.25, 0.25, 0.25'"),
 33 |     "TIMef" :   (3+0, "-m 012230 -f '0.25, 0.25, 0.25, 0.25'"),
 34 |     "SYM"   :   (5+0, "-m 012345 -f '0.25, 0.25, 0.25, 0.25'"),
 35 |     "F81"   :   (0+3, "-m 000000 -f e"),
 36 |     "HKY"   :   (1+3, "-m 010010 -f e"),
 37 |     "TrN"   :   (2+3, "-m 010020 -f e"),  
 38 |     "K81uf" :   (2+3, "-m 012210 -f e"),
 39 |     "TVM"   :   (4+3, "-m 012314 -f e"),
 40 |     "TIM"   :   (3+3, "-m 012230 -f e"),
 41 |     "GTR"   :   (5+3, "-m 012345 -f e")
 42 | }
 43 | 
 44 | # number of free parameters in substitution model, listed as "aa_frequencies"
 45 | # and the model string for PhyML as the second of the tuple
 46 | _base_protein_models = {
 47 |     "LG"            :   (0, "-m LG        -d aa"),
 48 |     "WAG"           :   (0, "-m WAG       -d aa"),
 49 |     "mtREV"         :   (0, "-m mtREV     -d aa"),
 50 |     "Dayhoff"       :   (0, "-m Dayhoff   -d aa"),
 51 |     "DCMut"         :   (0, "-m DCMut     -d aa"),
 52 |     "JTT"           :   (0, "-m JTT       -d aa"),
 53 |     "VT"            :   (0, "-m VT        -d aa"),
 54 |     "Blosum62"      :   (0, "-m Blosum62  -d aa"),
 55 |     "CpREV"         :   (0, "-m CpREV     -d aa"),
 56 |     "RtREV"         :   (0, "-m RtREV     -d aa"),
 57 |     "MtMam"         :   (0, "-m MtMam     -d aa"),
 58 |     "MtArt"         :   (0, "-m MtArt     -d aa"),
 59 |     "HIVb"          :   (0, "-m HIVb      -d aa"),
 60 |     "HIVw"          :   (0, "-m HIVw      -d aa"),
 61 |  }
 62 | 
 63 | # All the functions in here return the same thing with the same parameters, 
 64 | # this just caches the return ...
 65 | def memoize(f):
 66 |     cache= {}
 67 |     def memf(*x):
 68 |         if x not in cache:
 69 |             cache[x] = f(*x)
 70 |         return cache[x]
 71 |     return memf
 72 | 
 73 | @memoize
 74 | def get_all_dna_models():
 75 |     '''
 76 |     Return a list of all implemented _base_models
 77 |     '''
 78 |     model_list = []
 79 |     for model in _base_models.keys():
 80 |         model_list.append(model)
 81 |         model_list.append("%s+I"   %(model))
 82 |         model_list.append("%s+G"   %(model))
 83 |         model_list.append("%s+I+G" %(model))
 84 |     return model_list
 85 | 
 86 | @memoize
 87 | def get_all_protein_models():
 88 |     '''
 89 |     Return a list of all implemented _base__protein_models
 90 |     '''
 91 |     model_list = []
 92 |     for model in _base_protein_models.keys():
 93 |         model_list.append(model)
 94 |         model_list.append("%s+F"     %(model))
 95 |         model_list.append("%s+I"     %(model))
 96 |         model_list.append("%s+G"     %(model))
 97 |         model_list.append("%s+I+G"   %(model))
 98 |         model_list.append("%s+I+F"   %(model))
 99 |         model_list.append("%s+G+F"   %(model))
100 |         model_list.append("%s+I+G+F" %(model))
101 |     return model_list
102 | 
103 | @memoize
104 | def get_mrbayes_models():
105 |     '''
106 |     Return a list of all models implemented in MrBayes. Thanks to Ainsley Seago for this.
107 |     '''
108 |     mrbayes_base_models = ["JC", "F81", "K80", "HKY", "SYM", "GTR"]
109 |     model_list = []
110 |     for model in mrbayes_base_models:
111 |         model_list.append(model)
112 |         model_list.append("%s+I"   %(model))
113 |         model_list.append("%s+G"   %(model))
114 |         model_list.append("%s+I+G" %(model))
115 |     return model_list
116 | 
117 | def get_beast_models():
118 |     '''
119 |     Return a list of all models implemented in BEAST v1.7.2.
120 |     '''
121 |     beast_base_models = ["K80", "TrNef", "SYM", "HKY", "TrN", "GTR"]
122 |     model_list = []
123 |     for model in beast_base_models:
124 |         model_list.append(model)
125 |         model_list.append("%s+I"   %(model))
126 |         model_list.append("%s+G"   %(model))
127 |         model_list.append("%s+I+G" %(model))
128 |     return model_list
129 | 
130 | 
131 | @memoize
132 | def get_raxml_models():
133 |     '''
134 |     Return a list of all models implemented in RaxML. Thanks to Ainsley Seago for this.
135 |     '''
136 |     model_list = ["GTR+G", "GTR+I+G"]
137 |     return model_list
138 | 
139 | @memoize
140 | def get_protein_models():
141 |     '''
142 |     Return a list of all protein models implemented in PhyML
143 |     '''
144 |     model_list = [
145 | 		"LG",
146 | 		"cheese"
147 | 	]
148 |     return model_list
149 | 
150 | 
151 | 
152 | @memoize
153 | def get_num_params(modelstring):
154 |     '''
155 |     Input a model string like HKY+I+G or LG+G+F, and get the number of parameters
156 |     '''
157 |     elements = modelstring.split("+")
158 |     model_name = elements[0]
159 |     if model_name in _base_models.keys():
160 |         model_params = _base_models[model_name][0]
161 |     else:
162 |         model_params = _base_protein_models[model_name][0]
163 |         if "F" in elements[1:]:
164 |             model_params = model_params+19-1 #the -1 here is to account for the fact we add 1 for the + in '+F' below
165 |     
166 |     extras = modelstring.count("+")
167 |     total = model_params+extras
168 |     log.debug("Model: %s Params: %d" %(modelstring, total))
169 | 
170 |     return total
171 |  
172 | @memoize
173 | def get_model_difficulty(modelstring):
174 |     '''
175 |     Input a model string like HKY+I+G or LG+G+F, and a guess about how long it takes to analyse
176 |     Right now, this is done with a simple hack. I just return a number that is the number of params
177 |     plus a modifier for extra stuff like +I and +G
178 |     the hardest models are +I+G, then +G, then +I
179 |     this is just used to rank models for ordering the analysis
180 |     The return is a 'difficulty' score that can be used to rank models
181 |     '''
182 |     elements = modelstring.split("+")
183 | 
184 |     model_params = get_num_params(modelstring)
185 |     
186 |     difficulty = 0
187 |     if "G" in elements[1:]:
188 |         difficulty = difficulty + 2000
189 |     if "I" in elements[1:]: 
190 |         difficulty = difficulty + 1000
191 |     
192 |     extras = modelstring.count("+")
193 |     total = model_params+extras+difficulty
194 |     log.debug("Model: %s Difficulty: %d" %(modelstring, total))
195 | 
196 |     return total
197 |  
198 |  
199 |  
200 | @memoize
201 | def get_model_commandline(modelstring):
202 |     '''
203 |     Input a model string, and get the PhyML command line
204 |     '''
205 | 
206 |     # This is always the same - optimise brlens and model, not tree
207 |     commandline = ["-o lr "]
208 | 
209 |     elements = modelstring.split("+")
210 |     model_name = elements[0]
211 | 
212 |     # Everything but the first element
213 |     extras = elements[1:]
214 | 
215 |     if model_name in _base_models.keys(): #DNA models
216 |         commandline.append(_base_models[model_name][1])
217 |     else: #protein models
218 |         commandline.append(_base_protein_models[model_name][1])
219 |         if "F" in extras:
220 |             commandline.append("-f e") #emprical AA frequencies (+19 params)
221 |         else:
222 |             commandline.append("-f m") #AA frequences from the model (+0 params)
223 | 
224 | 
225 |     if "I" in extras:
226 |         commandline.append("-v e")
227 |     if "G" in extras:
228 |         commandline.append("-a e")
229 |         commandline.append("-c 4")
230 |     else:
231 |         commandline.append("-c 1")
232 | 
233 |     return " ".join(commandline)
234 | 
235 | if __name__ == "__main__":
236 |     print "  ",
237 |     print "Name".ljust(12),
238 |     print "Params".ljust(10),
239 |     print "CommandLine"
240 |     for i, model in enumerate(get_all_models()):
241 |         print str(i+1).rjust(2), 
242 |         print model.ljust(12),
243 |         print str(get_num_params(model)).ljust(10),
244 |         print get_model_commandline(model)
245 |     for model in get_protein_models():
246 |         print model
247 | 
248 | 


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/phyml_models.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/phyml_models.pyc


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/progress.py:
--------------------------------------------------------------------------------
 1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
 2 | #
 3 | #This program is free software: you can redistribute it and/or modify it
 4 | #under the terms of the GNU General Public License as published by the
 5 | #Free Software Foundation, either version 3 of the License, or (at your
 6 | #option) any later version.
 7 | #
 8 | #This program is distributed in the hope that it will be useful, but
 9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11 | #General Public License for more details. You should have received a copy
12 | #of the GNU General Public License along with this program.  If not, see
13 | #<http://www.gnu.org/licenses/>. PartitionFinder also includes the PhyML
14 | #program, the RAxML program, and the PyParsing library,
15 | #all of which are protected by their own licenses and conditions, using
16 | #PartitionFinder implies that you agree with those licences and conditions as well.
17 | 
18 | import logging
19 | log = logging.getLogger("progress")
20 | 
21 | 
22 | class Progress(object):
23 |     def __init__(self, cfg):
24 |         self.cfg = cfg
25 |         self.cfg.progress = self
26 | 
27 |     def begin(self, scheme_count, subset_count):
28 |         pass
29 | 
30 |     def next_scheme(self):
31 |         pass
32 | 
33 |     def subset_begin(self, sub):
34 |         pass
35 | 
36 |     def subset_done(self, sub):
37 |         pass
38 | 
39 |     def end(self):
40 |         pass
41 | 
42 | 
43 | class NoProgress(Progress):
44 |     pass
45 | 
46 | 
47 | class TextProgress(Progress):
48 | 
49 |     def begin(self, scheme_count, subset_count):
50 |         self.scheme_count = scheme_count
51 |         self.subset_count = subset_count
52 |         self.schemes_analysed = 0
53 |         self.subsets_analysed = set()
54 | 
55 |         log.info("PartitionFinder will have to analyse %d subsets to complete this analysis", subset_count)
56 |         log.info("This will result in %s schemes being created", scheme_count)
57 |         if subset_count > 10000:
58 |             log.warning("%d is a lot of subsets, this might take a long time to analyse", subset_count)
59 |             log.warning("Perhaps consider using a different search scheme instead (see Manual)")
60 | 
61 |     def next_scheme(self):
62 |         self.schemes_analysed += 1
63 |         #log.info("Analysing scheme %d/%d", self.schemes_analysed,self.scheme_count)
64 | 
65 |     def subset_begin(self, sub):
66 |         #log.info("Begin analysing subset %s", sub)
67 |         pass
68 | 
69 |     def subset_done(self, sub):
70 |         old_num_done = len(self.subsets_analysed)
71 |         self.subsets_analysed.add(sub.name)
72 |         num_subs_done = len(self.subsets_analysed)
73 |         if old_num_done != num_subs_done:
74 |             percent_done = (
75 |                 float(num_subs_done) * 100.0) / float(self.subset_count)
76 |             log.info("Finished subset %d/%d, %.2f percent done", num_subs_done, self.subset_count, percent_done)
77 | 
78 |     def end(self):
79 |         pass
80 | 


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/progress.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/progress.pyc


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/pyparsing.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/pyparsing.pyc


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/raxml.py:
--------------------------------------------------------------------------------
  1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
  2 | #
  3 | #This program is free software: you can redistribute it and/or modify it
  4 | #under the terms of the GNU General Public License as published by the
  5 | #Free Software Foundation, either version 3 of the License, or (at your
  6 | #option) any later version.
  7 | #
  8 | #This program is distributed in the hope that it will be useful, but
  9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 11 | #General Public License for more details. You should have received a copy
 12 | #of the GNU General Public License along with this program.  If not, see
 13 | #<http://www.gnu.org/licenses/>. PartitionFinder also includes the PhyML
 14 | #program, the RAxML program, the PyParsing library, and the python-cluster library
 15 | #all of which are protected by their own licenses and conditions, using
 16 | #PartitionFinder implies that you agree with those licences and conditions as well.
 17 | 
 18 | """Run raxml and parse the output"""
 19 | 
 20 | import logging
 21 | log = logging.getLogger("raxml")
 22 | 
 23 | import subprocess
 24 | import shlex
 25 | import os
 26 | import shutil
 27 | import sys
 28 | import fnmatch
 29 | import util
 30 | 
 31 | from pyparsing import (
 32 |     Word, Literal, nums, Suppress, ParseException,
 33 |     SkipTo, OneOrMore, Regex
 34 | )
 35 | 
 36 | import raxml_models as models
 37 | 
 38 | _binary_name = 'raxml'
 39 | if sys.platform == 'win32':
 40 |     _binary_name += ".exe"
 41 | 
 42 | from util import PhylogenyProgramError
 43 | 
 44 | 
 45 | class RaxmlError(PhylogenyProgramError):
 46 |     pass
 47 | 
 48 | 
 49 | def find_program():
 50 |     """Locate the binary ..."""
 51 |     pth = os.path.abspath(__file__)
 52 | 
 53 |     # Split off the name and the directory...
 54 |     pth, notused = os.path.split(pth)
 55 |     pth, notused = os.path.split(pth)
 56 |     pth = os.path.join(pth, "programs", _binary_name)
 57 |     pth = os.path.normpath(pth)
 58 | 
 59 |     log.debug("Checking for program %s", _binary_name)
 60 |     if not os.path.exists(pth) or not os.path.isfile(pth):
 61 |         log.error("No such file: '%s'", pth)
 62 |         raise RaxmlError
 63 |     log.debug("Found program %s at '%s'", _binary_name, pth)
 64 |     return pth
 65 | 
 66 | _raxml_binary = None
 67 | 
 68 | 
 69 | def run_raxml(command):
 70 |     global _raxml_binary
 71 |     if _raxml_binary is None:
 72 |         _raxml_binary = find_program()
 73 | 
 74 |     # Add in the command file
 75 |     log.debug("Running 'raxml %s'", command)
 76 |     command = "\"%s\" %s" % (_raxml_binary, command)
 77 | 
 78 |     # Note: We use shlex.split as it does a proper job of handling command
 79 |     # lines that are complex
 80 |     p = subprocess.Popen(
 81 |         shlex.split(command),
 82 |         shell=False,
 83 |         stdout=subprocess.PIPE,
 84 |         stderr=subprocess.PIPE)
 85 | 
 86 |     # Capture the output, we might put it into the errors
 87 |     stdout, stderr = p.communicate()
 88 |     # p.terminate()
 89 | 
 90 |     if p.returncode != 0:
 91 |         log.error("RAxML did not execute successfully")
 92 |         log.error("RAxML output follows, in case it's helpful for finding the problem")
 93 |         log.error("%s", stdout)
 94 |         log.error("%s", stderr)
 95 |         raise RaxmlError
 96 | 
 97 | 
 98 | def dupfile(src, dst):
 99 |     # Make a copy or a symlink so that we don't overwrite different model runs
100 |     # of the same alignment
101 | 
102 |     # TODO maybe this should throw...?
103 |     try:
104 |         if os.path.exists(dst):
105 |             os.remove(dst)
106 |         shutil.copyfile(src, dst)
107 |     except OSError:
108 |         log.error("Cannot link/copy file %s to %s", src, dst)
109 |         raise RaxmlError
110 | 
111 | 
112 | def make_topology(alignment_path, datatype, cmdline_extras):
113 |     '''Make a MP tree to start the analysis'''
114 |     log.info("Making MP tree for %s", alignment_path)
115 | 
116 |     cmdline_extras = check_defaults(cmdline_extras)
117 | 
118 |     # First get the MP topology like this (-p is a hard-coded random number seed):
119 |     if datatype == "DNA":
120 |         command = "-y -s '%s' -m GTRGAMMA -n MPTREE -p 123456789 %s" % (
121 |             alignment_path, cmdline_extras)
122 |     elif datatype == "protein":
123 |         command = "-y -s '%s' -m PROTGAMMALG -n MPTREE -p 123456789 %s" % (
124 |             alignment_path, cmdline_extras)
125 |     else:
126 |         log.error("Unrecognised datatype: '%s'" % (datatype))
127 |         raise(RaxmlError)
128 | 
129 |     #force raxml to write to the dir with the alignment in it
130 |     aln_dir, fname = os.path.split(alignment_path)
131 |     command = ''.join([command, " -w '%s'" % os.path.abspath(aln_dir)])
132 | 
133 |     run_raxml(command)
134 |     dir, aln = os.path.split(alignment_path)
135 |     tree_path = os.path.join(dir, "RAxML_parsimonyTree.MPTREE")
136 |     return tree_path
137 | 
138 | 
139 | def make_branch_lengths(alignment_path, topology_path, datatype, cmdline_extras):
140 |     #Now we re-estimate branchlengths using a GTR+G model on the (unpartitioned) dataset
141 |     cmdline_extras = check_defaults(cmdline_extras)
142 |     dir_path, fname = os.path.split(topology_path)
143 |     tree_path = os.path.join(dir_path, 'topology_tree.phy')
144 |     log.debug("Copying %s to %s", topology_path, tree_path)
145 |     dupfile(topology_path, tree_path)
146 |     os.remove(topology_path)  # saves headaches later...
147 | 
148 |     if datatype == "DNA":
149 |         log.info("Estimating GTR+G branch lengths on tree using RAxML")
150 |         command = "-f e -s '%s' -t '%s' -m GTRGAMMA -n BLTREE -w '%s' %s" % (
151 |             alignment_path, tree_path, os.path.abspath(dir_path), cmdline_extras)
152 |         run_raxml(command)
153 |     if datatype == "protein":
154 |         log.info("Estimating LG+G branch lengths on tree using RAxML")
155 |         command = "-f e -s '%s' -t '%s' -m PROTGAMMALG -n BLTREE -w '%s' %s" % (
156 |             alignment_path, tree_path, os.path.abspath(dir_path), cmdline_extras)
157 |         run_raxml(command)
158 | 
159 |     dir, aln = os.path.split(alignment_path)
160 |     tree_path = os.path.join(dir, "RAxML_result.BLTREE")
161 |     log.info("Branchlength estimation finished")
162 | 
163 |     # Now return the path of the final tree with branch lengths
164 |     return tree_path
165 | 
166 | 
167 | def check_defaults(cmdline_extras):
168 |     """We use some sensible defaults, but allow users to override them with extra cmdline options"""
169 |     if cmdline_extras.count("-e") > 0:
170 |         #then the user has specified a particular accuracy:
171 |         accuracy = ""
172 |     else:
173 |         #we specify a default accuracy of 1 lnL unit
174 |         accuracy = " -e 1.0 "
175 | 
176 |     #we set this in case people are using the PThreads version of RAxML
177 |     #note that this is intentionally set to give an error if people use Pthreads, because
178 |     #they will need to consider by hand what the optimal setting is. And, if we set it >1
179 |     #then we risk massively slowing things down because PF's default is to use all possible
180 |     #processors.
181 |     if cmdline_extras.count("-T") > 0:
182 |         num_threads = ""
183 | 
184 |     else:
185 |         num_threads = " -T 1 "
186 | 
187 |     #and we'll specify the -O option, so that the program doesn't exit if there are undetermined seqs.
188 |     #we'll put spaces at the start and end too, just in case...
189 |     cmdline_extras = ''.join(
190 |         [" ", cmdline_extras, accuracy, num_threads, "-O "])
191 | 
192 |     return cmdline_extras
193 | 
194 | 
195 | def analyse(model, alignment_path, tree_path, branchlengths, cmdline_extras):
196 |     """Do the analysis -- this will overwrite stuff!"""
197 | 
198 |     # Move it to a new name to stop raxml stomping on different model analyses
199 |     # dupfile(alignment_path, analysis_path)
200 |     model_params = models.get_model_commandline(model)
201 | 
202 |     if branchlengths == 'linked':
203 |         #constrain all branchlengths to be equal
204 |         bl = ' -f B '
205 |     elif branchlengths == 'unlinked':
206 |         #let branchlenghts vary among subsets
207 |         bl = ' -f e '
208 |     else:
209 |         # WTF?
210 |         log.error("Unknown option for branchlengths: %s", branchlengths)
211 |         raise RaxmlError
212 | 
213 |     cmdline_extras = check_defaults(cmdline_extras)
214 | 
215 |     #raxml doesn't append alignment names automatically, like PhyML, let's do that here
216 |     analysis_ID = raxml_analysis_ID(alignment_path, model)
217 | 
218 |     #force raxml to write to the dir with the alignment in it
219 |     #-e 1.0 sets the precision to 1 lnL unit. This is all that's required here, and helps with speed.
220 |     aln_dir, fname = os.path.split(alignment_path)
221 |     command = " %s -s '%s' -t '%s' %s -n %s -w '%s' %s" % (
222 |         bl, alignment_path, tree_path, model_params, analysis_ID, os.path.abspath(aln_dir), cmdline_extras)
223 |     run_raxml(command)
224 | 
225 | 
226 | def raxml_analysis_ID(alignment_path, model):
227 |     dir, file = os.path.split(alignment_path)
228 |     aln_name = os.path.splitext(file)[0]
229 |     analysis_ID = '%s_%s.txt' % (aln_name, model)
230 |     return analysis_ID
231 | 
232 | 
233 | def make_tree_path(alignment_path):
234 |     dir, aln = os.path.split(alignment_path)
235 |     tree_path = os.path.join(dir, "RAxML_result.BLTREE")
236 |     return tree_path
237 | 
238 | 
239 | def make_output_path(alignment_path, model):
240 |     analysis_ID = raxml_analysis_ID(alignment_path, model)
241 |     dir, aln_file = os.path.split(alignment_path)
242 |     stats_fname = "RAxML_info.%s" % (analysis_ID)
243 |     stats_path = os.path.join(dir, stats_fname)
244 |     tree_fname = "RAxML_result.%s" % (analysis_ID)
245 |     tree_path = os.path.join(dir, tree_fname)
246 |     return stats_path, tree_path
247 | 
248 | 
249 | def remove_files(aln_path, model):
250 |     '''remove all files from the alignment directory that are produced by raxml'''
251 |     dir, file = os.path.split(aln_path)
252 |     analysis_ID = raxml_analysis_ID(aln_path, model)
253 |     dir = os.path.abspath(dir)
254 |     fs = os.listdir(dir)
255 |     fnames = fnmatch.filter(fs, '*%s*' % analysis_ID)
256 |     util.delete_files(fnames)
257 | 
258 | 
259 | class RaxmlResult(object):
260 | 
261 |     def __init__(self):
262 |         self.rates = {}
263 |         self.freqs = {}
264 | 
265 |     def __str__(self):
266 |         return "RaxmlResult(lnl:%s, tree_size:%s, secs:%s, alphs:%s)" % (
267 |                 self.lnl, self.tree_size, self.seconds, self.alpha)
268 | 
269 | 
270 | class Parser(object):
271 |     def __init__(self, datatype):
272 | 
273 |         if datatype == "protein":
274 |             letters = "ARNDCQEGHILKMFPSTWYV"
275 |         elif datatype == "DNA":
276 |             letters = "ATCG"
277 |         else:
278 |             log.error("Unknown datatype '%s', please check" % datatype)
279 |             raise RaxmlError
280 | 
281 |         FLOAT = Word(nums + '.-').setParseAction(lambda x: float(x[0]))
282 | 
283 |         L = Word(letters, exact=1)
284 |         COLON = Suppress(":")
285 | 
286 |         LNL_LABEL = Regex("Final GAMMA.+:") | Literal("Likelihood:")
287 |         TIME_LABEL = Regex("Overall Time.+:") | Regex("Overall Time.+tion ")
288 |         ALPHA_LABEL = Literal("alpha:")
289 |         TREE_SIZE_LABEL = Literal("Tree-Length:")
290 | 
291 |         def labeled_float(label):
292 |             return Suppress(SkipTo(label)) + Suppress(label) + FLOAT
293 | 
294 |         lnl = labeled_float(LNL_LABEL)
295 |         lnl.setParseAction(self.set_lnl)
296 | 
297 |         seconds = labeled_float(TIME_LABEL)
298 |         seconds.setParseAction(self.set_seconds)
299 | 
300 |         alpha = labeled_float(ALPHA_LABEL)
301 |         alpha.setParseAction(self.set_alpha)
302 | 
303 |         tree_size = labeled_float(TREE_SIZE_LABEL)
304 |         tree_size.setParseAction(self.set_tree_size)
305 | 
306 |         rate = Suppress("rate") + L + Suppress("<->") + L + COLON + FLOAT
307 |         rate.setParseAction(self.set_rate)
308 |         rates = OneOrMore(rate)
309 | 
310 |         freq = Suppress("freq pi(") + L + Suppress("):") + FLOAT
311 |         freq.setParseAction(self.set_freq)
312 |         freqs = OneOrMore(freq)
313 | 
314 |         # Just look for these things
315 |         self.root_parser = seconds + lnl + alpha + tree_size + rates + freqs
316 | 
317 |     def set_seconds(self, tokens):
318 |         self.result.seconds = tokens[0]
319 | 
320 |     def set_lnl(self, tokens):
321 |         self.result.lnl = tokens[0]
322 | 
323 |     def set_tree_size(self, tokens):
324 |         self.result.tree_size = tokens[0]
325 | 
326 |     def set_alpha(self, tokens):
327 |         self.result.alpha = tokens[0]
328 | 
329 |     def set_rate(self, tokens):
330 |         basefrom, baseto, rate = tokens
331 |         self.result.rates[(basefrom, baseto)] = rate
332 | 
333 |     def set_freq(self, tokens):
334 |         base, rate = tokens
335 |         self.result.freqs[base] = rate
336 | 
337 |     def parse(self, text):
338 |         log.debug("Parsing raxml output...")
339 |         self.result = RaxmlResult()
340 |         try:
341 |             self.root_parser.parseString(text)
342 |         except ParseException, p:
343 |             log.error(str(p))
344 |             raise RaxmlError
345 | 
346 |         log.debug("Result is %s", self.result)
347 |         return self.result
348 | 
349 | 
350 | def parse(text, datatype):
351 |     the_parser = Parser(datatype)
352 |     return the_parser.parse(text)
353 | 
354 | if __name__ == '__main__':
355 |     logging.basicConfig(level=logging.DEBUG)
356 |     pth = "./tests/misc/raxml_nucleotide.output"
357 |     p = Parser('DNA')
358 |     result = p.parse(open(pth).read())
359 | 
360 |     pth = "./tests/misc/raxml_aminoacid.output"
361 |     p = Parser('protein')
362 |     result = p.parse(open(pth).read())
363 | 


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/raxml.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/raxml.pyc


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/raxml_models.py:
--------------------------------------------------------------------------------
  1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
  2 | #
  3 | #This program is free software: you can redistribute it and/or modify it
  4 | #under the terms of the GNU General Public License as published by the
  5 | #Free Software Foundation, either version 3 of the License, or (at your
  6 | #option) any later version.
  7 | #
  8 | #This program is distributed in the hope that it will be useful, but
  9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 11 | #General Public License for more details. You should have received a copy
 12 | #of the GNU General Public License along with this program.  If not, see
 13 | #<http://www.gnu.org/licenses/>. PartitionFinder also includes the PhyML
 14 | #program, the RAxML program, and the PyParsing library, 
 15 | #all of which are protected by their own licenses and conditions, using 
 16 | #PartitionFinder implies that you agree with those licences and conditions as well.
 17 | 
 18 | import logging
 19 | log = logging.getLogger("analysis")
 20 | 
 21 | import config
 22 | 
 23 | # TODO need some error checking!
 24 | 
 25 | # number of free parameters in substitution model, listed as "model+base_frequencies"
 26 | _base_models = {
 27 |     "GTR"   :   (5+3, "")
 28 | }
 29 | 
 30 | # number of free parameters in substitution model, listed as "aa_frequencies"
 31 | _base_protein_models = {
 32 |     "DAYHOFF"   :   (0, ""),
 33 |     "DCMUT"     :   (0, ""),
 34 |     "JTT"       :   (0, ""),
 35 |     "MTREV"     :   (0, ""),
 36 |     "WAG"       :   (0, ""),
 37 |     "RTREV"     :   (0, ""),
 38 |     "CPREV"     :   (0, ""),
 39 |     "VT"        :   (0, ""),
 40 |     "BLOSUM62"  :   (0, ""),
 41 |     "MTMAM"     :   (0, ""),
 42 |     "LG"        :   (0, ""),
 43 |  }
 44 | 
 45 | # All the functions in here return the same thing with the same parameters, 
 46 | # this just caches the return ...
 47 | def memoize(f):
 48 |     cache= {}
 49 |     def memf(*x):
 50 |         if x not in cache:
 51 |             cache[x] = f(*x)
 52 |         return cache[x]
 53 |     return memf
 54 | 
 55 | @memoize
 56 | def get_protein_models_gamma():
 57 |     '''
 58 |     Return a list of all implemented _base__protein_models in RAxML
 59 |     NB there are NO models in RAxML without Gamma
 60 |     '''
 61 |     model_list = []
 62 |     for model in _base_protein_models.keys():
 63 |         model_list.append("%s+G"     %(model))
 64 |         model_list.append("%s+G+F"     %(model))
 65 |     return model_list
 66 | 
 67 | @memoize
 68 | def get_protein_models_gammaI():
 69 |     '''
 70 |     Return a list of all implemented _base__protein_models in RAxML with invariant sites
 71 |     '''
 72 |     model_list = []
 73 |     for model in _base_protein_models.keys():
 74 |         model_list.append("%s+I+G"     %(model))
 75 |         model_list.append("%s+I+G+F"    %(model))
 76 |     return model_list
 77 | 
 78 | def get_all_protein_models():
 79 |     model_list = get_protein_models_gamma() + get_protein_models_gammaI()
 80 |     return model_list
 81 | 
 82 | @memoize
 83 | def get_dna_models_gamma():
 84 |     '''
 85 |     Just one model in RAxML with +G.
 86 |     '''
 87 |     model_list = ["GTR+G"]
 88 |     return model_list
 89 | 
 90 | @memoize
 91 | def get_dna_models_gammaI():
 92 |     '''
 93 |     Just one model in RAxML with I+G.
 94 |     '''
 95 |     model_list = ["GTR+I+G"]
 96 |     return model_list
 97 | 
 98 | @memoize
 99 | def get_all_dna_models():
100 |     model_list = get_dna_models_gamma() + get_dna_models_gammaI()
101 |     return model_list
102 | 
103 | @memoize
104 | def get_all_models():
105 |     model_list = get_all_DNA_models() + get_all_protein_models()
106 |     return model_list
107 | 
108 | @memoize
109 | def get_model_commandline(modelstring):
110 |     '''
111 |     Input a model string, and get the piece of the raxml command line that defines that model
112 |     '''
113 |     commandline = '-m '
114 |     elements = modelstring.split("+")
115 |     model_name = elements[0]
116 | 
117 |     # Everything but the first element
118 |     extras = elements[1:]
119 | 
120 |     if model_name in _base_models.keys(): #DNA models
121 |         commandline = ''.join([commandline, "GTRGAMMA"])
122 |         if "I" in extras:
123 |             commandline = ''.join([commandline, "I"])
124 |     else: #protein models, look like this 'PROTGAMMAILGF
125 |         commandline = ''.join([commandline, "PROTGAMMA"])
126 |         if "I" in extras:
127 |             commandline = ''.join([commandline, "I"])
128 |         commandline = ''.join([commandline, model_name])        
129 |         if "F" in extras:
130 |             commandline = ''.join([commandline, "F"])
131 | 
132 |     return commandline
133 | 
134 | 
135 | @memoize
136 | def get_num_params(modelstring):
137 |     '''
138 |     Input a model string like HKY+I+G or LG+G+F, and get the number of parameters
139 |     '''
140 |     elements = modelstring.split("+")
141 |     model_name = elements[0]
142 |     if model_name in _base_models.keys():
143 |         model_params = _base_models[model_name][0]
144 |     else:
145 |         model_params = _base_protein_models[model_name][0]
146 |         if "F" in elements[1:]:
147 |             model_params = model_params+19-1 #the -1 here is to account for the fact we add 1 for the + in '+F' below
148 |     
149 |     extras = modelstring.count("+")
150 |     total = model_params+extras
151 |     log.debug("Model: %s Params: %d" %(modelstring, total))
152 | 
153 |     return total
154 | 
155 | @memoize
156 | def get_model_difficulty(modelstring):
157 |     '''
158 |     Input a model string like HKY+I+G or LG+G+F, and a guess about how long it takes to analyse
159 |     Right now, this is done with a simple hack. I just return a number that is the number of params
160 |     plus a modifier for extra stuff like +I and +G
161 |     the hardest models are +I+G, then +G, then +I
162 |     this is just used to rank models for ordering the analysis
163 |     The return is a 'difficulty' score that can be used to rank models
164 |     '''
165 |     elements = modelstring.split("+")
166 | 
167 |     model_params = get_num_params(modelstring)
168 |     
169 |     difficulty = 0
170 |     if "G" in elements[1:]:
171 |         difficulty = difficulty + 2000
172 |     if "I" in elements[1:]: 
173 |         difficulty = difficulty + 1000
174 |     
175 |     extras = modelstring.count("+")
176 |     total = model_params+extras+difficulty
177 |     log.debug("Model: %s Difficulty: %d" %(modelstring, total))
178 | 
179 |     return total
180 | 
181 | def get_raxml_protein_modelstring(modelstring):
182 |     """Start with a model like this: LG+I+G+F, return a model in raxml format like this:
183 |     ILGF. This is only used for printing out RAxML partition files"""
184 |     elements = modelstring.split("+")
185 |     model_name = elements[0]
186 |     extras = elements[1:]
187 | 
188 |     raxmlstring = model_name
189 |     if "F" in extras:
190 |         raxmlstring = ''.join([raxmlstring, "F"])
191 |     
192 |     return raxmlstring
193 | 
194 | if __name__ == "__main__":
195 |     print "  ",
196 |     print "Name".ljust(15),
197 |     print "Params".ljust(10),
198 |     print "Diff".ljust(10),
199 |     print "CommandLine"
200 |     for i, model in enumerate(get_all_DNA_models()):
201 |         print str(i+1).rjust(2), 
202 |         print model.ljust(15),
203 |         print str(get_num_params(model)).ljust(10),
204 |         print str(get_model_difficulty(model)).ljust(10),
205 |         print get_model_commandline(model)
206 |     for i, model in enumerate(get_all_protein_models()):
207 |         print str(i+1).rjust(2), 
208 |         print model.ljust(15),
209 |         print str(get_num_params(model)).ljust(10),
210 |         print str(get_model_difficulty(model)).ljust(10),
211 |         print get_model_commandline(model)
212 | 
213 | 


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/raxml_models.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/raxml_models.pyc


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/reporter.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2012 Robert Lanfear and Brett Calcott
  2 | #
  3 | # This program is free software: you can redistribute it and/or modify it under
  4 | # the terms of the GNU General Public License as published by the Free Software
  5 | # Foundation, either version 3 of the License, or (at your option) any later
  6 | # version.
  7 | #
  8 | # This program is distributed in the hope that it will be useful, but WITHOUT
  9 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 10 | # FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 11 | # details. You should have received a copy of the GNU General Public License
 12 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 13 | # PartitionFinder also includes the PhyML program, the RAxML program, and the
 14 | # PyParsing library, all of which are protected by their own licenses and
 15 | # conditions, using PartitionFinder implies that you agree with those licences
 16 | # and conditions as well.
 17 | 
 18 | import logging
 19 | log = logging.getLogger("reporter")
 20 | 
 21 | import os
 22 | 
 23 | scheme_header_template = "%-18s: %s\n"
 24 | scheme_subset_template = "%-6s | %-10s | %-30s | %-30s | %-40s\n"
 25 | subset_template = "%-15s | %-15s | %-15s | %-15s | %-15s\n"
 26 | 
 27 | 
 28 | class TextReporter(object):
 29 |     def __init__(self, config):
 30 |         self.cfg = config
 31 |         self.cfg.reporter = self
 32 | 
 33 |     def write_subset_summary(self, sub):
 34 |         pth = os.path.join(self.cfg.subsets_path, sub.name + '.txt')
 35 |         # Sort everything
 36 |         model_results = [(r.bic, r) for r in sub.results.values()]
 37 |         model_results.sort()
 38 |         output = open(pth, 'w')
 39 |         # TODO change back to full name...
 40 |         # output.write("Model selection results for subset: %s\n" % sub.full_name)
 41 |         output.write("Model selection results for subset: %s\n" % sub.name)
 42 |         output.write("Subset alignment stored here: %s\n" % sub.alignment_path)
 43 |         output.write("This subset contains the following data_blocks: %s\n" % sub)
 44 |         output.write("Models are organised according to their BIC scores\n\n")
 45 |         output.write(subset_template % ("Model", "lNL", "AIC", "AICc", "BIC"))
 46 |         for bic, r in model_results:
 47 |             output.write(subset_template % (r.model, r.lnl, r.aic, r.aicc, r.bic))
 48 | 
 49 |     def write_scheme_summary(self, sch, result):
 50 |         pth = os.path.join(self.cfg.schemes_path, sch.name + '.txt')
 51 |         output = open(pth, 'w')
 52 |         self.output_scheme(sch, result, output)
 53 | 
 54 |     def output_scheme(self, sch, result, output):
 55 |         self.write_scheme_header(sch, result, output)
 56 |         sorted_subsets = [sub for sub in sch]
 57 |         sorted_subsets.sort(key=lambda sub: min(sub.columns), reverse=False)
 58 |         self.write_subsets(sch, result, output, sorted_subsets)
 59 |         self.write_raxml(sch, result, output, sorted_subsets)
 60 | 
 61 |     def write_scheme_header(self, sch, result, output):
 62 |         output.write(scheme_header_template % ("Scheme Name", sch.name))
 63 |         output.write(scheme_header_template % ("Scheme lnL", result.lnl))
 64 |         if self.cfg.model_selection == "aic":
 65 |             output.write(scheme_header_template % ("Scheme AIC", result.aic))
 66 |         if self.cfg.model_selection == "aicc":
 67 |             output.write(scheme_header_template % ("Scheme AICc", result.aicc))
 68 |         if self.cfg.model_selection == "bic":
 69 |             output.write(scheme_header_template % ("Scheme BIC", result.bic))
 70 |         output.write(scheme_header_template % ("Number of params", result.sum_k))
 71 |         output.write(scheme_header_template % ("Number of sites", result.nsites))
 72 |         output.write(scheme_header_template % ("Number of subsets", result.nsubs))
 73 |         output.write("\n")
 74 | 
 75 |     def write_subsets(self, sch, result, output, sorted_subsets):
 76 |         output.write(scheme_subset_template % (
 77 |             "Subset", "Best Model", "Subset Partitions", "Subset Sites", "Alignment"))
 78 |         number = 1
 79 | 
 80 |         pf_scheme_description = []
 81 |             # a way to print out the scheme in PF format
 82 | 
 83 |         for sub in sorted_subsets:
 84 |             desc = {}
 85 |             names = []
 86 |             for part in sub:
 87 |                 names.append(part.name)
 88 |                 for subpart in part.description:  # loop through each sub-part of the partition
 89 |                     desc[subpart[0]] = subpart
 90 | 
 91 |             #pretty print the sites in the scheme
 92 |             desc_starts = desc.keys()
 93 |             desc_starts.sort()
 94 |             parts = []
 95 |             for key in desc_starts:
 96 |                 part = desc[key]
 97 |                 if part[2] == 1:
 98 |                     text = "%s-%s" % (part[0], part[1])
 99 |                 else:
100 |                     text = "%s-%s\\%s" % tuple(part)
101 |                 parts.append(text)
102 |             parts = ', '.join(parts)
103 | 
104 |             names.sort()
105 |             names = ', '.join(names)
106 | 
107 |             pf_scheme_description.append("(%s)" % names)
108 | 
109 |             output.write(scheme_subset_template % (
110 |                 number, sub.best_model, names, parts, sub.alignment_path))
111 |             number += 1
112 | 
113 |         pf_scheme_description = " ".join(pf_scheme_description)
114 |         output.write("\n\nScheme Description in PartitionFinder format\n")
115 |         output.write("Scheme_%s = %s;" % (sch.name, pf_scheme_description))
116 | 
117 |     def write_raxml(self, sch, result, output, sorted_subsets):
118 |         """Print out partition definitions in RaxML-like format, might be
119 |         useful to some people
120 |         """
121 |         from raxml_models import get_raxml_protein_modelstring
122 |         output.write("\n\nRaxML-style partition definitions\n")
123 |         number = 1
124 |         for sub in sorted_subsets:
125 | 
126 |             desc = {}
127 |             names = []
128 |             for part in sub:
129 |                 names.append(part.name)
130 |                 for subpart in part.description:  # loop through each sub-part of the partition
131 |                     desc[subpart[0]] = subpart
132 | 
133 |             # Pretty print the sites in the scheme
134 |             desc_starts = desc.keys()
135 |             desc_starts.sort()
136 |             parts = []
137 |             for key in desc_starts:
138 |                 part = desc[key]
139 |                 if part[2] == 1:
140 |                     text = "%s-%s" % (part[0], part[1])
141 |                 else:
142 |                     text = "%s-%s\\%s" % tuple(part)
143 |                 parts.append(text)
144 |             parts = ', '.join(parts)
145 | 
146 |             if self.cfg.datatype == "DNA":
147 |                 model = "DNA"
148 |             elif self.cfg.datatype == "protein":
149 |                 model = get_raxml_protein_modelstring(sub.best_model)
150 |             else:
151 |                 raise RuntimeError
152 | 
153 |             line = "%s, p%s = %s\n" % (model, number, parts)
154 |             output.write(line)
155 | 
156 |             number += 1
157 | 
158 |     def write_best_scheme(self, result):
159 |         pth = os.path.join(self.cfg.output_path, 'best_scheme.txt')
160 |         output = open(pth, 'wb')
161 |         output.write('Settings used\n\n')
162 |         output.write(scheme_header_template % ("alignment", self.cfg.alignment_path))
163 |         output.write(scheme_header_template % ("branchlengths", self.cfg.branchlengths))
164 |         output.write(scheme_header_template % ("models", ', '.join(self.cfg.models)))
165 |         output.write(scheme_header_template % ("model_selection",
166 |                                                 self.cfg.model_selection))
167 |         output.write(scheme_header_template % ("search", self.cfg.search))
168 |         if self.cfg.search in ["rcluster", "hcluster"]:
169 |             pretty_weights = "rate = %s, base = %s, model = %s, alpha = %s" %(
170 |                                str(self.cfg.cluster_weights["rate"]),
171 |                                str(self.cfg.cluster_weights["freqs"]),
172 |                                str(self.cfg.cluster_weights["model"]),
173 |                                str(self.cfg.cluster_weights["alpha"]))
174 |             output.write(scheme_header_template % ("weights", pretty_weights))
175 |         if self.cfg.search == "rcluster":
176 |             output.write(scheme_header_template % ("rcluster-percent",         
177 |                                                    self.cfg.cluster_percent))        
178 |         output.write('\n\nBest partitioning scheme\n\n')        
179 |         self.output_scheme(result.best_scheme, result.best_result, output)
180 |         log.info("Information on best scheme is here: %s", pth)
181 | 


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/reporter.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/reporter.pyc


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/results.py:
--------------------------------------------------------------------------------
 1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
 2 | #
 3 | #This program is free software: you can redistribute it and/or modify it
 4 | #under the terms of the GNU General Public License as published by the
 5 | #Free Software Foundation, either version 3 of the License, or (at your
 6 | #option) any later version.
 7 | #
 8 | #This program is distributed in the hope that it will be useful, but
 9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11 | #General Public License for more details. You should have received a copy
12 | #of the GNU General Public License along with this program.  If not, see
13 | #<http://www.gnu.org/licenses/>. PartitionFinder also includes the PhyML
14 | #program, the RAxML program, and the PyParsing library,
15 | #all of which are protected by their own licenses and conditions, using
16 | #PartitionFinder implies that you agree with those licences and conditions as well.
17 | 
18 | import logging
19 | log = logging.getLogger("results")
20 | 
21 | import os
22 | import cPickle as pickle
23 | 
24 | from util import PartitionFinderError
25 | 
26 | _check_fields = "lnl aic aicc bic".split()
27 | 
28 | 
29 | class ComparisonError(PartitionFinderError):
30 |     pass
31 | 
32 | 
33 | class AnalysisResults(object):
34 |     """
35 |     This stores the results, keeping only the winning scheme.
36 |     """
37 | 
38 |     MAX_ERROR = .1
39 | 
40 |     def __init__(self, model_selection):
41 |         self.model_selection = model_selection
42 |         self.best_score = None
43 |         self.best_result = None
44 |         self.best_scheme = None
45 | 
46 |     def add_scheme_result(self, sch, result):
47 |         score = result.score
48 |         if self.best_score is None or score < self.best_score:
49 |             self.best_score = score
50 |             self.best_result = result
51 |             self.best_scheme = sch
52 | 
53 |     def get_dump_path(self, cfg):
54 |         return os.path.join(cfg.base_path, 'results.bin')
55 | 
56 |     def get_result_fields(self):
57 |         flds = []
58 |         for k in _check_fields:
59 |             flds.append(getattr(self.best_result, k))
60 |         return flds
61 | 
62 |     def dump(self, cfg):
63 |         pth = self.get_dump_path(cfg)
64 |         log.info("Dumping all results to '%s'", pth)
65 |         f = open(pth, 'wb')
66 |         pickle.dump(self.get_result_fields(), f, -1)
67 | 
68 |     def compare(self, cfg):
69 |         """We only compare the best result!"""
70 |         pth = self.get_dump_path(cfg)
71 |         if not os.path.exists(pth):
72 |             log.error("Previous results file not found at '%s'. "
73 |                       "Did you run --dump-results previously?", pth)
74 |             raise ComparisonError
75 | 
76 |         log.info("Loading old results from '%s'", pth)
77 |         f = open(pth, 'rb')
78 |         old_fields = pickle.load(f)
79 |         f.close()
80 | 
81 |         cur_fields = self.get_result_fields()
82 | 
83 |         log.info("Comparing results...")
84 |         # Now do the comparison
85 | 
86 |         errors = 0
87 |         for nm, oldv, curv in zip(_check_fields, old_fields, cur_fields):
88 |             if abs(oldv - curv) > self.MAX_ERROR:
89 |                 log.error("Differences were more than acceptable value of %s", AnalysisResults.MAX_ERROR)
90 |                 log.error("Old %s value: %s, new %s value %s", nm, oldv, nm, curv)
91 |                 errors += 1
92 | 
93 |         if errors > 0:
94 |             raise ComparisonError
95 |         else:
96 |             log.info(
97 |                 "All results were within an acceptable %s of the dumped results",
98 |                 AnalysisResults.MAX_ERROR)
99 | 


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/results.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/results.pyc


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/scheme.py:
--------------------------------------------------------------------------------
  1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
  2 | #
  3 | #This program is free software: you can redistribute it and/or modify it
  4 | #under the terms of the GNU General Public License as published by the
  5 | #Free Software Foundation, either version 3 of the License, or (at your
  6 | #option) any later version.
  7 | #
  8 | #This program is distributed in the hope that it will be useful, but
  9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 11 | #General Public License for more details. You should have received a copy
 12 | #of the GNU General Public License along with this program.  If not, see
 13 | #<http://www.gnu.org/licenses/>. PartitionFinder also includes the PhyML
 14 | #program, the RAxML program, and the PyParsing library,
 15 | #all of which are protected by their own licenses and conditions, using
 16 | #PartitionFinder implies that you agree with those licences and conditions as well.
 17 | 
 18 | import logging
 19 | log = logging.getLogger("scheme")
 20 | import subset
 21 | import submodels
 22 | 
 23 | from math import log as logarithm
 24 | 
 25 | from util import PartitionFinderError
 26 | 
 27 | 
 28 | class SchemeError(PartitionFinderError):
 29 |     pass
 30 | 
 31 | 
 32 | class SchemeResult(object):
 33 |     def __init__(self, sch, nseq, branchlengths, model_selection):
 34 |         self.scheme_name = sch.name
 35 |         self.scheme = sch
 36 |         self.model_selection = model_selection
 37 | 
 38 |         # Calculate AIC, BIC, AICc for each scheme.
 39 |         # How you do this depends on whether brlens are linked or not.
 40 |         self.nsubs = len(sch.subsets)  # number of subsets
 41 |         sum_subset_k = sum([s.best_params for s in sch])  # sum of number of parameters in the best model of each subset
 42 | 
 43 |         log.debug("Calculating number of parameters in scheme:")
 44 |         log.debug("Total parameters from subset models: %d" % (sum_subset_k))
 45 | 
 46 |         if branchlengths == 'linked':  # linked brlens - only one extra parameter per subset
 47 |             self.sum_k = sum_subset_k + (self.nsubs - 1) + (
 48 |                 (2 * nseq) - 3)  # number of parameters in a scheme
 49 |             log.debug("Total parameters from brlens: %d" % ((2 * nseq) - 3))
 50 |             log.debug(
 51 |                 "Parameters from subset multipliers: %d" % (self.nsubs - 1))
 52 | 
 53 |         elif branchlengths == 'unlinked':  # unlinked brlens - every subset has its own set of brlens
 54 |             self.sum_k = sum_subset_k + (self.nsubs * (
 55 |                 (2 * nseq) - 3))  # number of parameters in a scheme
 56 |             log.debug("Total parameters from brlens: %d" % ((
 57 |                 2 * nseq) - 3) * self.nsubs)
 58 | 
 59 |         else:
 60 |             # WTF?
 61 |             log.error("Unknown option for branchlengths: %s", branchlengths)
 62 |             raise AnalysisError
 63 | 
 64 |         log.debug("Grand total parameters: %d" % (self.sum_k))
 65 | 
 66 |         self.lnl = sum([s.best_lnl for s in sch])
 67 |         self.nsites = sum([len(s.columnset) for s in sch])
 68 | 
 69 |         K = float(self.sum_k)
 70 |         n = float(self.nsites)
 71 |         lnL = float(self.lnl)
 72 | 
 73 |         log.debug("n: %d\tK: %d" % (n, K))
 74 | 
 75 |         #here we put in a catch for small subsets, where n<K+2
 76 |         #if this happens, the AICc actually starts rewarding very small datasets, which is wrong
 77 |         #a simple but crude catch for this is just to never allow n to go below k+2
 78 |         self.aic = (-2.0 * lnL) + (2.0 * K)
 79 |         self.bic = (-2.0 * lnL) + (K * logarithm(n))
 80 | 
 81 |         if n < (K + 2):
 82 |             log.warning("Scheme '%s' has a very small"
 83 |                         " number of sites (%d) compared to the number of parameters"
 84 |                         " in the models that make up the subsets"
 85 |                         " This may give misleading AICc results, so please check carefully"
 86 |                         " if you are using the AICc for your analyses." % (sch.name, n,))
 87 |             n = K + 2
 88 | 
 89 |         self.aicc = (-2.0 * lnL) + ((2.0 * K) * (n / (n - K - 1.0)))
 90 | 
 91 |     @property
 92 |     def score(self):
 93 |         return getattr(self, self.model_selection)
 94 | 
 95 |     def __repr__(self):
 96 |         return "SchemeResult<score({0.model_selection}):{0.score}>".format(self)
 97 | 
 98 | 
 99 | class Scheme(object):
100 |     def __init__(self, cfg, name, subsets, description=None):
101 |         """A set of subsets of partitions"""
102 |         self.name = name
103 |         self.subsets = set()
104 |         self.description = description
105 | 
106 |         # This one is a set of frozensets of partitions...
107 |         part_subsets = set()
108 | 
109 |         # This is really long-winded, but it is mainly for error-checking
110 |         partitions = set()
111 |         duplicates = []
112 |         for s in subsets:
113 |             for p in s:
114 |                 if p in partitions:
115 |                     # This is an error -- we'll collect them up
116 |                     duplicates.append(str(p))
117 |                 else:
118 |                     partitions.add(p)
119 |             self.subsets.add(s)
120 |             part_subsets.add(s.partitions)
121 | 
122 |         self.part_subsets = frozenset(part_subsets)
123 | 
124 |         # Report the errors
125 |         if duplicates:
126 |             log.error("Scheme '%s' contains duplicate partitions: %s",
127 |                       name, ', '.join(duplicates))
128 |             raise SchemeError
129 | 
130 |         # Hm. It seems this is the only way to get just one item out of a set
131 |         # as pop would remove one...
132 |         pset = cfg.partitions
133 | 
134 |         # Do a set-difference to see what is missing...
135 |         missing = pset.partitions - partitions
136 |         if missing:
137 |             log.error("Scheme '%s' is missing partitions: %s",
138 |                       name, ', '.join([str(p) for p in missing]))
139 |             raise SchemeError
140 | 
141 |         # This locks down whether new partitions can be created.
142 |         if not cfg.partitions.finalised:
143 |             cfg.partitions.finalise()
144 | 
145 |         log.debug("Created %s", self)
146 | 
147 |     def __iter__(self):
148 |         return iter(self.subsets)
149 | 
150 |     def __str__(self):
151 |         ss = ', '.join([str(s) for s in self.subsets])
152 |         return "Scheme(%s, %s)" % (self.name, ss)
153 | 
154 | 
155 | class SchemeSet(object):
156 |     """All the schemes added, and also a list of all unique subsets"""
157 |     def __init__(self):
158 |         """A collection of schemes"""
159 |         self.clear_schemes()
160 | 
161 |     def clear_schemes(self):
162 |         self.schemes_by_name = {}
163 |         self.schemes_by_subsets = {}
164 | 
165 |     def add_scheme(self, scheme):
166 |         if scheme.name in self.schemes_by_name:
167 |             log.error("Cannot add two schemes with same name: '%s'" %
168 |                       scheme.name)
169 |             raise SchemeError
170 | 
171 |         if scheme.part_subsets in self.schemes_by_subsets:
172 |             existing_scheme = \
173 |                 self.schemes_by_subsets[scheme.part_subsets]
174 |             log.warning(
175 |                 "Scheme named %s being added is identical to existing %s",
176 |                 scheme.name, existing_scheme)
177 |             # raise SchemeError
178 | 
179 |         self.schemes_by_name[scheme.name] = scheme
180 |         self.schemes_by_subsets[scheme.part_subsets] = scheme
181 | 
182 |     def __len__(self):
183 |         return len(self.schemes_by_name)
184 | 
185 |     def __iter__(self):
186 |         return iter(self.schemes_by_name.itervalues())
187 | 
188 | 
189 | def create_scheme(cfg, scheme_name, scheme_description):
190 |     """
191 |     Generate a single scheme given a list of numbers that represent the
192 |     indexes of the partitions e.g. [0,1,2,3,4,5,6,7]
193 |     """
194 | 
195 |     partition_count = len(
196 |         cfg.partitions)  # total number of partitions defined by user
197 | 
198 |     # Check that the correct number of items are in the list
199 |     if len(scheme_description) != partition_count:
200 |         log.error("There's a problem with the description of scheme %s" %
201 |                   scheme_name)
202 |         raise SchemeError
203 | 
204 |     # Now generate the pattern
205 |     subs = {}
206 |     # We use the numbers returned to group the different subsets
207 |     for sub_index, grouping in enumerate(scheme_description):
208 |         insub = subs.setdefault(grouping, [])
209 |         insub.append(sub_index)
210 | 
211 |     # We now have what we need to create a subset. Each entry will have a
212 |     # set of values which are the index for the partition
213 |     created_subsets = []
214 |     for sub_indexes in subs.values():
215 |         sub = subset.Subset(*tuple([cfg.partitions[i] for i in sub_indexes]))
216 |         created_subsets.append(sub)
217 | 
218 |     return Scheme(cfg, str(scheme_name), created_subsets, description=scheme_description)
219 | 
220 | 
221 | def model_to_scheme(model, scheme_name, cfg):
222 |     """Turn a model definition e.g. [0, 1, 2, 3, 4] into a scheme"""
223 |     subs = {}
224 |     # We use the numbers returned to group the different subsets
225 |     for sub_index, grouping in enumerate(model):
226 |         insub = subs.setdefault(grouping, [])
227 |         insub.append(sub_index)
228 | 
229 |     # We now have what we need to create a subset. Each entry will have a
230 |     # set of values which are the index for the partition
231 |     created_subsets = []
232 |     for sub_indexes in subs.values():
233 |         sub = subset.Subset(*tuple([cfg.partitions[i] for i in sub_indexes]))
234 |         created_subsets.append(sub)
235 | 
236 |     return Scheme(cfg, str(scheme_name), created_subsets)
237 | 
238 | 
239 | def generate_all_schemes(cfg):
240 |     """
241 |     Convert the abstract schema given by the algorithm into subsets
242 |     """
243 | 
244 |     log.info("Generating all possible schemes for the partitions...")
245 | 
246 |     partition_count = len(
247 |         cfg.partitions)  # total number of partitions defined by user
248 | 
249 |     # Now generate the pattern for this many partitions
250 |     all_schemes = submodels.get_submodels(partition_count)
251 |     scheme_name = 1
252 |     scheme_list = []
253 |     for scheme in all_schemes:
254 |         subs = {}
255 |         # We use the numbers returned to group the different subsets
256 |         for sub_index, grouping in enumerate(scheme):
257 |             insub = subs.setdefault(grouping, [])
258 |             insub.append(sub_index)
259 |         # We now have what we need to create a subset. Each entry will have a
260 |         # set of values which are the index for the partition
261 |         created_subsets = []
262 |         for sub_indexes in subs.values():
263 |             sub = subset.Subset(
264 |                 *tuple([cfg.partitions[i] for i in sub_indexes]))
265 |             created_subsets.append(sub)
266 | 
267 |         scheme_list.append(
268 |             Scheme(cfg, str(scheme_name), created_subsets))
269 | 
270 |         log.debug("Created scheme %d of %d" % (scheme_name, len(all_schemes)))
271 | 
272 |         scheme_name += 1
273 | 
274 |     return scheme_list
275 | 


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/scheme.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/scheme.pyc


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/submodels.py:
--------------------------------------------------------------------------------
  1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
  2 | #
  3 | #This program is free software: you can redistribute it and/or modify it
  4 | #under the terms of the GNU General Public License as published by the
  5 | #Free Software Foundation, either version 3 of the License, or (at your
  6 | #option) any later version.
  7 | #
  8 | #This program is distributed in the hope that it will be useful, but
  9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 11 | #General Public License for more details. You should have received a copy
 12 | #of the GNU General Public License along with this program.  If not, see
 13 | #<http://www.gnu.org/licenses/>. PartitionFinder also includes the PhyML
 14 | #program, the RAxML program, and the PyParsing library, 
 15 | #all of which are protected by their own licenses and conditions, using 
 16 | #PartitionFinder implies that you agree with those licences and conditions as well.
 17 | 
 18 | import math
 19 | import logging
 20 | log = logging.getLogger("submodels")
 21 | import algorithm
 22 | 
 23 | def submodel_generator(result, pat, current, maxn):
 24 |     """ result is a list to append to
 25 |         pat is the current pattern (starts as empty list)
 26 |         current is the current number of the pattern
 27 |         maxn is the number of items in the pattern
 28 |     """
 29 |     if pat:
 30 |         curmax = max(pat)
 31 |     else: 
 32 |         curmax = 0
 33 |     for i in range(current):
 34 |         if i-1 <= curmax:
 35 |             newpat = pat[:]
 36 |             newpat.append(i)
 37 |             if current == maxn:
 38 |                 result.append(newpat)
 39 |             else:
 40 |                 submodel_generator(result, newpat, current+1, maxn)
 41 | 
 42 | def submodel_iterator(pat, current, maxn):
 43 |     """same as generator but yields instead"""
 44 |     if pat:
 45 |         curmax = max(pat)
 46 |     else: 
 47 |         curmax = 0
 48 |     for i in range(current):
 49 |         if i-1 <= curmax:
 50 |             newpat = pat[:]
 51 |             newpat.append(i)
 52 |             if current == maxn:
 53 |                 yield newpat
 54 |             else:
 55 |                 for b in submodel_iterator(newpat, current+1, maxn):
 56 |                     yield b
 57 | 
 58 | def a_choose_b(n,k):
 59 |     return reduce(lambda a,b: a*(n-b)/(b+1),xrange(k),1)
 60 | 
 61 | def count_relaxed_clustering_subsets(N, cluster_percent, output=False):
 62 |     #startscheme    
 63 |     start_scheme = N
 64 |     #firstbatch is just cluster_percent of N choose 2
 65 |     step_1 = int(math.ceil(a_choose_b(N, 2)*cluster_percent*0.01))
 66 |     previous = step_1
 67 |     cumsum = start_scheme+step_1
 68 |     if output: print start_scheme
 69 |     if output: print cumsum
 70 |     #now for the rest
 71 |     for i in reversed(xrange(N)):
 72 |         # once we get to the all combined scheme we can stop  
 73 |         if i == 1:
 74 |             break
 75 |         num_new_schemes = int(math.ceil((a_choose_b(i, 2))*cluster_percent*0.01))
 76 |         # but those new schemes include a lot we will have already analysed
 77 |         # so we want to subtract that many. We could have already seen up to i-1 choose 2
 78 |         # the worst case is that the scheme we chose knocked out the maximum number of 
 79 |         # previously analysed schemes, which is just 2(i)-1, so:
 80 |         worst_case = 2*i - 1
 81 |         num_already_analysed = previous - worst_case
 82 |         if num_already_analysed <0: num_already_analysed=0
 83 |         # now we transfer over the 'previous' for the next round of the loop
 84 |         previous = num_new_schemes
 85 |         # now we calculate the final number of new schemes
 86 |         num_new_schemes = num_new_schemes - num_already_analysed
 87 |         cumsum += num_new_schemes
 88 |         if output:print cumsum
 89 |     return cumsum
 90 | 
 91 | def count_relaxed_clustering_schemes(N, cluster_percent, output=False):
 92 |     #startscheme    
 93 |     start_scheme = 1
 94 |     #firstbatch is just cluster_percent of N choose 2
 95 |     step_1 = int(math.ceil(a_choose_b(N, 2)*cluster_percent*0.01))
 96 |     previous = step_1
 97 |     cumsum = start_scheme+step_1
 98 |     if output: print start_scheme
 99 |     if output: print cumsum
100 |     #now for the rest
101 |     for i in reversed(xrange(N)):
102 |         # each subsequent step is cluster_percent of i choose 2  
103 |         if i == 1:
104 |             break
105 |         num_new_schemes = int(math.ceil((a_choose_b(i, 2))*cluster_percent*0.01))
106 |         cumsum += num_new_schemes
107 |         if output:print cumsum
108 |     return cumsum
109 | 
110 | def count_greedy_schemes(N):
111 |     """oeis.org reveals this is 1+(N*(N+1)*(N-1))/6"""
112 |     count = 1+(N*(N+1)*(N-1))/6
113 |     return count
114 | 
115 | def count_greedy_subsets(N):
116 |     """oeis.org says thes are Central polygonal numbers: n^2 - n + 1. """
117 |     count = (N*N) - N + 1
118 |     return count
119 | 
120 | def bell_numbers(N):
121 |     ## Return the bell number for N subsets
122 |     # script modified from Wikipedia: http://en.wikipedia.org/wiki/Bell_number
123 |     N = N+1                          ## Bell numbers are indexed from zero 
124 |     t = [[1]]                        ## Initialize the triangle as a two-dimensional array
125 |     c = 1                            ## Bell numbers count
126 |     while c <= N:
127 |         if c >= N:
128 |             return t[-1][0]          ## Yield the Bell number of the previous row
129 |         row = [t[-1][-1]]            ## Initialize a new row
130 |         for b in t[-1]:
131 |             row.append(row[-1] + b)  ## Populate the new row
132 |         c += 1                       ## We have found another Bell number
133 |         t.append(row)                ## Append the row to the triangle
134 |  
135 | 
136 | def get_submodels(N):
137 |     """Return all the submodels
138 |     """
139 |     log.debug("Generating submodels for %s partitions", N)
140 |     result = []
141 |     submodel_generator(result, [], 1, N)
142 |     log.debug("Resulting number of partitions is %s", len(result))
143 |     return result
144 | 
145 | def count_all_schemes(N):
146 |     """Count the number of submodels we"ve got"""
147 |     count = bell_numbers(N)
148 |     return count
149 | 
150 | def count_all_subsets(N):
151 |     """Count the number of subses we'll have to look at given a certain number of starting partitions"""
152 |     count = (2**N) - 1
153 |     return count
154 | 
155 | 


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/submodels.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/submodels.pyc


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/subset.py:
--------------------------------------------------------------------------------
  1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
  2 | #
  3 | #This program is free software: you can redistribute it and/or modify it
  4 | #under the terms of the GNU General Public License as published by the
  5 | #Free Software Foundation, either version 3 of the License, or (at your
  6 | #option) any later version.
  7 | #
  8 | #This program is distributed in the hope that it will be useful, but
  9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 11 | #General Public License for more details. You should have received a copy
 12 | #of the GNU General Public License along with this program.  If not, see
 13 | #<http://www.gnu.org/licenses/>. PartitionFinder also includes the PhyML
 14 | #program, the RAxML program, and the PyParsing library,
 15 | #all of which are protected by their own licenses and conditions, using
 16 | #PartitionFinder implies that you agree with those licences and conditions as well.
 17 | 
 18 | import logging
 19 | log = logging.getLogger("subset")
 20 | import os
 21 | import weakref
 22 | 
 23 | from hashlib import md5
 24 | 
 25 | # import base64
 26 | # from zlib import compress
 27 | 
 28 | import cPickle as pickle
 29 | from math import log as logarithm
 30 | from alignment import Alignment, SubsetAlignment
 31 | from util import PartitionFinderError, remove_runID_files
 32 | 
 33 | FRESH, PREPARED, DONE = range(3)
 34 | 
 35 | 
 36 | class SubsetError(PartitionFinderError):
 37 |     pass
 38 | 
 39 | 
 40 | def count_subsets():
 41 |     return 1
 42 |     # return len(Subset._cache)
 43 |     #
 44 | 
 45 | 
 46 | def clear_subsets():
 47 |     pass
 48 |     # Subset._cache.clear()
 49 | 
 50 | 
 51 | class Subset(object):
 52 |     """A Subset of Partitions
 53 |     """
 54 |     # TODO: Move this to the config -- once we have a global one
 55 |     _cache = weakref.WeakValueDictionary()
 56 | 
 57 |     def __new__(cls, *parts):
 58 |         """Return the SAME subset if the partitions are identical. This is
 59 |         basically a pythonized factory. See here:
 60 |         http://codesnipers.com/?q=python-flyweights
 61 |         """
 62 | 
 63 |         cacheid = frozenset(parts)
 64 |         obj = Subset._cache.get(cacheid, None)
 65 |         # TODO Flush cache? USE MRU? functools.lrucache
 66 |         if not obj:
 67 |             obj = object.__new__(cls)
 68 |             Subset._cache[cacheid] = obj
 69 |             obj.init(cacheid, *parts)
 70 | 
 71 |         # obj = object.__new__(cls)
 72 |         # cacheid = frozenset(parts)
 73 |         # obj.init(cacheid, *parts)
 74 |         return obj
 75 | 
 76 |     def init(self, cacheid, *parts):
 77 |         # Error checking....
 78 |         self.status = FRESH
 79 | 
 80 |         tempparts = set()
 81 |         for p in parts:
 82 |             if p.partition_set is None:
 83 |                 log.error("You cannot add a Partition to a Subset until "
 84 |                           "the Partition belongs to a PartitionSet")
 85 |                 raise SubsetError
 86 | 
 87 |             if p in tempparts:
 88 |                 log.error("%s is duplicated in a Subset", p)
 89 |                 raise SubsetError
 90 | 
 91 |             tempparts.add(p)
 92 | 
 93 |         self.partitions = cacheid
 94 | 
 95 |         # a list of columns in the subset
 96 |         self.columns = []
 97 |         self.columnset = set()
 98 |         for p in parts:
 99 |             self.columns += p.columns
100 |             self.columnset |= p.columnset
101 |         self.columns.sort()
102 | 
103 |         self.results = {}
104 |         self.best_info_score = None  # e.g. AIC, BIC, AICc
105 |         self.best_model = None
106 |         self.best_params = None
107 |         self.best_lnl = None
108 |         self.alignment_path = None
109 |         log.debug("Created %s", self)
110 | 
111 |     def __str__(self):
112 |         return "(%s)" % ", ".join([str(p.name) for p in self.partitions])
113 | 
114 |     @property
115 |     def full_name(self):
116 |         if hasattr(self, '_full_name'):
117 |             nm = self._full_name
118 |         else:
119 |             s = sorted([p.name for p in self.partitions])
120 |             nm = '-'.join(s)
121 |             self._full_name = nm
122 |         return nm
123 | 
124 |     @property
125 |     def name(self):
126 |         # Cache this
127 |         if hasattr(self, '_name'):
128 |             nm = self._name
129 |         else:
130 |             nm = self.full_name
131 |             # This gets super long -- we can shorten it like this...  This is
132 |             # a slightly lazy solution. There is some vanishingly small chance
133 |             # that we'll get the same thing. Google "MD5 Hash Collision"
134 |             nm = md5(nm).hexdigest()
135 |             self._name = nm
136 |         return nm
137 | 
138 |     def __iter__(self):
139 |         return iter(self.partitions)
140 | 
141 |     def add_result(self, cfg, model, result):
142 |         result.model = model
143 |         result.params = cfg.processor.models.get_num_params(model)
144 | 
145 |         K = float(result.params)
146 |         n = float(len(self.columnset))
147 |         lnL = float(result.lnl)
148 |         #here we put in a catch for small subsets, where n<K+2
149 |         #if this happens, the AICc actually starts rewarding very small datasets, which is wrong
150 |         #a simple but crude catch for this is just to never allow n to go below k+2
151 |         result.aic = (-2.0 * lnL) + (2.0 * K)
152 |         result.bic = (-2.0 * lnL) + (K * logarithm(n))
153 | 
154 |         if n < (K + 2):
155 |             log.warning("The subset containing the following data_blocks: %s, has a very small"
156 |                         " number of sites (%d) compared to the number of parameters"
157 |                         " in the model being estimated (the %s model which has %d parameters)."
158 |                         " This may give misleading AICc results, so please check carefully"
159 |                         " if you are using the AICc for your analyses."
160 |                         " The model selection results for this subset are in the following file:"
161 |                         " /analysis/subsets/%s.txt\n" % (self, n, model, K, self.name))
162 |             n = K + 2
163 | 
164 |         result.aicc = (-2.0 * lnL) + ((2.0 * K) * (n / (n - K - 1.0)))
165 | 
166 |         #this is the rate per site of the model - used in some clustering analyses
167 |         result.site_rate = float(result.tree_size)
168 | 
169 |         log.debug("Adding model to subset. Model: %s, params %d, site_rate %f" % (model, K, result.site_rate))
170 | 
171 |         if model in self.results:
172 |             log.error("Can't add model result %s, it already exists in %s",
173 |                       model, self)
174 |         self.results[model] = result
175 | 
176 |     def model_selection(self, cfg):
177 |         # Model selection is done after we've added all the models
178 |         # Note: we may have more models than we want if there is old data lying
179 |         # around
180 |         self.best_info_score = None  # Reset this before model selection
181 |         meth = cfg.model_selection.lower()
182 | 
183 |         for model in cfg.models:
184 |             result = self.results[model]
185 |             try:
186 |                 info_score = getattr(result, meth)
187 |             except AttributeError:
188 |                 log.error(
189 |                     "Model selection option %s not recognised, "
190 |                     "please check" % cfg.model_selection)
191 |                 raise SubsetError
192 | 
193 |             if self.best_info_score is None or info_score < self.best_info_score:
194 |                 self.best_lnl = result.lnl
195 |                 self.best_info_score = info_score
196 |                 self.best_model = result.model
197 |                 self.best_params = result.params
198 |                 self.best_site_rate = result.site_rate
199 |                 self.best_alpha = result.alpha
200 |                 self.best_freqs = result.freqs
201 |                 self.best_modelparams = result.rates
202 | 
203 |         log.debug("Model Selection. best model: %s, params: %d, site_rate: %f"
204 |                   % (self.best_model, self.best_params, self.best_site_rate))
205 | 
206 |     def get_param_values(self):
207 |         param_values = {}
208 | 
209 |         param_values["rate"] = self.best_site_rate
210 |         param_values["alpha"] = self.best_alpha
211 | 
212 |         #not sure if this sorting is necessary, but it's here in case it's needed
213 |         #to make sure that freqs and model parameters are always in the same order
214 |         #can't hurt... (i hope).
215 |         keys_f = self.best_freqs.keys()
216 |         keys_f.sort()
217 |         param_values["freqs"] = [self.best_freqs[key] for key in keys_f]
218 | 
219 |         keys_m = self.best_modelparams.keys()
220 |         keys_m.sort()
221 |         param_values["model"] = [self.best_modelparams[key] for key in keys_m]
222 | 
223 |         return param_values
224 | 
225 |     def finalise(self, cfg):
226 |         if self.models_not_done:
227 |             return False
228 | 
229 |         # We might already have done everything
230 |         if self.status == DONE:
231 |             return True
232 | 
233 |         # Do all the final cleanup
234 |         cfg.reporter.write_subset_summary(self)
235 |         self.save_results(cfg)
236 |         self.model_selection(cfg)
237 |         if not cfg.save_phylofiles:
238 |             remove_runID_files(self.alignment_path)
239 | 
240 |         self.models_to_process = []
241 |         self.status = DONE
242 |         cfg.progress.subset_done(self)
243 |         return True
244 | 
245 |     def prepare(self, cfg, alignment):
246 |         """Get everything ready for running the analysis
247 |         """
248 |         # cfg.progress.update_subsets(self)
249 |         cfg.progress.subset_begin(self)
250 | 
251 |         # Load the cached results
252 |         self.load_results(cfg)
253 | 
254 |         # First, see if we've already got the results loaded. Then we can
255 |         # shortcut all the other checks
256 |         models_done = set(self.results.keys())
257 |         self.models_not_done = cfg.models - models_done
258 |         if self.finalise(cfg):
259 |             return
260 | 
261 |         # Make an Alignment from the source, using this subset
262 |         self.make_alignment(cfg, alignment)
263 | 
264 |         # Try and read in some previous analyses
265 |         self.parse_results(cfg)
266 |         if self.finalise(cfg):
267 |             return
268 | 
269 |         self.models_to_process = list(self.models_not_done)
270 |         # Now order them by difficulty
271 |         self.models_to_process.sort(
272 |             key=cfg.processor.models.get_model_difficulty,
273 |             reverse=True)
274 | 
275 |         self.status = PREPARED
276 | 
277 |     def parse_results(self, cfg):
278 |         """Read in the results and parse them"""
279 |         for m in list(self.models_not_done):
280 |             self.parse_model_result(cfg, m)
281 | 
282 |     def parse_model_result(self, cfg, model):
283 |         pth, tree_path = cfg.processor.make_output_path(
284 |             self.alignment_path, model)
285 | 
286 |         if not os.path.exists(pth):
287 |             # If it ain't there, we can't do it
288 |             return
289 | 
290 |         output = open(pth, 'rb').read()
291 |         try:
292 |             result = cfg.processor.parse(output, cfg.datatype)
293 |             self.add_result(cfg, model, result)
294 |             # Remove the current model from remaining ones
295 |             self.models_not_done.remove(model)
296 | 
297 |             # Just used for below
298 |             if not cfg.save_phylofiles:
299 |                 # We remove all files that have the specified RUN ID
300 |                 cfg.processor.remove_files(self.alignment_path, model)
301 | 
302 |         except cfg.processor.PhylogenyProgramError:
303 |             # If we're loading old files, this is fine
304 |             if self.status == FRESH:
305 |                 log.warning("Failed loading parse output from %s."
306 |                             "Output maybe corrupted. I'll run it again.",
307 |                             pth)
308 |                 cfg.processor.remove_files(self.alignment_path, model)
309 |             else:
310 |                 # But if we're prepared, then we've just run this. And we're
311 |                 # screwed
312 |                 log.error(
313 |                     "Failed to run models %s; not sure why",
314 |                     ", ".join(list(self.models_not_done)))
315 |                 raise
316 | 
317 |     def make_alignment(self, cfg, alignment):
318 |         # Make an Alignment from the source, using this subset
319 |         sub_alignment = SubsetAlignment(alignment, self)
320 |         sub_path = os.path.join(cfg.phylofiles_path, self.name + '.phy')
321 |         # Add it into the sub, so we keep it around
322 |         self.alignment_path = sub_path
323 | 
324 |         # Maybe it is there already?
325 |         if os.path.exists(sub_path):
326 |             log.debug("Found existing alignment file %s", sub_path)
327 |             old_align = Alignment()
328 |             old_align.read(sub_path)
329 | 
330 |             # It had better be the same!
331 |             if not old_align.same_as(sub_alignment):
332 |                 log.error(
333 |                     "It looks like you have changed one or more of the "
334 |                     "data_blocks in the configuration file, "
335 |                     "so the new subset alignments "
336 |                     "don't match the ones stored for this analysis. "
337 |                     "You'll need to run the program with --force-restart")
338 |                 raise SubsetError
339 |         else:
340 |             # We need to write it
341 |             sub_alignment.write(sub_path)
342 | 
343 |     def get_subset_cache_path(self, cfg):
344 |         return os.path.join(cfg.subsets_path, self.name + '.bin')
345 | 
346 |     def load_results(self, cfg):
347 |         # We might have already saved a bunch of results, try there first
348 |         if not self.results:
349 |             log.debug("Reading in cached data from the subsets file")
350 |             self.read_cache(self.get_subset_cache_path(cfg))
351 | 
352 |     def save_results(self, cfg):
353 |         self.write_cache(self.get_subset_cache_path(cfg))
354 | 
355 |     # These are the fields that get stored for quick loading
356 |     _cache_fields = "alignment_path results".split()
357 | 
358 |     def write_cache(self, path):
359 |         """Write out the results we've collected to a binary file"""
360 |         f = open(path, 'wb')
361 |         store = dict([(x, getattr(self, x)) for x in Subset._cache_fields])
362 |         pickle.dump(store, f, -1)
363 |         f.close()
364 | 
365 |     def read_cache(self, path):
366 |         if not os.path.exists(path):
367 |             return False
368 | 
369 |         log.debug("Reading binary cached results for %s", self)
370 |         f = open(path, 'rb')
371 |         self.__dict__.update(pickle.load(f))
372 |         f.close()
373 | 


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/subset.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/subset.pyc


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/threadpool.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Modified and vastly simplified from here
  3 | # http://code.activestate.com/recipes/203871/
  4 | #
  5 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott
  6 | #
  7 | #This program is free software: you can redistribute it and/or modify it
  8 | #under the terms of the GNU General Public License as published by the
  9 | #Free Software Foundation, either version 3 of the License, or (at your
 10 | #option) any later version.
 11 | #
 12 | #This program is distributed in the hope that it will be useful, but
 13 | #WITHOUT ANY WARRANTY; without even the implied warranty of
 14 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 15 | #General Public License for more details. You should have received a copy
 16 | #of the GNU General Public License along with this program.  If not, see
 17 | #<http://www.gnu.org/licenses/>. PartitionFinder also includes the PhyML
 18 | #program, the RAxML program, the PyParsing library, and the python-cluster library
 19 | #all of which are protected by their own licenses and conditions, using
 20 | #PartitionFinder implies that you agree with those licences and conditions as well.
 21 | 
 22 | import logging
 23 | log = logging.getLogger("threadpool")
 24 | import threading
 25 | from time import sleep
 26 | import multiprocessing
 27 | 
 28 | _cpus = None
 29 | 
 30 | 
 31 | def get_cpu_count():
 32 |     global _cpus
 33 |     if _cpus is not None:
 34 |         return _cpus
 35 | 
 36 |     try:
 37 |         _cpus = multiprocessing.cpu_count()
 38 |     except:
 39 |         _cpus = 1
 40 |         log.info("I cannot detect the number of processors...")
 41 | 
 42 |     log.info("Found %s cpus", _cpus)
 43 |     return _cpus
 44 | 
 45 | 
 46 | class Pool(object):
 47 |     def __init__(self, tasks, numthreads=-1):
 48 |         """Initialize the thread pool with numthreads workers and all tasks"""
 49 |         self.more_tasks = True
 50 |         self.tasks = tasks
 51 |         self.task_lock = threading.Condition(threading.Lock())
 52 |         self.threads = []
 53 |         self.failed = False
 54 | 
 55 |         numtasks = len(tasks)
 56 |         if numtasks == 0:
 57 |             log.warning("You did not give any tasks to do...")
 58 |             self.more_tasks = False
 59 |             return
 60 | 
 61 |         if numthreads <= 1:
 62 |             numthreads = get_cpu_count()
 63 |         if numtasks < numthreads:
 64 |             numthreads = numtasks
 65 | 
 66 |         log.debug("Creating %s threads for %s tasks", numthreads, numtasks)
 67 |         for i in range(numthreads):
 68 |             t = Thread(self)
 69 |             self.threads.append(t)
 70 |             t.start()
 71 | 
 72 |     def next_task(self):
 73 |         self.task_lock.acquire()
 74 |         try:
 75 |             if self.tasks == []:
 76 |                 self.more_tasks = False
 77 |                 return None, None
 78 |             else:
 79 |                 return self.tasks.pop(0)
 80 |         finally:
 81 |             self.task_lock.release()
 82 | 
 83 |     def kill(self, e):
 84 |         self.task_lock.acquire()
 85 |         self.tasks = []
 86 |         self.more_tasks = False
 87 |         self.failed = True
 88 |         self.exception = e
 89 |         self.task_lock.release()
 90 | 
 91 |     def join(self):
 92 |         # TODO: I don't think we need this bit....
 93 |         # Wait till all tasks have been taken
 94 |         while self.more_tasks:
 95 |             sleep(.1)
 96 |         # ... now wait for them all to finish
 97 |         for t in self.threads:
 98 |             t.join()
 99 | 
100 |         if self.failed:
101 |             raise self.exception
102 | 
103 | 
104 | class Thread(threading.Thread):
105 |     def __init__(self, pool):
106 |         threading.Thread.__init__(self)
107 |         self.pool = pool
108 | 
109 |     def run(self):
110 |         while 1:
111 |             cmd, args = self.pool.next_task()
112 |             # If there's nothing to do, return
113 |             if cmd is None:
114 |                 break
115 |             try:
116 |                 cmd(*args)
117 |             except Exception as e:
118 |                 # The error should already have been reported.
119 |                 # Stop operation and kill the entire pool. Then reraise the
120 |                 # error
121 |                 self.pool.kill(e)
122 |                 break
123 | 


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/threadpool.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/threadpool.pyc


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/util.py:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2012 Robert Lanfear and Brett Calcott
  2 | #
  3 | # This program is free software: you can redistribute it and/or modify it under
  4 | # the terms of the GNU General Public License as published by the Free Software
  5 | # Foundation, either version 3 of the License, or (at your option) any later
  6 | # version.
  7 | #
  8 | #This program is distributed in the hope that it will be useful, but
  9 | #WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 11 | #General Public License for more details. You should have received a copy
 12 | #of the GNU General Public License along with this program.  If not, see
 13 | #<http://www.gnu.org/licenses/>. PartitionFinder also includes the PhyML
 14 | #program, the RAxML program, and the PyParsing library,
 15 | #all of which are protected by their own licenses and conditions, using
 16 | #PartitionFinder implies that you agree with those licences and conditions as well.
 17 | 
 18 | import logging
 19 | log = logging.getLogger("util")
 20 | import os
 21 | import sys
 22 | import fnmatch
 23 | 
 24 | 
 25 | # Base error class
 26 | class PartitionFinderError(Exception):
 27 |     pass
 28 | 
 29 | 
 30 | class PhylogenyProgramError(PartitionFinderError):
 31 |     pass
 32 | 
 33 | 
 34 | def check_file_exists(pth):
 35 |     if not os.path.exists(pth) or not os.path.isfile(pth):
 36 |         if pth.count("partition_finder.cfg") > 0:
 37 |             log.error("Failed to find configuration file: '%s'. "
 38 |                       "For PartitionFinder to run, there must be a file called 'partition_finder.cfg' "
 39 |                       "located in the same folder as your alignment. Please check and try again.", pth)
 40 |             raise PartitionFinderError
 41 |         else:
 42 |             log.error(
 43 |                 "Failed to find file: '%s'. Please check and try again.", pth)
 44 |             raise PartitionFinderError
 45 | 
 46 | def delete_files(pths):
 47 |     """ delete files, but watch out for a WindowsError that crops up sometimes with threading 
 48 |         oddly, this error occurs, but the files get deleted anyway. So we ignore it for now
 49 |     """
 50 |     for f in pths:
 51 |         try:
 52 |             os.remove(f)
 53 |         except OSError:
 54 |             log.debug("Found and ignored Error when deleting file %s" % f)
 55 |             pass
 56 |     log.debug("deleted %d files" % len(pths))
 57 | 
 58 | 
 59 | def check_folder_exists(pth):
 60 |     if not os.path.exists(pth) or not os.path.isdir(pth):
 61 |         log.error("No such folder: '%s'", pth)
 62 |         raise PartitionFinderError
 63 | 
 64 | def clean_out_folder(folder, keep = []):
 65 |     """Hat Tip: http://stackoverflow.com/questions/185936/delete-folder-contents-in-python
 66 |     """
 67 |     for the_file in os.listdir(folder):
 68 |         if the_file not in keep:
 69 |             file_path = os.path.join(folder, the_file)
 70 |             try:
 71 |                 if os.path.isfile(file_path):
 72 |                     os.unlink(file_path)
 73 |             except Exception, e:
 74 |                 log.error(e)
 75 |                 raise PartitionFinderError
 76 | 
 77 | 
 78 | def make_dir(pth):
 79 |     if os.path.exists(pth):
 80 |         if not os.path.isdir(pth):
 81 |             log.error("Cannot create folder '%s'", pth)
 82 |             raise PartitionFinderError
 83 |     else:
 84 |         os.mkdir(pth)
 85 | 
 86 | 
 87 | def remove_runID_files(aln_pth):
 88 |     """remove all files that match a particular run_ID. Useful for cleaning out directories
 89 |     but ONLY after a whole analysis of a subset is completely finished, be careful!"""
 90 |     head, tail = os.path.split(aln_pth)
 91 |     run_ID = os.path.splitext(tail)[0]
 92 |     head = os.path.abspath(head)
 93 |     fnames = os.listdir(head)
 94 |     fs = fnmatch.filter(fnames, '*%s*' % run_ID)
 95 |     for f in fs:
 96 |         try:
 97 |             os.remove(os.path.join(head, f))
 98 |         except OSError:
 99 |             # Don't complain if you can't delete them
100 |             # (This is here because we sometimes try and delete things twice in
101 |             # the threading).
102 |             pass
103 | 
104 | 
105 | # def we_are_frozen():
106 |     # # All of the modules are built-in to the interpreter, e.g., by py2exe
107 |     # return hasattr(sys, "frozen")
108 | 
109 | 
110 | # def get_root_install_path():
111 |     # pth = os.path.abspath(__file__)
112 |     # # Split off the name and the directory...
113 |     # pth, not_used = os.path.split(pth)
114 |     # pth, not_used = os.path.split(pth)
115 |     # return pth
116 | 
117 | # def module_path():
118 |     # encoding = sys.getfilesystemencoding()
119 |     # if we_are_frozen():
120 |         # return os.path.dirname(unicode(sys.executable, encoding))
121 |     # return os.path.abspath(os.path.dirname(unicode(__file__, encoding)))
122 | 


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/util.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/util.pyc


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/version.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Author: Douglas Creager <dcreager@dcreager.net>
  3 | # This file is placed into the public domain.
  4 | #
  5 | # Minor Modifications have been made by Brett Calcott
  6 | # * Write the VERSION into the current folder
  7 | 
  8 | # Calculates the current version number.  If possible, this is the
  9 | # output of “git describe”, modified to conform to the versioning
 10 | # scheme that setuptools uses.  If “git describe” returns an error
 11 | # (most likely because we're in an unpacked copy of a release tarball,
 12 | # rather than in a git working copy), then we fall back on reading the
 13 | # contents of the RELEASE-VERSION file.
 14 | #
 15 | # To use this script, simply import it your setup.py file, and use the
 16 | # results of get_git_version() as your package version:
 17 | #
 18 | # from version import *
 19 | #
 20 | # setup(
 21 | #     version=get_git_version(),
 22 | #     .
 23 | #     .
 24 | #     .
 25 | # )
 26 | #
 27 | # This will automatically update the RELEASE-VERSION file, if
 28 | # necessary.  Note that the RELEASE-VERSION file should *not* be
 29 | # checked into git; please add it to your top-level .gitignore file.
 30 | #
 31 | # You'll probably want to distribute the RELEASE-VERSION file in your
 32 | # sdist tarballs; to do this, just create a MANIFEST.in file that
 33 | # contains the following line:
 34 | #
 35 | #   include RELEASE-VERSION
 36 | from subprocess import Popen, PIPE
 37 | import os
 38 | 
 39 | 
 40 | def get_version_path():
 41 |     # Get current dir, then parent dir
 42 |     pth = os.path.dirname(os.path.abspath(__file__))
 43 |     pth, here = os.path.split(pth)
 44 |     return os.path.join(pth, "RELEASE-VERSION")
 45 | 
 46 | 
 47 | def call_git_describe(abbrev=4):
 48 |     try:
 49 |         p = Popen(['git', 'describe', '--abbrev=%d' % abbrev],
 50 |                   stdout=PIPE, stderr=PIPE)
 51 |         p.stderr.close()
 52 |         line = p.stdout.readlines()[0]
 53 |         return line.strip()
 54 | 
 55 |     except:
 56 |         return None
 57 | 
 58 | 
 59 | def read_release_version():
 60 |     try:
 61 |         f = open(get_version_path(), "r")
 62 | 
 63 |         try:
 64 |             version = f.readlines()[0]
 65 |             return version.strip()
 66 | 
 67 |         finally:
 68 |             f.close()
 69 | 
 70 |     except:
 71 |         return None
 72 | 
 73 | 
 74 | def write_release_version(version):
 75 |     f = open(get_version_path(), "w")
 76 |     f.write("%s\n" % version)
 77 |     f.close()
 78 | 
 79 | 
 80 | 
 81 | def get_git_version(abbrev=4):
 82 |     # Read in the version that's currently in RELEASE-VERSION.
 83 | 
 84 |     release_version = read_release_version()
 85 | 
 86 |     # First try to get the current version using “git describe”.
 87 | 
 88 |     version = call_git_describe(abbrev)
 89 | 
 90 |     # If that doesn't work, fall back on the value that's in
 91 |     # RELEASE-VERSION.
 92 | 
 93 |     if version is None:
 94 |         version = release_version
 95 | 
 96 |     # If we still don't have anything, that's an error.
 97 | 
 98 |     if version is None:
 99 |         raise ValueError("Cannot find the version number!")
100 | 
101 |     # If the current version is different from what's in the
102 |     # RELEASE-VERSION file, update the file to be current.
103 | 
104 |     if version != release_version:
105 |         write_release_version(version)
106 | 
107 |     # Finally, return the current version.
108 | 
109 |     return version
110 | 
111 | def get_version():
112 |     version = read_release_version()
113 |     if version is None:
114 |         raise ValueError("Cannot find the version number!")
115 |     return version
116 | 
117 | if __name__ == "__main__":
118 |     print get_version()
119 | 


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/partfinder/version.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/version.pyc


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/programs/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/programs/.DS_Store


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/programs/phyml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/programs/phyml


--------------------------------------------------------------------------------
/PartitionFinderV1.1.1/programs/raxml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/programs/raxml


--------------------------------------------------------------------------------
/Xenarthrans/fasta/readme:
--------------------------------------------------------------------------------
1 | This is the test dataset of Xenarthrans mitochondrial genomes
2 | 


--------------------------------------------------------------------------------
/bin/BMGE.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/bin/BMGE.jar


--------------------------------------------------------------------------------
/bin/Gblocks:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/bin/Gblocks


--------------------------------------------------------------------------------
/bin/blastall:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/bin/blastall


--------------------------------------------------------------------------------
/bin/formatdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/bin/formatdb


--------------------------------------------------------------------------------
/bin/noisy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/bin/noisy


--------------------------------------------------------------------------------
/bin/progressiveMauve:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/bin/progressiveMauve


--------------------------------------------------------------------------------
/bin/readal:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/bin/readal


--------------------------------------------------------------------------------
/bin/trimal:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/bin/trimal


--------------------------------------------------------------------------------
/plant/fasta/readme:
--------------------------------------------------------------------------------
1 | This is the test data of 52 higher plant chloroplast genomes
2 | 


--------------------------------------------------------------------------------