├── HomBlocks.pl ├── PartitionFinderV1.1.1 ├── .DS_Store ├── PartitionFinder.py ├── PartitionFinderProtein.py ├── README.md ├── RELEASE-VERSION ├── ROGP_v2.0-backup.pl ├── circoletto.pl ├── docs │ ├── .DS_Store │ └── Manual_v1.1.1.pdf ├── examples │ ├── .DS_Store │ ├── README.txt │ ├── aminoacid │ │ ├── Als_etal_2004.phy │ │ └── partition_finder.cfg │ └── nucleotide │ │ ├── .DS_Store │ │ ├── partition_finder.cfg │ │ └── test.phy ├── partfinder │ ├── .DS_Store │ ├── __init__.py │ ├── __init__.pyc │ ├── algorithm.py │ ├── algorithm.pyc │ ├── alignment.py │ ├── alignment.pyc │ ├── analysis.py │ ├── analysis.pyc │ ├── analysis_method.py │ ├── analysis_method.pyc │ ├── config.py │ ├── config.pyc │ ├── main.py │ ├── main.pyc │ ├── neighbour.py │ ├── neighbour.pyc │ ├── parser.py │ ├── parser.pyc │ ├── partition.py │ ├── partition.pyc │ ├── phyml.py │ ├── phyml.pyc │ ├── phyml_models.py │ ├── phyml_models.pyc │ ├── progress.py │ ├── progress.pyc │ ├── pyparsing.py │ ├── pyparsing.pyc │ ├── raxml.py │ ├── raxml.pyc │ ├── raxml_models.py │ ├── raxml_models.pyc │ ├── reporter.py │ ├── reporter.pyc │ ├── results.py │ ├── results.pyc │ ├── scheme.py │ ├── scheme.pyc │ ├── submodels.py │ ├── submodels.pyc │ ├── subset.py │ ├── subset.pyc │ ├── threadpool.py │ ├── threadpool.pyc │ ├── util.py │ ├── util.pyc │ ├── version.py │ └── version.pyc └── programs │ ├── .DS_Store │ ├── phyml │ └── raxml ├── README.md ├── Xenarthrans └── fasta │ ├── Bradypus_pygmaeus.fasta │ ├── Bradypus_torquatus.fasta │ ├── Bradypus_tridactylus.fasta │ ├── Bradypus_variegatus.fasta │ ├── Bradypus_variegatus_old.fasta │ ├── Cabassous_centralis.fasta │ ├── Cabassous_chacoensis.fasta │ ├── Cabassous_tatouay.fasta │ ├── Cabassous_unicinctus_ISEM_T-2291.fasta │ ├── Cabassous_unicinctus_MNHN_1999-1068.fasta │ ├── Calyptophractus_retusus.fasta │ ├── Chaetophractus_vellerosus.fasta │ ├── Chaetophractus_villosus.fasta │ ├── Chlamyphorus_truncatus.fasta │ ├── Choloepus_didactylus.fasta │ ├── Choloepus_didactylus_old.fasta │ ├── Choloepus_hoffmanni.fasta │ ├── Cyclopes_didactylus.fasta │ ├── Dasypus_hybridus.fasta │ ├── Dasypus_kappleri.fasta │ ├── Dasypus_novemcinctus.fasta │ ├── Dasypus_novemcinctus_old.fasta │ ├── Dasypus_pilosus_LSUMZ_21888.fasta │ ├── Dasypus_pilosus_MSB_49990.fasta │ ├── Dasypus_sabanicola.fasta │ ├── Dasypus_septemcinctus.fasta │ ├── Dasypus_yepesi.fasta │ ├── Euphractus_sexcinctus.fasta │ ├── Myrmecophaga_tridactyla.fasta │ ├── Priodontes_maximus.fasta │ ├── Tamandua_mexicana.fasta │ ├── Tamandua_tetradactyla.fasta │ ├── Tamandua_tetradactyla_old.fasta │ ├── Tolypeutes_matacus.fasta │ ├── Tolypeutes_tricinctus.fasta │ ├── Zaedyus_pichiy.fasta │ └── readme ├── bin ├── BMGE.jar ├── Gblocks ├── blastall ├── formatdb ├── noisy ├── progressiveMauve ├── readal └── trimal └── plant └── fasta ├── Acidosasa_purpurea.fasta ├── Aegilops_cylindrica.fasta ├── Aegilops_geniculata.fasta ├── Aegilops_speltoides_SPE0661.fasta ├── Aegilops_tauschii.fasta ├── Agrostis_stolonifera.fasta ├── Anomochloa_marantoidea.fasta ├── Arundinaria_appalachiana.fasta ├── Arundinaria_gigantea.fasta ├── Arundinaria_tecta.fasta ├── Bambusa_emeiensis.fasta ├── Bambusa_multiplex.fasta ├── Bambusa_oldhamii.fasta ├── Brachypodium_distachyon.fasta ├── Coix_lacryma-jobi.fasta ├── Dendrocalamus_latiflorus.fasta ├── Deschampsia_antarctica.fasta ├── Ferrocalamus_rimosivaginus.fasta ├── Festuca_altissima.fasta ├── Festuca_arundinacea.fasta ├── Festuca_ovina.fasta ├── Festuca_pratensis.fasta ├── Hordeum_vulgare_sub_vulgare.fasta ├── Indocalamus_longiauritus.fasta ├── Leersia_tisserantii.fasta ├── Lolium_multiflorum.fasta ├── Lolium_perenne.fasta ├── Oryza_meridionalis.fasta ├── Oryza_nivara.fasta ├── Oryza_rufipogon.fasta ├── Oryza_sativa_93-11.fasta ├── Oryza_sativa_Nipponbare.fasta ├── Panicum_virgatum.fasta ├── Pharus_lappulaceus.fasta ├── Pharus_latifolius.fasta ├── Phragmites_australis.fasta ├── Phyllostachys_edulis.fasta ├── Phyllostachys_nigra_var_henonis.fasta ├── Phyllostachys_propinqua.fasta ├── Puelia_olyriformis.fasta ├── Rhynchoryza_subulata.fasta ├── Saccharum_officinarum.fasta ├── Secale_cereale.fasta ├── Setaria_italica.fasta ├── Sorghum_bicolor.fasta ├── Sorghum_timorense.fasta ├── Triticum_aestivum.fasta ├── Triticum_monococcum.fasta ├── Triticum_urartu.fasta ├── Typha_latifolia.fasta ├── Zea_mays.fasta ├── Zizania_latifolia.fasta └── readme /PartitionFinderV1.1.1/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/.DS_Store -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/PartitionFinder.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott 2 | # 3 | #This program is free software: you can redistribute it and/or modify it 4 | #under the terms of the GNU General Public License as published by the 5 | #Free Software Foundation, either version 3 of the License, or (at your 6 | #option) any later version. 7 | # 8 | #This program is distributed in the hope that it will be useful, but 9 | #WITHOUT ANY WARRANTY; without even the implied warranty of 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 | #General Public License for more details. You should have received a copy 12 | #of the GNU General Public License along with this program. If not, see 13 | #. PartitionFinder also includes the PhyML 14 | #program, the RAxML program, the PyParsing library, and the python-cluster library 15 | #all of which are protected by their own licenses and conditions, using 16 | #PartitionFinder implies that you agree with those licences and conditions as well. 17 | 18 | import sys 19 | from partfinder import main 20 | 21 | if __name__ == "__main__": 22 | # Well behaved unix programs exits with 0 on success... 23 | sys.exit(main.main("PartitionFinder", "DNA")) 24 | -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/PartitionFinderProtein.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott 2 | # 3 | #This program is free software: you can redistribute it and/or modify it 4 | #under the terms of the GNU General Public License as published by the 5 | #Free Software Foundation, either version 3 of the License, or (at your 6 | #option) any later version. 7 | # 8 | #This program is distributed in the hope that it will be useful, but 9 | #WITHOUT ANY WARRANTY; without even the implied warranty of 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 | #General Public License for more details. You should have received a copy 12 | #of the GNU General Public License along with this program. If not, see 13 | #. PartitionFinder also includes the PhyML 14 | #program, the RAxML program, the PyParsing library, and the python-cluster library 15 | #all of which are protected by their own licenses and conditions, using 16 | #PartitionFinder implies that you agree with those licences and conditions as well. 17 | 18 | import sys 19 | from partfinder import main 20 | 21 | if __name__ == "__main__": 22 | # Well behaved unix programs exits with 0 on success... 23 | sys.exit(main.main("PartitionFinderProtein", "protein")) 24 | -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/README.md: -------------------------------------------------------------------------------- 1 | # PartitionFinder 2 | 3 | PartitionFinder and PartitionFinderProtein are Python programs for simultaneously 4 | choosing partitioning schemes and models of molecular evolution for sequence data. 5 | You can use them before running a phylogenetic analysis, in order 6 | to decide how to divide up your sequence data into separate blocks before 7 | analysis, and to simultaneously perform model selection on each of those 8 | blocks. 9 | 10 | # Operating System 11 | 12 | Mac and Windows are supported. 13 | All of the code was written with Linux in mind too, so if you are interested 14 | in porting it to Linux, please get in touch (or just try it out!). 15 | 16 | # Manual 17 | 18 | is in the /docs folder. 19 | 20 | # Quick Start 21 | 22 | * Make sure you have Python 2.7 installed first, if not, go to www.python.org/getit/ 23 | 24 | * For PartitionFinderProtein just substitute 'PartitionFinderProtein' for 'PartitionFinder' below 25 | 26 | 1. Open Terminal (on a Mac) or Command Prompt (on Windows) and cd to the directory with PartitionFinder in it 27 | 2. Run PartitionFinder by typing at the command prompt: 28 | 29 | python PartitionFinder.py example 30 | 31 | This will run the included example analysis for PartitionFinder. More generally, the command line for PartitionFinder looks like this: 32 | 33 | python 34 | 35 | where is the full file-path to the PartitionFinder.py file 36 | and is the full filepath to a folder with a phylip alignemnt and associated .cfg file. 37 | 38 | For more details, read the manual. 39 | -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/RELEASE-VERSION: -------------------------------------------------------------------------------- 1 | 1.1.1 2 | -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/ROGP_v2.0-backup.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # 3 | #AUTHOR 4 | #Guiqi Bi :fenghen360@126.com 5 | #VERSION 6 | #ROGP v0.1 7 | #COPYRIGHT & LICENCE 8 | #This script is free software; you can redistribute it and/or modify it. 9 | #This script is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of merchantability or fitness for a particular purpose. 10 | 11 | my $USAGE = "\nusage: ./ROGP.pl 12 | \nparameters: 13 | -in= Genome alignment outputfile derived from Muave. If you set --align, ignore this input parameter. 14 | -number= Number of taxa used in aliggment (should be precious). If you set --align, ignore this input parameter. 15 | --align If you want to align sequences by mauve, add this parameter (Default: progressiveMauve). 16 | Then you should split every sequence into a single fasta file. Suffix must be .fasta 17 | --path= Absolute path to directory where you put in fasta sequences. 18 | --mauve-out= The output file produced by mauve (Absolute path). If you set --align parameter. 19 | -help Print the usage.\n"; 20 | 21 | 22 | #------------------------------------------------------------------------------------------- 23 | #提取参数 24 | foreach my $paras (@ARGV){ 25 | if ($paras=~/in/){ 26 | $in=(split "=", $paras)[1]; 27 | } 28 | if ($paras=~/number/){ 29 | $number=(split "=", $paras)[1]; 30 | } 31 | if ($paras=~/help/){ 32 | print $USAGE; 33 | } 34 | if ($paras=~/align/){ 35 | $align=1; 36 | } 37 | if ($paras=~/path/){ 38 | $path=(split "=", $paras)[1]; 39 | } 40 | if ($paras=~/mauve-out/){ 41 | $mauveout=(split "=", $paras)[1]; 42 | } 43 | } 44 | #------------------------------------------------------------------------------------------- 45 | #参数检验 46 | if($align){ 47 | undef $in; #如果设置了align,就清空$in和$number 48 | undef $number; 49 | if(!$mauveout){ 50 | print "Please set the --mauve-out= parameter!\n";#检查是否设置了--mauve-out参数 51 | exit; 52 | } 53 | elsif(!$path){ 54 | print "Please set the --path= parameter!\n";#检查是否设置了path参数 55 | exit; 56 | } 57 | else{ 58 | $in=$mauveout; #把mauveout回传给$in 59 | my @files=glob "$path*.fasta"; #匹配fasta文件并保存到数组里 60 | my $filecom; 61 | $number=$#files+1; #回传文件数量给number变量 62 | print "Totla $number files detected!\nThe list of sequences will be aligned:\n"; 63 | foreach(@files){ 64 | $filecom.=" $_"; 65 | print "$_\n"; #打印文件进行检验 66 | } 67 | print "<===========Please re-check.============>\n\nKeep going?\n\[Enter press/Ctrl+C\]\n"; 68 | my $go=; #标准输入来决定程序是否进行下去 69 | if(!$go){exit;} 70 | else{print "Aligning, please wait.\n"; 71 | `./progressiveMauve --output=$mauveout $filecom|tee mauve-screenout.log`; 72 | open MAUVE,"<","mauve-screenout.log"; 73 | while(){print;} 74 | } 75 | 76 | } 77 | 78 | } 79 | 80 | print "\n\nROGP started!\n\nAligned fasta file is $in\nNumber of the taxon species used in alignment is $number\n"; 81 | 82 | my $taxon=1; #记录fasta标头识别数字 83 | my $file=1; #用于输出模块标头的head 84 | my $module; 85 | 86 | #------------------------------------------------------------------------------------------- 87 | #首先提取fasta标头 88 | open(HEAD, ">>head.tmp"); 89 | open(IN, "<$in")||die "Can't open $in:$!\n"; 90 | while(){ 91 | if($_=~m/^>/){ 92 | print HEAD "$_"; 93 | } 94 | } 95 | close(HEAD); 96 | 97 | print "Identify the colinear blocks. Be patient\n\n\n"; 98 | open(TMP, "){ 100 | 101 | if($_=~m/^>\s$taxon:/){ 102 | $module.="$_"; 103 | $taxon++; 104 | if($taxon==$number+1){ 105 | open(OUT, ">module_$file.head") ; 106 | print OUT "$module"; 107 | close(OUT); 108 | undef $module; 109 | $taxon=1; 110 | $file++; 111 | } 112 | } 113 | else{ 114 | undef $module; 115 | $taxon=1; 116 | } 117 | } 118 | 119 | `rm head.tmp`; 120 | print "Finished!\n\n\n"; 121 | 122 | 123 | #------------------------------------------------------------------------------------------- 124 | #抽取序列并修改文件名 125 | my @head=glob("*.head"); 126 | my $temp_num=@head; #temp_num临时记录 127 | print "$temp_num colinear blocks were identified totally!\nNow extracting these sequences!\n\n\n"; 128 | foreach my $head(@head){ 129 | 130 | &extract($head,$in); 131 | `rm $head`; 132 | } 133 | 134 | `rename .head.fasta .fasta *.fasta`; 135 | 136 | #写了个子程序用来提取序列,直接拿的g.pl进行的修改 137 | sub extract{ 138 | my $biaotou=shift @_; 139 | my $seq=shift @_; 140 | my @liuyuan; 141 | open IN,"<",$biaotou; 142 | while(){ 143 | push @liuyuan,"$_"; 144 | } 145 | 146 | foreach(0...$#liuyuan){ 147 | 148 | open OUT,"<",$seq; 149 | open FASTA,">>","$biaotou.fasta"; 150 | my $turnoff=0; 151 | while($line=){ 152 | 153 | if($line eq $liuyuan[$_]){$turnoff=1; 154 | print FASTA "$line"; 155 | next; 156 | } 157 | elsif($line ne $liuyuan[$_]&&$line=~m/>.*\n/){$turnoff=0;} 158 | if($turnoff){print FASTA "$line";} 159 | 160 | 161 | } 162 | close(OUT); 163 | close(FASTA); 164 | } 165 | } 166 | #------------------------------------------------------------------------------------------- 167 | #改每个模块序列的标头,否则太长,Gblock无法处理,并处理每个模块序列尾部的=号 168 | 169 | my @seq=glob("*.fasta"); 170 | foreach my $seq(@seq){ 171 | open(TMP2IN, "<$seq")||die "Can't open $in:$!\n"; 172 | open(TMP2OUT, ">>$seq.rename")||die "Can't open $in:$!\n"; 173 | while(){ 174 | if($_=~m/=/){next;} 175 | if($_=~m/^>/){ 176 | my @array=split(/ [+|-] /,$_); 177 | my @array2=split(/\\/,@array[$#array]); 178 | my @array3=split(/\//,@array2[$#array2]); 179 | @array3[$#array3]=~s/\.fasta//g; 180 | print TMP2OUT ">@array3[$#array3]"; 181 | undef @array; 182 | undef @array2; 183 | undef @array3; 184 | } 185 | else {print TMP2OUT "$_";} 186 | 187 | } 188 | close(TMP2IN); 189 | close(TMP2OU); 190 | `rm $seq`; 191 | } 192 | `rename .fasta.rename .fasta *.rename`; 193 | 194 | 195 | 196 | 197 | 198 | #------------------------------------------------------------------------------------------- 199 | #使用Gblocks进行序列处理,需要将Gblocks添加至环境变量,也可以再设置参数,填写Gblock的位置,等去看看Gblock是否有没有交互的,好直接加参数进去 200 | print "Now work with Gblock!\n\n\n" ; 201 | 202 | my @trimed=glob("*.fasta"); 203 | foreach my $trimed(@trimed){ 204 | `./Gblocks $trimed out`; 205 | `rm $trimed`; 206 | } 207 | 208 | 209 | #------------------------------------------------------------------------------------------- 210 | #处理gb后缀的结果文件 211 | #perl -e '$gb=shift;open GB,$gb;while(){if(/^>/){print "$_";}if(/^[A|T|C|G|N]{10}\s/i){$_=~s/\s//g;print "$_";}}' block20.fasta-gb > block20-gb 212 | 213 | my @gb=glob("*.fasta-gb"); 214 | foreach my $gb(@gb){ 215 | my $delete=0; #设置个阈值,如果没有匹配到任何ATCG的话,就直接略过,并删掉产生文件 216 | open(GB, "<$gb")||die "Can't open $in:$!\n"; 217 | open(GBOUT, ">>$gb.out")||die "Can't open $in:$!\n"; 218 | while(){ 219 | 220 | if(/^>/){print GBOUT "$_";} 221 | elsif(/^[A|T|C|G|N]{10}\s/i){ 222 | $delete++; 223 | $_=~s/\s//g; 224 | print GBOUT "$_\n"; 225 | } 226 | 227 | } 228 | close(GB); 229 | close(GBOUT); 230 | if ($delete==0){`rm $gb.out`;} 231 | `rm $gb`; 232 | } 233 | 234 | `rename fasta-gb.out fasta *.fasta-gb.out`; 235 | 236 | 237 | 238 | #------------------------------------------------------------------------------------------- 239 | #最后合并文件,报告提取出多少模块,和总序列长度 240 | my @final=glob("*.fasta"); 241 | my $f_length=@final; 242 | if ($f_length==$temp_num){ 243 | print "All blocks extracted by Mauve have conserved sequences.\n\n\n"; 244 | } 245 | else {print "Only $f_length blocks have conserved sequences.\n\n\n";} 246 | #先建一个数组 247 | my @fasta; 248 | my $hehe=1; 249 | foreach my $final(@final){ 250 | 251 | if ($hehe>1){last;} 252 | else { 253 | open(HEHE, "<$final")||die "Can't open $in:$!\n"; 254 | while(){ 255 | if($_=~m/^>/){push @fasta, $_;} 256 | } 257 | close(HEHE); 258 | } 259 | $hehe++; 260 | } 261 | 262 | open(CAN,">>all-sequence.fasta")||die"Can'not open file"; 263 | my $character_length; 264 | 265 | foreach $fasta(@fasta){ 266 | my $can_all; #用来连接一个物种的序列 267 | print CAN "$fasta"; 268 | foreach $final(@final){ 269 | open(FILE,"<$final")||die"Can'not open file"; 270 | my $turnoff=0; #用来判断是否连接 271 | my $can; #用来连接序列 272 | while(my $line=){ 273 | if ($line eq $fasta){$turnoff=1;next;} 274 | elsif($line ne $fasta&&$line=~m/>.*\n/){$turnoff=0;} 275 | if($turnoff){chomp $line;$can.=$line;} 276 | } 277 | $can_all.=$can; 278 | 279 | } 280 | print CAN "$can_all\n"; 281 | $character_length=length($can_all); 282 | } 283 | close(CAN); 284 | 285 | 286 | print "The final concatenated sequences was writen in all-sequence.fasta\n\n"; 287 | print "The concatenated length is $character_length bp\n\n"; 288 | print "ROGP DATA PREPRATION COMPLETED! ENJOY IT!!\n\n\n"; 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/circoletto.pl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/circoletto.pl -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/docs/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/docs/.DS_Store -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/docs/Manual_v1.1.1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/docs/Manual_v1.1.1.pdf -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/examples/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/examples/.DS_Store -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/examples/README.txt: -------------------------------------------------------------------------------- 1 | README for Examples 2 | ___________________ 3 | 4 | 5 | These folders contain two simple examples. 6 | 7 | The /nucleotide folder demonstrates PartitionFinder 8 | the /aminoacid folder demonstrates PartitionFinderProtein 9 | 10 | Instructions on how to run these examples are provided in the manual: 11 | 12 | For Mac Users: Page 7 of the manual 13 | For Windows Users: Page 9 of the manual 14 | 15 | Please email me if you have any questions. 16 | 17 | Rob Lanfear 18 | May 2012 -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/examples/aminoacid/Als_etal_2004.phy: -------------------------------------------------------------------------------- 1 | 4 949 2 | AD00P055 SLMLLISSSIVENGAGTGWTVYPPLSSNIAHSGSSVDLAIFSLHLAGISSILGAINFITTIINMKVNNLFFDQMSLFIWAVGITALLLLLSLPVLAGAITMLLTDRNLNTSFFDPAGGGDPILYQHLFWFFGHPXXXXXXXXXXGIISHIISQESGKKETFGSLGMIYAMLAIGLLGFIVWAHHMFTVGMDIDTRAYFTSATMIIAVPTGIKIFSWLATIYGTQINYSPSMLWSLGFIFLFAVGGLTGVILANSSIDITLHDTYYVVAHFHYVLSMGAIFAIFGGFIHWYPLFTGLMMNSYLLKIQFILMFIGVNXXXXXXXXXXXXXXXXXXXXXPDMXLSWNIISSLGSYMSFISMMMMMMIIWESMIKQRLILFSLNMSSSIEWLQNTPPNEHSYNELPILSNFMATWSNLNFQNSVSPLMEQIIFFHDHSLIILIMITMLLSYMMLSMFWNKFINRFLLEGQMIELIXXXXXXXXXXXXXXXXXRLLYLLDELNNPLITIKSIGHQWYWSYEYSDFKNIEFDSYMINEYNLNNFRLLDVDNRIIIPMNNNIRMLITATDVIHSWTVPSIGVKVDANPGRLNQTSFFINRPGIFFGQCSEICGANHSFMPIVIESISIKNFXDAPGHSDFIKNMITGTSQAXCAVLIVAAGTGEXEAGISKNGQTREHALXAFTLGVKQLIVGVNKMXSTEPPYSESRFEEIKKEVSSYIKKIGYNPAAVAFVPISGWHGDNMLEASTKMPWFKGWQVERKEGKAEGKCLIEALDAILPPARPTDKALRLPLQDVYKIGGIGTVPVGRVETGVLKPGTIVVFAPANITTEVKSVEMHHEXLQEAVPGDNVGFNVKNVSVKELRRGYVAGDTKNNPPKGAADFTAQVIVLNHPGQISNGYTPVLDCHTAHIACKFAEIKEKVDXXSGKSXEVDPKSIKSGDDAXVNMVXSKPLXXES 3 | RV03N585 SLMLLISSSIVENGAGTGWTVYPPLSSNIAHSGSSVDLAIFSLHLAGISSILGAINFITTIINMKVNNLFFDQMSLFIWAVGITALLLLLSLPVLAGAITMLLTDRNLNTSFFDPAGGGDPILYQHLFWFFGHPEVYILILPGFGIISHIISQESGKKETFGSLGMIYAMLAIGLLGFIVWAHHMFTVGMDIDTRAYITSATMIIAVPTGIKIFSWLATIYGTQINYSPSMLWSLGFIFLFAVGGLTGVILANSSIDITLHDTYYVVAHFHYVLSMGAIFAIFGGFIHWYPLFTGLMMNSYLLKIQFILMFIGVNXXXXXXXXXXXXXXXXXXXXXPDMFLSWNIISSLGSYMSFISMMMMMMIIWESMIKQRLILFSLNMSSSIEWLQNTPPNEHSYNELPILSNFMATWSNLNFQNSVSPLMEQIIFFHDHSLIILIMITMLLSYMMLSMFWNKFINRFLLEGQMIEXXXXXXXXIILIFIALPSLRLLYLLDELNNPLITIKSIGHQWYWSYEYSDFKNIEFDSYMINKYNLNNFRLLDVDNRIIIPMNNNIRMLITATDVIHSWTVPSIGVKVDANPGRLNQTSFFINRPGIFFGQCSEICGANHSFMPIVIESISIKNFIDAPGHSDFIKNMITGTSQADCAVLIVAAGTGEFEAGISKNGQTREHALLAFTLGVKQLIVGVNKMDSTEPPYSESRFEEIKKEVSSYIKKIGYNPAAVAFVPISGWHGDNMLEASTKMPWFKGWQVERKEGKAEGKCLIEALDAILPPARPTDKALRLPLQDVYKIGGIGTVPVGRVETGVLKPGTIVVFAPANITTEVKSVEMHHEALQEAVPGDNVGFNVKNVSVKELRRGYVAGDTKNNPPKGAADFTAQVIVLNHPGQISNGYTPVLDCHTAHIACKFAEIKEKVDRRSGKSTEVDPKSIKSGDAAIVNLVPSKPLCVES 4 | TDA99Q996 SLMLLISSSIVENGAGTGWTVYPPLSSNIAHSGSSVDLAIFSLHLAGISSILGAINFITTIINMKVNNMSFDQMSLFIWAVGITALLLLLSLPVLAGAITMLLTDRNLNTSFFDPAGGGDPILYQXXXXXXXXXXXXXXXXXXXXIISXIISQESXKKETFGSLGMIYAMLAIGLLGFIVWAHHMFTVGMDIDTRAYFTSATMIIAVPTGIKIFSWLATIYGSQINYSPSMLWSLGFIFLFAVGGLTGVILANSSIDITLHDTYYVVAHFHYVLSMGAIFAIFGGFIHWYPLFTGLMMNSYLLXIXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXLSWNIVSSLGSYMSFISMLLMMMIIWESMIKKRLILFSLNMSSSIEWLQNTPPNEHSYNELPILNNFMATWSNLNFQNSVSPLMEQIIFFNDHSLIILIMITMLLSYMMLSMFWNKFINRFLLEGQMXXLIXXXXXXXXXXXXXXXSLRLLYLLDELNNPLITIKSIGHQWYWSYEYSDFKNIEFDSYMINEYNLNNFRLLDVDNRIIIPMNNNIRMLITATDVIHSWTIPAIGVKVDANPGRLNQSSFFINRPGIFFGQCSEICGANHSFMPIVIESISIKNFIDAPGHSDFIKNMITGTSQADCAVLIVAAGTGEFEAGISKNGQTREHALLAFTLGVKQLIVGVNKMDSTEPPYSESRFEEIKKEVSSYIKKIGYNPAAVAFVPISGWHGDNMLEASTKMPWFKGWQVERKEGKAEGKCLIEALDAILPPARPTDKALRLPLQDVYKIGGIGTVPVGRVETGVLKPGTIVVFAPANITTEVKSVEMHHEALQEAVPGDNVGFNVKNVSVKELRRGYVAGDTKNNPPKGAADFTAQVIVLNHPGQISNGYTPVLDCHTAHIACKFAEIKEKVDRRSGKSTEVDPKSIKSGDAAIVNLVPSKPLCVES 5 | ZD99S305 SLMLLISSSIVENGAGTGWTVYPPLSSNIAHSGSSVDLAIFSLHLAGISSILGAINFITTIINMKVNNLFFDQMSLFIWAVGITALLLLLSLPVLAGAITMLLTDRNLNTSFFDPAGGGDPILYQHLFWFFGHPXXXXXXXXXXXXXXXXXXXESGKKETFGSLGMIYAMLAIGLLGFIVWAHHMFTVGMDIDTRAYFTSATMIIAVPTGIKIFSWLATIYGTQINYSPSMLWSLGFIFLFAVGGLTGVILANSSIDITLHDTYYVVAHFHYVLSMGAIFAIFGGFIHWYPLFTGLMMNSYLLKIQFILMXXXXXXXXXXXXXXXXXXXXXXXXXXPDMXLSWNIISSLGSYMSFISMMMMMMIIWESMIKQRLILFSLNMSSSIEWLQNTPPNEHSYNELPILSNFMATWSNLNFQNSVSPLMEQIIFFHDHSLIILIMITMLLSYMMLSMFWNKFINRFLLEGQMIELIXXXXXXIILIFIALPSLRLLYLLDELNNPLITIKSIGHQWYWSYEYSDFKNIEFDSYMINEYNLNNFRLLDVDNRIIIPMNNNIRMLITATDVIHSWTVPSIGVKVDANPGRLNQTSFFINRPGIFFGQCSEICGANHSFMPIVIESISIKNFIDAPGHSDFIKNMITGTSQADCAVLIVAAGTGEFEAGISKNGQTREHALLAFTLGVKQLIVGVNKMDSTEPPYSESRFEEIKKEVSSYIKKIGYNPAAVAFVPISGWHGDNMLEASTKMPWFKGWQVERKEGKAEGKCLIEALDAILPPARPTDKALRLPLQDVYKIGGIGTVPVGRVETGVLKPGTIVVFAPANITTEVKSVEMHHEALQEAVPGDNVGFNVKNVSVKELRRGYVAGDTKNNPPKGAADFTAQVIVLNHPGQISNGYTPVLDCHTAHIACKFAEIKEKVDRRSGKSTEVDPKSIKSGDAAIVNLVPSKPLCVES 6 | -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/examples/aminoacid/partition_finder.cfg: -------------------------------------------------------------------------------- 1 | ## ALIGNMENT FILE ## 2 | alignment = Als_etal_2004.phy; 3 | 4 | ## BRANCHLENGTHS: linked | unlinked ## 5 | branchlengths = linked; 6 | 7 | ## MODELS OF EVOLUTION for PartitionFinder: all | raxml | mrbayes | ## 8 | ## for PartitionFinderProtein: all_protein | ## 9 | models = all_protein; 10 | 11 | # MODEL SELECCTION: AIC | AICc | BIC # 12 | model_selection = BIC; 13 | 14 | ## DATA BLOCKS: see manual for how to define ## 15 | [data_blocks] 16 | COI = 1-407; 17 | COII = 408-624; 18 | EF1a = 625-949; 19 | 20 | ## SCHEMES, search: all | user | greedy ## 21 | [schemes] 22 | search = greedy; 23 | 24 | #user schemes go here if search=user. See manual for how to define.# 25 | -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/examples/nucleotide/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/examples/nucleotide/.DS_Store -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/examples/nucleotide/partition_finder.cfg: -------------------------------------------------------------------------------- 1 | ## ALIGNMENT FILE ## 2 | alignment = test.phy; 3 | 4 | ## BRANCHLENGTHS: linked | unlinked ## 5 | branchlengths = linked; 6 | 7 | ## MODELS OF EVOLUTION for PartitionFinder: all | raxml | mrbayes | beast | ## 8 | ## for PartitionFinderProtein: all_protein | ## 9 | models = all; 10 | 11 | # MODEL SELECCTION: AIC | AICc | BIC # 12 | model_selection = BIC; 13 | 14 | ## DATA BLOCKS: see manual for how to define ## 15 | [data_blocks] 16 | Gene1_pos1 = 1-789\3; 17 | Gene1_pos2 = 2-789\3; 18 | Gene1_pos3 = 3-789\3; 19 | Gene2_pos1 = 790-1449\3; 20 | Gene2_pos2 = 791-1449\3; 21 | Gene2_pos3 = 792-1449\3; 22 | Gene3_pos1 = 1450-2208\3; 23 | Gene3_pos2 = 1451-2208\3; 24 | Gene3_pos3 = 1452-2208\3; 25 | 26 | ## SCHEMES, search: all | greedy | rcluster | hcluster | user ## 27 | [schemes] 28 | search = greedy; 29 | 30 | #user schemes go here if search=user. See manual for how to define.# 31 | -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/examples/nucleotide/test.phy: -------------------------------------------------------------------------------- 1 | 4 2208 2 | spp1 CTTGAGGTTCAGAATGGTAATGAA------GTGCTGGTGCTGGAAGTTCAGCAGCAGCTCGGCGGCGGTATCGTACGTACCATCGCCATGGGTTCTTCCGACGGTCTGCGTCGCGGTCTGGATGTAAAAGACCTCGAGCACCCGATCGAAGTCCCAGTTGGTAAAGCAACACTGGGTCGTATCATGAACGTACTGGGTCAGCCAGTAGACATGAAGGGCGACATCGGTGAAGAAGAGCGTTGGGCT---------------ATCCACCGTGAAGCACCATCCTATGAAGAGCTGTCAAGCTCTCAGGAACTGCTGGAAACCGGCATCAAAGTTATCGACCTGATGTGTCCGTTTGCGAAGGGCGGTAAAGTTGGTCTGTTCGGTGGTGCGGGTGTAGGTAAAACCGTAAACATGATGGAGCTTATTCGTAACATCGCGATCGAGCACTCCGGTTATTCTGTGTTTGCGGGCGTAGGTGAACGTACTCGTGAGGGTAACGACTTCTACCACGAAATGACCGACTCCAACGTTATCGAT---------------------AAAGTTTCTCTGGTTTATGGCCAGATGAACGAGCCACCAGGTAACCGTCTGCGCGTTGCGCTGACCGGTCTGACCATGGCTGAGAAGTTCCGTGACGAAGGTCGCGACGTACTGCTGTTCGTCGATAACATCTATCGTTACACCCTGGCAGGTACTGAAGTTTCAGCACTGCTGGGTCGTATGCCTTCAGCGGTAGGTTACCAGCCGACTCTGGCGGAAGAAATGGGCGTTCGCATTCCAACGCTGGAAGAGTGTGATATCTGCCACGGCAGCGGCGCTAAAGCCGGTTCGAAGCCGCAGACCTGTCCTACCTGTCACGGTGCAGGCCAGGTACAGATGCGCCAGGGCTTCTTCGCTGTACAGCAGACCTGTCCACACTGCCAGGGCCGCGGTACGCTGATCAAAGATCCGTGCAACAAATGTCACGGTCATGGTCGCGTAGAGAAAACCAAAACCCTGTCCGTAAAAATTCCGGCAGGCGTTGATACCGGCGATCGTATTCGTCTGACTGGCGAAGGTGAAGCTGGTGAGCACGGCGCACCGGCAGGCGATCTGTACGTTCAGGTGCAGGTGAAGCAGCACGCTATTTTCGAGCGTGAAGGCAACAACCTGTACTGTGAAGTGCCGATCAACTTCTCAATGGCGGCTCTTGGCGGCGAGATTGAAGTGCCGACGCTTGATGGTCGCGTGAAGCTGAAAGTTCCGGGCGAAACGCAAACTGGCAAGCTGTTCCGTATGCGTGGCAAGGGCGTGAAGTCCGTGCGCGGCGGTGCACAGGGCGACCTTCTGTGCCGCGTGGTGGTCGAGACACCGGTAGGTCTTAACGAGAAGCAGAAACAGCTGCTCAAAGATCTGCAGGAAAGTTTTGGCGGCCCAACGGGTGAAAACAACGTTGTTAACGCCCTGTCGCAGAAACTGGAATTGCTGATCCGCCGCGAAGGCAAAGTACATCAGCAAACTTATGTCCATGGTGTGCCACAGGCTCCGCTGGCGGTAACCGGTGAAACGGAAGTGACCGGTACACAGGTGCGTTTCTGGCCAAGCCACGAAACCTTCACCAACGTAATCGAATTCGAATATGAGATTCTGGCAAAACGTCTGCGCGAGCTGTCATTCCTGAACTCCGGCGTTTCCATCCGTCTGCGCGATAAGCGTGAC---GGCAAAGAAGACCATTTCCACTATGAAGGTGGTATCAAGGCGTTTATTGAGTATCTCAATAAAAATAAAACGCCTATCCACCCGAATATCTTCTACTTCTCCACCGAA---AAAGACGGTATTGGCGTAGAAGTGGCGTTGCAGTGGAACGATGGTTTCCAGGAAAACATCTACTGCTTCACCAACAACATTCCACAGCGTGATGGCGGTACTCACCTTGCAGGCTTCCGTGCGGCGATGACCCGTACGCTGAACGCTTACATGGACAAAGAAGGCTACAGCAAAAAAGCCAAA------GTCAGCGCCACCGGTGATGATGCCCGTGAAGGCCTGATTGCCGTCGTTTCCGTGAAAGTACCGGATCCGAAATTCTCCTCTCAGACTAAAGACAAACTGGTCTCTTCTGAGGTGAAAACGGCGGTAGAACAGCAGATGAATGAACTGCTGAGCGAATACCTGCTGGAAAACCCGTCTGACGCCAAAATC 3 | spp2 CTTGAGGTACAAAATGGTAATGAG------AGCCTGGTGCTGGAAGTTCAGCAGCAGCTCGGTGGTGGTATCGTACGTGCTATCGCCATGGGTTCTTCCGACGGTCTGCGTCGTGGTCTGGAAGTTAAAGACCTTGAGCACCCGATCGAAGTCCCGGTTGGTAAAGCAACGCTGGGTCGTATCATGAACGTGCTGGGTCAGCCGATCGATATGAAAGGCGACATCGGCGAAGAAGAACGTTGGGCG---------------ATTCACCGTGCAGCACCTTCCTATGAAGAGCTCTCCAGCTCTCAGGAACTGCTGGAAACCGGCATCAAAGTTATCGACCTGATGTGTCCGTTCGCGAAGGGCGGTAAAGTCGGTCTGTTCGGTGGTGCGGGTGTTGGTAAAACCGTAAACATGATGGAGCTGATCCGTAACATCGCGATCGAACACTCCGGTTACTCCGTGTTTGCTGGTGTTGGTGAGCGTACTCGTGAGGGTAACGACTTCTACCACGAAATGACCGACTCCAACGTTCTGGAT---------------------AAAGTATCCCTGGTTTACGGCCAGATGAACGAGCCGCCGGGAAACCGTCTGCGCGTTGCACTGACCGGCCTGACCATGGCTGAGAAATTCCGTGACGAAGGTCGTGACGTTCTGCTGTTCGTCGATAACATCTATCGTTATACCCTGGCCGGTACAGAAGTATCTGCACTGCTGGGTCGTATGCCTTCTGCGGTAGGTTATCAGCCGACGCTGGCGGAAGAGATGGGCGTTCGTATCCCGACGCTGGAAGAGTGCGACGTCTGCCACGGCAGCGGCGCGAAATCTGGCAGCAAACCGCAGACCTGTCCGACCTGTCATGGTCAGGGCCAGGTGCAGATGCGTCAGGGCTTCTTCGCCGTTCAGCAGACCTGTCCGCATTGTCAGGGGCGCGGTACGCTGATTAAAGATCCGTGCAACAAATGTCACGGTCACGGTCGCGTTGAGAAAACCAAAACCCTGTCGGTCAAAATCCCGGCGGGCGTGGATACCGGCGATCGTATTCGTCTGTCAGGAGAAGGCGAAGCGGGCGAACACGGTGCACCAGCAGGCGATCTGTACGTTCAGGTCCAGGTTAAGCAGCACGCCATCTTTGAGCGTGAAGGCAATAACCTGTACTGCGAAGTGCCTATTAACTTCACCATGGCAGCCCTCGGCGGCGAGATTGAAGTCCCGACGCTGGATGGCCGGGTGAATCTCAAAGTGCCTGGCGAAACGCAAACCGGCAAACTGTTCCGCATGCGCGGTAAAGGTGTGAAATCCGTGCGCGGTGGTGCTCAGGGCGACCTGCTGTGCCGCGTGGTGGTTGAAACACCAGTCGGGCTGAACGATAAGCAGAAACAGCTGCTGAAGGACCTGCAGGAAAGTTTTGGCGGACCAACGGGCGAGAAAAACGTGGTTAACGCCCTGTCGCAGAAGCTGGAGCTGGTTATTCAGCGCGACAATAAAGTTCACCGTCAGATCTATGCGCACGGTGTGCCGCAGGCTCCGCTGGCAGTGACCGGTGAGACCGAAAAAACCGGCACCATGGTACGTTTCTGGCCAAGCTATGAAACCTTCACCAACGTTGTCGAGTTCGAATACGAGATCCTGGCAAAACGTCTGCGTGAGCTGTCGTTCCTGAACTCCGGGGTTTCTATCCGTCTGCGTGACAAGCGTGAC---GGTAAAGAAGACCATTTCCACTACGAAGGCGGCATCAAGGCGTTCGTTGAGTATCTCAATAAGAACAAAACGCCGATCCACCCGAATATCTTCTACTTCTCCACCGAA---AAAGACGGTATTGGCGTCGAAGTAGCGCTGCAGTGGAACGACGGCTTCCAGGAAAACATCTACTGCTTCACCAACAACATCCCGCAGCGCGATGGCGGTACTCACCTTGCGGGCTTCCGCGCGGCGATGACCCGTACCCTGAACGCCTATATGGACAAAGAAGGCTACAGCAAAAAAGCCAAA------GTCAGCGCTACCGGCGACGATGCGCGTGAAGGCCTGATTGCCGTTGTCTCCGTGAAGGTTCCGGATCCGAAATTCTCCTCGCAGACCAAAGACAAACTGGTCTCCTCCGAGGTGAAAACCGCGGTTGAACAGCAGATGAATGAACTGCTGAACGAATACCTGCTGGAAAATCCGTCTGACGCGAAAATC 4 | spp3 CTTGAGGTACAGAATAACAGCGAG------AAGCTGGTGCTGGAAGTTCAGCAGCAGCTCGGCGGCGGTATCGTACGTACCATCGCAATGGGTTCTTCCGACGGTCTGCGTCGTGGTCTGGAAGTGAAAGACCTCGAGCACCCGATCGAAGTCCCGGTAGGTAAAGCGACCCTGGGTCGTATCATGAACGTGCTGGGTCAGCCAATCGATATGAAAGGCGACATCGGCGAAGAAGATCGTTGGGCG---------------ATTCACCGCGCAGCACCTTCCTATGAAGAGCTGTCCAGCTCTCAGGAACTGCTGGAAACCGGCATCAAAGTTATCGACCTGATTTGTCCGTTCGCTAAGGGCGGTAAAGTTGGTCTGTTCGGTGGTGCGGGCGTAGGTAAAACCGTAAACATGATGGAGCTGATCCGTAACATCGCGATCGAGCACTCCGGTTACTCCGTGTTTGCAGGCGTGGGTGAGCGTACTCGTGAGGGTAACGACTTCTACCACGAGATGACCGACTCCAACGTTCTGGAC---------------------AAAGTTGCACTGGTTTACGGCCAGATGAACGAGCCGCCAGGTAACCGTCTGCGCGTAGCGCTGACCGGTCTGACCATCGCGGAGAAATTCCGTGACGAAGGCCGTGACGTTCTGCTGTTCGTCGATAACATCTATCGTTATACCCTGGCCGGTACAGAAGTTTCTGCACTGCTGGGTCGTATGCCATCTGCGGTAGGTTATCAGCCTACTCTGGCAGAAGAGATGGGTGTTCGTATCCCGACGCTGGAAGAGTGTGAAGTTTGCCACGGCAGCGGCGCGAAAAAAGGTTCTTCTCCGCAGACCTGTCCAACCTGTCATGGACAGGGCCAGGTGCAGATGCGTCAGGGCTTCTTCACCGTGCAGCAAAGCTGCCCGCACTGCCAGGGCCGCGGTACCATCATTAAAGATCCGTGCACCAACTGTCACGGCCATGGCCGCGTAGAGAAAACCAAAACGCTGTCGGTAAAAATTCCGGCAGGCGTGGATACCGGCGATCGTATCCGCCTTTCTGGTGAAGGCGAAGCGGGCGAGCACGGCGCACCTTCAGGCGATCTGTACGTTCAGGTTCAGGTGAAACAGCACCCAATCTTCGAGCGTGAAGGCAATAACCTGTACTGCGAAGTGCCGATCAACTTTGCGATGGCTGCGCTGGGCGGGGAAATTGAAGTGCCGACCCTTGACGGCCGCGTTAAGCTGAAGGTACCGAGCGAAACGCAAACCGGCAAGCTGTTCCGCATGCGCGGTAAAGGCGTGAAATCCGTACGCGGTGGCGCGCAGGGCGATCTGCTGTGCCGCGTCGTCGTTGAAACTCCGGTTAGCCTGAACGAAAAGCAGAAGAAACTGCTGCGTGATTTGGAAGAGAGCTTTGGCGGCCCAACGGGGGCGAACAATGTTGTGAACGCCCTGTCCCAGAAGCTGGAGCTGCTGATTCGCCGCGAAGGCAAAACCCATCAGCAAACCTACGTGCACGGTGTGCCGCAGGCTCCGCTGGCGGTCACCGGTGAAACCGAACTGACCGGTACCCAGGTGCGTTTCTGGCCGAGCCATGAAACCTTCACCAACGTCACCGAATTCGAATATGACATCCTGGCTAAGCGCCTGCGTGAGCTGTCGTTCCTGAACTCCGGCGTCTCTATTCGCCTGAACGATAAGCGCGAC---GGCAAGCAGGATCACTTCCACTACGAAGGCGGCATCAAGGCGTTTGTTGAGTACCTCAACAAGAACAAAACCCCGATTCACCCGAACGTCTTCTATTTCAGCACTGAA---AAAGACGGCATCGGCGTGGAAGTGGCGCTGCAGTGGAACGACGGCTTCCAGGAAAATATCTACTGCTTTACCAACAACATTCCTCAGCGCGACGGCGGTACTCACCTTGCGGGCTTCCGCGCGGCGATGACCCGTACCCTGAACGCCTATATGGACAAAGAAGGCTACAGCAAAAAAGCCAAA------GTGAGCGCCACCGGTGACGATGCGCGTGAAGGCCTGATTGCCGTAGTGTCCGTGAAGGTGCCGGATCCGAAGTTCTCTTCCCAGACCAAAGACAAACTGGTTTCTTCGGAAGTGAAATCCGCGGTTGAACAGCAGATGAACGAACTGCTGGCTGAATACCTGCTGGAAAATCCGGGCGACGCAAAAATT 5 | spp4 CTCGAGGTGAAAAATGGTGATGCT------CGTCTGGTGCTGGAAGTTCAGCAGCAGCTGGGTGGTGGCGTGGTTCGTACCATCGCCATGGGTACTTCTGACGGCCTGAAGCGCGGTCTGGAAGTTACCGACCTGAAAAAACCTATCCAGGTTCCGGTTGGTAAAGCAACCCTCGGCCGTATCATGAACGTATTGGGTGAGCCAATCGACATGAAAGGCGACCTGCAGAATGACGACGGCACTGTAGTAGAGGTTTCCTCTATTCACCGTGCAGCACCTTCGTATGAAGATCAGTCTAACTCGCAGGAACTGCTGGAAACCGGCATCAAGGTTATCGACCTGATGTGTCCGTTCGCTAAGGGCGGTAAAGTCGGTCTGTTCGGTGGTGCGGGTGTAGGTAAAACCGTAAACATGATGGAGCTGATCCGTAACATCGCGGCTGAGCACTCAGGTTATTCGGTATTTGCTGGTGTGGGTGAGCGTACTCGTGAGGGTAACGACTTCTACCACGAAATGACTGACTCCAACGTTATCGAT---------------------AAAGTAGCGCTGGTGTATGGCCAGATGAACGAGCCGCCGGGTAACCGTCTGCGCGTAGCACTGACCGGTTTGACCATGGCGGAAAAATTCCGTGATGAAGGCCGTGACGTTCTGCTGTTCATCGACAACATCTATCGTTACACCCTGGCCGGTACTGAAGTATCAGCACTGCTGGGTCGTATGCCATCTGCGGTAGGCTATCAGCCAACGCTGGCAGAAGAGATGGGTGTGCGCATTCCAACACTGGAAGAGTGCGATGTCTGCCACGGTAGCGGCGCGAAAGCGGGGACCAAACCGCAGACCTGTCATACCTGTCATGGCGCAGGCCAGGTGCAGATGCGTCAGGGCTTCTTCACTGTGCAGCAGGCGTGTCCGACCTGTCACGGTCGCGGTTCAGTGATCAAAGATCCGTGCAATGCTTGTCATGGTCACGGTCGCGTTGAGCGCAGTAAAACCCTGTCGGTGAAAATTCCAGCAGGCGTGGATACCGGCGATCGCATTCGTCTGACCGGCGAAGGTGAAGCGGGCGAACAGGGCGCACCAGCGGGCGATCTGTACGTTCAGGTTTCGGTGAAAAAGCACCCGATCTTTGAGCGTGAAGATAACAACCTATATTGCGAAGTGCCGATTAACTTTGCGATGGCAGCATTGGGTGGCGAGATTGAAGTGCCGACGCTTGATGGGCGTGTGAACCTGAAAGTGCCTTCTGAAACGCAAACTGGCAAGCTGTTCCGCATGCGCGGTAAAGGCGTGAAATCGGTGCGTGGTGGTGCGGTAGGCGATTTGCTGTGTCGTGTGGTGGTGGAAACGCCAGTTAGCCTCAATGACAAACAGAAAGCGTTACTGCGTGAACTGGAAGAGAGTTTTGGCGGCCCGAGCGGTGAGAAAAACGTCGTAAACGCCCTGTCACAGAAGCTGGAGCTGACCATTCGCCGTGAAGGCAAAGTGCATCAGCAGGTTTATCAGCACGGCGTGCCGCAGGCACCGCTGGCGGTGTCCGGTGATACCGATGCAACCGGTACTCGCGTGCGTTTCTGGCCGAGCTACGAAACCTTCACCAATGTGATTGAGTTTGAGTACGAAATCCTGGCGAAACGCCTGCGTGAACTGTCGTTCCTGAACTCTGGCGTTTCGATTCGTCTGGAAGACAAACGCGAC---GGCAAGAACGATCACTTCCACTACGAAGGCGGCATCAAGGCGTTCGTTGAGTATCTCAACAAGAACAAAACCCCGATTCACCCAACGGTGTTCTACTTCTCGACGGAG---AAAGATGGCATTGGCGTGGAAGTGGCGCTGCAGTGGAACGATGGTTTCCAGGAAAACATCTACTGCTTCACCAACAACATTCCACAGCGCGACGGCGGTACGCACCTGGCGGGCTTCCGTGCGGCAATGACGCGTACGCTGAATGCCTACATGGATAAAGAAGGCTACAGCAAAAAAGCCAAA------GTCAGTGCGACCGGTGACGATGCGCGTGAAGGCCTGATTGCAGTGGTTTCCGTGAAAGTGCCGGATCCGAAATTCTCTTCTCAGACCAAAGATAAGCTGGTCTCTTCTGAAGTGAAATCGGCGGTTGAGCAGCAGATGAACGAACTGCTGGCGGAATACCTGCTGGAAAATCCGTCTGACGCGAAAATC 6 | -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/.DS_Store -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | #This program is free software: you can redistribute it and/or modify it 3 | #under the terms of the GNU General Public License as published by the 4 | #Free Software Foundation, either version 3 of the License, or (at your 5 | #option) any later version. 6 | # 7 | #This program is distributed in the hope that it will be useful, but 8 | #WITHOUT ANY WARRANTY; without even the implied warranty of 9 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 | #General Public License for more details. You should have received a copy 11 | #of the GNU General Public License along with this program. If not, see 12 | #. PartitionFinder also includes the PhyML 13 | #PartitionFinder implies that you agree with those licences and conditions as well. 14 | 15 | import logging 16 | log = logging.getLogger("config") 17 | import config 18 | 19 | # TODO: Not currently used 20 | # Activation should chdir, and maybe do some other stuff 21 | # So maybe need an 'activate' function on the config? 22 | # Should also clear out subsets, in the cache? 23 | 24 | class Current(object): 25 | """Keep a bunch of stuff current, that can be reinitialised""" 26 | def __init__(self): 27 | self._config = None 28 | 29 | def activate_config(self, c): 30 | assert isinstance(c, config.Configuration) 31 | 32 | if self._config is not None: 33 | log.debug("Resetting old configuration...") 34 | self._config.reset() 35 | 36 | log.debug("Assigning a new configuration...") 37 | self._config = c 38 | 39 | @property 40 | def active_config(self): 41 | if self._config is None: 42 | log.error("No configuration is currently active...") 43 | 44 | return self._config 45 | 46 | 47 | current = Current() 48 | -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/__init__.pyc -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/algorithm.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott 2 | # 3 | #This program is free software: you can redistribute it and/or modify it 4 | #under the terms of the GNU General Public License as published by the 5 | #Free Software Foundation, either version 3 of the License, or (at your 6 | #option) any later version. 7 | # 8 | #This program is distributed in the hope that it will be useful, but 9 | #WITHOUT ANY WARRANTY; without even the implied warranty of 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 | #General Public License for more details. You should have received a copy 12 | #of the GNU General Public License along with this program. If not, see 13 | #. PartitionFinder also includes the PhyML 14 | #program, the RAxML program, and the PyParsing library, 15 | #all of which are protected by their own licenses and conditions, using 16 | #PartitionFinder implies that you agree with those licences and conditions as well. 17 | 18 | from math import sqrt 19 | from itertools import izip 20 | 21 | 22 | def k_subsets_i(n, k): 23 | ''' 24 | from http://code.activestate.com/recipes/500268-all-k-subsets-from-an-n-set/ 25 | Yield each subset of size k from the set of intergers 0 .. n - 1 26 | n -- an integer > 0 27 | k -- an integer > 0 28 | ''' 29 | # Validate args 30 | if n < 0: 31 | raise ValueError('n must be > 0, got n=%d' % n) 32 | if k < 0: 33 | raise ValueError('k must be > 0, got k=%d' % k) 34 | # check base cases 35 | if k == 0 or n < k: 36 | yield set() 37 | elif n == k: 38 | yield set(range(n)) 39 | 40 | else: 41 | # Use recursive formula based on binomial coeffecients: 42 | # choose(n, k) = choose(n - 1, k - 1) + choose(n - 1, k) 43 | for s in k_subsets_i(n - 1, k - 1): 44 | s.add(n - 1) 45 | yield s 46 | for s in k_subsets_i(n - 1, k): 47 | yield s 48 | 49 | 50 | def k_subsets(s, k): 51 | ''' 52 | from http://code.activestate.com/recipes/500268-all-k-subsets-from-an-n-set/ 53 | Yield all subsets of size k from set (or list) s 54 | s -- a set or list (any iterable will suffice) 55 | k -- an integer > 0 56 | ''' 57 | s = list(s) 58 | n = len(s) 59 | for k_set in k_subsets_i(n, k): 60 | yield set([s[i] for i in k_set]) 61 | 62 | 63 | def lumpings(scheme): 64 | """ 65 | generate all possible lumpings of a given scheme, where a lumping involves 66 | joining two partitions together scheme has to be a list of digits 67 | """ 68 | # Get the numbers involved in the scheme 69 | nums = set(scheme) 70 | lumpings = [] 71 | for sub in k_subsets(nums, 2): 72 | lump = list(scheme) 73 | sub = list(sub) 74 | sub.sort() 75 | #now replace all the instance of one number in lump with the other in sub 76 | while lump.count(sub[1]) > 0: 77 | lump[lump.index(sub[1])] = sub[0] 78 | lumpings.append(lump) 79 | 80 | return lumpings 81 | 82 | 83 | def euclidean_distance(x, y): 84 | sum = 0 85 | for xval, yval in izip(x, y): 86 | sum += (xval - yval) ** 2 87 | dist = sqrt(sum) 88 | return dist 89 | 90 | 91 | # def getLevels(cluster, levs): 92 | # """ 93 | # Returns the levels of the cluster as list. 94 | # """ 95 | # levs.append(cluster.level()) 96 | 97 | # left = cluster.items()[0] 98 | # right = cluster.items()[1] 99 | # if isinstance(left, Cluster): 100 | # first = getLevels(left, levs) 101 | # else: 102 | # first = left 103 | # if isinstance(right, Cluster): 104 | # second = getLevels(right, levs) 105 | # else: 106 | # second = right 107 | # return levs 108 | 109 | 110 | def levels_to_scheme(levels, namedict): 111 | """ 112 | take the return from Cluster.getlevel 113 | and return it as a list of partition names description 114 | """ 115 | 116 | levels = str(levels) 117 | 118 | for key in namedict.keys(): 119 | old = str(namedict[key]) 120 | new = '"%s"' % key 121 | levels = levels.replace(old, new) 122 | 123 | levels = eval(levels) 124 | return levels 125 | -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/algorithm.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/algorithm.pyc -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/alignment.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2012 Robert Lanfear and Brett Calcott 2 | # 3 | # This program is free software: you can redistribute it and/or modify it under 4 | # the terms of the GNU General Public License as published by the Free Software 5 | # Foundation, either version 3 of the License, or (at your option) any later 6 | # version. 7 | # 8 | # This program is distributed in the hope that it will be useful, but WITHOUT 9 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 10 | # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 11 | # details. You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | # PartitionFinder also includes the PhyML program, the RAxML program, and the 14 | # PyParsing library, all of which are protected by their own licenses and 15 | # conditions, using PartitionFinder implies that you agree with those licences 16 | # and conditions as well. 17 | 18 | """Loading, Saving, Parsing Alignment Files 19 | 20 | See the phyml details here: 21 | http://www.atgc-montpellier.fr/phyml/usersguide.php?type=command 22 | 23 | """ 24 | import logging 25 | log = logging.getLogger("alignment") 26 | 27 | import os 28 | 29 | from pyparsing import ( 30 | Word, OneOrMore, alphas, nums, Suppress, Optional, Group, stringEnd, 31 | delimitedList, ParseException, line, lineno, col, LineStart, restOfLine, 32 | LineEnd, White, Literal, Combine, Or, MatchFirst, ZeroOrMore) 33 | 34 | from util import PartitionFinderError 35 | class AlignmentError(PartitionFinderError): 36 | pass 37 | 38 | class AlignmentParser(object): 39 | """Parses an alignment and returns species sequence tuples""" 40 | 41 | # I think this covers it... 42 | BASES = Word(alphas + "?.-") 43 | 44 | def __init__(self): 45 | self.sequence_length = None 46 | self.species_count = None 47 | self.sequences = [] 48 | self.current_sequence = 0 49 | 50 | self.root_parser = self.phylip_parser() + stringEnd 51 | 52 | def phylip_parser(self): 53 | 54 | INTEGER = Word(nums) 55 | INTEGER.setParseAction(lambda x: int(x[0])) 56 | 57 | header = INTEGER("species_count") + INTEGER("sequence_length") +\ 58 | Suppress(restOfLine) 59 | header.setParseAction(self.set_header) 60 | 61 | sequence_name = Word( 62 | alphas + nums + "!#$%&\'*+-./;<=>?@[\\]^_`{|}~", 63 | max=100) 64 | 65 | # Take a copy and disallow line breaks in the bases 66 | bases = self.BASES.copy() 67 | bases.setWhitespaceChars(" \t") 68 | seq_start = sequence_name("species") + bases("sequence") + Suppress(LineEnd()) 69 | seq_start.setParseAction(self.set_seq_start) 70 | seq_start_block = OneOrMore(seq_start) 71 | seq_start_block.setParseAction(self.set_start_block) 72 | 73 | seq_continue = bases("sequence") + Suppress(LineEnd()) 74 | seq_continue.setParseAction(self.set_seq_continue) 75 | 76 | seq_continue_block = Suppress(LineEnd()) + OneOrMore(seq_continue) 77 | seq_continue_block.setParseAction(self.set_continue_block) 78 | 79 | return header + seq_start_block + ZeroOrMore(seq_continue_block) 80 | 81 | def set_header(self, text, loc, tokens): 82 | self.sequence_length = tokens.sequence_length 83 | self.species_count = tokens.species_count 84 | 85 | def set_seq_start(self, text, loc, tokens): 86 | self.sequences.append([tokens.species, tokens.sequence]) 87 | self.current_sequence += 1 88 | 89 | def set_start_block(self, tokens): 90 | # End of block 91 | # Reset the counter 92 | self.current_sequence = 0 93 | 94 | def set_seq_continue(self, text, loc, tokens): 95 | append_to = self.sequences[self.current_sequence] 96 | append_to[1] += tokens.sequence 97 | self.current_sequence += 1 98 | 99 | def set_continue_block(self, tokens): 100 | self.current_sequence = 0 101 | 102 | def parse(self, s): 103 | try: 104 | defs = self.root_parser.parseString(s) 105 | except ParseException, p: 106 | log.error("Error in Alignment Parsing:" + str(p)) 107 | log.error("A common cause of this error is having whitespace" 108 | ", i.e. spaces or tabs, in the species names. Please check this and remove" 109 | " all whitespace from species names, or replace them with e.g. underscores") 110 | 111 | raise AlignmentError 112 | 113 | # Check that all the sequences are equal length 114 | slen = None 115 | for nm, seq in self.sequences: 116 | if slen is None: 117 | # Use the first as the test case 118 | slen = len(seq) 119 | else: 120 | if len(seq) != slen: 121 | log.error("Bad alignment file: Not all species have the same sequences length") 122 | raise AlignmentError 123 | 124 | # Not all formats have a heading, but if we have one do some checking 125 | if self.sequence_length is not None: 126 | if self.sequence_length != slen: 127 | log.error("Bad Alignment file: sequence length count in header does not match" 128 | " sequence length in file, please check") 129 | raise AlignmentError 130 | 131 | if self.species_count is not None: 132 | if len(self.sequences) != self.species_count: 133 | log.error("Bad Alignment file: species count in header does not match" 134 | " number of sequences in file, please check") 135 | raise AlignmentError 136 | 137 | return self.sequences 138 | 139 | def parse(s): 140 | return AlignmentParser().parse(s) 141 | 142 | class Alignment(object): 143 | def __init__(self): 144 | self.species = {} 145 | self.sequence_len = 0 146 | 147 | def __str__(self): 148 | return "Alignment(%s species, %s codons)" % self.species, self.sequence_len 149 | 150 | def same_as(self, other): 151 | if self.sequence_len != other.sequence_len: 152 | log.warning("Alignments not the same, length differs %s: %s", self.sequence_len, other.sequence_len) 153 | return False 154 | 155 | if self.species != other.species: 156 | log.warning("Alignments not the same. " 157 | "This alignment has %s species, the alignment from the previous " 158 | "analysis had %s.", len(self.species), len(other.species)) 159 | return False 160 | 161 | return True 162 | 163 | def from_parser_output(self, defs): 164 | """A series of species / sequences tuples 165 | e.g def = ("dog", "GATC"), ("cat", "GATT") 166 | """ 167 | species = {} 168 | sequence_len = None 169 | for spec, seq in defs: 170 | # log.debug("Found Sequence for %s: %s...", spec, seq[:20]) 171 | if spec in species: 172 | log.error("Repeated species name '%s' is repeated " 173 | "in alignment", spec) 174 | raise AlignmentError 175 | 176 | # Assign it 177 | species[spec] = seq 178 | 179 | if sequence_len is None: 180 | sequence_len = len(seq) 181 | else: 182 | if len(seq) != sequence_len: 183 | log.error("Sequence length of %s " 184 | "differs from previous sequences", spec) 185 | raise AlignmentError 186 | log.debug("Found %d species with sequence length %d", 187 | len(species), sequence_len) 188 | 189 | # Overwrite these 190 | self.species = species 191 | self.sequence_len = sequence_len 192 | 193 | def read(self, pth): 194 | if not os.path.exists(pth): 195 | log.error("Cannot find sequence file '%s'", pth) 196 | raise AlignmentError 197 | 198 | log.info("Reading alignment file '%s'", pth) 199 | text = open(pth, 'rU').read() 200 | self.from_parser_output(parse(text)) 201 | 202 | def write(self, pth): 203 | self.write_phylip(pth) 204 | 205 | def write_phylip(self, pth): 206 | fd = open(pth, 'w') 207 | log.debug("Writing phylip file '%s'", pth) 208 | 209 | species_count = len(self.species) 210 | sequence_len = len(iter(self.species.itervalues()).next()) 211 | 212 | fd.write("%d %d\n" % (species_count, sequence_len)) 213 | for species, sequence in self.species.iteritems(): 214 | # we use a version of phylip which can have longer species names, up to 100 215 | shortened = "%s " %(species[:99]) 216 | fd.write(shortened) 217 | fd.write(sequence) 218 | fd.write("\n") 219 | fd.close() 220 | 221 | class SubsetAlignment(Alignment): 222 | """Create an alignment based on some others and a subset definition""" 223 | def __init__(self, source, subset): 224 | """create an alignment for this subset""" 225 | Alignment.__init__(self) 226 | 227 | #let's do a basic check to make sure that the specified sites aren't > alignment length 228 | site_max = max(subset.columns)+1 229 | log.debug("Max site in data_blocks: %d; max site in alignment: %d" %(site_max, source.sequence_len)) 230 | if site_max>source.sequence_len: 231 | log.error("Site %d is specified in [data_blocks], but the alignment only has %d sites. Please check." %(site_max, source.sequence_len)) 232 | raise AlignmentError 233 | 234 | # Pull out the columns we need 235 | for species_name, old_sequence in source.species.iteritems(): 236 | new_sequence = ''.join([old_sequence[i] for i in subset.columns]) 237 | self.species[species_name] = new_sequence 238 | 239 | if not self.species: 240 | log.error("No species found in %s", self) 241 | raise AlignmentError 242 | 243 | self.sequence_len = len(self.species.itervalues().next()) 244 | 245 | class TestAlignment(Alignment): 246 | """Good for testing stuff""" 247 | def __init__(self, text): 248 | Alignment.__init__(self) 249 | self.from_parser_output(parse(text)) 250 | 251 | -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/alignment.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/alignment.pyc -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/analysis.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott 2 | # 3 | #This program is free software: you can redistribute it and/or modify it 4 | #under the terms of the GNU General Public License as published by the 5 | #Free Software Foundation, either version 3 of the License, or (at your 6 | #option) any later version. 7 | # 8 | #This program is distributed in the hope that it will be useful, but 9 | #WITHOUT ANY WARRANTY; without even the implied warranty of 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 | #General Public License for more details. You should have received a copy 12 | #of the GNU General Public License along with this program. If not, see 13 | #. PartitionFinder also includes the PhyML 14 | #program, the RAxML program, and the PyParsing library, 15 | #all of which are protected by their own licenses and conditions, using 16 | #PartitionFinder implies that you agree with those licences and conditions as well. 17 | 18 | import logging 19 | log = logging.getLogger("analysis") 20 | 21 | import os 22 | import shutil 23 | 24 | from alignment import Alignment, SubsetAlignment 25 | import threadpool 26 | import scheme 27 | import subset 28 | import results 29 | import threading 30 | from util import PartitionFinderError 31 | import util 32 | 33 | class AnalysisError(PartitionFinderError): 34 | pass 35 | 36 | 37 | class Analysis(object): 38 | """Performs the analysis and collects the results""" 39 | def __init__(self, cfg, force_restart=False, threads=-1): 40 | cfg.validate() 41 | self.cfg = cfg 42 | self.threads = threads 43 | 44 | self.results = results.AnalysisResults(self.cfg.model_selection) 45 | 46 | log.info("Beginning Analysis") 47 | self.process_restart(force_restart) 48 | 49 | # Check for old analyses to see if we can use the old data 50 | self.cfg.check_for_old_config() 51 | 52 | # Make some folders for the analysis 53 | self.cfg.make_output_folders() 54 | self.make_alignment(cfg.alignment_path) 55 | self.make_tree(cfg.user_tree_topology_path) 56 | 57 | # We need this to block the threads for critical stuff 58 | self.lock = threading.Condition(threading.Lock()) 59 | 60 | def process_restart(self, force_restart): 61 | if force_restart: 62 | # Remove everything 63 | if os.path.exists(self.cfg.output_path): 64 | log.warning("Deleting all previous workings in '%s'", self.cfg.output_path) 65 | shutil.rmtree(self.cfg.output_path) 66 | else: 67 | # Just remove the schemes folder 68 | if os.path.exists(self.cfg.schemes_path): 69 | log.info("Removing Schemes in '%s' (they will be recalculated from existing subset data)", self.cfg.schemes_path) 70 | shutil.rmtree(self.cfg.schemes_path) 71 | 72 | def analyse(self): 73 | self.do_analysis() 74 | return self.results 75 | 76 | def make_alignment(self, source_alignment_path): 77 | # Make the alignment 78 | self.alignment = Alignment() 79 | self.alignment.read(source_alignment_path) 80 | 81 | # We start by copying the alignment 82 | self.alignment_path = os.path.join(self.cfg.start_tree_path, 'source.phy') 83 | if os.path.exists(self.alignment_path): 84 | # Make sure it is the same 85 | old_align = Alignment() 86 | old_align.read(self.alignment_path) 87 | if not old_align.same_as(self.alignment): 88 | log.error("Alignment file has changed since previous run. You need to use the force-restart option.") 89 | raise AnalysisError 90 | 91 | else: 92 | self.alignment.write(self.alignment_path) 93 | 94 | def need_new_tree(self, tree_path): 95 | if os.path.exists(tree_path): 96 | if ';' in open(tree_path).read(): 97 | log.info("Starting tree file found.") 98 | redo_tree = False 99 | else: 100 | log.info("Starting tree file found but incomplete. Re-estimating") 101 | redo_tree = True 102 | else: 103 | log.info("No starting tree file found.") 104 | redo_tree = True 105 | 106 | return redo_tree 107 | 108 | def make_tree(self, user_path): 109 | # Begin by making a filtered alignment, containing ONLY those columns 110 | # that are defined in the subsets 111 | subset_with_everything = subset.Subset(*list(self.cfg.partitions)) 112 | self.filtered_alignment = SubsetAlignment(self.alignment, subset_with_everything) 113 | self.filtered_alignment_path = os.path.join(self.cfg.start_tree_path, 'filtered_source.phy') 114 | self.filtered_alignment.write(self.filtered_alignment_path) 115 | 116 | # Now we've written this alignment, we need to lock everything in 117 | # place, no more adding partitions, or changing them from now on. 118 | self.cfg.partitions.check_against_alignment(self.alignment) 119 | self.cfg.partitions.finalise() 120 | 121 | # We start by copying the alignment 122 | self.alignment_path = os.path.join(self.cfg.start_tree_path, 'source.phy') 123 | 124 | # Now check for the tree 125 | tree_path = self.cfg.processor.make_tree_path(self.filtered_alignment_path) 126 | 127 | if self.need_new_tree(tree_path) == True: 128 | log.debug("Estimating new starting tree, no old tree found") 129 | 130 | # If we have a user tree, then use that, otherwise, create a topology 131 | util.clean_out_folder(self.cfg.start_tree_path, keep = ["filtered_source.phy", "source.phy"]) 132 | 133 | if user_path is not None and user_path != "": 134 | # Copy it into the start tree folder 135 | log.info("Using user supplied topology at %s", user_path) 136 | topology_path = os.path.join(self.cfg.start_tree_path, 'user_topology.phy') 137 | self.cfg.processor.dupfile(user_path, topology_path) 138 | else: 139 | log.debug( 140 | "didn't find tree at %s, making a new one" % tree_path) 141 | topology_path = self.cfg.processor.make_topology( 142 | self.filtered_alignment_path, self.cfg.datatype, self.cfg.cmdline_extras) 143 | 144 | # Now estimate branch lengths 145 | tree_path = self.cfg.processor.make_branch_lengths( 146 | self.filtered_alignment_path, 147 | topology_path, 148 | self.cfg.datatype, 149 | self.cfg.cmdline_extras) 150 | 151 | self.tree_path = tree_path 152 | log.info("Starting tree with branch lengths is here: %s", self.tree_path) 153 | 154 | def run_task(self, m, sub): 155 | # This bit should run in parallel (forking the processor) 156 | self.cfg.processor.analyse( 157 | m, 158 | sub.alignment_path, 159 | self.tree_path, 160 | self.cfg.branchlengths, 161 | self.cfg.cmdline_extras 162 | ) 163 | 164 | # Not entirely sure that WE NEED to block here, but it is safer to do 165 | # It shouldn't hold things up toooo long... 166 | self.lock.acquire() 167 | try: 168 | sub.parse_model_result(self.cfg, m) 169 | # Try finalising, then the result will get written out earlier... 170 | sub.finalise(self.cfg) 171 | finally: 172 | self.lock.release() 173 | 174 | def add_tasks_for_sub(self, tasks, sub): 175 | for m in sub.models_to_process: 176 | tasks.append((self.run_task, (m, sub))) 177 | 178 | def run_concurrent(self, tasks): 179 | for func, args in tasks: 180 | func(*args) 181 | 182 | def run_threaded(self, tasks): 183 | if not tasks: 184 | return 185 | pool = threadpool.Pool(tasks, self.threads) 186 | pool.join() 187 | 188 | def analyse_scheme(self, sch): 189 | # Progress 190 | self.cfg.progress.next_scheme() 191 | 192 | # Prepare by reading everything in first 193 | tasks = [] 194 | for sub in sch: 195 | sub.prepare(self.cfg, self.alignment) 196 | self.add_tasks_for_sub(tasks, sub) 197 | 198 | # Now do the analysis 199 | if self.threads == 1: 200 | self.run_concurrent(tasks) 201 | else: 202 | self.run_threaded(tasks) 203 | 204 | # Now see if we're done 205 | for sub in sch: 206 | # ALL subsets should already be finalised in the task. We just 207 | # check again here 208 | if not sub.finalise(self.cfg): 209 | log.error("Failed to run models %s; not sure why", ", ".join(list(sub.models_to_do))) 210 | raise AnalysisError 211 | 212 | # AIC needs the number of sequences 213 | number_of_seq = len(self.alignment.species) 214 | result = scheme.SchemeResult(sch, number_of_seq, self.cfg.branchlengths, self.cfg.model_selection) 215 | self.results.add_scheme_result(sch, result) 216 | 217 | return result 218 | -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/analysis.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/analysis.pyc -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/analysis_method.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2012 Robert Lanfear and Brett Calcott 2 | # 3 | #This program is free software: you can redistribute it and/or modify it 4 | #under the terms of the GNU General Public License as published by the 5 | #Free Software Foundation, either version 3 of the License, or (at your 6 | #option) any later version. 7 | # 8 | #This program is distributed in the hope that it will be useful, but 9 | #WITHOUT ANY WARRANTY; without even the implied warranty of 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 | #General Public License for more details. You should have received a copy 12 | #of the GNU General Public License along with this program. If not, see 13 | #. PartitionFinder also includes the PhyML 14 | #program, the RAxML program, the PyParsing library, and the python-cluster library 15 | #all of which are protected by their own licenses and conditions, using 16 | #PartitionFinder implies that you agree with those licences and conditions as well. 17 | 18 | 19 | import logging 20 | log = logging.getLogger("method") 21 | 22 | import os 23 | import math 24 | import scheme 25 | import algorithm 26 | import submodels 27 | import subset 28 | from analysis import Analysis, AnalysisError 29 | import neighbour 30 | 31 | class UserAnalysis(Analysis): 32 | 33 | def do_analysis(self): 34 | log.info("Performing User analysis") 35 | current_schemes = [s for s in self.cfg.user_schemes] 36 | scheme_count = len(current_schemes) 37 | subset_count = subset.count_subsets() 38 | 39 | self.cfg.progress.begin(scheme_count, subset_count) 40 | if scheme_count > 0: 41 | for s in current_schemes: 42 | res = self.analyse_scheme(s) 43 | 44 | # Write out the scheme 45 | self.cfg.reporter.write_scheme_summary(s, res) 46 | else: 47 | log.error("Search set to 'user', but no user schemes detected in .cfg file. Please check.") 48 | raise AnalysisError 49 | 50 | self.cfg.progress.end() 51 | 52 | self.cfg.reporter.write_best_scheme(self.results) 53 | 54 | 55 | class StrictClusteringAnalysis(Analysis): 56 | """ 57 | This analysis uses model parameters to guess at similar partitions, then 58 | just joins them together this is much less accurate than other methods, but 59 | a LOT quicker - it runs in order N time (where N is the number of initial 60 | datablocks), whereas the greedy algorithm is still N squared. 61 | """ 62 | 63 | def do_analysis(self): 64 | log.info("Performing strict clustering analysis") 65 | 66 | partnum = len(self.cfg.partitions) 67 | subset_count = 2 * partnum - 1 68 | scheme_count = partnum 69 | self.cfg.progress.begin(scheme_count, subset_count) 70 | 71 | # Start with the most partitioned scheme 72 | start_description = range(len(self.cfg.partitions)) 73 | start_scheme = scheme.create_scheme( 74 | self.cfg, "start_scheme", start_description) 75 | 76 | # Analyse our first scheme 77 | log.info("Analysing starting scheme (scheme %s)" % start_scheme.name) 78 | self.analyse_scheme(start_scheme) 79 | 80 | # Current scheme number 81 | cur_s = 2 82 | 83 | # Now we try out all clusterings of the first scheme, to see if we can 84 | # find a better one 85 | while True: 86 | log.info("***Strict clustering algorithm step %d of %d***" % 87 | (cur_s - 1, partnum - 1)) 88 | 89 | # Calculate the subsets which are most similar 90 | # e.g. combined rank ordering of euclidean distances 91 | # Could combine average site-rates, q matrices, and frequencies 92 | scheme_name = "step_%d" % (cur_s - 1) 93 | clustered_scheme = neighbour.get_nearest_neighbour_scheme( 94 | start_scheme, scheme_name, self.cfg) 95 | 96 | # Now analyse that new scheme 97 | cur_s += 1 98 | self.analyse_scheme(clustered_scheme) 99 | 100 | # Stop when we've anlaysed the scheme with all subsets combined 101 | if len(set(clustered_scheme.subsets)) == 1: # then it's the scheme with everything together 102 | break 103 | else: 104 | start_scheme = clustered_scheme 105 | 106 | self.cfg.progress.end() 107 | 108 | self.cfg.reporter.write_best_scheme(self.results) 109 | 110 | 111 | class AllAnalysis(Analysis): 112 | 113 | def do_analysis(self): 114 | log.info("Performing complete analysis") 115 | partnum = len(self.cfg.partitions) 116 | 117 | scheme_count = submodels.count_all_schemes(partnum) 118 | subset_count = submodels.count_all_subsets(partnum) 119 | self.cfg.progress.begin(scheme_count, subset_count) 120 | 121 | # Iterate over submodels, which we can turn into schemes afterwards in the loop 122 | model_iterator = submodels.submodel_iterator([], 1, partnum) 123 | 124 | scheme_name = 1 125 | for m in model_iterator: 126 | s = scheme.model_to_scheme(m, scheme_name, self.cfg) 127 | scheme_name = scheme_name + 1 128 | res = self.analyse_scheme(s) 129 | 130 | # Write out the scheme 131 | self.cfg.reporter.write_scheme_summary(s, res) 132 | 133 | self.cfg.reporter.write_best_scheme(self.results) 134 | 135 | 136 | class GreedyAnalysis(Analysis): 137 | def do_analysis(self): 138 | '''A greedy algorithm for heuristic partitioning searches''' 139 | 140 | log.info("Performing greedy analysis") 141 | 142 | partnum = len(self.cfg.partitions) 143 | scheme_count = submodels.count_greedy_schemes(partnum) 144 | subset_count = submodels.count_greedy_subsets(partnum) 145 | 146 | self.cfg.progress.begin(scheme_count, subset_count) 147 | 148 | # Start with the most partitioned scheme 149 | start_description = range(len(self.cfg.partitions)) 150 | start_scheme = scheme.create_scheme( 151 | self.cfg, "start_scheme", start_description) 152 | 153 | log.info("Analysing starting scheme (scheme %s)" % start_scheme.name) 154 | self.analyse_scheme(start_scheme) 155 | 156 | step = 1 157 | cur_s = 2 158 | 159 | # Now we try out all lumpings of the current scheme, to see if we can 160 | # find a better one and if we do, we just keep going 161 | while True: 162 | log.info("***Greedy algorithm step %d***" % step) 163 | 164 | # Get a list of all possible lumpings of the best_scheme 165 | lumpings = algorithm.lumpings(start_description) 166 | 167 | # Save the current best score we have in results 168 | old_best_score = self.results.best_score 169 | for lumped_description in lumpings: 170 | lumped_scheme = scheme.create_scheme(self.cfg, cur_s, lumped_description) 171 | cur_s += 1 172 | # This is just checking to see if a scheme is any good, if it 173 | # is, we remember and write it later 174 | self.analyse_scheme(lumped_scheme) 175 | 176 | # Did out best score change (It ONLY gets better -- see in 177 | # results.py) 178 | if self.results.best_score == old_best_score: 179 | # It didn't, so we're done 180 | break 181 | 182 | # Let's look further. We use the description from our best scheme 183 | # (which will be the one that just changed in the last lumpings 184 | # iteration) 185 | start_description = self.results.best_result.scheme.description 186 | 187 | # Rename and record the best scheme for this step 188 | self.results.best_scheme.name = "step_%d" % step 189 | self.cfg.reporter.write_scheme_summary( 190 | self.results.best_scheme, self.results.best_result) 191 | 192 | # If it's the scheme with everything equal, quit 193 | if len(set(start_description)) == 1: 194 | break 195 | 196 | # Go do the next round... 197 | step += 1 198 | 199 | log.info("Greedy algorithm finished after %d steps" % step) 200 | log.info("Highest scoring scheme is scheme %s, with %s score of %.3f" % 201 | (self.results.best_scheme.name, self.cfg.model_selection, 202 | self.results.best_score)) 203 | 204 | self.cfg.reporter.write_best_scheme(self.results) 205 | 206 | 207 | class RelaxedClusteringAnalysis(Analysis): 208 | ''' 209 | A relaxed clustering algorithm for heuristic partitioning searches 210 | 211 | 1. Rank subsets by their similarity (defined by clustering-weights) 212 | 2. Analyse cluster-percent of the most similar schemes 213 | 3. Take the scheme that improves the AIC/BIC score the most 214 | 4. Quit if no improvements. 215 | ''' 216 | 217 | def do_analysis(self): 218 | log.info("Performing relaxed clustering analysis") 219 | 220 | stop_at = self.cfg.cluster_percent * 0.01 221 | 222 | model_selection = self.cfg.model_selection 223 | partnum = len(self.cfg.partitions) 224 | 225 | scheme_count = submodels.count_relaxed_clustering_schemes(partnum, self.cfg.cluster_percent) 226 | subset_count = submodels.count_relaxed_clustering_subsets(partnum, self.cfg.cluster_percent) 227 | 228 | self.cfg.progress.begin(scheme_count, subset_count) 229 | 230 | # Start with the most partitioned scheme, and record it. 231 | start_description = range(len(self.cfg.partitions)) 232 | start_scheme = scheme.create_scheme( 233 | self.cfg, "start_scheme", start_description) 234 | log.info("Analysing starting scheme (scheme %s)" % start_scheme.name) 235 | self.analyse_scheme(start_scheme) 236 | self.cfg.reporter.write_scheme_summary( 237 | self.results.best_scheme, self.results.best_result) 238 | 239 | 240 | # Start by remembering that we analysed the starting scheme 241 | subset_counter = 1 242 | step = 1 243 | while True: 244 | 245 | log.info("***Relaxed clustering algorithm step %d of %d***" % (step, partnum - 1)) 246 | name_prefix = "step_%d" % (step) 247 | 248 | # Get a list of all possible lumpings of the best_scheme, ordered 249 | # according to the clustering weights 250 | lumped_subsets = neighbour.get_ranked_clustered_subsets( 251 | start_scheme, self.cfg) 252 | 253 | # reduce the size of the lumped subsets to cluster_percent long 254 | cutoff = int(math.ceil(len(lumped_subsets)*stop_at)) #round up to stop zeros 255 | lumped_subsets = lumped_subsets[:cutoff] 256 | 257 | # Now analyse the lumped schemes 258 | lumpings_done = 0 259 | old_best_score = self.results.best_score 260 | 261 | for subset_grouping in lumped_subsets: 262 | scheme_name = "%s_%d" % (name_prefix, lumpings_done + 1) 263 | lumped_scheme = neighbour.make_clustered_scheme( 264 | start_scheme, scheme_name, subset_grouping, self.cfg) 265 | 266 | new_result = self.analyse_scheme(lumped_scheme) 267 | 268 | log.debug("Difference in %s: %.1f", self.cfg.model_selection, (new_result.score-old_best_score)) 269 | 270 | lumpings_done += 1 271 | 272 | 273 | if self.results.best_score != old_best_score: 274 | log.info("Analysed %.1f percent of the schemes for this step. The best " 275 | "scheme changed the %s score by %.1f units.", 276 | self.cfg.cluster_percent, self.cfg.model_selection, 277 | (self.results.best_score - old_best_score)) 278 | 279 | #write out the best scheme 280 | self.results.best_scheme.name = "step_%d" % step 281 | self.cfg.reporter.write_scheme_summary( 282 | self.results.best_scheme, self.results.best_result) 283 | 284 | 285 | # Now we find out which is the best lumping we know of for this step 286 | start_scheme = self.results.best_scheme 287 | else: 288 | log.info("Analysed %.1f percent of the schemes for this step and found no schemes " 289 | "that improve the score, stopping" , self.cfg.cluster_percent) 290 | break 291 | 292 | # We're done if it's the scheme with everything together 293 | if len(set(lumped_scheme.subsets)) == 1: 294 | break 295 | 296 | step += 1 297 | 298 | 299 | 300 | log.info("Relaxed clustering algorithm finished after %d steps" % step) 301 | log.info("Best scoring scheme is scheme %s, with %s score of %.3f" 302 | % (self.results.best_scheme.name, model_selection, self.results.best_score)) 303 | 304 | self.cfg.reporter.write_best_scheme(self.results) 305 | 306 | 307 | def choose_method(search): 308 | if search == 'all': 309 | method = AllAnalysis 310 | elif search == 'user': 311 | method = UserAnalysis 312 | elif search == 'greedy': 313 | method = GreedyAnalysis 314 | elif search == 'hcluster': 315 | method = StrictClusteringAnalysis 316 | elif search == 'rcluster': 317 | method = RelaxedClusteringAnalysis 318 | else: 319 | log.error("Search algorithm '%s' is not yet implemented", search) 320 | raise AnalysisError 321 | return method 322 | -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/analysis_method.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/analysis_method.pyc -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/config.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/config.pyc -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/main.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott 2 | # 3 | #This program is free software: you can redistribute it and/or modify it 4 | #under the terms of the GNU General Public License as published by the 5 | #Free Software Foundation, either version 3 of the License, or (at your 6 | #option) any later version. 7 | # 8 | #This program is distributed in the hope that it will be useful, but 9 | #WITHOUT ANY WARRANTY; without even the implied warranty of 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 | #General Public License for more details. You should have received a copy 12 | #of the GNU General Public License along with this program. If not, see 13 | #. PartitionFinder also includes the PhyML 14 | #program, the RAxML program, the PyParsing library, and the python-cluster library 15 | #all of which are protected by their own licenses and conditions, using 16 | #PartitionFinder implies that you agree with those licences and conditions as well. 17 | 18 | import logging 19 | import sys 20 | import shlex 21 | import os 22 | 23 | logging.basicConfig( 24 | format="%(levelname)-8s | %(asctime)s | %(message)s", 25 | level=logging.INFO 26 | ) 27 | 28 | # curdir = os.path.dirname(os.path.abspath(__file__)) 29 | # rootdir, here = os.path.split(curdir) 30 | # config_path = os.path.join(rootdir, 'logging.cfg') 31 | # from logging import config as _logconfig 32 | # _logconfig.fileConfig(config_path) 33 | 34 | log = logging.getLogger("main") 35 | from optparse import OptionParser 36 | 37 | # We import everything here as it forces all of debug regions to be loaded 38 | import version 39 | import config 40 | import analysis_method 41 | import util 42 | import reporter 43 | import progress 44 | import datetime 45 | import parser 46 | import raxml 47 | import phyml 48 | from partfinder import current 49 | 50 | 51 | def debug_arg_callback(option, opt, value, parser): 52 | setattr(parser.values, option.dest, value.split(',')) 53 | 54 | 55 | def get_debug_regions(): 56 | mlogger = logging.Logger.manager 57 | return mlogger.loggerDict.keys() 58 | 59 | 60 | def set_debug_regions(regions): 61 | if regions is None: 62 | return 63 | valid_regions = set(get_debug_regions()) 64 | if 'all' in regions: 65 | regions = valid_regions 66 | else: 67 | regions = set(regions) 68 | errors = set() 69 | for r in regions: 70 | if r not in valid_regions: 71 | log.error("'%s' is not a valid debug region", r) 72 | errors.add(r) 73 | if errors: 74 | return errors 75 | 76 | for r in regions: 77 | logging.getLogger(r).setLevel(logging.DEBUG) 78 | 79 | # Enhance the format 80 | fmt = logging.Formatter("%(levelname)-8s | %(asctime)s | %(name)-10s | %(message)s") 81 | logging.getLogger("").handlers[0].setFormatter(fmt) 82 | 83 | return None 84 | 85 | def clean_folder(folder): 86 | """ Delete all the files in a folder 87 | Thanks to StackOverflow for this: 88 | http://stackoverflow.com/questions/185936/delete-folder-contents-in-python 89 | """ 90 | for the_file in os.listdir(folder): 91 | file_path = os.path.join(folder, the_file) 92 | try: 93 | if os.path.isfile(file_path): 94 | os.unlink(file_path) 95 | except Exception, e: 96 | log.error("Couldn't delete file from phylofiles folder: %s" % e) 97 | raise PartitionFinderError 98 | 99 | def parse_args(datatype, cmdargs=None): 100 | usage = """usage: python %prog [options] 101 | 102 | PartitionFinder and PartitionFinderProtein are designed to discover optimal 103 | partitioning schemes for nucleotide and amino acid sequence alignments. 104 | They are also useful for finding the best model of sequence evolution for datasets. 105 | 106 | The Input: : the full path to a folder containing: 107 | - A configuration file (partition_finder.cfg) 108 | - A nucleotide/aa alignment in Phylip format 109 | Take a look at the included 'example' folder for more details. 110 | 111 | The Output: A file in the same directory as the .cfg file, named 112 | 'analysis' This file contains information on the best 113 | partitioning scheme, and the best model for each partiiton 114 | 115 | Usage Examples: 116 | >python %prog example 117 | Analyse what is in the 'example' sub-folder in the current folder. 118 | 119 | >python %prog -v example 120 | Analyse what is in the 'example' sub-folder in the current folder, but 121 | show all the debug output 122 | 123 | >python %prog -c ~/data/frogs 124 | Check the configuration files in the folder data/frogs in the current 125 | user's home folder. 126 | 127 | >python %prog --force-restart ~/data/frogs 128 | Deletes any data produced by the previous runs (which is in 129 | ~/data/frogs/output) and starts afresh 130 | """ 131 | op = OptionParser(usage) 132 | op.add_option( 133 | "-v", "--verbose", 134 | action="store_true", dest="verbose", 135 | help="show debug logging information (equivalent to --debug-out=all)") 136 | op.add_option( 137 | "-c", "--check-only", 138 | action="store_true", dest="check_only", 139 | help="just check the configuration files, don't do any processing") 140 | op.add_option( 141 | "--force-restart", 142 | action="store_true", dest="force_restart", 143 | help="delete all previous output and start afresh (!)") 144 | op.add_option( 145 | "-p", "--processes", 146 | type="int", dest="processes", default=-1, metavar="N", 147 | help="Number of concurrent processes to use." 148 | " Use -1 to match the number of cpus on the machine." 149 | " The default is to use -1.") 150 | op.add_option( 151 | "--show-python-exceptions", 152 | action="store_true", dest="show_python_exceptions", 153 | help="If errors occur, print the python exceptions") 154 | op.add_option( 155 | "--save-phylofiles", 156 | action="store_true", dest="save_phylofiles", 157 | help="save all of the phyml or raxml output. This can take a lot of space(!)") 158 | op.add_option( 159 | "--dump-results", 160 | action="store_true", dest="dump_results", 161 | help="Dump all results to a binary file. " 162 | "This is only of use for testing purposes.") 163 | op.add_option( 164 | "--compare-results", 165 | action="store_true", dest="compare_results", 166 | help="Compare the results to previously dumped binary results. " 167 | "This is only of use for testing purposes.") 168 | op.add_option( 169 | "--raxml", 170 | action="store_true", dest="raxml", 171 | help="Use RAxML (rather than PhyML) to do the analysis. See the manual" 172 | ) 173 | op.add_option( 174 | "--cmdline-extras", 175 | type="str", dest="cmdline_extras", default="", metavar="N", 176 | help="Add additional commands to the phyml or raxml commandlines that PF uses." 177 | "This can be useful e.g. if you want to change the accuracy of lnL calculations" 178 | " ('-e' option in raxml), or use multi-threaded versions of raxml that require" 179 | " you to specify the number of threads you will let raxml use ('-T' option in " 180 | "raxml. E.g. you might specify this: --cmndline_extras ' -e 2.0 -T 10 '" 181 | " N.B. MAKE SURE YOU PUT YOUR EXTRAS IN QUOTES, and only use this command if you" 182 | " really know what you're doing and are very familiar with raxml and" 183 | " PartitionFinder" 184 | ) 185 | op.add_option( 186 | "--weights", 187 | type="str", dest="cluster_weights", default=None, metavar="N", 188 | help="Mainly for algorithm development. Only use it if you know what you're doing." 189 | "A list of weights to use in the clustering algorithms. This list allows you " 190 | "to assign different weights to: the overall rate for a subset, the base/amino acid " 191 | "frequencies, model parameters, and alpha value. This will affect how subsets are " 192 | "clustered together. For instance: --cluster_weights '1, 2, 5, 1', would weight " 193 | "the base freqeuncies 2x more than the overall rate, the model parameters 5x " 194 | "more, and the alpha parameter the same as the model rate" 195 | ) 196 | op.add_option( 197 | "--rcluster-percent", 198 | type="float", dest="cluster_percent", default=10.0, metavar="N", 199 | help="This defines the proportion of possible schemes that the relaxed clustering" 200 | " algorithm will consider before it stops looking. The default is 10%." 201 | "e.g. --cluster-percent 10.0" 202 | 203 | ) 204 | op.add_option( 205 | '--debug-output', 206 | type='string', 207 | action='callback', 208 | dest='debug_output', 209 | metavar="REGION,REGION,...", 210 | callback=debug_arg_callback, 211 | help="(advanced option) Provide a list of debug regions to output extra " 212 | "information about what the program is doing." 213 | " Possible regions are 'all' or any of {%s}." 214 | % ",".join(get_debug_regions()) 215 | ) 216 | 217 | if cmdargs is None: 218 | options, args = op.parse_args() 219 | else: 220 | options, args = op.parse_args(cmdargs) 221 | 222 | options.datatype = datatype 223 | # We should have one argument: the folder to read the configuration from 224 | if not args: 225 | op.print_help() 226 | else: 227 | check_options(op, options) 228 | 229 | return options, args 230 | 231 | 232 | def check_options(op, options): 233 | # Error checking 234 | if options.dump_results and options.compare_results: 235 | op.error("options --dump_results and --compare_results are mutually exclusive!") 236 | 237 | if options.verbose: 238 | set_debug_regions(['all']) 239 | else: 240 | errors = set_debug_regions(options.debug_output) 241 | if errors is not None: 242 | bad = ",".join(list(errors)) 243 | op.error("Invalid debug regions: %s" % bad) 244 | 245 | # Default to phyml 246 | if options.raxml == 1: 247 | options.phylogeny_program = 'raxml' 248 | else: 249 | options.phylogeny_program = 'phyml' 250 | 251 | #A warning for people using the Pthreads version of RAxML 252 | # if options.cmdline_extras.count("-T") > 0: 253 | # log.warning("It looks like you're using a Pthreads version of RAxML. Be aware " 254 | # "that the default behaviour of PartitionFinder is to run one version of RAxML per " 255 | # "available processor. This might not be what you want with Pthreads - since the " 256 | # "minimum number of threads per RAxML run is 2 (i.e. -T 2). Make sure to limit the " 257 | # "total number of RAxML runs you start using the -p option in PartitionFinder. " 258 | # "Specifically, the total number of processors you will use with the Pthreads " 259 | # "version is the number you set via the -T option in --cmdline-extras, multiplied " 260 | # "by the number of processors you set via the -p option in PartitionFinder. " 261 | # "You should also be aware that the Pthreads version of RAxML has a rare but " 262 | # "known bug on some platforms. This bug results in infinite liklelihood values " 263 | # "if it happens on your dataset, PartitionFinder will give an error. In that case " 264 | # "you should switch back to using a single-threaded version of RAxML, e.g. the " 265 | # "SSE3 or AVX version." 266 | # "See the manual for more info.") 267 | 268 | 269 | def check_python_version(): 270 | """Check the python version is above 2.7 but lower than 3.0""" 271 | 272 | python_version = float( 273 | "%d.%d" % (sys.version_info[0], sys.version_info[1])) 274 | 275 | log.info("You have Python version %.1f" % python_version) 276 | 277 | if python_version < 2.7: 278 | log.error("Your Python version is %.1f, but this program requires Python 2.7. " 279 | "Please upgrade to version 2.7 by visiting www.python.org/getit, or by following" 280 | " the instructions in the PartitionFinder manual." % python_version) 281 | return 0 282 | 283 | if python_version > 3.0: 284 | log.warning("Your Python version is %.1f. This program was not built to run with " 285 | "version 3 or higher. To guarantee success, please use Python 2.7.x" % python_version) 286 | 287 | 288 | def main(name, datatype, passed_args=None): 289 | v = version.get_version() 290 | 291 | # If passed_args is None, this will use sys.argv 292 | options, args = parse_args(datatype, passed_args) 293 | if not args: 294 | # Help has already been printed 295 | return 2 296 | 297 | log.info("------------- %s %s -----------------", name, v) 298 | start_time = datetime.datetime.now().replace(microsecond=0) # start the clock ticking 299 | 300 | check_python_version() 301 | 302 | if passed_args is None: 303 | cmdline = "".join(sys.argv) 304 | else: 305 | cmdline = "".join(passed_args) 306 | 307 | log.info("Command-line arguments used: %s", cmdline) 308 | 309 | # Load, using the first argument as the folder 310 | try: 311 | cfg = config.Configuration(datatype, 312 | options.phylogeny_program, 313 | options.save_phylofiles, 314 | options.cmdline_extras, 315 | options.cluster_weights, 316 | options.cluster_percent) 317 | 318 | # Set up the progress callback 319 | progress.TextProgress(cfg) 320 | cfg.load_base_path(args[0]) 321 | 322 | if options.check_only: 323 | log.info("Exiting without processing (because of the -c/--check-only option ...") 324 | else: 325 | try: 326 | # Now try processing everything.... 327 | if not cfg.save_phylofiles: 328 | clean_folder(cfg.phylofiles_path) 329 | method = analysis_method.choose_method(cfg.search) 330 | reporter.TextReporter(cfg) 331 | anal = method(cfg, 332 | options.force_restart, 333 | options.processes) 334 | results = anal.analyse() 335 | 336 | if options.dump_results: 337 | results.dump(cfg) 338 | elif options.compare_results: 339 | results.compare(cfg) 340 | finally: 341 | # Make sure that we reset the configuration 342 | cfg.reset() 343 | 344 | # Successful exit 345 | end_time = datetime.datetime.now().replace(microsecond=0) 346 | processing_time = end_time - start_time 347 | 348 | log.info("Total processing time: %s (h:m:s)" % processing_time) 349 | log.info("Processing complete.") 350 | 351 | return 0 352 | 353 | except util.PartitionFinderError: 354 | log.error("Failed to run. See previous errors.") 355 | # Reraise if we were called by call_main, or if the options is set 356 | if options.show_python_exceptions or passed_args is not None: 357 | raise 358 | 359 | except KeyboardInterrupt: 360 | log.error("User interrupted the Program") 361 | 362 | 363 | return 1 364 | 365 | 366 | def call_main(datatype, cmdline): 367 | cmdargs = shlex.split(cmdline) 368 | main("" % datatype, datatype, cmdargs) 369 | -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/main.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/main.pyc -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/neighbour.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott 2 | # 3 | #This program is free software: you can redistribute it and/or modify it 4 | #under the terms of the GNU General Public License as published by the 5 | #Free Software Foundation, either version 3 of the License, or (at your 6 | #option) any later version. 7 | # 8 | #This program is distributed in the hope that it will be useful, but 9 | #WITHOUT ANY WARRANTY; without even the implied warranty of 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 | #General Public License for more details. You should have received a copy 12 | #of the GNU General Public License along with this program. If not, see 13 | #. PartitionFinder also includes the PhyML 14 | #program, the RAxML program, and the PyParsing library, 15 | #all of which are protected by their own licenses and conditions, using 16 | #PartitionFinder implies that you agree with those licences and conditions as well. 17 | 18 | import subset 19 | import scheme 20 | from algorithm import euclidean_distance 21 | 22 | import logging 23 | log = logging.getLogger("cluster") 24 | 25 | 26 | def get_ranked_list(final_distances): 27 | """ 28 | Return the closest subsets defined by a distance matrix usually there will 29 | just be a pair that's closer than all other pairs BUT, it's feasible (if 30 | unlikely) that >2 subsets are equally close. This is possible if, e.g. all 31 | weights are zero. Then we just want to group all the equally close 32 | subsets... 33 | 34 | So, we return a list of all the closest subsets 35 | """ 36 | 37 | # Let's make a dict keyed by the distance in the matrix, using setdefault 38 | # to add things, in case there are subsets with identical pairwise 39 | # distances 40 | distances = {} 41 | for pair in final_distances: 42 | d = final_distances[pair] 43 | 44 | # Get any subs that we already know are that distance apart as a set 45 | # default to empty set if it's a new distance 46 | subs = distances.setdefault(d, set()) 47 | 48 | # Add subs that correspond to this cell 49 | subs.add(pair[0]) 50 | subs.add(pair[1]) 51 | 52 | ordered_subsets = [] 53 | unique_distances = list(distances.keys()) 54 | unique_distances.sort() 55 | 56 | for d in unique_distances: 57 | ordered_subsets.append(list(distances[d])) 58 | 59 | return ordered_subsets 60 | 61 | 62 | def get_pairwise_dists(subsets, rates, freqs, model, alpha, weights): 63 | 64 | import itertools 65 | #set up all pairwise combinations as iterators 66 | s = itertools.combinations(subsets, 2) 67 | r = itertools.combinations(rates, 2) 68 | f = itertools.combinations(freqs, 2) 69 | m = itertools.combinations(model, 2) 70 | a = itertools.combinations(alpha, 2) 71 | 72 | #now we can izip over ALL of them at once (isn't python great!) 73 | subset_pairs = [] 74 | r_dists = [] 75 | f_dists = [] 76 | m_dists = [] 77 | a_dists = [] 78 | 79 | for pair in itertools.izip(s, r, f, m, a): 80 | subset_pair = pair[0] 81 | subset_pairs.append(subset_pair) 82 | 83 | r_dist = euclidean_distance(pair[1][0], pair[1][1]) 84 | f_dist = euclidean_distance(pair[2][0], pair[2][1]) 85 | m_dist = euclidean_distance(pair[3][0], pair[3][1]) 86 | a_dist = euclidean_distance(pair[4][0], pair[4][1]) 87 | 88 | r_dists.append(r_dist) 89 | f_dists.append(f_dist) 90 | m_dists.append(m_dist) 91 | a_dists.append(a_dist) 92 | 93 | #print pair 94 | 95 | #and now we get the minmax values 96 | max_r = max(r_dists) 97 | max_f = max(f_dists) 98 | max_m = max(m_dists) 99 | max_a = max(a_dists) 100 | 101 | #now we go over them again, and normalise, weight, and sum 102 | final_dists = {} 103 | closest_pairs = [] 104 | mindist = None 105 | for i, pair in enumerate(itertools.izip(r_dists, f_dists, m_dists, a_dists, subset_pairs)): 106 | 107 | if max_r > 0.0: 108 | r_final = pair[0] * float(weights["rate"]) / float(max_r) 109 | else: 110 | r_final = 0.0 111 | if max_f > 0.0: 112 | f_final = pair[1] * float(weights["freqs"]) / float(max_f) 113 | else: 114 | f_final = 0.0 115 | if max_m > 0.0: 116 | m_final = pair[2] * float(weights["model"]) / float(max_m) 117 | else: 118 | m_final = 0.0 119 | if max_a > 0: 120 | a_final = pair[3] * float(weights["alpha"]) / float(max_a) 121 | else: 122 | a_final = 0.0 123 | 124 | #print i, pair 125 | 126 | total_dist = r_final + f_final + m_final + a_final 127 | 128 | final_dists[pair[4]] = total_dist 129 | 130 | #check to see if this is the closest 131 | if (total_dist < mindist or mindist is None): 132 | mindist = total_dist 133 | closest_pairs = pair[4] # pair[4] is the tuple of two subsets 134 | elif total_dist == mindist: 135 | #we want a tuple with all of the subsets that are equally close 136 | #with no replicates, so we use tuple(set()) 137 | closest_pairs = tuple(set(closest_pairs + (pair[4]))) 138 | 139 | return final_dists, closest_pairs 140 | 141 | def get_distance_matrix(start_scheme, weights): 142 | #1. get the parameter lists for each subset 143 | subsets = [] # a list of subset names, so we know the order things appear in the list 144 | rates = [] # tree length 145 | freqs = [] # amino acid or base frequencies 146 | model = [] # model parameters e.g. A<->C 147 | alpha = [] #alpha parameter of the gamma distribution of rates across sites 148 | 149 | for s in start_scheme.subsets: 150 | param_dict = s.get_param_values() 151 | subsets.append(s) 152 | rates.append([param_dict["rate"]]) 153 | freqs.append(param_dict["freqs"]) 154 | model.append(param_dict["model"]) 155 | alpha.append([param_dict["alpha"]]) 156 | 157 | #get pairwise euclidean distances, and minmax values, for all parameters 158 | final_dists, closest_pairs = get_pairwise_dists(subsets, rates, freqs, model, alpha, weights) 159 | 160 | return final_dists, closest_pairs 161 | 162 | def get_closest_subsets(start_scheme, weights): 163 | """Find the closest subsets in a scheme 164 | """ 165 | final_dists, closest_pairs = get_distance_matrix(start_scheme, weights) 166 | 167 | return closest_pairs 168 | 169 | 170 | def get_ranked_clustered_subsets(start_scheme, cfg): 171 | """ 172 | The idea here is to take a scheme, and perform some analyses to find out 173 | how the subsets in that scheme cluster. 174 | 175 | We then just return the list of schemes, ordered by closest to most distant 176 | in the clustering space 177 | """ 178 | final_dists, closest_pairs = get_distance_matrix( 179 | start_scheme, cfg.cluster_weights) 180 | 181 | ranked_subset_groupings = get_ranked_list(final_dists) 182 | return ranked_subset_groupings 183 | 184 | 185 | def make_clustered_scheme(start_scheme, scheme_name, subsets_to_cluster, cfg): 186 | 187 | #1. Create a new subset that merges the subsets_to_cluster 188 | newsub_parts = [] 189 | 190 | #log.info("Clustering %d subsets" % len(subsets_to_cluster)) 191 | 192 | for s in subsets_to_cluster: 193 | newsub_parts = newsub_parts + list(s.partitions) 194 | newsub = subset.Subset(*tuple(newsub_parts)) 195 | 196 | #2. Then we define a new scheme with those merged subsets 197 | all_subs = [s for s in start_scheme.subsets] 198 | 199 | #pop out the subsets we're going to join together 200 | for s in subsets_to_cluster: 201 | all_subs.remove(s) 202 | 203 | #and now we add back in our new subset... 204 | all_subs.append(newsub) 205 | 206 | #and finally create the clustered scheme 207 | final_scheme = (scheme.Scheme(cfg, str(scheme_name), all_subs)) 208 | 209 | return final_scheme 210 | 211 | 212 | def get_nearest_neighbour_scheme(start_scheme, scheme_name, cfg): 213 | """ 214 | The idea here is to take a scheme, and perform some analyses to find a 215 | neighbouring scheme, where the neighbour has one less subset than the 216 | current scheme. Really this is just progressive clustering, but specified 217 | to work well with PartitionFinder 218 | """ 219 | 220 | #1. First we get the closest subsets, based on some weights. This will almost always 221 | # be two subsets, but it's generalised so that it could be all of them... 222 | # cluster weights is a dictionary of weights, keyed by: rate, freqs, model 223 | # for the overall subset rate, the base/aminoacid frequencies, and the model parameters 224 | closest_subsets = get_closest_subsets(start_scheme, cfg.cluster_weights) 225 | 226 | scheme = make_clustered_scheme( 227 | start_scheme, scheme_name, closest_subsets, cfg) 228 | 229 | return scheme 230 | -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/neighbour.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/neighbour.pyc -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/parser.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott 2 | # 3 | #This program is free software: you can redistribute it and/or modify it 4 | #under the terms of the GNU General Public License as published by the 5 | #Free Software Foundation, either version 3 of the License, or (at your 6 | #option) any later version. 7 | # 8 | #This program is distributed in the hope that it will be useful, but 9 | #WITHOUT ANY WARRANTY; without even the implied warranty of 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 | #General Public License for more details. You should have received a copy 12 | #of the GNU General Public License along with this program. If not, see 13 | #. PartitionFinder also includes the PhyML 14 | #program, the RAxML program, and the PyParsing library, 15 | #all of which are protected by their own licenses and conditions, using 16 | #PartitionFinder implies that you agree with those licences and conditions as well. 17 | 18 | import logging 19 | log = logging.getLogger("parser") 20 | 21 | from pyparsing import ( 22 | Word, OneOrMore, alphas, nums, Suppress, Optional, Group, stringEnd, 23 | delimitedList, pythonStyleComment, line, lineno, col, Keyword, Or, 24 | NoMatch, CaselessKeyword, ParseException, SkipTo) 25 | 26 | # debugging 27 | # ParserElement.verbose_stacktrace = True 28 | 29 | import partition 30 | import scheme 31 | import subset 32 | import phyml_models 33 | import raxml_models 34 | import config 35 | from util import PartitionFinderError 36 | 37 | # Only used internally 38 | 39 | 40 | class ParserError(Exception): 41 | """Used for our own parsing problems""" 42 | def __init__(self, text, loc, msg): 43 | self.line = line(loc, text) 44 | self.col = col(loc, text) 45 | self.lineno = lineno(loc, text) 46 | self.msg = msg 47 | 48 | def format_message(self): 49 | return "%s at line:%s, column:%s" % (self.msg, self.lineno, self.col) 50 | 51 | 52 | class Parser(object): 53 | """Parse configuration files 54 | 55 | The results are put into the configuration object 56 | """ 57 | 58 | # These will get set in the configuration passed in 59 | def __init__(self, cfg): 60 | # For adding variables 61 | self.cfg = cfg 62 | 63 | # Use these to keep track of stuff that is going on in parser 64 | self.schemes = [] 65 | self.subsets = [] 66 | self.init_grammar() 67 | self.ignore_schemes = False 68 | # provide useful error messages when parsing settings with limited options 69 | 70 | def init_grammar(self): 71 | """Set up the parsing classes 72 | Any changes to the grammar of the config file be done here. 73 | """ 74 | # Some syntax that we need, but don't bother looking at 75 | SEMICOLON = (Suppress(";")) 76 | EQUALS = Suppress("=") 77 | OPENB = Suppress("(") 78 | CLOSEB = Suppress(")") 79 | BACKSLASH = Suppress("\\") 80 | DASH = Suppress("-") 81 | 82 | # Top Section 83 | FILENAME = Word(alphas + nums + '-_.') 84 | alignmentdef = Keyword('alignment') + EQUALS + FILENAME + SEMICOLON 85 | alignmentdef.setParseAction(self.set_alignment) 86 | 87 | treedef = Keyword('user_tree_topology') + EQUALS + FILENAME + SEMICOLON 88 | treedef.setParseAction(self.set_user_tree) 89 | 90 | def simple_option(name): 91 | opt = Keyword(name) + EQUALS + Word(alphas + nums + '-_') + SEMICOLON 92 | opt.setParseAction(self.set_simple_option) 93 | return opt 94 | 95 | branchdef = simple_option('branchlengths') 96 | 97 | MODELNAME = Word(alphas + nums + '+') 98 | modellist = delimitedList(MODELNAME) 99 | modeldef = Keyword("models") + EQUALS + Group( 100 | ( 101 | CaselessKeyword("all") | CaselessKeyword("mrbayes") | CaselessKeyword("raxml") | 102 | CaselessKeyword("beast") | CaselessKeyword("all_protein") | 103 | CaselessKeyword( 104 | "all_protein_gamma") | CaselessKeyword("all_protein_gammaI") 105 | )("predefined") | 106 | Group(modellist)("userlist")) + SEMICOLON 107 | modeldef.setParseAction(self.set_models) 108 | 109 | modseldef = simple_option("model_selection") 110 | topsection = alignmentdef + Optional(treedef) + branchdef + \ 111 | modeldef + modseldef 112 | 113 | # Partition Parsing 114 | column = Word(nums) 115 | partname = Word(alphas + '_-' + nums) 116 | partdef = column("start") +\ 117 | Optional(DASH + column("end")) +\ 118 | Optional(BACKSLASH + column("step")) 119 | 120 | partdef.setParseAction(self.define_range) 121 | partdeflist = Group(OneOrMore(Group(partdef))) 122 | partition = Optional("charset") + partname("name") + \ 123 | EQUALS + partdeflist("parts") + SEMICOLON 124 | partition.setParseAction(self.define_partition) 125 | partlist = OneOrMore(Group(partition)) 126 | partsection = Suppress("[data_blocks]") + partlist 127 | 128 | # Scheme Parsing 129 | schemename = Word(alphas + '_-' + nums) 130 | partnameref = partname.copy( 131 | ) # Make a copy, cos we set a different action on it 132 | partnameref.setParseAction(self.check_part_exists) 133 | 134 | subset = Group(OPENB + delimitedList(partnameref("name")) + CLOSEB) 135 | subset.setParseAction(self.define_subset) 136 | 137 | scheme = Group(OneOrMore(subset)) 138 | schemedef = schemename("name") + \ 139 | EQUALS + scheme("scheme") + SEMICOLON 140 | schemedef.setParseAction(self.define_schema) 141 | 142 | schemelist = OneOrMore(Group(schemedef)) 143 | 144 | schemealgo = simple_option("search") 145 | schemesection = \ 146 | Suppress("[schemes]") + schemealgo + Optional(schemelist) 147 | 148 | # We've defined the grammar for each section. Here we just put it all together 149 | self.config_parser = ( 150 | topsection + partsection + schemesection + stringEnd) 151 | 152 | def set_alignment(self, text, loc, tokens): 153 | value = tokens[1] 154 | self.cfg.set_alignment_file(value) 155 | # TODO Make sure it is readable! 156 | # raise ParserError(text, loc, "No '%s' defined in the configuration" % var) 157 | # 158 | 159 | def set_user_tree(self, text, loc, tokens): 160 | self.cfg.user_tree = tokens[1] 161 | pass 162 | 163 | def set_simple_option(self, text, loc, tokens): 164 | try: 165 | self.cfg.set_option(tokens[0], tokens[1]) 166 | except config.ConfigurationError: 167 | raise ParserError(text, loc, "Invalid option in .cfg file") 168 | 169 | def set_models(self, text, loc, tokens): 170 | if self.cfg.phylogeny_program == "phyml": 171 | self.phylo_models = phyml_models 172 | elif self.cfg.phylogeny_program == "raxml": 173 | self.phylo_models = raxml_models 174 | 175 | all_dna_mods = set(self.phylo_models.get_all_dna_models()) 176 | all_protein_mods = set(self.phylo_models.get_all_protein_models()) 177 | total_mods = all_dna_mods.union(all_protein_mods) 178 | 179 | mods = tokens[1] 180 | DNA_mods = 0 181 | prot_mods = 0 182 | if mods.userlist: 183 | modlist = mods.userlist 184 | log.info("Setting 'models' to a user-specified list") 185 | else: 186 | modsgroup = mods.predefined 187 | if modsgroup.lower() == "all": 188 | modlist = list(all_dna_mods) 189 | DNA_mods = DNA_mods + 1 190 | elif modsgroup.lower() == "mrbayes": 191 | modlist = set(phyml_models.get_mrbayes_models()) 192 | DNA_mods = DNA_mods + 1 193 | elif modsgroup.lower() == "beast": 194 | modlist = set(phyml_models.get_beast_models()) 195 | DNA_mods = DNA_mods + 1 196 | elif modsgroup.lower() == "raxml": 197 | modlist = set(phyml_models.get_raxml_models()) 198 | DNA_mods = DNA_mods + 1 199 | elif modsgroup.lower() == "all_protein": 200 | modlist = set(self.phylo_models.get_all_protein_models()) 201 | prot_mods = prot_mods + 1 202 | elif modsgroup.lower() == "all_protein_gamma": 203 | if self.cfg.phylogeny_program == "raxml": 204 | modlist = set(raxml_models.get_protein_models_gamma()) 205 | prot_mods = prot_mods + 1 206 | else: 207 | log.error("The models option 'all_protein_gamma' is only available with raxml" 208 | ", (the --raxml commandline option). Please check and try again") 209 | raise ParserError 210 | elif modsgroup.lower() == "all_protein_gammaI": 211 | if self.cfg.phylogeny_program == "raxml": 212 | modlist = set(raxml_models.get_protein_models_gammaI()) 213 | prot_mods = prot_mods + 1 214 | else: 215 | log.error("The models option 'all_protein_gammaI' is only available with raxml" 216 | ", (the --raxml commandline option). Please check and try again") 217 | raise ParserError 218 | else: 219 | pass 220 | log.info("Setting 'models' to '%s'", modsgroup) 221 | 222 | self.cfg.models = set() 223 | for m in modlist: 224 | if m not in total_mods: 225 | raise ParserError( 226 | text, loc, "'%s' is not a valid model for phylogeny " 227 | "program %s. Please check the lists of valid models in the" 228 | " manual and try again" % (m, self.cfg.phylogeny_program)) 229 | 230 | if m in all_dna_mods: 231 | DNA_mods = DNA_mods + 1 232 | if m in all_protein_mods: 233 | prot_mods = prot_mods + 1 234 | 235 | self.cfg.models.add(m) 236 | 237 | log.info("The models included in this analysis are: %s", 238 | ", ".join(self.cfg.models)) 239 | 240 | #check datatype against the model list that we've got a sensible model list 241 | if DNA_mods > 0 and prot_mods == 0 and self.cfg.datatype == "DNA": 242 | log.info("Setting datatype to 'DNA'") 243 | elif DNA_mods == 0 and prot_mods > 0 and self.cfg.datatype == "protein": 244 | log.info("Setting datatype to 'protein'") 245 | elif DNA_mods == 0 and prot_mods > 0 and self.cfg.datatype == "DNA": 246 | raise ParserError( 247 | text, loc, "The models list contains only models of amino acid change." 248 | " PartitionFinder.py only works with nucleotide models (like the GTR model)." 249 | " If you're analysing an amino acid dataset, please use PartitionFinderProtein," 250 | " which you can download here: www.robertlanfear.com/partitionfinder." 251 | " The models line in the .cfg file is") 252 | elif DNA_mods > 0 and prot_mods == 0 and self.cfg.datatype == "protein": 253 | raise ParserError( 254 | text, loc, "The models list contains only models of nucelotide change." 255 | " PartitionFinderProtein.py only works with amino acid models (like the WAG model)." 256 | " If you're analysing a nucelotide dataset, please use PartitionFinder.py," 257 | " which you can download here: www.robertlanfear.com/partitionfinder" 258 | " The models line in the .cfg file is") 259 | else: # we've got a mixture of models. 260 | raise ParserError( 261 | text, loc, "The models list contains a mixture of protein and nucelotide models." 262 | " If you're analysing a nucelotide dataset, please use PartitionFinder." 263 | " If you're analysing an amino acid dataset, please use PartitionFinderProtein." 264 | " You can download both of these programs from here: www.robertlanfear.com/partitionfinder" 265 | " The models line in the .cfg file is") 266 | 267 | def define_range(self, part): 268 | """Turn the 1, 2 or 3 tokens into integers, supplying a default if needed""" 269 | fromc = int(part.start) 270 | 271 | if part.end: 272 | toc = int(part.end) 273 | else: 274 | toc = fromc 275 | 276 | if part.step: 277 | stepc = int(part.step) 278 | else: 279 | stepc = 1 280 | return [fromc, toc, stepc] 281 | 282 | def define_partition(self, text, loc, part_def): 283 | """We have everything we need here to make a partition""" 284 | try: 285 | # Creation adds it to set 286 | p = partition.Partition( 287 | self.cfg, part_def.name, *tuple(part_def.parts)) 288 | except partition.PartitionError: 289 | raise ParserError( 290 | text, loc, "Error in '%s' can be found" % part_def.name) 291 | 292 | def check_part_exists(self, text, loc, partref): 293 | if partref.name not in self.cfg.partitions: 294 | raise ParserError(text, loc, "Partition %s not defined" % 295 | partref.name) 296 | 297 | def define_subset(self, text, loc, subset_def): 298 | try: 299 | # Get the partitions from the names 300 | parts = [self.cfg.partitions[nm] for nm in subset_def[0]] 301 | 302 | # Keep a running list of these till we define the schema below 303 | self.subsets.append(subset.Subset(*tuple(parts))) 304 | except subset.SubsetError: 305 | raise ParserError(text, loc, "Error creating subset...") 306 | 307 | def define_schema(self, text, loc, scheme_def): 308 | try: 309 | # Clear out the subsets as we need to reuse it 310 | subs = tuple(self.subsets) 311 | self.subsets = [] 312 | 313 | if self.ignore_schemes == False: 314 | sch = scheme.Scheme(self.cfg, scheme_def.name, subs) 315 | self.cfg.user_schemes.add_scheme(sch) 316 | 317 | except (scheme.SchemeError, subset.SubsetError): 318 | raise ParserError(text, loc, "Error in '%s' can be found" % 319 | scheme_def.name) 320 | 321 | def parse_file(self, fname): 322 | #this just reads in the config file into 's' 323 | s = open(fname, 'rU').read() 324 | self.parse_configuration(s) 325 | 326 | def parse_configuration(self, s): 327 | #parse the config cfg 328 | try: 329 | self.result = self.config_parser.ignore( 330 | pythonStyleComment).parseString(s) 331 | except ParserError, p: 332 | log.error(p.format_message()) 333 | raise PartitionFinderError 334 | except ParseException, p: 335 | log.error("There was a problem loading your .cfg file, please check and try again") 336 | log.error(p) 337 | 338 | #let's see if there was something missing fro the input file 339 | expectations = ["models", "search", "[schemes]", "[data_blocks]", 340 | "model_selection", "branchlengths", "alignment"] 341 | missing = None 342 | for e in expectations: 343 | if p.msg.count(e): 344 | missing = e 345 | 346 | if missing: 347 | log.info("It looks like the '%s' option might be missing or in the wrong place" % (missing)) 348 | log.info("Or perhaps something is wrong in the lines just before the '%s' option" % (missing)) 349 | log.info("Please double check the .cfg file and try again") 350 | else: 351 | log.info( 352 | "The line causing the problem is this: '%s'" % (p.line)) 353 | log.info("Please check that line, and make sure it appears in the right place in the .cfg file.") 354 | log.info("If it looks OK, try double-checking the semi-colons on other lines in the .cfg file") 355 | raise PartitionFinderError 356 | -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/parser.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/parser.pyc -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/partition.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott 2 | # 3 | #This program is free software: you can redistribute it and/or modify it 4 | #under the terms of the GNU General Public License as published by the 5 | #Free Software Foundation, either version 3 of the License, or (at your 6 | #option) any later version. 7 | # 8 | #This program is distributed in the hope that it will be useful, but 9 | #WITHOUT ANY WARRANTY; without even the implied warranty of 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 | #General Public License for more details. You should have received a copy 12 | #of the GNU General Public License along with this program. If not, see 13 | #. PartitionFinder also includes the PhyML 14 | #program, the RAxML program, and the PyParsing library, 15 | #all of which are protected by their own licenses and conditions, using 16 | #PartitionFinder implies that you agree with those licences and conditions as well. 17 | 18 | import logging 19 | log = logging.getLogger("partition") 20 | 21 | from util import PartitionFinderError 22 | class PartitionError(PartitionFinderError): 23 | pass 24 | 25 | def columnset_to_string(colset): 26 | s = list(colset) 27 | s.sort() 28 | # Add one, cos we converted to zero base... 29 | return ', '.join([str(x+1) for x in s]) 30 | 31 | class PartitionSet(object): 32 | """The set of all partitions loaded from a configuration file""" 33 | def __init__(self): 34 | """A set of Partitions""" 35 | self.sequence = 0 36 | self.parts_by_name = {} 37 | self.parts_by_number = {} 38 | self.partitions = set() 39 | 40 | # All of the columns 41 | self.columns = [] 42 | self.columnset = set() 43 | 44 | self.finalised = False 45 | 46 | def __str__(self): 47 | return "PartitionSet(%s)" % ", ".join([str(p) for p in self.partitions]) 48 | 49 | def add_partition(self, p): 50 | """Check for overlap (= intersection)""" 51 | if self.finalised: 52 | log.error("Cannot add partitions after a Scheme has been created") 53 | raise PartitionError 54 | 55 | if p.name in self.parts_by_name: 56 | log.error("Attempt to add %s when that name already exists", p) 57 | raise PartitionError 58 | 59 | overlap = [] 60 | for otherp in self.partitions: 61 | if p.columnset & otherp.columnset: 62 | overlap.append(str(otherp)) 63 | if overlap: 64 | log.error("%s overlaps with previously defined " 65 | "partitions: %s", 66 | p, ", ".join(overlap)) 67 | raise PartitionError 68 | 69 | # Assign the partition to this set 70 | p.partition_set = self 71 | 72 | # Make sure we can look up by name 73 | self.parts_by_name[p.name] = p 74 | self.parts_by_number[self.sequence] = p 75 | p.sequence = self.sequence 76 | self.sequence += 1 77 | self.partitions.add(p) 78 | 79 | # Merge all the columns 80 | self.columns.extend(p.columns) 81 | self.columns.sort() 82 | self.columnset |= p.columnset 83 | 84 | def finalise(self): 85 | """Ensure that no more partitions can be added""" 86 | self.finalised = True 87 | 88 | def check_against_alignment(self, alignment): 89 | """Check the partition definitions against the alignment""" 90 | 91 | # TODO: pbly should check the converse too -- stuff defined that is 92 | # missing?? 93 | self.fullset = set(range(0, alignment.sequence_len)) 94 | leftout = self.fullset - self.columnset 95 | if leftout: 96 | # This does not raise an error, just a warning 97 | log.warn( 98 | "Columns defined in partitions range from %s to %s, " 99 | "but these columns in the alignment are missing: %s", 100 | self.columns[0]+1, self.columns[-1]+1, 101 | columnset_to_string(leftout)) 102 | 103 | # We can treat this like a bit like a dictionary 104 | def __iter__(self): 105 | return iter(self.partitions) 106 | 107 | def __len__(self): 108 | return len(self.partitions) 109 | 110 | def __getitem__(self, k): 111 | if type(k) is int: 112 | return self.parts_by_number[k] 113 | return self.parts_by_name[k] 114 | 115 | def __contains__(self, k): 116 | return k in self.parts_by_name 117 | 118 | def names(self): 119 | return self.parts_by_name.keys() 120 | 121 | class Partition(object): 122 | """A set of columns from an alignment""" 123 | def __init__(self, cfg, name=None, *partlist): 124 | """A named partition 125 | 126 | """ 127 | self.name = name 128 | description = [] 129 | 130 | # This will get set later, when they are added to PartitionSet 131 | self.partition_set = None 132 | 133 | # We now need to convert to column definitions. Note that these are 134 | # zero based, which is not how they are specified in the config. So we 135 | # must do some fiddling to make sure they are right. In addition, we 136 | # use range(...) which excludes the final column, whereas the 137 | # definitions assume inclusive... 138 | columns = [] 139 | for p in partlist: 140 | 141 | # Make sure it is sensible 142 | if len(p) < 2 or len(p) > 3: 143 | log.error("The Partition '%s' should contain\ 144 | a list of start, a stop, and an optional step", 145 | self.name) 146 | raise PartitionError 147 | if len(p) == 2: 148 | start, stop = p 149 | step = 1 150 | else: 151 | start, stop, step = p 152 | if start > stop: 153 | log.error("Partition '%s' has beginning after end (%s > %s)", 154 | name, start, stop) 155 | raise PartitionError 156 | 157 | # Actually, subtracting 1 deals with both issues... 158 | columns.extend(range(start-1, stop, step)) 159 | description.append((start, stop, step)) 160 | 161 | self.description = tuple(description) 162 | 163 | # Normalise it all 164 | columns.sort() 165 | columnset = set(columns) 166 | 167 | # If there was any overlap then these will differ... 168 | if len(columns) != len(columnset): 169 | log.error("Partition '%s' has internal overlap", name) 170 | raise PartitionError 171 | 172 | # Both of these are useful? 173 | self.columns = columns 174 | self.columnset = columnset 175 | 176 | cfg.partitions.add_partition(self) 177 | log.debug("Created %s", self) 178 | 179 | def __repr__(self): 180 | outlist = ", ".join(["%s-%s\\%s" % tuple(p) for p in self.description]) 181 | return "Partition<%s: %s>" % (self.name, outlist) 182 | 183 | def __str__(self): 184 | outlist = ", ".join(["%s-%s\\%s" % tuple(p) for p in self.description]) 185 | return "Partition(%s, %s)" % (self.name, outlist) 186 | -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/partition.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/partition.pyc -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/phyml.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/phyml.pyc -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/phyml_models.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott 2 | # 3 | #This program is free software: you can redistribute it and/or modify it 4 | #under the terms of the GNU General Public License as published by the 5 | #Free Software Foundation, either version 3 of the License, or (at your 6 | #option) any later version. 7 | # 8 | #This program is distributed in the hope that it will be useful, but 9 | #WITHOUT ANY WARRANTY; without even the implied warranty of 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 | #General Public License for more details. You should have received a copy 12 | #of the GNU General Public License along with this program. If not, see 13 | #. PartitionFinder also includes the PhyML 14 | #program, the RAxML program, and the PyParsing library, 15 | #all of which are protected by their own licenses and conditions, using 16 | #PartitionFinder implies that you agree with those licences and conditions as well. 17 | 18 | import logging 19 | log = logging.getLogger("analysis") 20 | 21 | import config 22 | 23 | # TODO need some error checking! 24 | 25 | # number of free parameters in substitution model, listed as "model+base_frequencies" 26 | # and the model string for PhyML as the second of the tuple. 27 | _base_models = { 28 | "JC" : (0+0, "-m 000000 -f '0.25, 0.25, 0.25, 0.25'"), 29 | "K80" : (1+0, "-m 010010 -f '0.25, 0.25, 0.25, 0.25'"), 30 | "TrNef" : (2+0, "-m 010020 -f '0.25, 0.25, 0.25, 0.25'"), 31 | "K81" : (2+0, "-m 012210 -f '0.25, 0.25, 0.25, 0.25'"), 32 | "TVMef" : (4+0, "-m 012314 -f '0.25, 0.25, 0.25, 0.25'"), 33 | "TIMef" : (3+0, "-m 012230 -f '0.25, 0.25, 0.25, 0.25'"), 34 | "SYM" : (5+0, "-m 012345 -f '0.25, 0.25, 0.25, 0.25'"), 35 | "F81" : (0+3, "-m 000000 -f e"), 36 | "HKY" : (1+3, "-m 010010 -f e"), 37 | "TrN" : (2+3, "-m 010020 -f e"), 38 | "K81uf" : (2+3, "-m 012210 -f e"), 39 | "TVM" : (4+3, "-m 012314 -f e"), 40 | "TIM" : (3+3, "-m 012230 -f e"), 41 | "GTR" : (5+3, "-m 012345 -f e") 42 | } 43 | 44 | # number of free parameters in substitution model, listed as "aa_frequencies" 45 | # and the model string for PhyML as the second of the tuple 46 | _base_protein_models = { 47 | "LG" : (0, "-m LG -d aa"), 48 | "WAG" : (0, "-m WAG -d aa"), 49 | "mtREV" : (0, "-m mtREV -d aa"), 50 | "Dayhoff" : (0, "-m Dayhoff -d aa"), 51 | "DCMut" : (0, "-m DCMut -d aa"), 52 | "JTT" : (0, "-m JTT -d aa"), 53 | "VT" : (0, "-m VT -d aa"), 54 | "Blosum62" : (0, "-m Blosum62 -d aa"), 55 | "CpREV" : (0, "-m CpREV -d aa"), 56 | "RtREV" : (0, "-m RtREV -d aa"), 57 | "MtMam" : (0, "-m MtMam -d aa"), 58 | "MtArt" : (0, "-m MtArt -d aa"), 59 | "HIVb" : (0, "-m HIVb -d aa"), 60 | "HIVw" : (0, "-m HIVw -d aa"), 61 | } 62 | 63 | # All the functions in here return the same thing with the same parameters, 64 | # this just caches the return ... 65 | def memoize(f): 66 | cache= {} 67 | def memf(*x): 68 | if x not in cache: 69 | cache[x] = f(*x) 70 | return cache[x] 71 | return memf 72 | 73 | @memoize 74 | def get_all_dna_models(): 75 | ''' 76 | Return a list of all implemented _base_models 77 | ''' 78 | model_list = [] 79 | for model in _base_models.keys(): 80 | model_list.append(model) 81 | model_list.append("%s+I" %(model)) 82 | model_list.append("%s+G" %(model)) 83 | model_list.append("%s+I+G" %(model)) 84 | return model_list 85 | 86 | @memoize 87 | def get_all_protein_models(): 88 | ''' 89 | Return a list of all implemented _base__protein_models 90 | ''' 91 | model_list = [] 92 | for model in _base_protein_models.keys(): 93 | model_list.append(model) 94 | model_list.append("%s+F" %(model)) 95 | model_list.append("%s+I" %(model)) 96 | model_list.append("%s+G" %(model)) 97 | model_list.append("%s+I+G" %(model)) 98 | model_list.append("%s+I+F" %(model)) 99 | model_list.append("%s+G+F" %(model)) 100 | model_list.append("%s+I+G+F" %(model)) 101 | return model_list 102 | 103 | @memoize 104 | def get_mrbayes_models(): 105 | ''' 106 | Return a list of all models implemented in MrBayes. Thanks to Ainsley Seago for this. 107 | ''' 108 | mrbayes_base_models = ["JC", "F81", "K80", "HKY", "SYM", "GTR"] 109 | model_list = [] 110 | for model in mrbayes_base_models: 111 | model_list.append(model) 112 | model_list.append("%s+I" %(model)) 113 | model_list.append("%s+G" %(model)) 114 | model_list.append("%s+I+G" %(model)) 115 | return model_list 116 | 117 | def get_beast_models(): 118 | ''' 119 | Return a list of all models implemented in BEAST v1.7.2. 120 | ''' 121 | beast_base_models = ["K80", "TrNef", "SYM", "HKY", "TrN", "GTR"] 122 | model_list = [] 123 | for model in beast_base_models: 124 | model_list.append(model) 125 | model_list.append("%s+I" %(model)) 126 | model_list.append("%s+G" %(model)) 127 | model_list.append("%s+I+G" %(model)) 128 | return model_list 129 | 130 | 131 | @memoize 132 | def get_raxml_models(): 133 | ''' 134 | Return a list of all models implemented in RaxML. Thanks to Ainsley Seago for this. 135 | ''' 136 | model_list = ["GTR+G", "GTR+I+G"] 137 | return model_list 138 | 139 | @memoize 140 | def get_protein_models(): 141 | ''' 142 | Return a list of all protein models implemented in PhyML 143 | ''' 144 | model_list = [ 145 | "LG", 146 | "cheese" 147 | ] 148 | return model_list 149 | 150 | 151 | 152 | @memoize 153 | def get_num_params(modelstring): 154 | ''' 155 | Input a model string like HKY+I+G or LG+G+F, and get the number of parameters 156 | ''' 157 | elements = modelstring.split("+") 158 | model_name = elements[0] 159 | if model_name in _base_models.keys(): 160 | model_params = _base_models[model_name][0] 161 | else: 162 | model_params = _base_protein_models[model_name][0] 163 | if "F" in elements[1:]: 164 | model_params = model_params+19-1 #the -1 here is to account for the fact we add 1 for the + in '+F' below 165 | 166 | extras = modelstring.count("+") 167 | total = model_params+extras 168 | log.debug("Model: %s Params: %d" %(modelstring, total)) 169 | 170 | return total 171 | 172 | @memoize 173 | def get_model_difficulty(modelstring): 174 | ''' 175 | Input a model string like HKY+I+G or LG+G+F, and a guess about how long it takes to analyse 176 | Right now, this is done with a simple hack. I just return a number that is the number of params 177 | plus a modifier for extra stuff like +I and +G 178 | the hardest models are +I+G, then +G, then +I 179 | this is just used to rank models for ordering the analysis 180 | The return is a 'difficulty' score that can be used to rank models 181 | ''' 182 | elements = modelstring.split("+") 183 | 184 | model_params = get_num_params(modelstring) 185 | 186 | difficulty = 0 187 | if "G" in elements[1:]: 188 | difficulty = difficulty + 2000 189 | if "I" in elements[1:]: 190 | difficulty = difficulty + 1000 191 | 192 | extras = modelstring.count("+") 193 | total = model_params+extras+difficulty 194 | log.debug("Model: %s Difficulty: %d" %(modelstring, total)) 195 | 196 | return total 197 | 198 | 199 | 200 | @memoize 201 | def get_model_commandline(modelstring): 202 | ''' 203 | Input a model string, and get the PhyML command line 204 | ''' 205 | 206 | # This is always the same - optimise brlens and model, not tree 207 | commandline = ["-o lr "] 208 | 209 | elements = modelstring.split("+") 210 | model_name = elements[0] 211 | 212 | # Everything but the first element 213 | extras = elements[1:] 214 | 215 | if model_name in _base_models.keys(): #DNA models 216 | commandline.append(_base_models[model_name][1]) 217 | else: #protein models 218 | commandline.append(_base_protein_models[model_name][1]) 219 | if "F" in extras: 220 | commandline.append("-f e") #emprical AA frequencies (+19 params) 221 | else: 222 | commandline.append("-f m") #AA frequences from the model (+0 params) 223 | 224 | 225 | if "I" in extras: 226 | commandline.append("-v e") 227 | if "G" in extras: 228 | commandline.append("-a e") 229 | commandline.append("-c 4") 230 | else: 231 | commandline.append("-c 1") 232 | 233 | return " ".join(commandline) 234 | 235 | if __name__ == "__main__": 236 | print " ", 237 | print "Name".ljust(12), 238 | print "Params".ljust(10), 239 | print "CommandLine" 240 | for i, model in enumerate(get_all_models()): 241 | print str(i+1).rjust(2), 242 | print model.ljust(12), 243 | print str(get_num_params(model)).ljust(10), 244 | print get_model_commandline(model) 245 | for model in get_protein_models(): 246 | print model 247 | 248 | -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/phyml_models.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/phyml_models.pyc -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/progress.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott 2 | # 3 | #This program is free software: you can redistribute it and/or modify it 4 | #under the terms of the GNU General Public License as published by the 5 | #Free Software Foundation, either version 3 of the License, or (at your 6 | #option) any later version. 7 | # 8 | #This program is distributed in the hope that it will be useful, but 9 | #WITHOUT ANY WARRANTY; without even the implied warranty of 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 | #General Public License for more details. You should have received a copy 12 | #of the GNU General Public License along with this program. If not, see 13 | #. PartitionFinder also includes the PhyML 14 | #program, the RAxML program, and the PyParsing library, 15 | #all of which are protected by their own licenses and conditions, using 16 | #PartitionFinder implies that you agree with those licences and conditions as well. 17 | 18 | import logging 19 | log = logging.getLogger("progress") 20 | 21 | 22 | class Progress(object): 23 | def __init__(self, cfg): 24 | self.cfg = cfg 25 | self.cfg.progress = self 26 | 27 | def begin(self, scheme_count, subset_count): 28 | pass 29 | 30 | def next_scheme(self): 31 | pass 32 | 33 | def subset_begin(self, sub): 34 | pass 35 | 36 | def subset_done(self, sub): 37 | pass 38 | 39 | def end(self): 40 | pass 41 | 42 | 43 | class NoProgress(Progress): 44 | pass 45 | 46 | 47 | class TextProgress(Progress): 48 | 49 | def begin(self, scheme_count, subset_count): 50 | self.scheme_count = scheme_count 51 | self.subset_count = subset_count 52 | self.schemes_analysed = 0 53 | self.subsets_analysed = set() 54 | 55 | log.info("PartitionFinder will have to analyse %d subsets to complete this analysis", subset_count) 56 | log.info("This will result in %s schemes being created", scheme_count) 57 | if subset_count > 10000: 58 | log.warning("%d is a lot of subsets, this might take a long time to analyse", subset_count) 59 | log.warning("Perhaps consider using a different search scheme instead (see Manual)") 60 | 61 | def next_scheme(self): 62 | self.schemes_analysed += 1 63 | #log.info("Analysing scheme %d/%d", self.schemes_analysed,self.scheme_count) 64 | 65 | def subset_begin(self, sub): 66 | #log.info("Begin analysing subset %s", sub) 67 | pass 68 | 69 | def subset_done(self, sub): 70 | old_num_done = len(self.subsets_analysed) 71 | self.subsets_analysed.add(sub.name) 72 | num_subs_done = len(self.subsets_analysed) 73 | if old_num_done != num_subs_done: 74 | percent_done = ( 75 | float(num_subs_done) * 100.0) / float(self.subset_count) 76 | log.info("Finished subset %d/%d, %.2f percent done", num_subs_done, self.subset_count, percent_done) 77 | 78 | def end(self): 79 | pass 80 | -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/progress.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/progress.pyc -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/pyparsing.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/pyparsing.pyc -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/raxml.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott 2 | # 3 | #This program is free software: you can redistribute it and/or modify it 4 | #under the terms of the GNU General Public License as published by the 5 | #Free Software Foundation, either version 3 of the License, or (at your 6 | #option) any later version. 7 | # 8 | #This program is distributed in the hope that it will be useful, but 9 | #WITHOUT ANY WARRANTY; without even the implied warranty of 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 | #General Public License for more details. You should have received a copy 12 | #of the GNU General Public License along with this program. If not, see 13 | #. PartitionFinder also includes the PhyML 14 | #program, the RAxML program, the PyParsing library, and the python-cluster library 15 | #all of which are protected by their own licenses and conditions, using 16 | #PartitionFinder implies that you agree with those licences and conditions as well. 17 | 18 | """Run raxml and parse the output""" 19 | 20 | import logging 21 | log = logging.getLogger("raxml") 22 | 23 | import subprocess 24 | import shlex 25 | import os 26 | import shutil 27 | import sys 28 | import fnmatch 29 | import util 30 | 31 | from pyparsing import ( 32 | Word, Literal, nums, Suppress, ParseException, 33 | SkipTo, OneOrMore, Regex 34 | ) 35 | 36 | import raxml_models as models 37 | 38 | _binary_name = 'raxml' 39 | if sys.platform == 'win32': 40 | _binary_name += ".exe" 41 | 42 | from util import PhylogenyProgramError 43 | 44 | 45 | class RaxmlError(PhylogenyProgramError): 46 | pass 47 | 48 | 49 | def find_program(): 50 | """Locate the binary ...""" 51 | pth = os.path.abspath(__file__) 52 | 53 | # Split off the name and the directory... 54 | pth, notused = os.path.split(pth) 55 | pth, notused = os.path.split(pth) 56 | pth = os.path.join(pth, "programs", _binary_name) 57 | pth = os.path.normpath(pth) 58 | 59 | log.debug("Checking for program %s", _binary_name) 60 | if not os.path.exists(pth) or not os.path.isfile(pth): 61 | log.error("No such file: '%s'", pth) 62 | raise RaxmlError 63 | log.debug("Found program %s at '%s'", _binary_name, pth) 64 | return pth 65 | 66 | _raxml_binary = None 67 | 68 | 69 | def run_raxml(command): 70 | global _raxml_binary 71 | if _raxml_binary is None: 72 | _raxml_binary = find_program() 73 | 74 | # Add in the command file 75 | log.debug("Running 'raxml %s'", command) 76 | command = "\"%s\" %s" % (_raxml_binary, command) 77 | 78 | # Note: We use shlex.split as it does a proper job of handling command 79 | # lines that are complex 80 | p = subprocess.Popen( 81 | shlex.split(command), 82 | shell=False, 83 | stdout=subprocess.PIPE, 84 | stderr=subprocess.PIPE) 85 | 86 | # Capture the output, we might put it into the errors 87 | stdout, stderr = p.communicate() 88 | # p.terminate() 89 | 90 | if p.returncode != 0: 91 | log.error("RAxML did not execute successfully") 92 | log.error("RAxML output follows, in case it's helpful for finding the problem") 93 | log.error("%s", stdout) 94 | log.error("%s", stderr) 95 | raise RaxmlError 96 | 97 | 98 | def dupfile(src, dst): 99 | # Make a copy or a symlink so that we don't overwrite different model runs 100 | # of the same alignment 101 | 102 | # TODO maybe this should throw...? 103 | try: 104 | if os.path.exists(dst): 105 | os.remove(dst) 106 | shutil.copyfile(src, dst) 107 | except OSError: 108 | log.error("Cannot link/copy file %s to %s", src, dst) 109 | raise RaxmlError 110 | 111 | 112 | def make_topology(alignment_path, datatype, cmdline_extras): 113 | '''Make a MP tree to start the analysis''' 114 | log.info("Making MP tree for %s", alignment_path) 115 | 116 | cmdline_extras = check_defaults(cmdline_extras) 117 | 118 | # First get the MP topology like this (-p is a hard-coded random number seed): 119 | if datatype == "DNA": 120 | command = "-y -s '%s' -m GTRGAMMA -n MPTREE -p 123456789 %s" % ( 121 | alignment_path, cmdline_extras) 122 | elif datatype == "protein": 123 | command = "-y -s '%s' -m PROTGAMMALG -n MPTREE -p 123456789 %s" % ( 124 | alignment_path, cmdline_extras) 125 | else: 126 | log.error("Unrecognised datatype: '%s'" % (datatype)) 127 | raise(RaxmlError) 128 | 129 | #force raxml to write to the dir with the alignment in it 130 | aln_dir, fname = os.path.split(alignment_path) 131 | command = ''.join([command, " -w '%s'" % os.path.abspath(aln_dir)]) 132 | 133 | run_raxml(command) 134 | dir, aln = os.path.split(alignment_path) 135 | tree_path = os.path.join(dir, "RAxML_parsimonyTree.MPTREE") 136 | return tree_path 137 | 138 | 139 | def make_branch_lengths(alignment_path, topology_path, datatype, cmdline_extras): 140 | #Now we re-estimate branchlengths using a GTR+G model on the (unpartitioned) dataset 141 | cmdline_extras = check_defaults(cmdline_extras) 142 | dir_path, fname = os.path.split(topology_path) 143 | tree_path = os.path.join(dir_path, 'topology_tree.phy') 144 | log.debug("Copying %s to %s", topology_path, tree_path) 145 | dupfile(topology_path, tree_path) 146 | os.remove(topology_path) # saves headaches later... 147 | 148 | if datatype == "DNA": 149 | log.info("Estimating GTR+G branch lengths on tree using RAxML") 150 | command = "-f e -s '%s' -t '%s' -m GTRGAMMA -n BLTREE -w '%s' %s" % ( 151 | alignment_path, tree_path, os.path.abspath(dir_path), cmdline_extras) 152 | run_raxml(command) 153 | if datatype == "protein": 154 | log.info("Estimating LG+G branch lengths on tree using RAxML") 155 | command = "-f e -s '%s' -t '%s' -m PROTGAMMALG -n BLTREE -w '%s' %s" % ( 156 | alignment_path, tree_path, os.path.abspath(dir_path), cmdline_extras) 157 | run_raxml(command) 158 | 159 | dir, aln = os.path.split(alignment_path) 160 | tree_path = os.path.join(dir, "RAxML_result.BLTREE") 161 | log.info("Branchlength estimation finished") 162 | 163 | # Now return the path of the final tree with branch lengths 164 | return tree_path 165 | 166 | 167 | def check_defaults(cmdline_extras): 168 | """We use some sensible defaults, but allow users to override them with extra cmdline options""" 169 | if cmdline_extras.count("-e") > 0: 170 | #then the user has specified a particular accuracy: 171 | accuracy = "" 172 | else: 173 | #we specify a default accuracy of 1 lnL unit 174 | accuracy = " -e 1.0 " 175 | 176 | #we set this in case people are using the PThreads version of RAxML 177 | #note that this is intentionally set to give an error if people use Pthreads, because 178 | #they will need to consider by hand what the optimal setting is. And, if we set it >1 179 | #then we risk massively slowing things down because PF's default is to use all possible 180 | #processors. 181 | if cmdline_extras.count("-T") > 0: 182 | num_threads = "" 183 | 184 | else: 185 | num_threads = " -T 1 " 186 | 187 | #and we'll specify the -O option, so that the program doesn't exit if there are undetermined seqs. 188 | #we'll put spaces at the start and end too, just in case... 189 | cmdline_extras = ''.join( 190 | [" ", cmdline_extras, accuracy, num_threads, "-O "]) 191 | 192 | return cmdline_extras 193 | 194 | 195 | def analyse(model, alignment_path, tree_path, branchlengths, cmdline_extras): 196 | """Do the analysis -- this will overwrite stuff!""" 197 | 198 | # Move it to a new name to stop raxml stomping on different model analyses 199 | # dupfile(alignment_path, analysis_path) 200 | model_params = models.get_model_commandline(model) 201 | 202 | if branchlengths == 'linked': 203 | #constrain all branchlengths to be equal 204 | bl = ' -f B ' 205 | elif branchlengths == 'unlinked': 206 | #let branchlenghts vary among subsets 207 | bl = ' -f e ' 208 | else: 209 | # WTF? 210 | log.error("Unknown option for branchlengths: %s", branchlengths) 211 | raise RaxmlError 212 | 213 | cmdline_extras = check_defaults(cmdline_extras) 214 | 215 | #raxml doesn't append alignment names automatically, like PhyML, let's do that here 216 | analysis_ID = raxml_analysis_ID(alignment_path, model) 217 | 218 | #force raxml to write to the dir with the alignment in it 219 | #-e 1.0 sets the precision to 1 lnL unit. This is all that's required here, and helps with speed. 220 | aln_dir, fname = os.path.split(alignment_path) 221 | command = " %s -s '%s' -t '%s' %s -n %s -w '%s' %s" % ( 222 | bl, alignment_path, tree_path, model_params, analysis_ID, os.path.abspath(aln_dir), cmdline_extras) 223 | run_raxml(command) 224 | 225 | 226 | def raxml_analysis_ID(alignment_path, model): 227 | dir, file = os.path.split(alignment_path) 228 | aln_name = os.path.splitext(file)[0] 229 | analysis_ID = '%s_%s.txt' % (aln_name, model) 230 | return analysis_ID 231 | 232 | 233 | def make_tree_path(alignment_path): 234 | dir, aln = os.path.split(alignment_path) 235 | tree_path = os.path.join(dir, "RAxML_result.BLTREE") 236 | return tree_path 237 | 238 | 239 | def make_output_path(alignment_path, model): 240 | analysis_ID = raxml_analysis_ID(alignment_path, model) 241 | dir, aln_file = os.path.split(alignment_path) 242 | stats_fname = "RAxML_info.%s" % (analysis_ID) 243 | stats_path = os.path.join(dir, stats_fname) 244 | tree_fname = "RAxML_result.%s" % (analysis_ID) 245 | tree_path = os.path.join(dir, tree_fname) 246 | return stats_path, tree_path 247 | 248 | 249 | def remove_files(aln_path, model): 250 | '''remove all files from the alignment directory that are produced by raxml''' 251 | dir, file = os.path.split(aln_path) 252 | analysis_ID = raxml_analysis_ID(aln_path, model) 253 | dir = os.path.abspath(dir) 254 | fs = os.listdir(dir) 255 | fnames = fnmatch.filter(fs, '*%s*' % analysis_ID) 256 | util.delete_files(fnames) 257 | 258 | 259 | class RaxmlResult(object): 260 | 261 | def __init__(self): 262 | self.rates = {} 263 | self.freqs = {} 264 | 265 | def __str__(self): 266 | return "RaxmlResult(lnl:%s, tree_size:%s, secs:%s, alphs:%s)" % ( 267 | self.lnl, self.tree_size, self.seconds, self.alpha) 268 | 269 | 270 | class Parser(object): 271 | def __init__(self, datatype): 272 | 273 | if datatype == "protein": 274 | letters = "ARNDCQEGHILKMFPSTWYV" 275 | elif datatype == "DNA": 276 | letters = "ATCG" 277 | else: 278 | log.error("Unknown datatype '%s', please check" % datatype) 279 | raise RaxmlError 280 | 281 | FLOAT = Word(nums + '.-').setParseAction(lambda x: float(x[0])) 282 | 283 | L = Word(letters, exact=1) 284 | COLON = Suppress(":") 285 | 286 | LNL_LABEL = Regex("Final GAMMA.+:") | Literal("Likelihood:") 287 | TIME_LABEL = Regex("Overall Time.+:") | Regex("Overall Time.+tion ") 288 | ALPHA_LABEL = Literal("alpha:") 289 | TREE_SIZE_LABEL = Literal("Tree-Length:") 290 | 291 | def labeled_float(label): 292 | return Suppress(SkipTo(label)) + Suppress(label) + FLOAT 293 | 294 | lnl = labeled_float(LNL_LABEL) 295 | lnl.setParseAction(self.set_lnl) 296 | 297 | seconds = labeled_float(TIME_LABEL) 298 | seconds.setParseAction(self.set_seconds) 299 | 300 | alpha = labeled_float(ALPHA_LABEL) 301 | alpha.setParseAction(self.set_alpha) 302 | 303 | tree_size = labeled_float(TREE_SIZE_LABEL) 304 | tree_size.setParseAction(self.set_tree_size) 305 | 306 | rate = Suppress("rate") + L + Suppress("<->") + L + COLON + FLOAT 307 | rate.setParseAction(self.set_rate) 308 | rates = OneOrMore(rate) 309 | 310 | freq = Suppress("freq pi(") + L + Suppress("):") + FLOAT 311 | freq.setParseAction(self.set_freq) 312 | freqs = OneOrMore(freq) 313 | 314 | # Just look for these things 315 | self.root_parser = seconds + lnl + alpha + tree_size + rates + freqs 316 | 317 | def set_seconds(self, tokens): 318 | self.result.seconds = tokens[0] 319 | 320 | def set_lnl(self, tokens): 321 | self.result.lnl = tokens[0] 322 | 323 | def set_tree_size(self, tokens): 324 | self.result.tree_size = tokens[0] 325 | 326 | def set_alpha(self, tokens): 327 | self.result.alpha = tokens[0] 328 | 329 | def set_rate(self, tokens): 330 | basefrom, baseto, rate = tokens 331 | self.result.rates[(basefrom, baseto)] = rate 332 | 333 | def set_freq(self, tokens): 334 | base, rate = tokens 335 | self.result.freqs[base] = rate 336 | 337 | def parse(self, text): 338 | log.debug("Parsing raxml output...") 339 | self.result = RaxmlResult() 340 | try: 341 | self.root_parser.parseString(text) 342 | except ParseException, p: 343 | log.error(str(p)) 344 | raise RaxmlError 345 | 346 | log.debug("Result is %s", self.result) 347 | return self.result 348 | 349 | 350 | def parse(text, datatype): 351 | the_parser = Parser(datatype) 352 | return the_parser.parse(text) 353 | 354 | if __name__ == '__main__': 355 | logging.basicConfig(level=logging.DEBUG) 356 | pth = "./tests/misc/raxml_nucleotide.output" 357 | p = Parser('DNA') 358 | result = p.parse(open(pth).read()) 359 | 360 | pth = "./tests/misc/raxml_aminoacid.output" 361 | p = Parser('protein') 362 | result = p.parse(open(pth).read()) 363 | -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/raxml.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/raxml.pyc -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/raxml_models.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott 2 | # 3 | #This program is free software: you can redistribute it and/or modify it 4 | #under the terms of the GNU General Public License as published by the 5 | #Free Software Foundation, either version 3 of the License, or (at your 6 | #option) any later version. 7 | # 8 | #This program is distributed in the hope that it will be useful, but 9 | #WITHOUT ANY WARRANTY; without even the implied warranty of 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 | #General Public License for more details. You should have received a copy 12 | #of the GNU General Public License along with this program. If not, see 13 | #. PartitionFinder also includes the PhyML 14 | #program, the RAxML program, and the PyParsing library, 15 | #all of which are protected by their own licenses and conditions, using 16 | #PartitionFinder implies that you agree with those licences and conditions as well. 17 | 18 | import logging 19 | log = logging.getLogger("analysis") 20 | 21 | import config 22 | 23 | # TODO need some error checking! 24 | 25 | # number of free parameters in substitution model, listed as "model+base_frequencies" 26 | _base_models = { 27 | "GTR" : (5+3, "") 28 | } 29 | 30 | # number of free parameters in substitution model, listed as "aa_frequencies" 31 | _base_protein_models = { 32 | "DAYHOFF" : (0, ""), 33 | "DCMUT" : (0, ""), 34 | "JTT" : (0, ""), 35 | "MTREV" : (0, ""), 36 | "WAG" : (0, ""), 37 | "RTREV" : (0, ""), 38 | "CPREV" : (0, ""), 39 | "VT" : (0, ""), 40 | "BLOSUM62" : (0, ""), 41 | "MTMAM" : (0, ""), 42 | "LG" : (0, ""), 43 | } 44 | 45 | # All the functions in here return the same thing with the same parameters, 46 | # this just caches the return ... 47 | def memoize(f): 48 | cache= {} 49 | def memf(*x): 50 | if x not in cache: 51 | cache[x] = f(*x) 52 | return cache[x] 53 | return memf 54 | 55 | @memoize 56 | def get_protein_models_gamma(): 57 | ''' 58 | Return a list of all implemented _base__protein_models in RAxML 59 | NB there are NO models in RAxML without Gamma 60 | ''' 61 | model_list = [] 62 | for model in _base_protein_models.keys(): 63 | model_list.append("%s+G" %(model)) 64 | model_list.append("%s+G+F" %(model)) 65 | return model_list 66 | 67 | @memoize 68 | def get_protein_models_gammaI(): 69 | ''' 70 | Return a list of all implemented _base__protein_models in RAxML with invariant sites 71 | ''' 72 | model_list = [] 73 | for model in _base_protein_models.keys(): 74 | model_list.append("%s+I+G" %(model)) 75 | model_list.append("%s+I+G+F" %(model)) 76 | return model_list 77 | 78 | def get_all_protein_models(): 79 | model_list = get_protein_models_gamma() + get_protein_models_gammaI() 80 | return model_list 81 | 82 | @memoize 83 | def get_dna_models_gamma(): 84 | ''' 85 | Just one model in RAxML with +G. 86 | ''' 87 | model_list = ["GTR+G"] 88 | return model_list 89 | 90 | @memoize 91 | def get_dna_models_gammaI(): 92 | ''' 93 | Just one model in RAxML with I+G. 94 | ''' 95 | model_list = ["GTR+I+G"] 96 | return model_list 97 | 98 | @memoize 99 | def get_all_dna_models(): 100 | model_list = get_dna_models_gamma() + get_dna_models_gammaI() 101 | return model_list 102 | 103 | @memoize 104 | def get_all_models(): 105 | model_list = get_all_DNA_models() + get_all_protein_models() 106 | return model_list 107 | 108 | @memoize 109 | def get_model_commandline(modelstring): 110 | ''' 111 | Input a model string, and get the piece of the raxml command line that defines that model 112 | ''' 113 | commandline = '-m ' 114 | elements = modelstring.split("+") 115 | model_name = elements[0] 116 | 117 | # Everything but the first element 118 | extras = elements[1:] 119 | 120 | if model_name in _base_models.keys(): #DNA models 121 | commandline = ''.join([commandline, "GTRGAMMA"]) 122 | if "I" in extras: 123 | commandline = ''.join([commandline, "I"]) 124 | else: #protein models, look like this 'PROTGAMMAILGF 125 | commandline = ''.join([commandline, "PROTGAMMA"]) 126 | if "I" in extras: 127 | commandline = ''.join([commandline, "I"]) 128 | commandline = ''.join([commandline, model_name]) 129 | if "F" in extras: 130 | commandline = ''.join([commandline, "F"]) 131 | 132 | return commandline 133 | 134 | 135 | @memoize 136 | def get_num_params(modelstring): 137 | ''' 138 | Input a model string like HKY+I+G or LG+G+F, and get the number of parameters 139 | ''' 140 | elements = modelstring.split("+") 141 | model_name = elements[0] 142 | if model_name in _base_models.keys(): 143 | model_params = _base_models[model_name][0] 144 | else: 145 | model_params = _base_protein_models[model_name][0] 146 | if "F" in elements[1:]: 147 | model_params = model_params+19-1 #the -1 here is to account for the fact we add 1 for the + in '+F' below 148 | 149 | extras = modelstring.count("+") 150 | total = model_params+extras 151 | log.debug("Model: %s Params: %d" %(modelstring, total)) 152 | 153 | return total 154 | 155 | @memoize 156 | def get_model_difficulty(modelstring): 157 | ''' 158 | Input a model string like HKY+I+G or LG+G+F, and a guess about how long it takes to analyse 159 | Right now, this is done with a simple hack. I just return a number that is the number of params 160 | plus a modifier for extra stuff like +I and +G 161 | the hardest models are +I+G, then +G, then +I 162 | this is just used to rank models for ordering the analysis 163 | The return is a 'difficulty' score that can be used to rank models 164 | ''' 165 | elements = modelstring.split("+") 166 | 167 | model_params = get_num_params(modelstring) 168 | 169 | difficulty = 0 170 | if "G" in elements[1:]: 171 | difficulty = difficulty + 2000 172 | if "I" in elements[1:]: 173 | difficulty = difficulty + 1000 174 | 175 | extras = modelstring.count("+") 176 | total = model_params+extras+difficulty 177 | log.debug("Model: %s Difficulty: %d" %(modelstring, total)) 178 | 179 | return total 180 | 181 | def get_raxml_protein_modelstring(modelstring): 182 | """Start with a model like this: LG+I+G+F, return a model in raxml format like this: 183 | ILGF. This is only used for printing out RAxML partition files""" 184 | elements = modelstring.split("+") 185 | model_name = elements[0] 186 | extras = elements[1:] 187 | 188 | raxmlstring = model_name 189 | if "F" in extras: 190 | raxmlstring = ''.join([raxmlstring, "F"]) 191 | 192 | return raxmlstring 193 | 194 | if __name__ == "__main__": 195 | print " ", 196 | print "Name".ljust(15), 197 | print "Params".ljust(10), 198 | print "Diff".ljust(10), 199 | print "CommandLine" 200 | for i, model in enumerate(get_all_DNA_models()): 201 | print str(i+1).rjust(2), 202 | print model.ljust(15), 203 | print str(get_num_params(model)).ljust(10), 204 | print str(get_model_difficulty(model)).ljust(10), 205 | print get_model_commandline(model) 206 | for i, model in enumerate(get_all_protein_models()): 207 | print str(i+1).rjust(2), 208 | print model.ljust(15), 209 | print str(get_num_params(model)).ljust(10), 210 | print str(get_model_difficulty(model)).ljust(10), 211 | print get_model_commandline(model) 212 | 213 | -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/raxml_models.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/raxml_models.pyc -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/reporter.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2012 Robert Lanfear and Brett Calcott 2 | # 3 | # This program is free software: you can redistribute it and/or modify it under 4 | # the terms of the GNU General Public License as published by the Free Software 5 | # Foundation, either version 3 of the License, or (at your option) any later 6 | # version. 7 | # 8 | # This program is distributed in the hope that it will be useful, but WITHOUT 9 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 10 | # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 11 | # details. You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | # PartitionFinder also includes the PhyML program, the RAxML program, and the 14 | # PyParsing library, all of which are protected by their own licenses and 15 | # conditions, using PartitionFinder implies that you agree with those licences 16 | # and conditions as well. 17 | 18 | import logging 19 | log = logging.getLogger("reporter") 20 | 21 | import os 22 | 23 | scheme_header_template = "%-18s: %s\n" 24 | scheme_subset_template = "%-6s | %-10s | %-30s | %-30s | %-40s\n" 25 | subset_template = "%-15s | %-15s | %-15s | %-15s | %-15s\n" 26 | 27 | 28 | class TextReporter(object): 29 | def __init__(self, config): 30 | self.cfg = config 31 | self.cfg.reporter = self 32 | 33 | def write_subset_summary(self, sub): 34 | pth = os.path.join(self.cfg.subsets_path, sub.name + '.txt') 35 | # Sort everything 36 | model_results = [(r.bic, r) for r in sub.results.values()] 37 | model_results.sort() 38 | output = open(pth, 'w') 39 | # TODO change back to full name... 40 | # output.write("Model selection results for subset: %s\n" % sub.full_name) 41 | output.write("Model selection results for subset: %s\n" % sub.name) 42 | output.write("Subset alignment stored here: %s\n" % sub.alignment_path) 43 | output.write("This subset contains the following data_blocks: %s\n" % sub) 44 | output.write("Models are organised according to their BIC scores\n\n") 45 | output.write(subset_template % ("Model", "lNL", "AIC", "AICc", "BIC")) 46 | for bic, r in model_results: 47 | output.write(subset_template % (r.model, r.lnl, r.aic, r.aicc, r.bic)) 48 | 49 | def write_scheme_summary(self, sch, result): 50 | pth = os.path.join(self.cfg.schemes_path, sch.name + '.txt') 51 | output = open(pth, 'w') 52 | self.output_scheme(sch, result, output) 53 | 54 | def output_scheme(self, sch, result, output): 55 | self.write_scheme_header(sch, result, output) 56 | sorted_subsets = [sub for sub in sch] 57 | sorted_subsets.sort(key=lambda sub: min(sub.columns), reverse=False) 58 | self.write_subsets(sch, result, output, sorted_subsets) 59 | self.write_raxml(sch, result, output, sorted_subsets) 60 | 61 | def write_scheme_header(self, sch, result, output): 62 | output.write(scheme_header_template % ("Scheme Name", sch.name)) 63 | output.write(scheme_header_template % ("Scheme lnL", result.lnl)) 64 | if self.cfg.model_selection == "aic": 65 | output.write(scheme_header_template % ("Scheme AIC", result.aic)) 66 | if self.cfg.model_selection == "aicc": 67 | output.write(scheme_header_template % ("Scheme AICc", result.aicc)) 68 | if self.cfg.model_selection == "bic": 69 | output.write(scheme_header_template % ("Scheme BIC", result.bic)) 70 | output.write(scheme_header_template % ("Number of params", result.sum_k)) 71 | output.write(scheme_header_template % ("Number of sites", result.nsites)) 72 | output.write(scheme_header_template % ("Number of subsets", result.nsubs)) 73 | output.write("\n") 74 | 75 | def write_subsets(self, sch, result, output, sorted_subsets): 76 | output.write(scheme_subset_template % ( 77 | "Subset", "Best Model", "Subset Partitions", "Subset Sites", "Alignment")) 78 | number = 1 79 | 80 | pf_scheme_description = [] 81 | # a way to print out the scheme in PF format 82 | 83 | for sub in sorted_subsets: 84 | desc = {} 85 | names = [] 86 | for part in sub: 87 | names.append(part.name) 88 | for subpart in part.description: # loop through each sub-part of the partition 89 | desc[subpart[0]] = subpart 90 | 91 | #pretty print the sites in the scheme 92 | desc_starts = desc.keys() 93 | desc_starts.sort() 94 | parts = [] 95 | for key in desc_starts: 96 | part = desc[key] 97 | if part[2] == 1: 98 | text = "%s-%s" % (part[0], part[1]) 99 | else: 100 | text = "%s-%s\\%s" % tuple(part) 101 | parts.append(text) 102 | parts = ', '.join(parts) 103 | 104 | names.sort() 105 | names = ', '.join(names) 106 | 107 | pf_scheme_description.append("(%s)" % names) 108 | 109 | output.write(scheme_subset_template % ( 110 | number, sub.best_model, names, parts, sub.alignment_path)) 111 | number += 1 112 | 113 | pf_scheme_description = " ".join(pf_scheme_description) 114 | output.write("\n\nScheme Description in PartitionFinder format\n") 115 | output.write("Scheme_%s = %s;" % (sch.name, pf_scheme_description)) 116 | 117 | def write_raxml(self, sch, result, output, sorted_subsets): 118 | """Print out partition definitions in RaxML-like format, might be 119 | useful to some people 120 | """ 121 | from raxml_models import get_raxml_protein_modelstring 122 | output.write("\n\nRaxML-style partition definitions\n") 123 | number = 1 124 | for sub in sorted_subsets: 125 | 126 | desc = {} 127 | names = [] 128 | for part in sub: 129 | names.append(part.name) 130 | for subpart in part.description: # loop through each sub-part of the partition 131 | desc[subpart[0]] = subpart 132 | 133 | # Pretty print the sites in the scheme 134 | desc_starts = desc.keys() 135 | desc_starts.sort() 136 | parts = [] 137 | for key in desc_starts: 138 | part = desc[key] 139 | if part[2] == 1: 140 | text = "%s-%s" % (part[0], part[1]) 141 | else: 142 | text = "%s-%s\\%s" % tuple(part) 143 | parts.append(text) 144 | parts = ', '.join(parts) 145 | 146 | if self.cfg.datatype == "DNA": 147 | model = "DNA" 148 | elif self.cfg.datatype == "protein": 149 | model = get_raxml_protein_modelstring(sub.best_model) 150 | else: 151 | raise RuntimeError 152 | 153 | line = "%s, p%s = %s\n" % (model, number, parts) 154 | output.write(line) 155 | 156 | number += 1 157 | 158 | def write_best_scheme(self, result): 159 | pth = os.path.join(self.cfg.output_path, 'best_scheme.txt') 160 | output = open(pth, 'wb') 161 | output.write('Settings used\n\n') 162 | output.write(scheme_header_template % ("alignment", self.cfg.alignment_path)) 163 | output.write(scheme_header_template % ("branchlengths", self.cfg.branchlengths)) 164 | output.write(scheme_header_template % ("models", ', '.join(self.cfg.models))) 165 | output.write(scheme_header_template % ("model_selection", 166 | self.cfg.model_selection)) 167 | output.write(scheme_header_template % ("search", self.cfg.search)) 168 | if self.cfg.search in ["rcluster", "hcluster"]: 169 | pretty_weights = "rate = %s, base = %s, model = %s, alpha = %s" %( 170 | str(self.cfg.cluster_weights["rate"]), 171 | str(self.cfg.cluster_weights["freqs"]), 172 | str(self.cfg.cluster_weights["model"]), 173 | str(self.cfg.cluster_weights["alpha"])) 174 | output.write(scheme_header_template % ("weights", pretty_weights)) 175 | if self.cfg.search == "rcluster": 176 | output.write(scheme_header_template % ("rcluster-percent", 177 | self.cfg.cluster_percent)) 178 | output.write('\n\nBest partitioning scheme\n\n') 179 | self.output_scheme(result.best_scheme, result.best_result, output) 180 | log.info("Information on best scheme is here: %s", pth) 181 | -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/reporter.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/reporter.pyc -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/results.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott 2 | # 3 | #This program is free software: you can redistribute it and/or modify it 4 | #under the terms of the GNU General Public License as published by the 5 | #Free Software Foundation, either version 3 of the License, or (at your 6 | #option) any later version. 7 | # 8 | #This program is distributed in the hope that it will be useful, but 9 | #WITHOUT ANY WARRANTY; without even the implied warranty of 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 | #General Public License for more details. You should have received a copy 12 | #of the GNU General Public License along with this program. If not, see 13 | #. PartitionFinder also includes the PhyML 14 | #program, the RAxML program, and the PyParsing library, 15 | #all of which are protected by their own licenses and conditions, using 16 | #PartitionFinder implies that you agree with those licences and conditions as well. 17 | 18 | import logging 19 | log = logging.getLogger("results") 20 | 21 | import os 22 | import cPickle as pickle 23 | 24 | from util import PartitionFinderError 25 | 26 | _check_fields = "lnl aic aicc bic".split() 27 | 28 | 29 | class ComparisonError(PartitionFinderError): 30 | pass 31 | 32 | 33 | class AnalysisResults(object): 34 | """ 35 | This stores the results, keeping only the winning scheme. 36 | """ 37 | 38 | MAX_ERROR = .1 39 | 40 | def __init__(self, model_selection): 41 | self.model_selection = model_selection 42 | self.best_score = None 43 | self.best_result = None 44 | self.best_scheme = None 45 | 46 | def add_scheme_result(self, sch, result): 47 | score = result.score 48 | if self.best_score is None or score < self.best_score: 49 | self.best_score = score 50 | self.best_result = result 51 | self.best_scheme = sch 52 | 53 | def get_dump_path(self, cfg): 54 | return os.path.join(cfg.base_path, 'results.bin') 55 | 56 | def get_result_fields(self): 57 | flds = [] 58 | for k in _check_fields: 59 | flds.append(getattr(self.best_result, k)) 60 | return flds 61 | 62 | def dump(self, cfg): 63 | pth = self.get_dump_path(cfg) 64 | log.info("Dumping all results to '%s'", pth) 65 | f = open(pth, 'wb') 66 | pickle.dump(self.get_result_fields(), f, -1) 67 | 68 | def compare(self, cfg): 69 | """We only compare the best result!""" 70 | pth = self.get_dump_path(cfg) 71 | if not os.path.exists(pth): 72 | log.error("Previous results file not found at '%s'. " 73 | "Did you run --dump-results previously?", pth) 74 | raise ComparisonError 75 | 76 | log.info("Loading old results from '%s'", pth) 77 | f = open(pth, 'rb') 78 | old_fields = pickle.load(f) 79 | f.close() 80 | 81 | cur_fields = self.get_result_fields() 82 | 83 | log.info("Comparing results...") 84 | # Now do the comparison 85 | 86 | errors = 0 87 | for nm, oldv, curv in zip(_check_fields, old_fields, cur_fields): 88 | if abs(oldv - curv) > self.MAX_ERROR: 89 | log.error("Differences were more than acceptable value of %s", AnalysisResults.MAX_ERROR) 90 | log.error("Old %s value: %s, new %s value %s", nm, oldv, nm, curv) 91 | errors += 1 92 | 93 | if errors > 0: 94 | raise ComparisonError 95 | else: 96 | log.info( 97 | "All results were within an acceptable %s of the dumped results", 98 | AnalysisResults.MAX_ERROR) 99 | -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/results.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/results.pyc -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/scheme.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott 2 | # 3 | #This program is free software: you can redistribute it and/or modify it 4 | #under the terms of the GNU General Public License as published by the 5 | #Free Software Foundation, either version 3 of the License, or (at your 6 | #option) any later version. 7 | # 8 | #This program is distributed in the hope that it will be useful, but 9 | #WITHOUT ANY WARRANTY; without even the implied warranty of 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 | #General Public License for more details. You should have received a copy 12 | #of the GNU General Public License along with this program. If not, see 13 | #. PartitionFinder also includes the PhyML 14 | #program, the RAxML program, and the PyParsing library, 15 | #all of which are protected by their own licenses and conditions, using 16 | #PartitionFinder implies that you agree with those licences and conditions as well. 17 | 18 | import logging 19 | log = logging.getLogger("scheme") 20 | import subset 21 | import submodels 22 | 23 | from math import log as logarithm 24 | 25 | from util import PartitionFinderError 26 | 27 | 28 | class SchemeError(PartitionFinderError): 29 | pass 30 | 31 | 32 | class SchemeResult(object): 33 | def __init__(self, sch, nseq, branchlengths, model_selection): 34 | self.scheme_name = sch.name 35 | self.scheme = sch 36 | self.model_selection = model_selection 37 | 38 | # Calculate AIC, BIC, AICc for each scheme. 39 | # How you do this depends on whether brlens are linked or not. 40 | self.nsubs = len(sch.subsets) # number of subsets 41 | sum_subset_k = sum([s.best_params for s in sch]) # sum of number of parameters in the best model of each subset 42 | 43 | log.debug("Calculating number of parameters in scheme:") 44 | log.debug("Total parameters from subset models: %d" % (sum_subset_k)) 45 | 46 | if branchlengths == 'linked': # linked brlens - only one extra parameter per subset 47 | self.sum_k = sum_subset_k + (self.nsubs - 1) + ( 48 | (2 * nseq) - 3) # number of parameters in a scheme 49 | log.debug("Total parameters from brlens: %d" % ((2 * nseq) - 3)) 50 | log.debug( 51 | "Parameters from subset multipliers: %d" % (self.nsubs - 1)) 52 | 53 | elif branchlengths == 'unlinked': # unlinked brlens - every subset has its own set of brlens 54 | self.sum_k = sum_subset_k + (self.nsubs * ( 55 | (2 * nseq) - 3)) # number of parameters in a scheme 56 | log.debug("Total parameters from brlens: %d" % (( 57 | 2 * nseq) - 3) * self.nsubs) 58 | 59 | else: 60 | # WTF? 61 | log.error("Unknown option for branchlengths: %s", branchlengths) 62 | raise AnalysisError 63 | 64 | log.debug("Grand total parameters: %d" % (self.sum_k)) 65 | 66 | self.lnl = sum([s.best_lnl for s in sch]) 67 | self.nsites = sum([len(s.columnset) for s in sch]) 68 | 69 | K = float(self.sum_k) 70 | n = float(self.nsites) 71 | lnL = float(self.lnl) 72 | 73 | log.debug("n: %d\tK: %d" % (n, K)) 74 | 75 | #here we put in a catch for small subsets, where n".format(self) 97 | 98 | 99 | class Scheme(object): 100 | def __init__(self, cfg, name, subsets, description=None): 101 | """A set of subsets of partitions""" 102 | self.name = name 103 | self.subsets = set() 104 | self.description = description 105 | 106 | # This one is a set of frozensets of partitions... 107 | part_subsets = set() 108 | 109 | # This is really long-winded, but it is mainly for error-checking 110 | partitions = set() 111 | duplicates = [] 112 | for s in subsets: 113 | for p in s: 114 | if p in partitions: 115 | # This is an error -- we'll collect them up 116 | duplicates.append(str(p)) 117 | else: 118 | partitions.add(p) 119 | self.subsets.add(s) 120 | part_subsets.add(s.partitions) 121 | 122 | self.part_subsets = frozenset(part_subsets) 123 | 124 | # Report the errors 125 | if duplicates: 126 | log.error("Scheme '%s' contains duplicate partitions: %s", 127 | name, ', '.join(duplicates)) 128 | raise SchemeError 129 | 130 | # Hm. It seems this is the only way to get just one item out of a set 131 | # as pop would remove one... 132 | pset = cfg.partitions 133 | 134 | # Do a set-difference to see what is missing... 135 | missing = pset.partitions - partitions 136 | if missing: 137 | log.error("Scheme '%s' is missing partitions: %s", 138 | name, ', '.join([str(p) for p in missing])) 139 | raise SchemeError 140 | 141 | # This locks down whether new partitions can be created. 142 | if not cfg.partitions.finalised: 143 | cfg.partitions.finalise() 144 | 145 | log.debug("Created %s", self) 146 | 147 | def __iter__(self): 148 | return iter(self.subsets) 149 | 150 | def __str__(self): 151 | ss = ', '.join([str(s) for s in self.subsets]) 152 | return "Scheme(%s, %s)" % (self.name, ss) 153 | 154 | 155 | class SchemeSet(object): 156 | """All the schemes added, and also a list of all unique subsets""" 157 | def __init__(self): 158 | """A collection of schemes""" 159 | self.clear_schemes() 160 | 161 | def clear_schemes(self): 162 | self.schemes_by_name = {} 163 | self.schemes_by_subsets = {} 164 | 165 | def add_scheme(self, scheme): 166 | if scheme.name in self.schemes_by_name: 167 | log.error("Cannot add two schemes with same name: '%s'" % 168 | scheme.name) 169 | raise SchemeError 170 | 171 | if scheme.part_subsets in self.schemes_by_subsets: 172 | existing_scheme = \ 173 | self.schemes_by_subsets[scheme.part_subsets] 174 | log.warning( 175 | "Scheme named %s being added is identical to existing %s", 176 | scheme.name, existing_scheme) 177 | # raise SchemeError 178 | 179 | self.schemes_by_name[scheme.name] = scheme 180 | self.schemes_by_subsets[scheme.part_subsets] = scheme 181 | 182 | def __len__(self): 183 | return len(self.schemes_by_name) 184 | 185 | def __iter__(self): 186 | return iter(self.schemes_by_name.itervalues()) 187 | 188 | 189 | def create_scheme(cfg, scheme_name, scheme_description): 190 | """ 191 | Generate a single scheme given a list of numbers that represent the 192 | indexes of the partitions e.g. [0,1,2,3,4,5,6,7] 193 | """ 194 | 195 | partition_count = len( 196 | cfg.partitions) # total number of partitions defined by user 197 | 198 | # Check that the correct number of items are in the list 199 | if len(scheme_description) != partition_count: 200 | log.error("There's a problem with the description of scheme %s" % 201 | scheme_name) 202 | raise SchemeError 203 | 204 | # Now generate the pattern 205 | subs = {} 206 | # We use the numbers returned to group the different subsets 207 | for sub_index, grouping in enumerate(scheme_description): 208 | insub = subs.setdefault(grouping, []) 209 | insub.append(sub_index) 210 | 211 | # We now have what we need to create a subset. Each entry will have a 212 | # set of values which are the index for the partition 213 | created_subsets = [] 214 | for sub_indexes in subs.values(): 215 | sub = subset.Subset(*tuple([cfg.partitions[i] for i in sub_indexes])) 216 | created_subsets.append(sub) 217 | 218 | return Scheme(cfg, str(scheme_name), created_subsets, description=scheme_description) 219 | 220 | 221 | def model_to_scheme(model, scheme_name, cfg): 222 | """Turn a model definition e.g. [0, 1, 2, 3, 4] into a scheme""" 223 | subs = {} 224 | # We use the numbers returned to group the different subsets 225 | for sub_index, grouping in enumerate(model): 226 | insub = subs.setdefault(grouping, []) 227 | insub.append(sub_index) 228 | 229 | # We now have what we need to create a subset. Each entry will have a 230 | # set of values which are the index for the partition 231 | created_subsets = [] 232 | for sub_indexes in subs.values(): 233 | sub = subset.Subset(*tuple([cfg.partitions[i] for i in sub_indexes])) 234 | created_subsets.append(sub) 235 | 236 | return Scheme(cfg, str(scheme_name), created_subsets) 237 | 238 | 239 | def generate_all_schemes(cfg): 240 | """ 241 | Convert the abstract schema given by the algorithm into subsets 242 | """ 243 | 244 | log.info("Generating all possible schemes for the partitions...") 245 | 246 | partition_count = len( 247 | cfg.partitions) # total number of partitions defined by user 248 | 249 | # Now generate the pattern for this many partitions 250 | all_schemes = submodels.get_submodels(partition_count) 251 | scheme_name = 1 252 | scheme_list = [] 253 | for scheme in all_schemes: 254 | subs = {} 255 | # We use the numbers returned to group the different subsets 256 | for sub_index, grouping in enumerate(scheme): 257 | insub = subs.setdefault(grouping, []) 258 | insub.append(sub_index) 259 | # We now have what we need to create a subset. Each entry will have a 260 | # set of values which are the index for the partition 261 | created_subsets = [] 262 | for sub_indexes in subs.values(): 263 | sub = subset.Subset( 264 | *tuple([cfg.partitions[i] for i in sub_indexes])) 265 | created_subsets.append(sub) 266 | 267 | scheme_list.append( 268 | Scheme(cfg, str(scheme_name), created_subsets)) 269 | 270 | log.debug("Created scheme %d of %d" % (scheme_name, len(all_schemes))) 271 | 272 | scheme_name += 1 273 | 274 | return scheme_list 275 | -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/scheme.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/scheme.pyc -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/submodels.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott 2 | # 3 | #This program is free software: you can redistribute it and/or modify it 4 | #under the terms of the GNU General Public License as published by the 5 | #Free Software Foundation, either version 3 of the License, or (at your 6 | #option) any later version. 7 | # 8 | #This program is distributed in the hope that it will be useful, but 9 | #WITHOUT ANY WARRANTY; without even the implied warranty of 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 | #General Public License for more details. You should have received a copy 12 | #of the GNU General Public License along with this program. If not, see 13 | #. PartitionFinder also includes the PhyML 14 | #program, the RAxML program, and the PyParsing library, 15 | #all of which are protected by their own licenses and conditions, using 16 | #PartitionFinder implies that you agree with those licences and conditions as well. 17 | 18 | import math 19 | import logging 20 | log = logging.getLogger("submodels") 21 | import algorithm 22 | 23 | def submodel_generator(result, pat, current, maxn): 24 | """ result is a list to append to 25 | pat is the current pattern (starts as empty list) 26 | current is the current number of the pattern 27 | maxn is the number of items in the pattern 28 | """ 29 | if pat: 30 | curmax = max(pat) 31 | else: 32 | curmax = 0 33 | for i in range(current): 34 | if i-1 <= curmax: 35 | newpat = pat[:] 36 | newpat.append(i) 37 | if current == maxn: 38 | result.append(newpat) 39 | else: 40 | submodel_generator(result, newpat, current+1, maxn) 41 | 42 | def submodel_iterator(pat, current, maxn): 43 | """same as generator but yields instead""" 44 | if pat: 45 | curmax = max(pat) 46 | else: 47 | curmax = 0 48 | for i in range(current): 49 | if i-1 <= curmax: 50 | newpat = pat[:] 51 | newpat.append(i) 52 | if current == maxn: 53 | yield newpat 54 | else: 55 | for b in submodel_iterator(newpat, current+1, maxn): 56 | yield b 57 | 58 | def a_choose_b(n,k): 59 | return reduce(lambda a,b: a*(n-b)/(b+1),xrange(k),1) 60 | 61 | def count_relaxed_clustering_subsets(N, cluster_percent, output=False): 62 | #startscheme 63 | start_scheme = N 64 | #firstbatch is just cluster_percent of N choose 2 65 | step_1 = int(math.ceil(a_choose_b(N, 2)*cluster_percent*0.01)) 66 | previous = step_1 67 | cumsum = start_scheme+step_1 68 | if output: print start_scheme 69 | if output: print cumsum 70 | #now for the rest 71 | for i in reversed(xrange(N)): 72 | # once we get to the all combined scheme we can stop 73 | if i == 1: 74 | break 75 | num_new_schemes = int(math.ceil((a_choose_b(i, 2))*cluster_percent*0.01)) 76 | # but those new schemes include a lot we will have already analysed 77 | # so we want to subtract that many. We could have already seen up to i-1 choose 2 78 | # the worst case is that the scheme we chose knocked out the maximum number of 79 | # previously analysed schemes, which is just 2(i)-1, so: 80 | worst_case = 2*i - 1 81 | num_already_analysed = previous - worst_case 82 | if num_already_analysed <0: num_already_analysed=0 83 | # now we transfer over the 'previous' for the next round of the loop 84 | previous = num_new_schemes 85 | # now we calculate the final number of new schemes 86 | num_new_schemes = num_new_schemes - num_already_analysed 87 | cumsum += num_new_schemes 88 | if output:print cumsum 89 | return cumsum 90 | 91 | def count_relaxed_clustering_schemes(N, cluster_percent, output=False): 92 | #startscheme 93 | start_scheme = 1 94 | #firstbatch is just cluster_percent of N choose 2 95 | step_1 = int(math.ceil(a_choose_b(N, 2)*cluster_percent*0.01)) 96 | previous = step_1 97 | cumsum = start_scheme+step_1 98 | if output: print start_scheme 99 | if output: print cumsum 100 | #now for the rest 101 | for i in reversed(xrange(N)): 102 | # each subsequent step is cluster_percent of i choose 2 103 | if i == 1: 104 | break 105 | num_new_schemes = int(math.ceil((a_choose_b(i, 2))*cluster_percent*0.01)) 106 | cumsum += num_new_schemes 107 | if output:print cumsum 108 | return cumsum 109 | 110 | def count_greedy_schemes(N): 111 | """oeis.org reveals this is 1+(N*(N+1)*(N-1))/6""" 112 | count = 1+(N*(N+1)*(N-1))/6 113 | return count 114 | 115 | def count_greedy_subsets(N): 116 | """oeis.org says thes are Central polygonal numbers: n^2 - n + 1. """ 117 | count = (N*N) - N + 1 118 | return count 119 | 120 | def bell_numbers(N): 121 | ## Return the bell number for N subsets 122 | # script modified from Wikipedia: http://en.wikipedia.org/wiki/Bell_number 123 | N = N+1 ## Bell numbers are indexed from zero 124 | t = [[1]] ## Initialize the triangle as a two-dimensional array 125 | c = 1 ## Bell numbers count 126 | while c <= N: 127 | if c >= N: 128 | return t[-1][0] ## Yield the Bell number of the previous row 129 | row = [t[-1][-1]] ## Initialize a new row 130 | for b in t[-1]: 131 | row.append(row[-1] + b) ## Populate the new row 132 | c += 1 ## We have found another Bell number 133 | t.append(row) ## Append the row to the triangle 134 | 135 | 136 | def get_submodels(N): 137 | """Return all the submodels 138 | """ 139 | log.debug("Generating submodels for %s partitions", N) 140 | result = [] 141 | submodel_generator(result, [], 1, N) 142 | log.debug("Resulting number of partitions is %s", len(result)) 143 | return result 144 | 145 | def count_all_schemes(N): 146 | """Count the number of submodels we"ve got""" 147 | count = bell_numbers(N) 148 | return count 149 | 150 | def count_all_subsets(N): 151 | """Count the number of subses we'll have to look at given a certain number of starting partitions""" 152 | count = (2**N) - 1 153 | return count 154 | 155 | -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/submodels.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/submodels.pyc -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/subset.py: -------------------------------------------------------------------------------- 1 | #Copyright (C) 2012 Robert Lanfear and Brett Calcott 2 | # 3 | #This program is free software: you can redistribute it and/or modify it 4 | #under the terms of the GNU General Public License as published by the 5 | #Free Software Foundation, either version 3 of the License, or (at your 6 | #option) any later version. 7 | # 8 | #This program is distributed in the hope that it will be useful, but 9 | #WITHOUT ANY WARRANTY; without even the implied warranty of 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 | #General Public License for more details. You should have received a copy 12 | #of the GNU General Public License along with this program. If not, see 13 | #. PartitionFinder also includes the PhyML 14 | #program, the RAxML program, and the PyParsing library, 15 | #all of which are protected by their own licenses and conditions, using 16 | #PartitionFinder implies that you agree with those licences and conditions as well. 17 | 18 | import logging 19 | log = logging.getLogger("subset") 20 | import os 21 | import weakref 22 | 23 | from hashlib import md5 24 | 25 | # import base64 26 | # from zlib import compress 27 | 28 | import cPickle as pickle 29 | from math import log as logarithm 30 | from alignment import Alignment, SubsetAlignment 31 | from util import PartitionFinderError, remove_runID_files 32 | 33 | FRESH, PREPARED, DONE = range(3) 34 | 35 | 36 | class SubsetError(PartitionFinderError): 37 | pass 38 | 39 | 40 | def count_subsets(): 41 | return 1 42 | # return len(Subset._cache) 43 | # 44 | 45 | 46 | def clear_subsets(): 47 | pass 48 | # Subset._cache.clear() 49 | 50 | 51 | class Subset(object): 52 | """A Subset of Partitions 53 | """ 54 | # TODO: Move this to the config -- once we have a global one 55 | _cache = weakref.WeakValueDictionary() 56 | 57 | def __new__(cls, *parts): 58 | """Return the SAME subset if the partitions are identical. This is 59 | basically a pythonized factory. See here: 60 | http://codesnipers.com/?q=python-flyweights 61 | """ 62 | 63 | cacheid = frozenset(parts) 64 | obj = Subset._cache.get(cacheid, None) 65 | # TODO Flush cache? USE MRU? functools.lrucache 66 | if not obj: 67 | obj = object.__new__(cls) 68 | Subset._cache[cacheid] = obj 69 | obj.init(cacheid, *parts) 70 | 71 | # obj = object.__new__(cls) 72 | # cacheid = frozenset(parts) 73 | # obj.init(cacheid, *parts) 74 | return obj 75 | 76 | def init(self, cacheid, *parts): 77 | # Error checking.... 78 | self.status = FRESH 79 | 80 | tempparts = set() 81 | for p in parts: 82 | if p.partition_set is None: 83 | log.error("You cannot add a Partition to a Subset until " 84 | "the Partition belongs to a PartitionSet") 85 | raise SubsetError 86 | 87 | if p in tempparts: 88 | log.error("%s is duplicated in a Subset", p) 89 | raise SubsetError 90 | 91 | tempparts.add(p) 92 | 93 | self.partitions = cacheid 94 | 95 | # a list of columns in the subset 96 | self.columns = [] 97 | self.columnset = set() 98 | for p in parts: 99 | self.columns += p.columns 100 | self.columnset |= p.columnset 101 | self.columns.sort() 102 | 103 | self.results = {} 104 | self.best_info_score = None # e.g. AIC, BIC, AICc 105 | self.best_model = None 106 | self.best_params = None 107 | self.best_lnl = None 108 | self.alignment_path = None 109 | log.debug("Created %s", self) 110 | 111 | def __str__(self): 112 | return "(%s)" % ", ".join([str(p.name) for p in self.partitions]) 113 | 114 | @property 115 | def full_name(self): 116 | if hasattr(self, '_full_name'): 117 | nm = self._full_name 118 | else: 119 | s = sorted([p.name for p in self.partitions]) 120 | nm = '-'.join(s) 121 | self._full_name = nm 122 | return nm 123 | 124 | @property 125 | def name(self): 126 | # Cache this 127 | if hasattr(self, '_name'): 128 | nm = self._name 129 | else: 130 | nm = self.full_name 131 | # This gets super long -- we can shorten it like this... This is 132 | # a slightly lazy solution. There is some vanishingly small chance 133 | # that we'll get the same thing. Google "MD5 Hash Collision" 134 | nm = md5(nm).hexdigest() 135 | self._name = nm 136 | return nm 137 | 138 | def __iter__(self): 139 | return iter(self.partitions) 140 | 141 | def add_result(self, cfg, model, result): 142 | result.model = model 143 | result.params = cfg.processor.models.get_num_params(model) 144 | 145 | K = float(result.params) 146 | n = float(len(self.columnset)) 147 | lnL = float(result.lnl) 148 | #here we put in a catch for small subsets, where n. PartitionFinder also includes the PhyML 18 | #program, the RAxML program, the PyParsing library, and the python-cluster library 19 | #all of which are protected by their own licenses and conditions, using 20 | #PartitionFinder implies that you agree with those licences and conditions as well. 21 | 22 | import logging 23 | log = logging.getLogger("threadpool") 24 | import threading 25 | from time import sleep 26 | import multiprocessing 27 | 28 | _cpus = None 29 | 30 | 31 | def get_cpu_count(): 32 | global _cpus 33 | if _cpus is not None: 34 | return _cpus 35 | 36 | try: 37 | _cpus = multiprocessing.cpu_count() 38 | except: 39 | _cpus = 1 40 | log.info("I cannot detect the number of processors...") 41 | 42 | log.info("Found %s cpus", _cpus) 43 | return _cpus 44 | 45 | 46 | class Pool(object): 47 | def __init__(self, tasks, numthreads=-1): 48 | """Initialize the thread pool with numthreads workers and all tasks""" 49 | self.more_tasks = True 50 | self.tasks = tasks 51 | self.task_lock = threading.Condition(threading.Lock()) 52 | self.threads = [] 53 | self.failed = False 54 | 55 | numtasks = len(tasks) 56 | if numtasks == 0: 57 | log.warning("You did not give any tasks to do...") 58 | self.more_tasks = False 59 | return 60 | 61 | if numthreads <= 1: 62 | numthreads = get_cpu_count() 63 | if numtasks < numthreads: 64 | numthreads = numtasks 65 | 66 | log.debug("Creating %s threads for %s tasks", numthreads, numtasks) 67 | for i in range(numthreads): 68 | t = Thread(self) 69 | self.threads.append(t) 70 | t.start() 71 | 72 | def next_task(self): 73 | self.task_lock.acquire() 74 | try: 75 | if self.tasks == []: 76 | self.more_tasks = False 77 | return None, None 78 | else: 79 | return self.tasks.pop(0) 80 | finally: 81 | self.task_lock.release() 82 | 83 | def kill(self, e): 84 | self.task_lock.acquire() 85 | self.tasks = [] 86 | self.more_tasks = False 87 | self.failed = True 88 | self.exception = e 89 | self.task_lock.release() 90 | 91 | def join(self): 92 | # TODO: I don't think we need this bit.... 93 | # Wait till all tasks have been taken 94 | while self.more_tasks: 95 | sleep(.1) 96 | # ... now wait for them all to finish 97 | for t in self.threads: 98 | t.join() 99 | 100 | if self.failed: 101 | raise self.exception 102 | 103 | 104 | class Thread(threading.Thread): 105 | def __init__(self, pool): 106 | threading.Thread.__init__(self) 107 | self.pool = pool 108 | 109 | def run(self): 110 | while 1: 111 | cmd, args = self.pool.next_task() 112 | # If there's nothing to do, return 113 | if cmd is None: 114 | break 115 | try: 116 | cmd(*args) 117 | except Exception as e: 118 | # The error should already have been reported. 119 | # Stop operation and kill the entire pool. Then reraise the 120 | # error 121 | self.pool.kill(e) 122 | break 123 | -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/threadpool.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/threadpool.pyc -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/util.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2012 Robert Lanfear and Brett Calcott 2 | # 3 | # This program is free software: you can redistribute it and/or modify it under 4 | # the terms of the GNU General Public License as published by the Free Software 5 | # Foundation, either version 3 of the License, or (at your option) any later 6 | # version. 7 | # 8 | #This program is distributed in the hope that it will be useful, but 9 | #WITHOUT ANY WARRANTY; without even the implied warranty of 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 | #General Public License for more details. You should have received a copy 12 | #of the GNU General Public License along with this program. If not, see 13 | #. PartitionFinder also includes the PhyML 14 | #program, the RAxML program, and the PyParsing library, 15 | #all of which are protected by their own licenses and conditions, using 16 | #PartitionFinder implies that you agree with those licences and conditions as well. 17 | 18 | import logging 19 | log = logging.getLogger("util") 20 | import os 21 | import sys 22 | import fnmatch 23 | 24 | 25 | # Base error class 26 | class PartitionFinderError(Exception): 27 | pass 28 | 29 | 30 | class PhylogenyProgramError(PartitionFinderError): 31 | pass 32 | 33 | 34 | def check_file_exists(pth): 35 | if not os.path.exists(pth) or not os.path.isfile(pth): 36 | if pth.count("partition_finder.cfg") > 0: 37 | log.error("Failed to find configuration file: '%s'. " 38 | "For PartitionFinder to run, there must be a file called 'partition_finder.cfg' " 39 | "located in the same folder as your alignment. Please check and try again.", pth) 40 | raise PartitionFinderError 41 | else: 42 | log.error( 43 | "Failed to find file: '%s'. Please check and try again.", pth) 44 | raise PartitionFinderError 45 | 46 | def delete_files(pths): 47 | """ delete files, but watch out for a WindowsError that crops up sometimes with threading 48 | oddly, this error occurs, but the files get deleted anyway. So we ignore it for now 49 | """ 50 | for f in pths: 51 | try: 52 | os.remove(f) 53 | except OSError: 54 | log.debug("Found and ignored Error when deleting file %s" % f) 55 | pass 56 | log.debug("deleted %d files" % len(pths)) 57 | 58 | 59 | def check_folder_exists(pth): 60 | if not os.path.exists(pth) or not os.path.isdir(pth): 61 | log.error("No such folder: '%s'", pth) 62 | raise PartitionFinderError 63 | 64 | def clean_out_folder(folder, keep = []): 65 | """Hat Tip: http://stackoverflow.com/questions/185936/delete-folder-contents-in-python 66 | """ 67 | for the_file in os.listdir(folder): 68 | if the_file not in keep: 69 | file_path = os.path.join(folder, the_file) 70 | try: 71 | if os.path.isfile(file_path): 72 | os.unlink(file_path) 73 | except Exception, e: 74 | log.error(e) 75 | raise PartitionFinderError 76 | 77 | 78 | def make_dir(pth): 79 | if os.path.exists(pth): 80 | if not os.path.isdir(pth): 81 | log.error("Cannot create folder '%s'", pth) 82 | raise PartitionFinderError 83 | else: 84 | os.mkdir(pth) 85 | 86 | 87 | def remove_runID_files(aln_pth): 88 | """remove all files that match a particular run_ID. Useful for cleaning out directories 89 | but ONLY after a whole analysis of a subset is completely finished, be careful!""" 90 | head, tail = os.path.split(aln_pth) 91 | run_ID = os.path.splitext(tail)[0] 92 | head = os.path.abspath(head) 93 | fnames = os.listdir(head) 94 | fs = fnmatch.filter(fnames, '*%s*' % run_ID) 95 | for f in fs: 96 | try: 97 | os.remove(os.path.join(head, f)) 98 | except OSError: 99 | # Don't complain if you can't delete them 100 | # (This is here because we sometimes try and delete things twice in 101 | # the threading). 102 | pass 103 | 104 | 105 | # def we_are_frozen(): 106 | # # All of the modules are built-in to the interpreter, e.g., by py2exe 107 | # return hasattr(sys, "frozen") 108 | 109 | 110 | # def get_root_install_path(): 111 | # pth = os.path.abspath(__file__) 112 | # # Split off the name and the directory... 113 | # pth, not_used = os.path.split(pth) 114 | # pth, not_used = os.path.split(pth) 115 | # return pth 116 | 117 | # def module_path(): 118 | # encoding = sys.getfilesystemencoding() 119 | # if we_are_frozen(): 120 | # return os.path.dirname(unicode(sys.executable, encoding)) 121 | # return os.path.abspath(os.path.dirname(unicode(__file__, encoding))) 122 | -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/util.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/util.pyc -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/version.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Author: Douglas Creager 3 | # This file is placed into the public domain. 4 | # 5 | # Minor Modifications have been made by Brett Calcott 6 | # * Write the VERSION into the current folder 7 | 8 | # Calculates the current version number. If possible, this is the 9 | # output of “git describe”, modified to conform to the versioning 10 | # scheme that setuptools uses. If “git describe” returns an error 11 | # (most likely because we're in an unpacked copy of a release tarball, 12 | # rather than in a git working copy), then we fall back on reading the 13 | # contents of the RELEASE-VERSION file. 14 | # 15 | # To use this script, simply import it your setup.py file, and use the 16 | # results of get_git_version() as your package version: 17 | # 18 | # from version import * 19 | # 20 | # setup( 21 | # version=get_git_version(), 22 | # . 23 | # . 24 | # . 25 | # ) 26 | # 27 | # This will automatically update the RELEASE-VERSION file, if 28 | # necessary. Note that the RELEASE-VERSION file should *not* be 29 | # checked into git; please add it to your top-level .gitignore file. 30 | # 31 | # You'll probably want to distribute the RELEASE-VERSION file in your 32 | # sdist tarballs; to do this, just create a MANIFEST.in file that 33 | # contains the following line: 34 | # 35 | # include RELEASE-VERSION 36 | from subprocess import Popen, PIPE 37 | import os 38 | 39 | 40 | def get_version_path(): 41 | # Get current dir, then parent dir 42 | pth = os.path.dirname(os.path.abspath(__file__)) 43 | pth, here = os.path.split(pth) 44 | return os.path.join(pth, "RELEASE-VERSION") 45 | 46 | 47 | def call_git_describe(abbrev=4): 48 | try: 49 | p = Popen(['git', 'describe', '--abbrev=%d' % abbrev], 50 | stdout=PIPE, stderr=PIPE) 51 | p.stderr.close() 52 | line = p.stdout.readlines()[0] 53 | return line.strip() 54 | 55 | except: 56 | return None 57 | 58 | 59 | def read_release_version(): 60 | try: 61 | f = open(get_version_path(), "r") 62 | 63 | try: 64 | version = f.readlines()[0] 65 | return version.strip() 66 | 67 | finally: 68 | f.close() 69 | 70 | except: 71 | return None 72 | 73 | 74 | def write_release_version(version): 75 | f = open(get_version_path(), "w") 76 | f.write("%s\n" % version) 77 | f.close() 78 | 79 | 80 | 81 | def get_git_version(abbrev=4): 82 | # Read in the version that's currently in RELEASE-VERSION. 83 | 84 | release_version = read_release_version() 85 | 86 | # First try to get the current version using “git describe”. 87 | 88 | version = call_git_describe(abbrev) 89 | 90 | # If that doesn't work, fall back on the value that's in 91 | # RELEASE-VERSION. 92 | 93 | if version is None: 94 | version = release_version 95 | 96 | # If we still don't have anything, that's an error. 97 | 98 | if version is None: 99 | raise ValueError("Cannot find the version number!") 100 | 101 | # If the current version is different from what's in the 102 | # RELEASE-VERSION file, update the file to be current. 103 | 104 | if version != release_version: 105 | write_release_version(version) 106 | 107 | # Finally, return the current version. 108 | 109 | return version 110 | 111 | def get_version(): 112 | version = read_release_version() 113 | if version is None: 114 | raise ValueError("Cannot find the version number!") 115 | return version 116 | 117 | if __name__ == "__main__": 118 | print get_version() 119 | -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/partfinder/version.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/partfinder/version.pyc -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/programs/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/programs/.DS_Store -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/programs/phyml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/programs/phyml -------------------------------------------------------------------------------- /PartitionFinderV1.1.1/programs/raxml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/PartitionFinderV1.1.1/programs/raxml -------------------------------------------------------------------------------- /Xenarthrans/fasta/readme: -------------------------------------------------------------------------------- 1 | This is the test dataset of Xenarthrans mitochondrial genomes 2 | -------------------------------------------------------------------------------- /bin/BMGE.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/bin/BMGE.jar -------------------------------------------------------------------------------- /bin/Gblocks: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/bin/Gblocks -------------------------------------------------------------------------------- /bin/blastall: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/bin/blastall -------------------------------------------------------------------------------- /bin/formatdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/bin/formatdb -------------------------------------------------------------------------------- /bin/noisy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/bin/noisy -------------------------------------------------------------------------------- /bin/progressiveMauve: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/bin/progressiveMauve -------------------------------------------------------------------------------- /bin/readal: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/bin/readal -------------------------------------------------------------------------------- /bin/trimal: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fenghen360/HomBlocks/d81447e872d700b51c6ef91caab3abef023a46b1/bin/trimal -------------------------------------------------------------------------------- /plant/fasta/readme: -------------------------------------------------------------------------------- 1 | This is the test data of 52 higher plant chloroplast genomes 2 | --------------------------------------------------------------------------------