├── .gitignore ├── Dockerfile ├── LICENSE ├── MetaLogo ├── __init__.py ├── character.py ├── colors.py ├── column.py ├── connect.py ├── entry.py ├── item.py ├── logo.py ├── logobits.py ├── pholy.py ├── utils.py └── version.py ├── README.md ├── __init__.py ├── dependencies ├── 6379.conf ├── FastTree ├── FastTreeMP ├── clustalo ├── redis-stable.tar.gz └── supervisord.conf ├── examples ├── all_cluster_center.fa ├── cdr3.fa ├── color.json ├── ectf.fa ├── example.fa ├── example2.fa └── example3.fa ├── logs └── .gitkeep ├── pngs └── about.PNG ├── requirements.txt ├── server.cmd.sh ├── server.dev.sh ├── server.docker.sh ├── server.toml ├── server ├── __init__.py ├── app.py ├── apps │ ├── about.py │ ├── analysis.py │ ├── msa.py │ ├── results.py │ └── tree.py ├── assets │ ├── about.PNG │ ├── baidu.js │ ├── fav1.ico │ ├── favicon.ico │ ├── google.js │ └── introduction.PNG ├── config.py ├── gen_example.py ├── handle_seqs.py ├── index.py ├── redis_queue.py ├── run_metalogo.py ├── sqlite3.py └── utils.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | MetaLogo.egg-info 2 | dist 3 | build 4 | *.pyc 5 | *.png 6 | figure_output 7 | sequence_input 8 | configs 9 | *.log 10 | bins 11 | logs 12 | db 13 | r4s.res 14 | db/metalogo.db 15 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.7 2 | LABEL maintainer "Yaowen Chen " 3 | WORKDIR /code 4 | COPY requirements.txt /code/requirements.txt 5 | RUN pip install -r requirements.txt 6 | COPY dependencies /code/ 7 | #install clusta omega 8 | RUN chmod a+x clustalo 9 | RUN cp clustalo /usr/bin 10 | #install Fasttree 11 | RUN chmod a+x FastTree 12 | RUN chmod a+x FastTreeMP 13 | RUN cp FastTree /usr/bin 14 | RUN cp FastTreeMP /usr/bin 15 | # "https://redis.io/topics/quickstart" 16 | RUN tar xzvf redis-stable.tar.gz 17 | WORKDIR /code/redis-stable 18 | RUN make 19 | RUN make install 20 | RUN mkdir /etc/redis 21 | RUN mkdir /var/redis 22 | RUN cp utils/redis_init_script /etc/init.d/redis_6379 23 | RUN cp ../6379.conf /etc/redis/6379.conf 24 | RUN mkdir /var/redis/6379 25 | RUN update-rc.d redis_6379 defaults 26 | #RUN /etc/init.d/redis_6379 start 27 | #supervisor configure 28 | WORKDIR /code 29 | RUN cp supervisord.conf /etc/ 30 | COPY server.cmd.sh /code/ 31 | EXPOSE 8050 32 | CMD sh MetaLogo/server.cmd.sh 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /MetaLogo/__init__.py: -------------------------------------------------------------------------------- 1 | from .version import __version__ -------------------------------------------------------------------------------- /MetaLogo/character.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from matplotlib import pyplot as plt 4 | from matplotlib import patches 5 | import numpy as np 6 | import math 7 | from matplotlib.transforms import Affine2D, Bbox 8 | from matplotlib.textpath import TextPath 9 | from matplotlib.patches import PathPatch,Rectangle,Circle,Polygon 10 | from matplotlib.path import Path 11 | import mpl_toolkits.mplot3d.art3d as art3d 12 | 13 | 14 | from .item import Item 15 | from .colors import get_color_scheme 16 | from .utils import rotate 17 | 18 | basic_dna_color = get_color_scheme('basic_dna_color') 19 | 20 | class Character(Item): 21 | 22 | def __init__(self, char, ax=None, start_pos=(0,0), width=1, height=1, limited_char_width=None, 23 | logo_type='Horizontal', font = 'Arial', color = basic_dna_color, alpha = 1, 24 | parent_start=(0,0), deg=np.pi/2, origin=(0,0), path_dict={},*args, **kwargs): 25 | super(Character, self).__init__(*args, **kwargs) 26 | self.char = char 27 | self.start_pos = start_pos 28 | self.width = width 29 | self.height = height 30 | self.logo_type = logo_type 31 | self.alpha = alpha 32 | self.parent_start = parent_start 33 | self.origin = origin 34 | self.deg = deg 35 | self.path = None 36 | self.patch = None 37 | self.color_map = color 38 | self.limited_char_width = limited_char_width 39 | self.path_dict = path_dict 40 | 41 | if ax == None: 42 | self.generate_ax(threed=(self.logo_type=='Threed')) 43 | else: 44 | self.ax = ax 45 | if limited_char_width == None: 46 | self.limited_char_width = self.get_limited_char_width() 47 | self.generate_components() 48 | 49 | def generate_components(self): 50 | self.path = TextPath(self.start_pos, self.char, size=1) 51 | 52 | def transform_path(self, transformation): 53 | return transformation.transform_path(self.path) 54 | 55 | 56 | def set_font(self, font): 57 | self.font = font 58 | 59 | def set_alpha(self, alpha): 60 | self.set_alpha = alpha 61 | 62 | def get_path_extents(self): 63 | return self.path.get_extents() 64 | 65 | def get_patch_extents(self): 66 | return self.patch.get_extents() 67 | 68 | def transform(self): 69 | width = self.width 70 | height = self.height 71 | 72 | if self.char in self.path_dict: 73 | tmp_path,bbox = self.path_dict[self.char] 74 | else: 75 | tmp_path = TextPath((0,0), self.char, size=1) 76 | bbox = tmp_path.get_extents() 77 | 78 | if self.logo_type in ['Horizontal','Threed']: 79 | hoffset = (width - bbox.width * width / max(bbox.width,self.limited_char_width))/2 80 | voffset = 0 81 | elif self.logo_type == 'Circle': 82 | hoffset = -1*(bbox.width * width / max(bbox.width,self.limited_char_width))/2 83 | voffset = 0 84 | elif self.logo_type == 'Radiation': 85 | hoffset = 0 86 | voffset = -1 * self.radiation_space/2 87 | else: 88 | pass 89 | 90 | transformation = Affine2D() \ 91 | .translate(tx=-bbox.xmin, ty=-bbox.ymin) \ 92 | .scale(sx=width/max(bbox.width,self.limited_char_width), sy=height/bbox.height) \ 93 | .translate(tx=self.start_pos[0] + hoffset,ty=self.start_pos[1] + voffset) 94 | 95 | if self.logo_type == 'Circle': 96 | transformation = transformation.rotate_around(self.parent_start[0], self.parent_start[1], self.deg-np.pi/2) 97 | elif self.logo_type == 'Radiation': 98 | transformation = transformation.rotate_around(self.origin[0], self.origin[1], self.deg) 99 | #pass 100 | 101 | #print('self.parent_start: ',self.parent_start) 102 | 103 | #self.ax.annotate(self.start_pos, self.deg) 104 | 105 | self.path = transformation.transform_path(tmp_path) 106 | self.patch = PathPatch(self.path, linewidth=0, 107 | facecolor=self.color_map.get(self.char,self.color_map.get('other','grey')), 108 | alpha=self.alpha, 109 | edgecolor=self.color_map.get(self.char,self.color_map.get('other','grey'))) 110 | 111 | 112 | def draw(self): 113 | self.transform() 114 | self.ax.add_patch(self.patch) 115 | if self.logo_type == 'Threed': 116 | art3d.pathpatch_2d_to_3d(self.patch, z=self.start_pos[2], zdir='y') 117 | 118 | def compute_positions(self): 119 | pass 120 | 121 | def get_height(self): 122 | return self.height 123 | 124 | def get_width(self): 125 | return self.width 126 | -------------------------------------------------------------------------------- /MetaLogo/colors.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | basic_dna_color_scheme = {'A': '#009980', 4 | 'C': '#59B3E6', 5 | 'G': '#E69B04', 6 | 'T': '#1A1A1A', 7 | '-': 'grey'} 8 | 9 | basic_rna_color_scheme = {'A': '#009980', 10 | 'C': '#59B3E6', 11 | 'G': '#E69B04', 12 | 'U': '#1A1A1A', 13 | '-': 'grey'} 14 | 15 | 16 | basic_aa_color_scheme ={ #https://jbloomlab.github.io/dmslogo/dmslogo.colorschemes.html 17 | 'A': 'black', 18 | 'C': 'green', 19 | 'D': 'red', 20 | 'E': 'red', 21 | 'F': 'black', 22 | 'G': 'green', 23 | 'H': 'blue', 24 | 'I': 'black', 25 | 'K': 'blue', 26 | 'L': 'black', 27 | 'M': 'black', 28 | 'N': '#FF00AE', 29 | 'P': 'black', 30 | 'Q': '#FF00AE', 31 | 'R': 'blue', 32 | 'S': 'green', 33 | 'T': 'green', 34 | 'V': 'black', 35 | 'W': 'black', 36 | 'Y': 'green' 37 | } 38 | 39 | def get_color_scheme(scheme): 40 | if scheme == 'basic_dna_color': 41 | return basic_dna_color_scheme 42 | if scheme == 'basic_rna_color': 43 | return basic_rna_color_scheme 44 | if scheme == 'basic_aa_color': 45 | return basic_aa_color_scheme 46 | return None 47 | -------------------------------------------------------------------------------- /MetaLogo/column.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from .character import Character 3 | from .item import Item 4 | from .utils import rotate 5 | import numpy as np 6 | from matplotlib.patches import PathPatch,Rectangle,Circle,Polygon 7 | from matplotlib.path import Path 8 | from .colors import get_color_scheme 9 | 10 | basic_dna_color = get_color_scheme('basic_dna_color') 11 | 12 | 13 | class Column(Item): 14 | 15 | def __init__(self, bases, weights, ax=None, start_pos=(0,0), logo_type = 'Horizontal', char_margin_ratio=0.05, 16 | width=1, parent_start=(0,0), origin=(0,0), color=basic_dna_color, limited_char_width=None, 17 | path_dict={},*args, **kwargs): 18 | super(Column, self).__init__(*args, **kwargs) 19 | self.bases = bases 20 | self.weights = weights 21 | self.width = width 22 | self.char_margin_ratio = char_margin_ratio 23 | self.start_pos = start_pos 24 | self.parent_start = parent_start 25 | self.origin = origin 26 | self.logo_type = logo_type 27 | self.color = color 28 | self.limited_char_width = limited_char_width 29 | self.path_dict = path_dict 30 | 31 | #self.path_hight, self.init_hight, self.target_height = self.get_heights() 32 | self.characters = [] 33 | if ax == None: 34 | self.generate_ax(threed=(self.logo_type=='Threed')) 35 | else: 36 | self.ax = ax 37 | 38 | if limited_char_width == None: 39 | self.limited_char_width = self.get_limited_char_width() 40 | 41 | self.generate_components() 42 | 43 | def generate_components(self): 44 | for base,weight in sorted(zip(self.bases,self.weights),key=lambda d:d[1]): 45 | character = Character(base,width=self.width,height=weight,ax=self.ax, 46 | logo_type=self.logo_type, parent_start=self.start_pos, 47 | origin=self.origin,color=self.color,limited_char_width=self.limited_char_width, 48 | path_dict=self.path_dict) 49 | self.characters.append(character) 50 | 51 | def draw(self): 52 | for character in self.characters: 53 | character.draw() 54 | 55 | def draw_wrap(self): 56 | p1,p2,p3,p4 = self.get_edge() 57 | verts = [p1,p2,p3,p4,p1] 58 | codes = [ 59 | Path.MOVETO, 60 | Path.LINETO, 61 | Path.LINETO, 62 | Path.LINETO, 63 | Path.CLOSEPOLY 64 | ] 65 | self.ax.add_patch(PathPatch(Path(verts, codes))) 66 | 67 | def compute_positions(self): 68 | start_pos = self.start_pos 69 | for character in self.characters: 70 | character.set_start_pos(start_pos) 71 | character.set_parent_start(self.start_pos) 72 | 73 | if self.logo_type == 'Circle': 74 | character.set_deg(self.deg) 75 | elif self.logo_type == 'Radiation': 76 | character.set_deg(self.deg) 77 | character.set_radiation_space(self.radiation_space) 78 | 79 | character.set_width(self.width) 80 | character.compute_positions() 81 | if self.logo_type == 'Threed': 82 | start_pos = (start_pos[0], start_pos[1] + character.get_height() *(1+self.char_margin_ratio), start_pos[2]) 83 | else: 84 | start_pos = (start_pos[0], start_pos[1] + character.get_height() * (1+self.char_margin_ratio)) 85 | 86 | def get_height(self): 87 | height = sum([char.get_height() * (1+self.char_margin_ratio) for char in self.characters[:-1]]) 88 | if len(self.characters) > 0: 89 | height += self.characters[-1].get_height() 90 | return height 91 | 92 | def get_width(self): 93 | return max([char.get_width() for char in self.characters]+[0]) 94 | 95 | def get_edge(self): 96 | 97 | h = self.get_height() 98 | w = self.get_width() 99 | 100 | if self.logo_type in 'Horizontal': 101 | leftbottom = self.start_pos 102 | rightbottom = (self.start_pos[0]+w, self.start_pos[1]) 103 | righttop = (self.start_pos[0] + w, self.start_pos[1] + h ) 104 | lefttop = (self.start_pos[0],self.start_pos[1] + h) 105 | return leftbottom,rightbottom,righttop,lefttop 106 | 107 | if self.logo_type in 'Threed': 108 | leftbottom = self.start_pos 109 | rightbottom = (self.start_pos[0]+w, self.start_pos[1], self.start_pos[2]) 110 | righttop = (self.start_pos[0] + w, self.start_pos[1] + h, self.start_pos[2]) 111 | lefttop = (self.start_pos[0],self.start_pos[1] + h, self.start_pos[2]) 112 | return leftbottom,rightbottom,righttop,lefttop 113 | 114 | if self.logo_type == 'Circle': 115 | p1 = (self.start_pos[0] - w/2, self.start_pos[1]) 116 | p2 = (self.start_pos[0] + w/2, self.start_pos[1]) 117 | p3 = (self.start_pos[0] + w/2, self.start_pos[1]+h) 118 | p4 = (self.start_pos[0] - w/2, self.start_pos[1]+h) 119 | nodes = rotate([p1,p2,p3,p4],origin=self.start_pos, angle=self.deg-np.pi/2) 120 | return nodes 121 | 122 | if self.logo_type == 'Radiation': 123 | p1 = (self.start_pos[0], self.start_pos[1]-self.radiation_space/2) 124 | p2 = (self.start_pos[0]+w, self.start_pos[1]-self.radiation_space/2) 125 | p3 = (self.start_pos[0]+w, self.start_pos[1]+h-self.radiation_space/2) 126 | p4 = (self.start_pos[0], self.start_pos[1]+h-self.radiation_space/2) 127 | nodes = rotate([p1,p2,p3,p4],origin=self.origin, angle=self.deg) 128 | return nodes 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | -------------------------------------------------------------------------------- /MetaLogo/connect.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from numpy.core.fromnumeric import product 3 | from scipy.stats import spearmanr,pearsonr 4 | import numpy as np 5 | from scipy.spatial import distance 6 | import math 7 | 8 | def dotproduct(v1, v2): 9 | return sum((a*b) for a, b in zip(v1, v2)) 10 | def length(v): 11 | return math.sqrt(dotproduct(v, v)) 12 | def costheta(v1, v2): 13 | return dotproduct(v1, v2) / (length(v1) * length(v2)) 14 | 15 | def get_score_mat(bits_array, align_metric = 'sort_consistency', gap_score=-1, seq_type='dna'): 16 | scores_mat = {} 17 | for i in range(len(bits_array)): 18 | for j in range(len(bits_array)): 19 | if i >= j: 20 | continue 21 | bits1 = bits_array[i] 22 | bits2 = bits_array[j] 23 | align1,align2 = needle(bits1,bits2, align_metric=align_metric, 24 | gap_penalty=gap_score, seq_type=seq_type) 25 | score = 0 26 | for pos1, pos2 in zip(align1,align2): 27 | if pos1 == '-' or pos2 == '-': 28 | score += gap_score 29 | else: 30 | score += match_score(bits1[pos1],bits2[pos2], align_metric=align_metric,seq_type=seq_type) 31 | 32 | if i not in scores_mat: 33 | scores_mat[i] = {} 34 | scores_mat[i][j] = score/len(align1) 35 | return scores_mat 36 | 37 | def msa(bits_array, scores_mat, align_metric = 'sort_consistency', gap_score=-1, seq_type='dna'): 38 | 39 | #find the nearest couple 40 | max_score = max([max(scores_mat[x].values()) for x in scores_mat]) 41 | findij = False 42 | for i in scores_mat: 43 | for j in scores_mat[i]: 44 | #if abs(scores_mat[i][j] - max_score) < 0.00001: 45 | if scores_mat[i][j] == max_score: 46 | findij = True 47 | break 48 | if findij: 49 | break 50 | #align the first two 51 | align1,align2 = needle(bits_array[i],bits_array[j], align_metric=align_metric, 52 | gap_penalty=gap_score, seq_type=seq_type) 53 | #print(align1)#test 54 | #print(align2)#test 55 | 56 | pools = [i,j] 57 | new_bits_array = [] 58 | new_bits_array.append([bits_array[i][pos] if pos!= '-' else [] for pos in align1]) 59 | new_bits_array.append([bits_array[j][pos] if pos!= '-' else [] for pos in align2]) 60 | repeat = 0 61 | while len(pools) < len(bits_array): 62 | repeat += 1 63 | if repeat > len(bits_array) + 1: 64 | break 65 | left = set(range(len(bits_array))) - set(pools) 66 | max_score = -1E9 67 | max_i= -1 68 | max_j = -1 69 | 70 | for i in pools: 71 | for j in left: 72 | score = scores_mat[min(i,j)][max(i,j)] 73 | if score > max_score: 74 | max_score = score 75 | max_i = i 76 | max_j = j 77 | # 78 | bits1 = new_bits_array[pools.index(max_i)] 79 | bits2 = bits_array[max_j] 80 | align1,align2 = needle(bits1,bits2, align_metric=align_metric, 81 | gap_penalty=gap_score, seq_type=seq_type) 82 | 83 | for i in range(len(new_bits_array)): 84 | _arr = [] 85 | for pos in align1: 86 | if pos == '-': 87 | _arr.append([]) 88 | else: 89 | _arr.append(new_bits_array[i][pos]) 90 | new_bits_array[i] = _arr 91 | 92 | new_bits_array.append([bits2[pos] if pos!= '-' else [] for pos in align2]) 93 | pools.append(max_j) 94 | 95 | sorted_bits_array = [] 96 | for i in range(len(pools)): 97 | sorted_bits_array.append(new_bits_array[pools.index(i)]) 98 | 99 | return sorted_bits_array 100 | 101 | 102 | 103 | def get_connect(bits_array, align_metric = 'sort_consistency', gap_score=-1, msa_input=False, seq_type='dna'): 104 | connected = {} 105 | for index,bit in enumerate(bits_array): 106 | if index == len(bits_array) - 1: 107 | break 108 | bits1 = bit 109 | bits2 = bits_array[index + 1] 110 | if msa_input: 111 | align1 = list(range(len(bits1))) 112 | align2 = list(range(len(bits2))) 113 | else: 114 | align1,align2 = needle(bits1,bits2, align_metric=align_metric, 115 | gap_penalty=gap_score,seq_type=seq_type) 116 | connected[index] = {} 117 | 118 | for pos1, pos2 in zip(align1,align2): 119 | if pos1 == '-' or pos2 == '-': 120 | continue 121 | score = match_score(bits1[pos1],bits2[pos2], align_metric=align_metric, seq_type = seq_type) 122 | connected[index][pos1] = [score, [pos2]] 123 | return connected 124 | 125 | max_entropy_aa = -sum([(1/20)*np.log(1/20) for i in range(20)]) 126 | max_entropy_dna = -sum([(1/4)*np.log(1/4) for i in range(4)]) 127 | 128 | def match_score(bit1, bit2, align_metric='sort_consistency',gap_score=-1,seq_type='dna'): 129 | 130 | try: 131 | 132 | if len(bit1) == 0 or len(bit2) == 0: 133 | return 0 134 | 135 | if align_metric not in ['dot_product','sort_consistency','js_divergence','cosine','entropy_bhattacharyya']: 136 | align_metric = 'dot_product' 137 | 138 | if align_metric == 'entropy_bhattacharyya': 139 | bit1 = dict(bit1) 140 | bit2 = dict(bit2) 141 | keys = sorted(list(bit1.keys()|bit2.keys())) 142 | v1 = [bit1.get(key,0) for key in keys] 143 | v2 = [bit2.get(key,0) for key in keys] 144 | bc = sum([np.sqrt(i1*i2) for i1,i2 in zip(v1,v2)]) 145 | max_entropy = 0 146 | if seq_type.lower() in ['protein','aa']: 147 | max_entropy = max_entropy_aa 148 | if seq_type.lower() in ['dna','rna']: 149 | max_entropy = max_entropy_dna 150 | entropy1 = -sum([bit1.get(key,0)*np.log(bit1.get(key,0)) for key in keys if bit1.get(key,0) > 0]) 151 | entropy2 = -sum([bit2.get(key,0)*np.log(bit2.get(key,0)) for key in keys if bit2.get(key,0) > 0]) 152 | res = bc * np.sqrt((1 - (entropy1/max_entropy)) * (1 - (entropy2/max_entropy))) 153 | 154 | return res 155 | 156 | 157 | if align_metric == 'dot_product': 158 | bit1 = dict(bit1) 159 | bit2 = dict(bit2) 160 | keys = sorted(list(bit1.keys()|bit2.keys())) 161 | v1 = [bit1.get(key,0) for key in keys] 162 | v2 = [bit2.get(key,0) for key in keys] 163 | val = dotproduct(v1,v2) 164 | return val 165 | 166 | if align_metric == 'cosine': 167 | bit1 = dict(bit1) 168 | bit2 = dict(bit2) 169 | keys = sorted(list(bit1.keys()|bit2.keys())) 170 | v1 = [bit1.get(key,0) for key in keys] 171 | v2 = [bit2.get(key,0) for key in keys] 172 | if length(v1)*length(v2)==0: 173 | return 0 174 | return costheta(v1,v2) 175 | 176 | if align_metric == 'sort_consistency': 177 | bit1 = sorted(bit1, key=lambda d:d[1],reverse=True) 178 | bit2 = sorted(bit2, key=lambda d:d[1],reverse=True) 179 | score = 0 180 | for i in range(min(len(bit1),len(bit2))): 181 | if bit1[i][0] == bit2[i][0]: 182 | score += bit1[i][1] * bit2[i][1] 183 | return score 184 | 185 | if align_metric =='js_divergence': #noted, here must input probabilites rather than bits. 186 | q1 = [] 187 | q2 = [] 188 | bit1 = dict(bit1) 189 | bit2 = dict(bit2) 190 | keys = sorted(list(bit1.keys()|bit2.keys())) 191 | for key in keys: 192 | q1.append(bit1.get(key,0)) 193 | q2.append(bit2.get(key,0)) 194 | if sum(q1)*sum(q2) == 0: 195 | return 0 196 | return 1-distance.jensenshannon(q1,q2) 197 | 198 | except Exception as e: 199 | print('exception: ', e) 200 | return 0 201 | 202 | 203 | #https://github.com/alevchuk/pairwise-alignment-in-python/blob/master/alignment.py 204 | def needle(seq1, seq2, gap_penalty=-1, align_metric='sort_consistency',seq_type='dna'): 205 | m, n = len(seq1), len(seq2) # length of two sequences 206 | 207 | # Generate DP table and traceback path pointer matrix 208 | score = np.zeros((m+1, n+1)) # the DP table 209 | 210 | # Calculate DP table 211 | for i in range(0, m + 1): 212 | score[i][0] = gap_penalty * i 213 | for j in range(0, n + 1): 214 | score[0][j] = gap_penalty * j 215 | for i in range(1, m + 1): 216 | for j in range(1, n + 1): 217 | match = score[i - 1][j - 1] + match_score(seq1[i-1], seq2[j-1],align_metric=align_metric,seq_type=seq_type) 218 | delete = score[i - 1][j] + gap_penalty 219 | insert = score[i][j - 1] + gap_penalty 220 | score[i][j] = max(match, delete, insert) 221 | 222 | 223 | # Traceback and compute the alignment 224 | align1, align2 = [], [] 225 | i,j = m,n # start from the bottom right cell 226 | repeat = 0 227 | while i > 0 and j > 0: # end toching the top or the left edge 228 | repeat += 1 229 | if repeat > (m+1) * (n*1): 230 | break 231 | score_current = score[i][j] 232 | score_diagonal = score[i-1][j-1] 233 | score_up = score[i][j-1] 234 | score_left = score[i-1][j] 235 | 236 | #print('seq1[i-1]:', seq1[i-1]) 237 | #print('seq2[j-1]:', seq1[j-1]) 238 | 239 | if score_current == score_diagonal + match_score(seq1[i-1], seq2[j-1],align_metric=align_metric,seq_type=seq_type): 240 | align1.append(i-1) 241 | align2.append(j-1) 242 | i -= 1 243 | j -= 1 244 | elif score_current == score_left + gap_penalty: 245 | align1.append(i-1) 246 | align2.append('-') 247 | i -= 1 248 | elif score_current == score_up + gap_penalty: 249 | align1.append('-') 250 | align2.append(j-1) 251 | j -= 1 252 | else: 253 | break 254 | 255 | # Finish tracing up to the top left cell 256 | while i > 0: 257 | align1.append(i-1) 258 | align2.append('-') 259 | i -= 1 260 | while j > 0: 261 | align1.append('-') 262 | align2.append(j-1) 263 | j -= 1 264 | #print('align1:', align1) 265 | #print('align2: ', align2) 266 | return align1[::-1],align2[::-1] 267 | -------------------------------------------------------------------------------- /MetaLogo/entry.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | import os 4 | import argparse 5 | 6 | from .logo import LogoGroup 7 | from .utils import read_file 8 | from .colors import get_color_scheme 9 | from .version import __version__ 10 | import matplotlib.pyplot as plt 11 | import json 12 | import uuid 13 | import toml 14 | 15 | def run_from_args(args): 16 | print('args: ', args) 17 | print('-----------------') 18 | print(f'uid: {args.uid}') 19 | 20 | os.makedirs(args.output_dir,exist_ok=True) 21 | os.makedirs(args.fa_output_dir,exist_ok=True) 22 | 23 | if args.color_scheme_json_file is not None: 24 | with open(args.color_scheme_json_file) as jsinf: 25 | color_scheme = json.load(jsinf) 26 | elif args.color_scheme_json_str is not None: 27 | color_scheme = json.loads(args.color_scheme_json_str) 28 | else: 29 | color_scheme = get_color_scheme(args.color_scheme) 30 | 31 | logogroup = LogoGroup(seqs=None, group_order = args.group_order, logo_type = args.type, group_strategy = args.group_strategy, 32 | min_length = args.min_length, max_length = args.max_length, seq_file_type=args.seq_file_type, 33 | align=args.align, align_metric=args.align_metric, connect_threshold = args.connect_threshold, 34 | color=color_scheme, task_name=args.task_name, hide_left_axis = args.hide_left_axis, 35 | hide_right_axis = args.hide_right_axis, hide_bottom_axis = args.hide_bottom_axis, 36 | hide_top_axis = args.hide_top_axis, show_grid = args.show_grid, show_group_id = args.show_group_id, 37 | hide_x_ticks = args.hide_x_ticks, hide_y_ticks = args.hide_y_ticks, hide_z_ticks=args.hide_z_ticks, 38 | x_label=args.x_label, y_label=args.y_label, z_label=args.z_label, 39 | title_size=args.title_size, label_size=args.label_size, group_id_size=args.group_id_size, 40 | tick_size=args.tick_size, logo_margin_ratio = args.logo_margin_ratio, column_margin_ratio = args.column_margin_ratio, 41 | figure_size_x=args.figure_size_x, figure_size_y=args.figure_size_y, 42 | char_margin_ratio = args.char_margin_ratio, align_color=args.align_color,align_alpha=args.align_alpha , 43 | display_range_left=args.display_range_left, display_range_right=args.display_range_right, 44 | gap_score = args.gap_score, 45 | padding_align = args.padding_align, 46 | hide_version_tag=args.hide_version_tag, 47 | sequence_type = args.sequence_type, 48 | height_algorithm=args.height_algorithm, 49 | seq_file=args.seq_file, output_dir=args.output_dir,fa_output_dir=args.fa_output_dir,uid=args.uid, 50 | group_resolution=args.group_resolution,clustering_method=args.clustering_method, 51 | clustalo_bin=args.clustalo_bin,fasttreemp_bin=args.fasttreemp_bin,fasttree_bin=args.fasttree_bin,treecluster_bin=args.treecluster_bin, 52 | withtree=args.withtree, group_limit = args.group_limit, 53 | auto_size=args.auto_size, 54 | x_axis_rotation=args.x_axis_rotation 55 | ) 56 | if hasattr(logogroup,'error'): 57 | print('error:',logogroup.error) 58 | return {'error':logogroup.error} 59 | 60 | 61 | logogroup.draw() 62 | 63 | if len(args.output_name) > 0: 64 | #if '.' in args.output_name: 65 | # base_name = '.'.join(args.output_name.split('.')[:-1]) 66 | #else: 67 | base_name = args.output_name 68 | else: 69 | base_name = args.uid 70 | 71 | logogroup.savefig(f"{args.output_dir}/{base_name}.{args.logo_format}") 72 | print(f'{args.output_dir}/{base_name}.{args.logo_format}',' saved') 73 | 74 | if args.logo_format.lower() != 'png': 75 | logogroup.savefig(f"{args.output_dir}/{base_name}.png") 76 | print(f'{args.output_dir}/{base_name}.png', ' saved') 77 | 78 | if args.analysis: 79 | 80 | fig = logogroup.get_grp_counts_figure() 81 | if fig: 82 | fig = fig.figure 83 | count_name = f'{args.output_dir}/{base_name}.counts.png' 84 | fig.savefig(count_name,bbox_inches='tight') 85 | plt.close(fig) 86 | 87 | fig = logogroup.get_seq_lengths_dist() 88 | if fig: 89 | fig = fig.figure 90 | lengths_name = f'{args.output_dir}/{base_name}.lengths.png' 91 | fig.savefig(lengths_name,bbox_inches='tight') 92 | plt.close(fig) 93 | 94 | 95 | fig = logogroup.get_entropy_figure() 96 | if fig: 97 | entropy_name = f'{args.output_dir}/{base_name}.entropy.png' 98 | fig.savefig(entropy_name,bbox_inches='tight') 99 | plt.close(fig) 100 | 101 | boxplot_entropy_name = f'{args.output_dir}/{base_name}.boxplot_entropy.png' 102 | fig = logogroup.get_boxplot_entropy_figure() 103 | if fig: 104 | fig = fig.figure 105 | fig.savefig(boxplot_entropy_name,bbox_inches='tight') 106 | plt.close(fig) 107 | 108 | if args.padding_align or args.group_strategy=='auto': 109 | clustermap_name = f'{args.output_dir}/{base_name}.clustermap.png' 110 | fig = logogroup.get_correlation_figure() 111 | if fig: 112 | fig.savefig(clustermap_name,bbox_inches='tight') 113 | 114 | return None 115 | 116 | 117 | def run_from_config(config_file): 118 | 119 | config = toml.load(config_file) 120 | print(config) 121 | print('-----------------') 122 | 123 | uid = config['uid'] 124 | print('uid: ', uid) 125 | 126 | os.makedirs(config['output_dir'],exist_ok=True) 127 | os.makedirs(config['fa_output_dir'],exist_ok=True) 128 | 129 | logogroup = LogoGroup(seqs=None, **config) 130 | 131 | if hasattr(logogroup,'error'): 132 | print('error:',logogroup.error) 133 | return {'error':logogroup.error} 134 | 135 | logogroup.draw() 136 | 137 | logogroup.savefig(f"{config['output_dir']}/{uid}.{config['logo_format']}") 138 | print(f"{config['output_dir']}/{uid}.{config['logo_format']}",' saved') 139 | 140 | if config['logo_format'].lower() != 'png': 141 | logogroup.savefig(f"{config['output_dir']}/{uid}.png") 142 | print(f"{config['output_dir']}/{uid}.png', ' saved") 143 | 144 | 145 | 146 | if config['analysis']: 147 | format = 'png' 148 | if 'analysis_format' in config: 149 | format = config['analysis_format'] 150 | 151 | fig = logogroup.get_grp_counts_figure() 152 | if fig: 153 | fig =fig.figure 154 | count_name = f"{config['output_dir']}/{uid}.counts.png" 155 | fig.savefig(count_name,bbox_inches='tight') 156 | if format != 'png': 157 | count_name = f"{config['output_dir']}/{uid}.counts.{format}" 158 | fig.savefig(count_name,bbox_inches='tight') 159 | plt.close(fig) 160 | 161 | fig = logogroup.get_seq_lengths_dist() 162 | if fig: 163 | fig = fig.figure 164 | lengths_name = f"{config['output_dir']}/{uid}.lengths.png" 165 | fig.savefig(lengths_name,bbox_inches='tight') 166 | if format != 'png': 167 | lengths_name = f"{config['output_dir']}/{uid}.lengths.{format}" 168 | fig.savefig(lengths_name,bbox_inches='tight') 169 | plt.close(fig) 170 | 171 | 172 | fig = logogroup.get_entropy_figure() 173 | if fig: 174 | entropy_name = f"{config['output_dir']}/{uid}.entropy.png" 175 | fig.savefig(entropy_name,bbox_inches='tight') 176 | if format != 'png': 177 | entropy_name = f"{config['output_dir']}/{uid}.entropy.{format}" 178 | fig.savefig(entropy_name,bbox_inches='tight') 179 | plt.close(fig) 180 | 181 | fig = logogroup.get_boxplot_entropy_figure() 182 | if fig: 183 | fig =fig.figure 184 | boxplot_entropy_name = f"{config['output_dir']}/{uid}.boxplot_entropy.png" 185 | fig.savefig(boxplot_entropy_name,bbox_inches='tight') 186 | if format != 'png': 187 | boxplot_entropy_name = f"{config['output_dir']}/{uid}.boxplot_entropy.{format}" 188 | fig.savefig(boxplot_entropy_name,bbox_inches='tight') 189 | 190 | plt.close(fig) 191 | 192 | if config.get('padding_align',False) or config.get('group_strategy','')=='auto': 193 | clustermap_name = f"{config['output_dir']}/{uid}.clustermap.png" 194 | fig = logogroup.get_correlation_figure() 195 | if fig: 196 | fig.savefig(clustermap_name,bbox_inches='tight') 197 | if format != 'png': 198 | clustermap_name = f"{config['output_dir']}/{uid}.clustermap.{format}" 199 | fig.savefig(clustermap_name,bbox_inches='tight') 200 | 201 | return None 202 | 203 | 204 | 205 | 206 | def main(): 207 | 208 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 209 | 210 | parser.add_argument('--config',type=str,help='The config file contain sequences',default=None) 211 | 212 | parser.add_argument('--type',type=str,help='Choose the layout type of sequence logo',choices=['Horizontal','Circle','Radiation','Threed'],default='Horizontal') 213 | parser.add_argument('--seq_file',type=str,help='The input file contain sequences') 214 | parser.add_argument('--seq_file_type',type=str,help='The type of input file', choices=['fasta','fastq'],default='fasta') 215 | parser.add_argument('--sequence_type',type=str,help='The type of sequences',choices=['auto','dna','rna','aa'],default='auto') 216 | 217 | #task 218 | parser.add_argument('--task_name',type=str,help='The title to displayed on the figure',default='MetaLogo') 219 | 220 | #sequences 221 | parser.add_argument('--min_length',type=int,help='The minimum length of sequences to be included',default=8) 222 | parser.add_argument('--max_length',type=int,help='The maximum length of sequences to be included',default=20) 223 | 224 | #group 225 | parser.add_argument('--group_strategy',type=str,help='The strategy to separate sequences into groups',choices=['auto','length','identifier'],default='auto') 226 | parser.add_argument('--clustering_method',type=str,help='The method for tree clustering',default='max') 227 | parser.add_argument('--group_resolution',type=float,help='The resolution for sequence grouping',default=0.5) 228 | parser.add_argument('--group_limit',type=int,help='The limit for group number',default=20) 229 | 230 | #sort 231 | parser.add_argument('--group_order',type=str,help='The order of groups',choices=['length','length_reverse','identifier','identifier_reverse'],default='length') 232 | 233 | #color 234 | parser.add_argument('--color_scheme',type=str,help='The color scheme',choices=['basic_dna_color','basic_rna_color','basic_aa_color'],default='basic_dna_color') 235 | parser.add_argument('--color_scheme_json_str',type=str,help='The json string of color scheme',default=None) 236 | parser.add_argument('--color_scheme_json_file',type=str,help='The json file of color scheme',default=None) 237 | 238 | #align 239 | parser.add_argument('--height_algorithm',type=str,help='The algorithm for character height',default='bits',choices=['bits','bits_without_correction','probabilities']) 240 | 241 | parser.add_argument('--align',action='store_true',dest='align', help='If show alignment of adjacent sequence logo') 242 | parser.add_argument('--padding_align',action='store_true',dest='padding_align', help='If padding logos to make multiple logo alignment') 243 | 244 | parser.add_argument('--align_metric',type=str,help='The metric for align score',default='dot_product',choices=['dot_product','js_divergence','cosine','entropy_bhattacharyya']) 245 | parser.add_argument('--connect_threshold',type=float,help='The align threshold',default=0.8) 246 | 247 | parser.add_argument('--gap_score',type=float,help='The gap score for alignment',default=-1.0) 248 | 249 | #display range 250 | parser.add_argument('--display_range_left',type=int,help='The start position of display range (Global alignment with padding required)',default=0) 251 | parser.add_argument('--display_range_right',type=int,help='Then end position of display range (Global alignment with padding requirement)',default=-1) 252 | 253 | #layout 254 | parser.add_argument('--withtree',action='store_true',dest='withtree', help='If show tree besides sequence logo') 255 | 256 | parser.add_argument('--logo_margin_ratio',type=float,help='Margin ratio between the logos',default=0.1) 257 | parser.add_argument('--column_margin_ratio',type=float,help='Margin ratio between the columns',default=0.05) 258 | parser.add_argument('--char_margin_ratio',type=float,help='Margin ratio between the chars',default=0.05) 259 | 260 | #style 261 | 262 | parser.add_argument('--hide_version_tag',action='store_true',dest='hide_version_tag',help='If show version tag of MetaLogo') 263 | 264 | parser.add_argument('--hide_left_axis',action='store_true',dest='hide_left_axis',help='If hide left axis') 265 | parser.add_argument('--hide_right_axis',action='store_true',dest='hide_right_axis',help='If hide right axis') 266 | parser.add_argument('--hide_top_axis',action='store_true',dest='hide_top_axis',help='If hide top axis') 267 | parser.add_argument('--hide_bottom_axis',action='store_true',dest='hide_bottom_axis',help='If hide bottom axis') 268 | 269 | parser.add_argument('--hide_x_ticks',action='store_true',dest='hide_x_ticks',help='If hide ticks of X axis') 270 | parser.add_argument('--hide_y_ticks',action='store_true',dest='hide_y_ticks',help='If hide ticks of Y axis') 271 | parser.add_argument('--hide_z_ticks',action='store_true',dest='hide_z_ticks',help='If hide ticks of Z axis') 272 | 273 | parser.add_argument('--x_label', type=str, help='The label for X axis') 274 | parser.add_argument('--y_label', type=str, help='The label for Y axis') 275 | parser.add_argument('--z_label', type=str, help='The label for Z axis') 276 | 277 | parser.add_argument('--show_group_id',action='store_true',dest='show_group_id',help='If show group ids') 278 | parser.add_argument('--show_grid',action='store_true',dest='show_grid',help='If show background grid') 279 | 280 | 281 | parser.add_argument('--title_size',type=int,help='The size of figure title',default=20) 282 | parser.add_argument('--label_size',type=int,help='The size of figure xy labels',default=10) 283 | parser.add_argument('--tick_size',type=int,help='The size of figure ticks',default=10) 284 | parser.add_argument('--group_id_size',type=int,help='The size of group labels',default=10) 285 | parser.add_argument('--x_axis_rotation',type=int,help='The rotation of x axis ticks',default=0) 286 | 287 | parser.add_argument('--figure_size_x',type=float,help='The width of figure',default=20) 288 | parser.add_argument('--figure_size_y',type=float,help='The height of figure',default=10) 289 | parser.add_argument('--auto_size',action='store_true',dest='auto_size',help='Let MetaLogo determine the size of figures') 290 | 291 | parser.add_argument('--align_color',type=str,help='The color of alignment',default='blue') 292 | parser.add_argument('--align_alpha',type=float,help='The transparency of alignment',default='0.2') 293 | 294 | #output 295 | parser.add_argument('--output_dir',type=str,help='Output path of figure',default='figure_output') 296 | parser.add_argument('--output_name',type=str,help='Output name of figure',default='') 297 | parser.add_argument('--fa_output_dir',type=str,help='Output path of fas',default='sequence_input') 298 | parser.add_argument('--uid',type=str,help='Task id',default=str(uuid.uuid4())) 299 | 300 | parser.add_argument('--logo_format',type=str,help='The format of figures',choices=['png','pdf'],default='png') 301 | 302 | #analysis 303 | parser.add_argument('--analysis',action='store_true',dest='analysis',help='If perform basic analysis on data') 304 | 305 | #software 306 | parser.add_argument('--clustalo_bin',type=str,help='The path of clustalo bin ',default='/usr/bin/clustalo') 307 | parser.add_argument('--fasttree_bin',type=str,help='The path of fasttree bin ',default='/usr/bin/FastTree') 308 | parser.add_argument('--fasttreemp_bin',type=str,help='The path of fasttreeMP bin ',default='/usr/bin/FastTreeMP') 309 | parser.add_argument('--treecluster_bin',type=str,help='The path of treecluster bin ',default='TreeCluster.py') 310 | 311 | parser.add_argument('-v', '--version', action='version', version=__version__) 312 | 313 | args = parser.parse_args() 314 | 315 | 316 | if args.config is not None: 317 | run_from_config(args.config) 318 | else: 319 | run_from_args(args) 320 | 321 | 322 | if __name__ == '__main__': 323 | main() 324 | -------------------------------------------------------------------------------- /MetaLogo/item.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from matplotlib import pyplot as plt 4 | from matplotlib.textpath import TextPath 5 | 6 | class Item(): 7 | def __init__(self, *args, **kwargs) : 8 | pass 9 | 10 | def generate_ax(self,threed=False,withtree=False): 11 | if threed: 12 | fig, ax = plt.subplots(1, 1,figsize=(10,10),subplot_kw=dict(projection="3d")) 13 | self.ax = ax 14 | elif withtree: 15 | fig, (ax1,ax2) = plt.subplots(1, 2,figsize=(10,10),gridspec_kw={'width_ratios': [1, 4]}) 16 | self.ax = ax2 17 | self.ax0 = ax1 18 | plt.subplots_adjust(wspace=0) 19 | else: 20 | fig, ax = plt.subplots(1, 1,figsize=(10,10)) 21 | self.ax = ax 22 | 23 | def draw(self): 24 | pass 25 | 26 | def savefig(self,filename,bbox_inches='tight'): 27 | if (hasattr(self,'ax')) and (self.ax is not None): 28 | self.ax.get_figure().savefig(filename,bbox_inches=bbox_inches) 29 | 30 | def set_start_pos(self,start_pos): 31 | self.start_pos = start_pos 32 | 33 | def set_parent_start(self,parent_start): 34 | self.parent_start = parent_start 35 | 36 | def set_deg(self,deg): 37 | self.deg = deg 38 | 39 | def set_width(self,width): 40 | self.width = width 41 | 42 | def set_radiation_space(self,space): 43 | self.radiation_space = space 44 | 45 | def get_limited_char_width(self, limited_char='E'): 46 | tmp_path = TextPath((0, 0), 'E', size=1) 47 | return tmp_path.get_extents().width 48 | -------------------------------------------------------------------------------- /MetaLogo/logobits.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import numpy as np 3 | 4 | def compute_prob(groups,threshold=0): 5 | counts = {} 6 | for gid,group in groups.items(): 7 | _counts = {} 8 | for name,seq in group: 9 | for i in range(len(seq)): 10 | if i not in _counts: 11 | _counts[i] = {} 12 | #if seq[i] == '-': 13 | # continue 14 | _counts[i][seq[i]] = _counts[i].get(seq[i],0) + 1 15 | counts[gid] = _counts 16 | 17 | probs = {} 18 | for gid,_counts in counts.items(): 19 | _probs = [] 20 | for i in range(len(_counts)): 21 | #total = sum(_counts[i].values()) 22 | total = sum(_counts.get(i,{}).values()) 23 | _ps = [] 24 | #for base in sorted(_counts[i].keys()): 25 | for base in sorted(_counts.get(i,{}).keys()): 26 | ration = _counts[i][base]/total 27 | if ration < threshold: 28 | continue 29 | _ps.append([base,ration]) 30 | _probs.append(_ps) 31 | 32 | probs[gid] = _probs 33 | 34 | return probs 35 | 36 | def compute_bits(groups, probs, seq_type='dna',no_correction=False): 37 | bits = {} 38 | for gid,prob in probs.items(): 39 | if seq_type.lower() in ['dna','rna']: 40 | if no_correction: 41 | e = np.log2(4) 42 | else: 43 | e = np.log2(4) - (4-1)/(np.log(2)*2*len(groups[gid])) 44 | elif seq_type.lower() == 'aa': 45 | if no_correction: 46 | e = np.log2(20) 47 | else: 48 | e = np.log2(20) - (20-1)/(np.log(2)*2*len(groups[gid])) 49 | bit = [] 50 | for i in range(len(prob)): 51 | h = 0 52 | for base,p in prob[i]: 53 | #if base == '-': 54 | # continue 55 | h -= p*np.log2(p) 56 | _bit = [] 57 | for base,p in prob[i]: 58 | if base == '-': 59 | continue 60 | height = max(p * (e - h),0) 61 | _bit.append((base,height)) 62 | bit.append(_bit) 63 | bits[gid] = bit 64 | 65 | return bits 66 | 67 | -------------------------------------------------------------------------------- /MetaLogo/pholy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from collections import namedtuple 3 | import os 4 | import uuid 5 | import pandas as pd 6 | import re 7 | import dendropy 8 | import matplotlib 9 | import matplotlib.pyplot as plt 10 | import seaborn as sns 11 | from Bio import Phylo 12 | from ete3 import Tree 13 | 14 | def get_distance_range(tree_file): 15 | tree = dendropy.Tree.get(path=tree_file,schema='newick') 16 | pdc = tree.phylogenetic_distance_matrix() 17 | dists = pdc.distances() 18 | return dists 19 | 20 | def get_distance_range_lessmem(tree_file): 21 | tree = Tree(tree_file) 22 | dists = [] 23 | for node in tree: 24 | dists.append(node.get_farthest_node()[1]) 25 | return dists 26 | 27 | 28 | def get_score_df(score_f): 29 | arrs = [] 30 | with open(score_f,'r') as inpf: 31 | for line in inpf: 32 | line = line.strip() 33 | if len(line) == 0: 34 | continue 35 | if line[0] == '#': 36 | continue 37 | _arrs = re.split(' +',line) 38 | arrs.append(_arrs[:3]) 39 | if len(arrs) == 0: 40 | return None 41 | df = pd.DataFrame(arrs,columns = ['POS','SEQ','SCORE']) 42 | df['SCORE'] = df['SCORE'].map(float) 43 | df['BASE'] = df['POS'] + '-' + df['SEQ'] 44 | return df 45 | 46 | def drawdists(dists,output): 47 | fig,ax = plt.subplots() 48 | df = pd.DataFrame({'Pairwise distance':list(dists)}) 49 | g = sns.histplot(df,x='Pairwise distance',kde=True,ax=ax) 50 | fig.savefig(output,bbox_inches='tight') 51 | plt.close() 52 | return 53 | 54 | def drawtree(input,output): 55 | tree = Phylo.read(input, 'newick') 56 | tree.ladderize() # Flip branches so deeper clades are displayed at top 57 | 58 | matplotlib.rc('font', size=10) 59 | fig = plt.figure(figsize=(20, 20), dpi=100) 60 | axes = fig.add_subplot(1, 1, 1) 61 | Phylo.draw(tree, axes=axes) 62 | plt.savefig(output, dpi=100) 63 | plt.close() 64 | 65 | 66 | def reverse_msa_seqname(name_dict,oldfile,newfile): 67 | 68 | with open(newfile,'w') as outpf: 69 | seqname = '' 70 | seq = '' 71 | with open(oldfile,'r') as inpf: 72 | for line in inpf: 73 | line = line.strip() 74 | if len(line) == 0: 75 | continue 76 | if line[0] == '>': 77 | if seqname != '' and seq != '': 78 | for alias in name_dict.get(seqname,[]): 79 | outpf.write(f'>{alias}\n') 80 | outpf.write(f'{seq}\n') 81 | seqname = line[1:] 82 | seq = '' 83 | else: 84 | seq += line 85 | 86 | if seqname != '' and seq != '': 87 | for alias in name_dict.get(seqname,[]): 88 | outpf.write(f'>{alias}\n') 89 | outpf.write(f'{seq}\n') 90 | return None 91 | 92 | def reverse_tree_seqname(name_dict,oldtreefile,newtreefile): 93 | tree = dendropy.Tree.get(path=oldtreefile,schema='newick') 94 | for node in tree: 95 | if node.is_leaf(): 96 | if node.taxon is not None: 97 | if len(name_dict.get(node.taxon.label,[])) > 0 : 98 | node.taxon.label = name_dict.get(node.taxon.label,[])[0] 99 | tree.write(path=newtreefile,schema='newick') 100 | return None 101 | 102 | def save_group_seqs(group_dict,outfa): 103 | with open(outfa,'w') as outpf: 104 | outpf.write('Sequence name\tGroup_id\n') 105 | for grpid in group_dict: 106 | for seqname,seq in group_dict[grpid]: 107 | outpf.write(f'{seqname} \t {grpid} \n') 108 | 109 | def save_seqs(seqs, filename): 110 | with open(filename,'w') as outpf: 111 | for seqname,seq in seqs: 112 | outpf.write(f'>{seqname}\n') 113 | outpf.write(f'{seq}\n') 114 | 115 | def auto_detect_groups(seqs, seq_fa, sequence_type='aa',group_resolution=1,clustering_method='max', 116 | clustalo_bin='',fasttree_bin='',fasttreemp_bin='',treecluster_bin='', 117 | uid='', fa_output_dir='', figure_output_dir=''): 118 | 119 | if seq_fa == '': 120 | if uid == '': 121 | uid = str(uuid.uuid4()) 122 | seq_fa = f'{fa_output_dir}/server.{uid}.fa' 123 | 124 | if not os.path.exists(seq_fa): 125 | save_seqs(seqs, seq_fa) 126 | 127 | groups_dict = {} 128 | 129 | dep_seq_fa = f'{fa_output_dir}/server.{uid}.dep.fa' 130 | name_dict, seq_dict = deduplicate(seq_fa,dep_seq_fa) 131 | 132 | msa(dep_seq_fa,f'{fa_output_dir}/server.{uid}.msa.fa',clustalo_bin) 133 | 134 | if not os.path.exists(f'{fa_output_dir}/server.{uid}.msa.fa'): 135 | return groups_dict 136 | 137 | msa_dict = {} 138 | with open(f'{fa_output_dir}/server.{uid}.msa.fa','r') as inpf: 139 | seqname = '' 140 | seq = '' 141 | for line in inpf: 142 | line = line.strip() 143 | if line[0] == '>': 144 | if seq != '': 145 | msa_dict[seqname] = seq 146 | seqname = line[1:] 147 | seq = '' 148 | else: 149 | seq += line 150 | if seq != '': 151 | msa_dict[seqname] = seq 152 | 153 | if len(seqs) > 1000: 154 | fasttree(f'{fa_output_dir}/server.{uid}.msa.fa', 155 | f'{fa_output_dir}/server.{uid}.fasttree.tree', 156 | fasttreemp_bin,sequence_type) 157 | else: 158 | fasttree(f'{fa_output_dir}/server.{uid}.msa.fa', 159 | f'{fa_output_dir}/server.{uid}.fasttree.tree', 160 | fasttree_bin,sequence_type) 161 | 162 | if not os.path.exists(f'{fa_output_dir}/server.{uid}.fasttree.tree'): 163 | return groups_dict 164 | 165 | try: 166 | if os.path.exists(f'{fa_output_dir}/server.{uid}.treedists.csv'): 167 | dists = pd.read_csv(f'{fa_output_dir}/server.{uid}.treedists.csv',index_col=False,header=0)['0'].tolist() 168 | else: 169 | if len(seqs) > 1000: 170 | dists = get_distance_range_lessmem(f'{fa_output_dir}/server.{uid}.fasttree.tree') 171 | else: 172 | dists = get_distance_range(f'{fa_output_dir}/server.{uid}.fasttree.tree') 173 | pd.Series(list(dists)).to_csv(f'{fa_output_dir}/server.{uid}.treedists.csv',index=None) 174 | except: 175 | dists = get_distance_range_lessmem(f'{fa_output_dir}/server.{uid}.fasttree.tree') 176 | 177 | treecluster(group_resolution,clustering_method,dists,f'{fa_output_dir}/server.{uid}.fasttree.tree',f'{fa_output_dir}/server.{uid}.fasttree.cluster',treecluster_bin) 178 | 179 | if not os.path.exists(f'{fa_output_dir}/server.{uid}.fasttree.cluster'): 180 | return groups_dict 181 | 182 | reverse_msa_seqname(name_dict,f'{fa_output_dir}/server.{uid}.msa.fa',f'{fa_output_dir}/server.{uid}.msa.rawid.fa') 183 | reverse_tree_seqname(name_dict,f'{fa_output_dir}/server.{uid}.fasttree.tree',f'{fa_output_dir}/server.{uid}.fasttree.rawid.tree') 184 | 185 | drawdists(dists,f'{figure_output_dir}/{uid}.treedistances.png') 186 | drawtree(f'{fa_output_dir}/server.{uid}.fasttree.rawid.tree',f'{figure_output_dir}/{uid}.tree.png') 187 | 188 | cluster_df = pd.read_csv(f'{fa_output_dir}/server.{uid}.fasttree.cluster',sep='\t') 189 | for index, grp in cluster_df.groupby('ClusterNumber'): 190 | #if str(index) == '-1': 191 | # continue 192 | groups_dict[index] = [] 193 | for seqname in grp['SequenceName']: 194 | for _seqname in name_dict[seqname]: 195 | groups_dict[index].append([_seqname,msa_dict[seqname]]) 196 | save_group_seqs(groups_dict,f'{fa_output_dir}/server.{uid}.grouping.fa') 197 | 198 | return groups_dict 199 | 200 | def msa(seq_fa,outfile,clustalo_bin): 201 | if not os.path.exists(outfile): 202 | cmd = f'{clustalo_bin} --auto -i {seq_fa} -o {outfile}' 203 | os.system(f'{clustalo_bin} --auto -i {seq_fa} -o {outfile}') 204 | return 205 | 206 | def fasttree(msa_fa,outfile_tree,fasttree_bin='',sequence_type='aa'): 207 | if (not os.path.exists(outfile_tree)): 208 | if sequence_type != 'aa': 209 | cmd = f'{fasttree_bin} -nt -quiet -nopr {msa_fa} > {outfile_tree} ' 210 | else: 211 | cmd = f'{fasttree_bin} -quiet -nopr {msa_fa} > {outfile_tree} ' 212 | return os.system(cmd) 213 | return -1 214 | 215 | def treecluster(threshold,clustering_method,dists,treefile,outfile,treecluster_bin=''): 216 | if threshold == 0: 217 | adj_threshold = threshold 218 | else: 219 | sorted_dists = sorted(dists) 220 | adj_threshold_idx = round(threshold*len(dists)) 221 | adj_threshold = sorted_dists[min(len(dists)-1,adj_threshold_idx)] 222 | cmd = f'{treecluster_bin} -i {treefile} -o {outfile} -t {adj_threshold} -m {clustering_method}' 223 | return os.system(cmd) 224 | 225 | def deduplicate(seq_fa,out_fa): 226 | seq_dict = {} 227 | name_dict = {} 228 | seq2seqno = {} 229 | with open(out_fa,'w') as outpf: 230 | with open(seq_fa,'r') as inpf: 231 | seq = '' 232 | seqname = '' 233 | seq_no = 0 234 | for line in inpf: 235 | line = line.strip() 236 | if line[0]=='>': 237 | if seq != '' and seqname != '': 238 | if seq not in seq2seqno: 239 | seq_no += 1 240 | seq_dict[f'seq-{seq_no}'] = seq 241 | name_dict[f'seq-{seq_no}'] = [seqname] 242 | outpf.write(f'>seq-{seq_no}\n') 243 | outpf.write(f'{seq}\n') 244 | seq2seqno[seq] = f'seq-{seq_no}' 245 | else: 246 | name_dict[f'seq-{seq_no}'].append(seqname) 247 | 248 | seq = '' 249 | seqname = line[1:] 250 | else: 251 | seq += line.upper() 252 | if seq != '' and seqname != '': 253 | if seq not in seq2seqno: 254 | seq_no += 1 255 | seq_dict[f'seq-{seq_no}'] = seq 256 | name_dict[f'seq-{seq_no}'] = [seqname] 257 | outpf.write(f'>seq-{seq_no}\n') 258 | outpf.write(f'{seq}\n') 259 | seq2seqno[seq] = f'seq-{seq_no}' 260 | else: 261 | name_dict[f'seq-{seq_no}'].append(seqname) 262 | del seq2seqno 263 | return name_dict, seq_dict 264 | 265 | 266 | -------------------------------------------------------------------------------- /MetaLogo/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import uuid 4 | import numpy as np 5 | import numpy as np 6 | import re 7 | 8 | from matplotlib import pyplot as plt 9 | from matplotlib.patches import PathPatch,Rectangle,Circle,Polygon 10 | from matplotlib.path import Path 11 | import mpl_toolkits.mplot3d.art3d as art3d 12 | from matplotlib.text import TextPath 13 | from matplotlib.transforms import Affine2D 14 | 15 | from .pholy import auto_detect_groups 16 | 17 | 18 | 19 | 20 | def read_file(filename, filetype): 21 | 22 | seq_dict = {} 23 | seqnames = [] 24 | seqname_dict = {} 25 | seqname = None 26 | seq = None 27 | ith = 0 28 | if filetype.lower() in ['fasta','fa']: 29 | with open(filename,'r') as inpf: 30 | for line in inpf: 31 | line = line.strip() 32 | if len(line)==0: 33 | continue 34 | if line[0] == '>': 35 | ith += 1 36 | seqname = f'{line[1:]} {ith}' 37 | seqnames.append(seqname) 38 | seqname_dict[seqname] = f'{line[1:]}' 39 | else: 40 | seq_dict[seqname] = seq_dict.get(seqname,'') + line.upper() 41 | elif filetype.lower() in ['fastq','fq']: 42 | with open(filename,'r') as inpf: 43 | num = -1 44 | for line in inpf: 45 | line = line.strip() 46 | if len(line)==0: 47 | continue 48 | num += 1 49 | if num%4 == 0: 50 | assert line[0] == '@' 51 | seqname = f"{line[1:]} {num+1}" 52 | seqnames.append(seqname) 53 | seqname_dict[seqname] = f'{line[1:]}' 54 | if num%4 == 1: 55 | seq_dict[seqname] = line.upper() 56 | else: 57 | pass 58 | 59 | return seq_dict, seqnames, seqname_dict 60 | 61 | def grouping(seqs,seq_file='',sequence_type='aa',group_by='length',group_resolution=1,clustering_method='max', 62 | clustalo_bin='', fasttree_bin='',fasttreemp_bin='', treecluster_bin='', 63 | uid='',fa_output_dir='.',figure_output_dir='.'): 64 | 65 | groups_dict = {} 66 | if group_by.lower() == 'length': 67 | for name,seq in seqs: 68 | key = f'Len{len(seq)}' 69 | if key not in groups_dict: 70 | groups_dict[key] = [] 71 | groups_dict[key].append([name,seq]) 72 | elif group_by.lower() == 'identifier': 73 | for name,seq in seqs: 74 | group_pat = re.search('group@(\d+-\S+)',name) 75 | if group_pat: 76 | group_id = group_pat.groups()[0] 77 | if group_id not in groups_dict: 78 | groups_dict[group_id] = [] 79 | groups_dict[group_id].append([name,seq]) 80 | elif group_by.lower() == 'auto': 81 | groups_dict = auto_detect_groups(seqs,seq_file,sequence_type,group_resolution,clustering_method, 82 | clustalo_bin,fasttree_bin,fasttreemp_bin,treecluster_bin, 83 | uid,fa_output_dir,figure_output_dir) 84 | 85 | return groups_dict 86 | 87 | def check_group(groups): 88 | for group_id in groups: 89 | seqs = groups[group_id] 90 | if len(set([len(x[1]) for x in seqs])) > 1: 91 | print('Sequence lengths not same in one group') 92 | exit(0) 93 | 94 | def write_to_tmp(seqs,tmp_path = './tmp/'): 95 | if not os.path.exists(tmp_path): 96 | os.mkdir(tmp_path) 97 | 98 | uid = str(uuid.uuid4()) 99 | f_name = os.path.join(tmp_path,f'{uid}.fa') 100 | with open(f_name,'w') as outpf: 101 | for name,seq in seqs: 102 | outpf.write(f'>{name}\n') 103 | outpf.write(f'{seq}\n') 104 | return f_name 105 | 106 | 107 | def angle_between(p1, p2=(0,0)): 108 | ang1 = np.arctan2(*p1[::-1]) 109 | ang2 = np.arctan2(*p2[::-1]) 110 | return (ang1 - ang2) % (2 * np.pi) 111 | 112 | def get_coor_by_angle(radius, angle, origin=(0,0)): 113 | relative_coor = (radius * np.cos(angle), radius * np.sin(angle)) 114 | return (relative_coor[0]+origin[0],relative_coor[1]+origin[1]) 115 | 116 | 117 | def check_parallel(edge1, edge2): 118 | start1, end1 = edge1 119 | start2, end2 = edge2 120 | shifted_end1 = (end1[0]-start1[0], end1[1]-start1[1]) 121 | shifted_end2 = (end2[0]-start2[0], end2[1]-start2[1]) 122 | 123 | def curve_connect(leftbot,lefttop,righttop,rightbot,limit_width,direction='right',**kargs): 124 | if np.abs((leftbot[0] - lefttop[0])/limit_width)< 0.1: 125 | return Polygon(xy=[leftbot,lefttop,righttop,rightbot], **kargs) 126 | else: 127 | if direction == 'left': 128 | limit_width = -limit_width 129 | p0 = leftbot 130 | p1 = (leftbot[0] + limit_width, leftbot[1]) 131 | p2 = (lefttop[0] + limit_width, lefttop[1]) 132 | p3 = lefttop 133 | p4 = righttop 134 | p5 = (righttop[0] + limit_width,righttop[1]) 135 | p6 = (rightbot[0] + limit_width, rightbot[1]) 136 | p7 = rightbot 137 | p8 = p0 138 | 139 | verts = [p0,p1,p2,p3,p4,p5,p6,p7,p8] 140 | 141 | codes = [ 142 | Path.MOVETO, 143 | Path.CURVE4, 144 | Path.CURVE4, 145 | Path.CURVE4, 146 | Path.LINETO, 147 | Path.CURVE4, 148 | Path.CURVE4, 149 | Path.CURVE4, 150 | Path.CLOSEPOLY 151 | ] 152 | path = Path(verts, codes) 153 | patch = PathPatch(path, **kargs) 154 | return patch 155 | 156 | def straight_connect(p1,p2,p3,p4,**kargs): 157 | verts = [p1,p2,p3,p4,p1] 158 | codes = [ 159 | Path.MOVETO, 160 | Path.LINETO, 161 | Path.LINETO, 162 | Path.LINETO, 163 | Path.CLOSEPOLY 164 | ] 165 | path = Path(verts, codes) 166 | patch = PathPatch(path, **kargs) 167 | return patch 168 | 169 | def link_edges(edge1, edge2, ax, threed=False,x=0,y=1,z=-1, color='blue',alpha=0.1): 170 | if ax is None: 171 | _, ax = plt.subplots(1, 1,figsize=(10,10)) 172 | 173 | 174 | p1,p2 = edge1 175 | p4,p3 = edge2 176 | 177 | if threed: 178 | patch = straight_connect((p1[x],p1[y]), 179 | (p2[x],p2[y]), 180 | (p3[x],p3[y]), 181 | (p4[x],p4[y]), fill=True,alpha=alpha,color=color,linewidth=0) 182 | 183 | else: 184 | patch = straight_connect(p1,p2,p3,p4, fill=True,alpha=alpha,color=color,linewidth=0) 185 | 186 | 187 | ax.add_patch(patch) 188 | if threed: 189 | art3d.pathpatch_2d_to_3d(patch, z=0, zdir='z') 190 | 191 | return ax 192 | 193 | 194 | #https://stackoverflow.com/questions/34372480/rotate-point-about-another-point-in-degrees-python/34374437 195 | def rotate(p, origin=(0, 0), angle=0): 196 | R = np.array([[np.cos(angle), -np.sin(angle)], 197 | [np.sin(angle), np.cos(angle)]]) 198 | o = np.atleast_2d(origin) 199 | p = np.atleast_2d(p) 200 | return np.squeeze((R @ (p.T-o.T) + o.T).T) 201 | 202 | 203 | 204 | #https://matplotlib.org/stable/gallery/mplot3d/pathpatch3d.html#sphx-glr-gallery-mplot3d-pathpatch3d-py 205 | def text3d(ax, xyz, s, zdir="z", size=None, angle=0, usetex=False, **kwargs): 206 | """ 207 | Plots the string *s* on the axes *ax*, with position *xyz*, size *size*, 208 | and rotation angle *angle*. *zdir* gives the axis which is to be treated as 209 | the third dimension. *usetex* is a boolean indicating whether the string 210 | should be run through a LaTeX subprocess or not. Any additional keyword 211 | arguments are forwarded to `.transform_path`. 212 | 213 | Note: zdir affects the interpretation of xyz. 214 | """ 215 | x, y, z = xyz 216 | if zdir == "y": 217 | xy1, z1 = (x, z), y 218 | elif zdir == "x": 219 | xy1, z1 = (y, z), x 220 | else: 221 | xy1, z1 = (x, y), z 222 | 223 | text_path = TextPath((0, 0), s, size=size, usetex=usetex) 224 | trans = Affine2D().rotate(angle).translate(xy1[0], xy1[1]) 225 | 226 | p1 = PathPatch(trans.transform_path(text_path), **kwargs) 227 | ax.add_patch(p1) 228 | art3d.pathpatch_2d_to_3d(p1, z=z1, zdir=zdir) 229 | 230 | def detect_seq_type(seqs): 231 | 232 | 233 | dna_set = {'A','T','G','C','N','-'} 234 | rna_set = {'A','U','G','C','N','-'} 235 | protein_set = {'A','R','N','D','C','Q','E','G','H','I','L','K','M','F','P','S','T','W','Y','V','-'} 236 | 237 | base_set = set() 238 | for _,seq in seqs: 239 | base_set |= set(seq) 240 | 241 | 242 | seq_type = 'aa' 243 | if base_set.issubset(dna_set): 244 | seq_type = 'dna' 245 | elif base_set.issubset(rna_set): 246 | seq_type = 'rna' 247 | elif base_set.issubset(protein_set): 248 | seq_type = 'aa' 249 | 250 | return seq_type 251 | 252 | def save_seqs(seqs, filename): 253 | with open(filename,'w') as outpf: 254 | for seqname,seq in seqs: 255 | outpf.write(f'>{seqname}\n') 256 | outpf.write(f'{seq}\n') 257 | 258 | -------------------------------------------------------------------------------- /MetaLogo/version.py: -------------------------------------------------------------------------------- 1 | __version__ = '1.1.4' 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MetaLogo 2 | 3 | ## Introduction 4 | 5 | **MetaLogo** is a tool for making sequence logos. It can take multiple sequences as input, automatically identify the homogeneity and heterogeneity among sequences and cluster them into different groups given any wanted resolution, finally output multiple aligned sequence logos in one figure. Grouping can also be specified by users, such as grouping by lengths, grouping by sample Id, etc. Compared to conventional sequence logo generator, MetaLogo can display the total sequence population in a more detailed, dynamic and informative view. 6 | 7 | To use MetaLogo, you could visit our public webserver http://metalogo.omicsnet.org. You could also install MetaLogo as a python package to using MetaLogo in your python scripts or in your OS terminal. If you want to provide MetaLogo to people in your local network, you could also setup a webserver by using docker. 8 | 9 | Please check the [tutorial](https://github.com/labomics/MetaLogo/wiki) for detailed usage of MetaLogo package and webserver. 10 | 11 | ![Introduction](./pngs/about.PNG) 12 | 13 | ## Install 14 | 15 | Before you install MetaLogo, you could first set up a virtual environment for MetaLogo using conda: 16 | 17 | $conda create -n metalogo python=3.7 18 | $conda activate metalogo 19 | 20 | Then, you can install MetaLogo using following commands: 21 | 22 | $git clone https://github.com/labomics/MetaLogo 23 | $cd MetaLogo 24 | $pip install . 25 | 26 | If succeed, you can run MetaLogo directly: 27 | 28 | $metalogo -h 29 | 30 | Note that the paths of clustal omega and fasttree bins should be given to MetaLogo, which are required for grouping. You can have these binary files in the dependencies folder in this repo. Basically, you can run metalogo as follows: 31 | 32 | $metalogo --seq_file examples/cdr3.fa --clustalo_bin dependencies/clustalo --fasttree_bin dependencies/FastTree --fasttreemp_bin dependencies/FastTreeMP --logo_format pdf --output_name test.xx --task_name test.xx 33 | 34 | If you want to using MetaLogo in your scripts, it is also very easy: 35 | 36 | from MetaLogo import logo 37 | sequences = [['seq1','ATACAGATACACATCACAG'],['seq2','ATACAGAGATACCAACAGAC'],['seq3','ATACAGAGTTACCCACGGAC']] 38 | 39 | bin_args = { 40 | 'clustalo_bin':'../MetaLogo/dependencies/clustalo', 41 | 'fasttree_bin':'../MetaLogo/dependencies/FastTree', 42 | 'fasttreemp_bin':'../MetaLogo/dependencies/FastTreeMP', 43 | } 44 | 45 | lg = logo.LogoGroup(sequences,height_algorithm='probabilities',group_strategy='length', **bin_args) 46 | lg.draw() 47 | 48 | Note that the paths of clustal omega and fasttree bins should be given to MetaLogo, which are required for grouping. 49 | 50 | MetaLogo also provide webserver to draw sequence logos without coding. For server setup, you need to install the required packages first. 51 | 52 | $pip install .[webserver] 53 | 54 | You need to start a redis server and the redis queue servic before running the MetaLogo server locally (see [tutorial](https://github.com/labomics/MetaLogo/wiki/Install)). We recommend that you can build a docker container to provide the service: 55 | 56 | $cat MetaLogo/server.docker.sh 57 | docker build -t metalogo:v1 . 58 | docker run -d --expose 8050 --name metalogo -e VIRTUAL_HOST=metalogo.omicsnet.org -v "$(pwd)":/code metalogo:v1 59 | $sh MetaLogo/server.docker.sh 60 | ... 61 | $docker ps 62 | CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES 63 | ad598ca936df metalogo:v1 "/bin/sh -c 'GUNICORN" 2 days ago Up 2 days 8050/tcp metalogo 64 | 65 | Docker needs to be installed in the system before running the command. This command will build the Docker image and start a Docker container. You could set a [nginx-proxy](https://github.com/nginx-proxy/nginx-proxy) layer to redirect network requests to MetaLogo container or you can just simply use the MetaLogo docker to receive outside network traffic from your local network. 66 | 67 | More detailed instructions please check the [tutorial](https://github.com/labomics/MetaLogo/wiki/Install) of MetaLogo. 68 | 69 | ## Citation 70 | 71 | Please kindly cite our work if you use MetaLogo in your work. 72 | 73 | Chen, Yaowen, et al. "MetaLogo: a heterogeneity-aware sequence logo generator and aligner." Briefings in Bioinformatics (2021). 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labomics/MetaLogo/918b04cbf15dd3bdd4acad5d792d20a7f6c693ce/__init__.py -------------------------------------------------------------------------------- /dependencies/FastTree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labomics/MetaLogo/918b04cbf15dd3bdd4acad5d792d20a7f6c693ce/dependencies/FastTree -------------------------------------------------------------------------------- /dependencies/FastTreeMP: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labomics/MetaLogo/918b04cbf15dd3bdd4acad5d792d20a7f6c693ce/dependencies/FastTreeMP -------------------------------------------------------------------------------- /dependencies/clustalo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labomics/MetaLogo/918b04cbf15dd3bdd4acad5d792d20a7f6c693ce/dependencies/clustalo -------------------------------------------------------------------------------- /dependencies/redis-stable.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labomics/MetaLogo/918b04cbf15dd3bdd4acad5d792d20a7f6c693ce/dependencies/redis-stable.tar.gz -------------------------------------------------------------------------------- /dependencies/supervisord.conf: -------------------------------------------------------------------------------- 1 | ; Sample supervisor config file. 2 | ; 3 | ; For more information on the config file, please see: 4 | ; http://supervisord.org/configuration.html 5 | ; 6 | ; Notes: 7 | ; - Shell expansion ("~" or "$HOME") is not supported. Environment 8 | ; variables can be expanded using this syntax: "%(ENV_HOME)s". 9 | ; - Quotes around values are not supported, except in the case of 10 | ; the environment= options as shown below. 11 | ; - Comments must have a leading space: "a=b ;comment" not "a=b;comment". 12 | ; - Command will be truncated if it looks like a config file comment, e.g. 13 | ; "command=bash -c 'foo ; bar'" will truncate to "command=bash -c 'foo ". 14 | ; 15 | ; Warning: 16 | ; Paths throughout this example file use /tmp because it is available on most 17 | ; systems. You will likely need to change these to locations more appropriate 18 | ; for your system. Some systems periodically delete older files in /tmp. 19 | ; Notably, if the socket file defined in the [unix_http_server] section below 20 | ; is deleted, supervisorctl will be unable to connect to supervisord. 21 | 22 | [unix_http_server] 23 | file=/tmp/supervisor.sock ; the path to the socket file 24 | ;chmod=0700 ; socket file mode (default 0700) 25 | ;chown=nobody:nogroup ; socket file uid:gid owner 26 | ;username=user ; default is no username (open server) 27 | ;password=123 ; default is no password (open server) 28 | 29 | ; Security Warning: 30 | ; The inet HTTP server is not enabled by default. The inet HTTP server is 31 | ; enabled by uncommenting the [inet_http_server] section below. The inet 32 | ; HTTP server is intended for use within a trusted environment only. It 33 | ; should only be bound to localhost or only accessible from within an 34 | ; isolated, trusted network. The inet HTTP server does not support any 35 | ; form of encryption. The inet HTTP server does not use authentication 36 | ; by default (see the username= and password= options to add authentication). 37 | ; Never expose the inet HTTP server to the public internet. 38 | 39 | ;[inet_http_server] ; inet (TCP) server disabled by default 40 | ;port=127.0.0.1:9001 ; ip_address:port specifier, *:port for all iface 41 | ;username=user ; default is no username (open server) 42 | ;password=123 ; default is no password (open server) 43 | 44 | [supervisord] 45 | logfile=/tmp/supervisord.log ; main log file; default $CWD/supervisord.log 46 | logfile_maxbytes=50MB ; max main logfile bytes b4 rotation; default 50MB 47 | logfile_backups=10 ; # of main logfile backups; 0 means none, default 10 48 | loglevel=info ; log level; default info; others: debug,warn,trace 49 | pidfile=/tmp/supervisord.pid ; supervisord pidfile; default supervisord.pid 50 | nodaemon=false ; start in foreground if true; default false 51 | silent=false ; no logs to stdout if true; default false 52 | minfds=1024 ; min. avail startup file descriptors; default 1024 53 | minprocs=200 ; min. avail process descriptors;default 200 54 | ;umask=022 ; process file creation umask; default 022 55 | ;user=supervisord ; setuid to this UNIX account at startup; recommended if root 56 | ;identifier=supervisor ; supervisord identifier, default is 'supervisor' 57 | ;directory=/tmp ; default is not to cd during start 58 | ;nocleanup=true ; don't clean up tempfiles at start; default false 59 | ;childlogdir=/tmp ; 'AUTO' child log dir, default $TEMP 60 | ;environment=KEY="value" ; key value pairs to add to environment 61 | ;strip_ansi=false ; strip ansi escape codes in logs; def. false 62 | 63 | ; The rpcinterface:supervisor section must remain in the config file for 64 | ; RPC (supervisorctl/web interface) to work. Additional interfaces may be 65 | ; added by defining them in separate [rpcinterface:x] sections. 66 | 67 | [rpcinterface:supervisor] 68 | supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface 69 | 70 | ; The supervisorctl section configures how supervisorctl will connect to 71 | ; supervisord. configure it match the settings in either the unix_http_server 72 | ; or inet_http_server section. 73 | 74 | [supervisorctl] 75 | serverurl=unix:///tmp/supervisor.sock ; use a unix:// URL for a unix socket 76 | ;serverurl=http://127.0.0.1:9001 ; use an http:// url to specify an inet socket 77 | ;username=chris ; should be same as in [*_http_server] if set 78 | ;password=123 ; should be same as in [*_http_server] if set 79 | ;prompt=mysupervisor ; cmd line prompt (default "supervisor") 80 | ;history_file=~/.sc_history ; use readline history if available 81 | 82 | ; The sample program section below shows all possible program subsection values. 83 | ; Create one or more 'real' program: sections to be able to control them under 84 | ; supervisor. 85 | 86 | ;[program:theprogramname] 87 | ;command=/bin/cat ; the program (relative uses PATH, can take args) 88 | ;process_name=%(program_name)s ; process_name expr (default %(program_name)s) 89 | ;numprocs=1 ; number of processes copies to start (def 1) 90 | ;directory=/tmp ; directory to cwd to before exec (def no cwd) 91 | ;umask=022 ; umask for process (default None) 92 | ;priority=999 ; the relative start priority (default 999) 93 | ;autostart=true ; start at supervisord start (default: true) 94 | ;startsecs=1 ; # of secs prog must stay up to be running (def. 1) 95 | ;startretries=3 ; max # of serial start failures when starting (default 3) 96 | ;autorestart=unexpected ; when to restart if exited after running (def: unexpected) 97 | ;exitcodes=0 ; 'expected' exit codes used with autorestart (default 0) 98 | ;stopsignal=QUIT ; signal used to kill process (default TERM) 99 | ;stopwaitsecs=10 ; max num secs to wait b4 SIGKILL (default 10) 100 | ;stopasgroup=false ; send stop signal to the UNIX process group (default false) 101 | ;killasgroup=false ; SIGKILL the UNIX process group (def false) 102 | ;user=chrism ; setuid to this UNIX account to run the program 103 | ;redirect_stderr=true ; redirect proc stderr to stdout (default false) 104 | ;stdout_logfile=/a/path ; stdout log path, NONE for none; default AUTO 105 | ;stdout_logfile_maxbytes=1MB ; max # logfile bytes b4 rotation (default 50MB) 106 | ;stdout_logfile_backups=10 ; # of stdout logfile backups (0 means none, default 10) 107 | ;stdout_capture_maxbytes=1MB ; number of bytes in 'capturemode' (default 0) 108 | ;stdout_events_enabled=false ; emit events on stdout writes (default false) 109 | ;stdout_syslog=false ; send stdout to syslog with process name (default false) 110 | ;stderr_logfile=/a/path ; stderr log path, NONE for none; default AUTO 111 | ;stderr_logfile_maxbytes=1MB ; max # logfile bytes b4 rotation (default 50MB) 112 | ;stderr_logfile_backups=10 ; # of stderr logfile backups (0 means none, default 10) 113 | ;stderr_capture_maxbytes=1MB ; number of bytes in 'capturemode' (default 0) 114 | ;stderr_events_enabled=false ; emit events on stderr writes (default false) 115 | ;stderr_syslog=false ; send stderr to syslog with process name (default false) 116 | ;environment=A="1",B="2" ; process environment additions (def no adds) 117 | ;serverurl=AUTO ; override serverurl computation (childutils) 118 | 119 | ; The sample eventlistener section below shows all possible eventlistener 120 | ; subsection values. Create one or more 'real' eventlistener: sections to be 121 | ; able to handle event notifications sent by supervisord. 122 | 123 | ;[eventlistener:theeventlistenername] 124 | ;command=/bin/eventlistener ; the program (relative uses PATH, can take args) 125 | ;process_name=%(program_name)s ; process_name expr (default %(program_name)s) 126 | ;numprocs=1 ; number of processes copies to start (def 1) 127 | ;events=EVENT ; event notif. types to subscribe to (req'd) 128 | ;buffer_size=10 ; event buffer queue size (default 10) 129 | ;directory=/tmp ; directory to cwd to before exec (def no cwd) 130 | ;umask=022 ; umask for process (default None) 131 | ;priority=-1 ; the relative start priority (default -1) 132 | ;autostart=true ; start at supervisord start (default: true) 133 | ;startsecs=1 ; # of secs prog must stay up to be running (def. 1) 134 | ;startretries=3 ; max # of serial start failures when starting (default 3) 135 | ;autorestart=unexpected ; autorestart if exited after running (def: unexpected) 136 | ;exitcodes=0 ; 'expected' exit codes used with autorestart (default 0) 137 | ;stopsignal=QUIT ; signal used to kill process (default TERM) 138 | ;stopwaitsecs=10 ; max num secs to wait b4 SIGKILL (default 10) 139 | ;stopasgroup=false ; send stop signal to the UNIX process group (default false) 140 | ;killasgroup=false ; SIGKILL the UNIX process group (def false) 141 | ;user=chrism ; setuid to this UNIX account to run the program 142 | ;redirect_stderr=false ; redirect_stderr=true is not allowed for eventlisteners 143 | ;stdout_logfile=/a/path ; stdout log path, NONE for none; default AUTO 144 | ;stdout_logfile_maxbytes=1MB ; max # logfile bytes b4 rotation (default 50MB) 145 | ;stdout_logfile_backups=10 ; # of stdout logfile backups (0 means none, default 10) 146 | ;stdout_events_enabled=false ; emit events on stdout writes (default false) 147 | ;stdout_syslog=false ; send stdout to syslog with process name (default false) 148 | ;stderr_logfile=/a/path ; stderr log path, NONE for none; default AUTO 149 | ;stderr_logfile_maxbytes=1MB ; max # logfile bytes b4 rotation (default 50MB) 150 | ;stderr_logfile_backups=10 ; # of stderr logfile backups (0 means none, default 10) 151 | ;stderr_events_enabled=false ; emit events on stderr writes (default false) 152 | ;stderr_syslog=false ; send stderr to syslog with process name (default false) 153 | ;environment=A="1",B="2" ; process environment additions 154 | ;serverurl=AUTO ; override serverurl computation (childutils) 155 | 156 | ; The sample group section below shows all possible group values. Create one 157 | ; or more 'real' group: sections to create "heterogeneous" process groups. 158 | 159 | ;[group:thegroupname] 160 | ;programs=progname1,progname2 ; each refers to 'x' in [program:x] definitions 161 | ;priority=999 ; the relative start priority (default 999) 162 | 163 | ; The [include] section can just contain the "files" setting. This 164 | ; setting can list multiple files (separated by whitespace or 165 | ; newlines). It can also contain wildcards. The filenames are 166 | ; interpreted as relative to this file. Included files *cannot* 167 | ; include files themselves. 168 | 169 | [include] 170 | files = relative/directory/*.ini 171 | 172 | [program:myworker] 173 | ; Point the command to the specific rq command you want to run. 174 | ; If you use virtualenv, be sure to point it to 175 | ; /path/to/virtualenv/bin/rq 176 | ; Also, you probably want to include a settings module to configure this 177 | ; worker. For more info on that, see http://python-rq.org/docs/workers/ 178 | environment=PYTHONPATH='/code/' 179 | 180 | command=/usr/local/bin/rq worker high default low 181 | ; process_num is required if you specify >1 numprocs 182 | process_name=%(program_name)s-%(process_num)s 183 | ; If you want to run more than one worker instance, increase this 184 | numprocs=3 185 | 186 | ; This is the directory from which RQ is ran. Be sure to point this to the 187 | ; directory where your source code is importable from 188 | directory=/code 189 | 190 | ; RQ requires the TERM signal to perform a warm shutdown. If RQ does not die 191 | ; within 10 seconds, supervisor will forcefully kill it 192 | stopsignal=TERM 193 | 194 | ; These are up to you 195 | autostart=true 196 | autorestart=true 197 | stdout_logfile=/code/MetaLogo/logs/workers-stdout.log.%(program_name)s-%(process_num)s 198 | stdout_logfile_maxbytes=50MB 199 | stdout_logfile_backups=10 200 | stderr_logfile=/code/MetaLogo/logs/workers-stderr.log.%(program_name)s-%(process_num)s 201 | stderr_logfile_maxbytes=50MB 202 | stderr_logfile_backups=10 203 | 204 | -------------------------------------------------------------------------------- /examples/all_cluster_center.fa: -------------------------------------------------------------------------------- 1 | >seq0 2 | CARRFDYW 3 | >seq1 4 | CARGFDYW 5 | >seq2 6 | CARGFDPW 7 | >seq3 8 | CARVFDYW 9 | >seq4 10 | CARVNSGSYLGAFDIW 11 | >seq5 12 | CARGFDYW 13 | >seq6 14 | CAREVLVYFDYW 15 | >seq7 16 | CARVLSYYGMDVW 17 | >seq8 18 | CARSSGYYYFDYW 19 | >seq9 20 | CARVVSYYYGMDVW 21 | >seq10 22 | CARVASYYYGMDVW 23 | >seq11 24 | CTRNDFWSGYYFDYW 25 | >seq12 26 | CARTYSGSYTDAFDIW 27 | >seq13 28 | CAREGVVGATTGLDYW 29 | >seq14 30 | CARDGVVTATTGLDYW 31 | >seq15 32 | CARDRIVGGTTGLDYW 33 | >seq16 34 | CTTVVYYYDSSGYSNDAFDIW 35 | >seq17 36 | CARGYSSGWYFDYW 37 | >seq18 38 | CARVPSSSWYFDYW 39 | >seq19 40 | CTTGGRLW 41 | >seq20 42 | CASGFDYW 43 | >seq21 44 | CARGVDPW 45 | >seq22 46 | CARDVGAQFDYW 47 | >seq23 48 | CARDLYTFGMDVW 49 | >seq24 50 | CAVYGYYYYGMDVW 51 | >seq25 52 | CAKVRGGYYYFDYW 53 | >seq26 54 | CAHRRSYYDAFDIW 55 | >seq27 56 | CTSNDFWSGYSDYW 57 | >seq28 58 | CARLGGTAWYFDLW 59 | >seq29 60 | CATDGIVGATGLDYW 61 | >seq30 62 | CAVPRGYSYGPFDYW 63 | >seq31 64 | CTRGDFWSGYWSDYW 65 | >seq32 66 | CSRPYSGSYYSYFDYW 67 | >seq33 68 | CARTYSGSYYSWFDPW 69 | >seq34 70 | CARVRSGSYYDAFDIW 71 | >seq35 72 | CARVRGGSYYGVFDYW 73 | >seq36 74 | CAKTVSGSYYSPFDYW 75 | >seq37 76 | CARVPGGSYYGPFDYW 77 | >seq38 78 | CARSNSGSYYHAFDIW 79 | >seq39 80 | CARVSSGSYFGAFDIW 81 | >seq40 82 | CATARAGSYYYGMDVW 83 | >seq41 84 | CAKVRGGSYRDAFDIW 85 | >seq42 86 | CASSGSGSYYSPFDYW 87 | >seq43 88 | CARTYSGSYYGAFDIW 89 | >seq44 90 | CAKTYSGSYYSSFDYW 91 | >seq45 92 | CARAYSGSYYYGMDVW 93 | >seq46 94 | CARAYSGSYWAAFDIW 95 | >seq47 96 | CARGYYDSSGYYNWFDPW 97 | >seq48 98 | CARGRTVTTLYYYYGMDVW 99 | >seq49 100 | CARDSVLRYFDWTYYYYGMDVW 101 | >seq50 102 | CARVGYCSSTSCYSYYYYGMDVW 103 | >seq51 104 | CARGRPLDAFDIW 105 | >seq52 106 | CARDSTAYGMDVW 107 | >seq53 108 | CASVHSSSWFFDYW 109 | >seq54 110 | CARSGSYYGPFDYW 111 | >seq55 112 | CAKDSYSSSWYFDYW 113 | >seq56 114 | CARDSGPYYYGMDVW 115 | >seq57 116 | CARGFNYYDSSGFDYW 117 | >seq58 118 | CERDRSVCSSTSCYYYYGMDVW 119 | >seq59 120 | CARGRYYYDSSGYSYYYYGMDVW 121 | >seq60 122 | CATSGYW 123 | >seq61 124 | CTTWGGYW 125 | >seq62 126 | CAKGVDVW 127 | >seq63 128 | CAQGLDYW 129 | >seq64 130 | CAMGFDDW 131 | >seq65 132 | CAKSDGMDVW 133 | >seq66 134 | CAKDVGGNYGYW 135 | >seq67 136 | CAREYLVDLDYW 137 | >seq68 138 | CARTPGPYFDYW 139 | >seq69 140 | CARVVYDYFDYW 141 | >seq70 142 | CARGYRDYGDYW 143 | >seq71 144 | CARVLSYYGMDVW 145 | >seq72 146 | CARMTTVTTYDYW 147 | >seq73 148 | CARYSSSSDFDYW 149 | >seq74 150 | CARAVNFGWFDPW 151 | >seq75 152 | CVRDWGEYYFDYW 153 | >seq76 154 | CARGVWDLWYFDLW 155 | >seq77 156 | CARQQWLRGRFDYW 157 | >seq78 158 | CARETGSGSYPDYW 159 | >seq79 160 | CARVLGTYYYMDVW 161 | >seq80 162 | CARDLSYGDYNDYW 163 | >seq81 164 | CATLVDYYYYMDVW 165 | >seq82 166 | CARGGGYGHYFDYW 167 | >seq83 168 | CVGELLPYYGMDVW 169 | >seq84 170 | CAVQLWLRGGFDYW 171 | >seq85 172 | CAKVMAVAGTGDYW 173 | >seq86 174 | CARVLVPVYGMDVW 175 | >seq87 176 | CARVLSPLYGMDVW 177 | >seq88 178 | CARDYGDYGGLDYW 179 | >seq89 180 | CSKDIYGSGSYPDYW 181 | >seq90 182 | CVTPRGYSYGPFDYW 183 | >seq91 184 | CATGGRYYYYGMDVW 185 | >seq92 186 | CARDPVAVAGPFDYW 187 | >seq93 188 | CAREGVGATYYFDYW 189 | >seq94 190 | CARVQGGYYYGMDVW 191 | >seq95 192 | CAKGYSSSWYFFDYW 193 | >seq96 194 | CATPRVYSYGPFDYW 195 | >seq97 196 | CARTSGSYYSPFDYW 197 | >seq98 198 | CARDREAVAGPFDYW 199 | >seq99 200 | CAREGVGATYYFDYW 201 | >seq100 202 | CTRTDFWSGYYPDDW 203 | >seq101 204 | CAKDVGYSSGLPDYW 205 | >seq102 206 | CTRYDFWSGYFSDYW 207 | >seq103 208 | CARGYSSYYWYFDLW 209 | >seq104 210 | CAREGIVGATGLDYW 211 | >seq105 212 | CATLRGGYYYGMDVW 213 | >seq106 214 | CASPRSGSYYGAFDIW 215 | >seq107 216 | CARTKSGSYWYYFDYW 217 | >seq108 218 | CARVRSGSYLSHFDYW 219 | >seq109 220 | CARVSSGSYYGWFDYW 221 | >seq110 222 | CARDRSGSYYHGMDVW 223 | >seq111 224 | CASSYSGSYLSAFDIW 225 | >seq112 226 | CVRAVAGTYYYGMDVW 227 | >seq113 228 | CSRSGSGSYYGWFDPW 229 | >seq114 230 | CATSPATVTTGWFDPW 231 | >seq115 232 | CARGSYGSYYYGMDVW 233 | >seq116 234 | CARVYSGSYFSPFDYW 235 | >seq117 236 | CARVYGGSYLGAFDIW 237 | >seq118 238 | CAKTNSGSYYYGMDVW 239 | >seq119 240 | CAKGPYGSGSYYVDYW 241 | >seq120 242 | CARGPRGYSYGYVDYW 243 | >seq121 244 | CAREGDYYYYYGMDVW 245 | >seq122 246 | CAREGVITGTTGLDYW 247 | >seq123 248 | CARVSGGSYYGNFDYW 249 | >seq124 250 | CARVRGGSYWGDFDYW 251 | >seq125 252 | CAREGQTTVTTGIDYW 253 | >seq126 254 | CATTSIVGATSWFDPW 255 | >seq127 256 | CARQEDYYDSSGFDYW 257 | >seq128 258 | CARTRSGSYLYYFDYW 259 | >seq129 260 | CARAYSGSYFNWFDPW 261 | >seq130 262 | CARDYSGSYLGLFDYW 263 | >seq131 264 | CARPYSGSYYWWFDPW 265 | >seq132 266 | CARDVAVAGDDAFDIW 267 | >seq133 268 | CARTRGGGYFDAFDIW 269 | >seq134 270 | CAKGVSGYYLDAFDIW 271 | >seq135 272 | CARTYSGSYRNWFDPW 273 | >seq136 274 | CARVPGGSYFGAFDIW 275 | >seq137 276 | CARVYGGSYIGYFDYW 277 | >seq138 278 | CARETLDSTSSDFDYW 279 | >seq139 280 | CARGSSGNYCYGMDVW 281 | >seq140 282 | CARRNSSGWYGDWFDPW 283 | >seq141 284 | CARATMIVVVVGAFDIW 285 | >seq142 286 | CSKDRPYDYVWGSLDYW 287 | >seq143 288 | CAKDWGYDYGDYGPDYW 289 | >seq144 290 | CATRYCSGGSCSLFDYW 291 | >seq145 292 | CARYDYYDSSGYYLDYW 293 | >seq146 294 | CTKADCSSTSCQNWFDPW 295 | >seq147 296 | CAKVGYSYGYPVYYFDYW 297 | >seq148 298 | CARVSYDSSGYYYYFDYW 299 | >seq149 300 | CARGGYSYGYYYYGMDVW 301 | >seq150 302 | CARSPITMIVVVNAFDIW 303 | >seq151 304 | CAREGTYCGGDCYSGLDYW 305 | >seq152 306 | CARTYYDILTGHNYGMDVW 307 | >seq153 308 | CARDSYSSSSYYYYYGMDVW 309 | >seq154 310 | CARGRYSSSWYGVRNWFDPW 311 | >seq155 312 | CAKVSGPYCSGGSCYSFYFDYW 313 | >seq156 314 | CARGWPSSSWYEYYYYYGMDVW 315 | >seq157 316 | CAKVVGVYCSGGSCYGGYFDYW 317 | >seq158 318 | CTRGVPGYW 319 | >seq159 320 | CARSDGYNFDYW 321 | >seq160 322 | CARDGGVLFDYW 323 | >seq161 324 | CARDTSGWYLDYW 325 | >seq162 326 | CARVLSYYGMDVW 327 | >seq163 328 | CAGGSYYYGMDVW 329 | >seq164 330 | CARDRGYSYGSFDYW 331 | >seq165 332 | CARVLSGWYVGWFDPW 333 | >seq166 334 | CARVYSGSYYPAFDIW 335 | >seq167 336 | CAKGSWYSSSWYYFDYW 337 | >seq168 338 | CAKDRSGSYGYYGMDVW 339 | >seq169 340 | CARGGYSSSWYYYYGMDVW 341 | >seq170 342 | CARGFDYW 343 | >seq171 344 | CARGFVLW 345 | >seq172 346 | CAKGGDMDVW 347 | >seq173 348 | CAKVGGYFDYW 349 | >seq174 350 | CASWVNWFDPW 351 | >seq175 352 | CTSLSAPTDYW 353 | >seq176 354 | CPRGLEWELLNW 355 | >seq177 356 | CARESGSGLDYW 357 | >seq178 358 | CARWGGSGLDYW 359 | >seq179 360 | CARSVVSYFDYW 361 | >seq180 362 | CATSFYYYMDVW 363 | >seq181 364 | CARGGGDYVDYW 365 | >seq182 366 | CARDVGGYFDLW 367 | >seq183 368 | CARYHGSALDYW 369 | >seq184 370 | CAKEGIVLLDYW 371 | >seq185 372 | CARDSGSYLGWW 373 | >seq186 374 | CASLAGSGFDYW 375 | >seq187 376 | CARYSSGYYFDYW 377 | >seq188 378 | CATEGLLDAFDIW 379 | >seq189 380 | CARVLSGYGFDYW 381 | >seq190 382 | CARDPYGGGFDYW 383 | >seq191 384 | CARDGLSYGLDYW 385 | >seq192 386 | CARDSYSSGLDYW 387 | >seq193 388 | CAREGSGWTIDYW 389 | >seq194 390 | CAKVAAAGTTDYW 391 | >seq195 392 | CAREHGDYGFDYW 393 | >seq196 394 | CVGELPYYGMDVW 395 | >seq197 396 | CARNGGNNWFDPW 397 | >seq198 398 | CARDGTLEPLDYW 399 | >seq199 400 | CGRGYGDYWFDPW 401 | >seq200 402 | CARDLYDDAFDIW 403 | >seq201 404 | CARRYGVRPFDYW 405 | >seq202 406 | CARGAGPYGMDVW 407 | >seq203 408 | CVRARDGYNHFDYW 409 | >seq204 410 | CAKVSSYGYYFDYW 411 | >seq205 412 | CATSLLWFGELLYW 413 | >seq206 414 | CARGSSGYYYFDYW 415 | >seq207 416 | CAKVYRSGNAFDIW 417 | >seq208 418 | CTGSTGYYYGMDVW 419 | >seq209 420 | CARSDGYNSYFDYW 421 | >seq210 422 | CARWLGGYYGMDVW 423 | >seq211 424 | CSRDYGDYGGMDVW 425 | >seq212 426 | CVREDYGDFGFDYW 427 | >seq213 428 | CARDYRFGELGDYW 429 | >seq214 430 | CASSSSPYYGMDVW 431 | >seq215 432 | CAVGLGYYYGMDVW 433 | >seq216 434 | CARDWAVGATTDYW 435 | >seq217 436 | CARRPVGVTPFDYW 437 | >seq218 438 | CAKMYGSGSYIDYW 439 | >seq219 440 | CARGGTLYDAFDIW 441 | >seq220 442 | CARSYSGYVPFDYW 443 | >seq221 444 | CARGWGGYYYFDYW 445 | >seq222 446 | CARTKTGYYYFDYW 447 | >seq223 448 | CARIMATINAFDIW 449 | >seq224 450 | CASTTYYYYYMDVW 451 | >seq225 452 | CARDVAVGDAFDSW 453 | >seq226 454 | CVRVVAATVSFDYW 455 | >seq227 456 | CAKLTYGDSPFDYW 457 | >seq228 458 | CASGSGSYYYFDYW 459 | >seq229 460 | CARYTGDYYGMDVW 461 | >seq230 462 | CARDGGLAAAFDYW 463 | >seq231 464 | CATELLWRYGMDVW 465 | >seq232 466 | CAKDPYGDLAFDYW 467 | >seq233 468 | CARDGGLVRGVDYW 469 | >seq234 470 | CTRDDFRSGYTNYW 471 | >seq235 472 | CARELSGWYGFDYW 473 | >seq236 474 | CARDRRLAEEFDYW 475 | >seq237 476 | CARDGTVTRTFDYW 477 | >seq238 478 | CTQDDFWSGYGHYW 479 | >seq239 480 | CGRDEYSSGWYFDYW 481 | >seq240 482 | CAKGRGYSYGYFDYW 483 | >seq241 484 | CTTGSGSYLGWFDPW 485 | >seq242 486 | CASGYSGSYWYFDYW 487 | >seq243 488 | CVLGSGSYYTPFDYW 489 | >seq244 490 | CARIYSSSWPSFDYW 491 | >seq245 492 | CARDSRGYSYGFDYW 493 | >seq246 494 | CARGGIAVADAFDIW 495 | >seq247 496 | CERAVVAAHNWFDPW 497 | >seq248 498 | CARGPRSSSDYFDYW 499 | >seq249 500 | CARDYLGYSYGSDYW 501 | >seq250 502 | CARVLVVNYYGMDVW 503 | >seq251 504 | CARDSIVSGWLFDYW 505 | >seq252 506 | CARQLTYYYYGMDVW 507 | >seq253 508 | CTRYDFWGGYYVDYW 509 | >seq254 510 | CARTSSGYYSKFDYW 511 | >seq255 512 | CARVGGIAVAGLDYW 513 | >seq256 514 | CARDLSGSYTYFDYW 515 | >seq257 516 | CATELLYYYYGMDVW 517 | >seq258 518 | CARDQRSSGWYVDYW 519 | >seq259 520 | CARTWAAAGLGADYW 521 | >seq260 522 | CARSRGGYYSPFDYW 523 | >seq261 524 | CARESVAALDAFDIW 525 | >seq262 526 | CARARGGSYSYFDYW 527 | >seq263 528 | CARGYGDYVPNFDYW 529 | >seq264 530 | CAREGVTATVYFDYW 531 | >seq265 532 | CARESIVVVPAADYW 533 | >seq266 534 | CSRNYYDSSDAFDIW 535 | >seq267 536 | CARTLVLSYYGMDVW 537 | >seq268 538 | CARVLVLYYYGMDVW 539 | >seq269 540 | CATNDYYYYYYMDVW 541 | >seq270 542 | CVRDMSGSYYGPFDYW 543 | >seq271 544 | CATVHSGSYLGYFDYW 545 | >seq272 546 | CASAGSGSYRGWFDPW 547 | >seq273 548 | CPRGYSGSYFSAFDIW 549 | >seq274 550 | CARVYSGSYYSHFDYW 551 | >seq275 552 | CARTWSGSYLSWFDPW 553 | >seq276 554 | CVRSYSGSYFSWFDPW 555 | >seq277 556 | CTRYYSGSYYYGMDVW 557 | >seq278 558 | CATTRGGSYYGAFDIW 559 | >seq279 560 | CVRGVGGGYYYGMDVW 561 | >seq280 562 | CATVPIFGVVSWFDPW 563 | >seq281 564 | CERETGDSSSSYFDYW 565 | >seq282 566 | CARVVGANYYYGMDVW 567 | >seq283 568 | CASVSSGYYYGAFDYW 569 | >seq284 570 | CARSVYGDYYYYMDVW 571 | >seq285 572 | CARTYSGSYYTPFDYW 573 | >seq286 574 | CSRVSVVYYYYGMDVW 575 | >seq287 576 | CARESIVVVHDAFDIW 577 | >seq288 578 | CARTYYYDSSGYWDYW 579 | >seq289 580 | CATTYSGSYKGYFDYW 581 | >seq290 582 | CATSPAALRSNWFDPW 583 | >seq291 584 | CAKDPVGATYDALDIW 585 | >seq292 586 | CARTYSGIYLAPFDYW 587 | >seq293 588 | CARARGGYYSRPFDYW 589 | >seq294 590 | CARSGSGSYFAYFDYW 591 | >seq295 592 | CARGWSGSYRTYFDYW 593 | >seq296 594 | CATGPVVVAATWFDPW 595 | >seq297 596 | CSRPGSGSYLSWFDPW 597 | >seq298 598 | CATSAVAGTWGWFDPW 599 | >seq299 600 | CATSPAIAVAGWFDPW 601 | >seq300 602 | CARHWDNFWSGYYHYW 603 | >seq301 604 | CATGPVAGTSNWFDPW 605 | >seq302 606 | CARVLSGSYWGWFDPW 607 | >seq303 608 | CPRALWGNYYYGMDVW 609 | >seq304 610 | CARSRGGSYSNAFDIW 611 | >seq305 612 | CARTSRGSYYDAFDIW 613 | >seq306 614 | CARGSSGTYYYGMDGW 615 | >seq307 616 | CARVSYDSLTGYYDYW 617 | >seq308 618 | CARTNIAAAGTAVDYW 619 | >seq309 620 | CARTSGGSYRGWFDPW 621 | >seq310 622 | CASSNSGSYWGAFDIW 623 | >seq311 624 | CARDVGSGWYGYFDYW 625 | >seq312 626 | CATGPTGSYYDYFDYW 627 | >seq313 628 | CASPYSGSYYSHFDYW 629 | >seq314 630 | CARSSRGSYLNWFDPW 631 | >seq315 632 | CARELIAAAGYYFDYW 633 | >seq316 634 | CARSRSGSYSSAFDYW 635 | >seq317 636 | CATSEVAGPLNWFDPW 637 | >seq318 638 | CPRGYSGSYYEYFQHW 639 | >seq319 640 | CARVGGGSYFNWFDPW 641 | >seq320 642 | CASPRGGSYYGEFDYW 643 | >seq321 644 | CARDLSGSYYYGLDVW 645 | >seq322 646 | CARDSSGSYYGPNDYW 647 | >seq323 648 | CARVYSGSYRGFFDYW 649 | >seq324 650 | CARVTYGNYYYGMDVW 651 | >seq325 652 | CATGPPFGVVSWFDPW 653 | >seq326 654 | CARVHSGSYYGDFDYW 655 | >seq327 656 | CARPYSGSYWSYVDYW 657 | >seq328 658 | CARSYSGSYYHAFDIW 659 | >seq329 660 | CAKTLSGSYYSPFYYW 661 | >seq330 662 | CARPYSGSYFAQFDYW 663 | >seq331 664 | CANPYSGSYRDAFDIW 665 | >seq332 666 | CARAYSGSYYYGMDVW 667 | >seq333 668 | CARGRGGGYHDAFDIW 669 | >seq334 670 | CVTTTIFGVKGWFDPW 671 | >seq335 672 | CARGTRGSYYYGMDVW 673 | >seq336 674 | CARDLVGAAYYGMDVW 675 | >seq337 676 | CARVTGANYYYGMDVW 677 | >seq338 678 | CARSKGGGYYPPFDYW 679 | >seq339 680 | CARTYSGSYYSYFDDW 681 | >seq340 682 | CARVYSGSYRGHFDYW 683 | >seq341 684 | CAKGKMGGYYYGMDVW 685 | >seq342 686 | CARTNGGGYYYGMDVW 687 | >seq343 688 | CASASGGNYYYGMDVW 689 | >seq344 690 | CARGWSSGWSYYFDYW 691 | >seq345 692 | CARGLRFLEWYYFDYW 693 | >seq346 694 | CATASVFGVANWFDPW 695 | >seq347 696 | CARSRSGSYTSWFDPW 697 | >seq348 698 | CARVSSGSYRSAFDIW 699 | >seq349 700 | CARGFGGSYYYGMDVW 701 | >seq350 702 | CARSLGGSYYVAFDIW 703 | >seq351 704 | CARTSGSYYYYFGMDVW 705 | >seq352 706 | CARDTYYDILTGYYLYW 707 | >seq353 708 | CARGVDLYYYYYGMDVW 709 | >seq354 710 | CARSGYYYDSSGYSDYW 711 | >seq355 712 | CARRYYGYYYYYGMDVW 713 | >seq356 714 | CATPRYYYDSSGTFDYW 715 | >seq357 716 | CAKFADYYDSSGYYDYW 717 | >seq358 718 | CARDTNYDILTGYYSYW 719 | >seq359 720 | CAREGQIAVAGTGLDYW 721 | >seq360 722 | CATSPPMAAAGNWFDPW 723 | >seq361 724 | CARDVDASGYYYGMDVW 725 | >seq362 726 | CATGGGYYYYYYGMDVW 727 | >seq363 728 | CARDLAVAGTSEYFQHW 729 | >seq364 730 | CAREVIAVAGSNWFDPW 731 | >seq365 732 | CARESLYGDYGGAFDIW 733 | >seq366 734 | CARAGGSGSYYGWFDPW 735 | >seq367 736 | CARVGRTIFGVVTNFDYW 737 | >seq368 738 | CARRGYYDSSGYYYFDYW 739 | >seq369 740 | CAREYYYDSSGYYPRDYW 741 | >seq370 742 | CATSPSIAAAGNNWFDPW 743 | >seq371 744 | CSKDYDSSGYYSDAFDIW 745 | >seq372 746 | CATGPYCGGDCYGAFDIW 747 | >seq373 748 | CARSNYDFWSGYLPFDYW 749 | >seq374 750 | CVRNYDILTGYSDAFDIW 751 | >seq375 752 | CAREGYSSSWSLYYFDYW 753 | >seq376 754 | CARDVFYDFWSGYYFDYW 755 | >seq377 756 | CAKNGYSYGYGGYYFDYW 757 | >seq378 758 | CAKDSNGSGSYHDAFDIW 759 | >seq379 760 | CARASYDSSGYYAYFDYW 761 | >seq380 762 | CAREVDIVVVPAAPFDYW 763 | >seq381 764 | CASGAVAGYYYYYGMDVW 765 | >seq382 766 | CARDDYDYVWGSYSTDYW 767 | >seq383 768 | CAKDRYYYDSSGYYYFDYW 769 | >seq384 770 | CARSYYDILTGYLAAFDIW 771 | >seq385 772 | CARIPYDILTGSLYGMDVW 773 | >seq386 774 | CATIAAAGTHYYYYGMDVW 775 | >seq387 776 | CASDYYDSSGYYNDWFDPW 777 | >seq388 778 | CTTDRLGATYYYYYGMDVW 779 | >seq389 780 | CSRDYYDSSGYYNNWFDPC 781 | >seq390 782 | CARTHYGGNYYYYYGMDVW 783 | >seq391 784 | CARYCSSTSCYTDDAFDIW 785 | >seq392 786 | CARGGYSYGWSYYYGMDVW 787 | >seq393 788 | CARVKRITIFGVVTHFDYW 789 | >seq394 790 | CARGYYDILTGYYREFDYW 791 | >seq395 792 | CATNFAYCGGDCYSRFDYW 793 | >seq396 794 | CARGRGIQLWNYYYGMDVW 795 | >seq397 796 | CATTSPYCSSTSCPRWFDPW 797 | >seq398 798 | CSRGRYSSSWYGVAEYFQHW 799 | >seq399 800 | CATDLYCSGGSCYSFGMDVW 801 | >seq400 802 | CATLAAAGPEWYYYYGMDVW 803 | >seq401 804 | CVRRRYSSSWPDYYYGMDVW 805 | >seq402 806 | CARSYCSSTSCYGYYYMDVW 807 | >seq403 808 | CARGGLAVAGTYYYYGVDVW 809 | >seq404 810 | CARDPDYYDSSGYYVGFDYW 811 | >seq405 812 | CARLTTVTTNYYYYYGMDVW 813 | >seq406 814 | CARGRYSSSWYGVRNWFDPW 815 | >seq407 816 | CARVVGGYCSSTSCYYFDYW 817 | >seq408 818 | CATVGVYCTNGVCSNWFDPW 819 | >seq409 820 | CARDLTYYYDSSGWGHFDYW 821 | >seq410 822 | CAREVYDSSGYYYEDWFDPW 823 | >seq411 824 | CARGYWNYVRYYYYYGMDVW 825 | >seq412 826 | CARGTTVTTLFYYYYGMDVW 827 | >seq413 828 | CVRGIVVVPAAHYYYYGMDVW 829 | >seq414 830 | CARHPDIAVAGYYYYYGMDVW 831 | >seq415 832 | CARWGWGSGSPNYYYYYMDVW 833 | >seq416 834 | CARSGAGYSYGYAYYYYMDVW 835 | >seq417 836 | CARGRWFGELLSYYYYGMDVW 837 | >seq418 838 | CARVGLGYCSSTSCYTAFDIW 839 | >seq419 840 | CARESGGYCSGGSCPNWFDPW 841 | >seq420 842 | CARGRWFGELLSYYYYGMDVW 843 | >seq421 844 | CARVGVLRYFDWQAYYYGMDVW 845 | >seq422 846 | CARGGVYYYDSSGYYLDAFDIW 847 | >seq423 848 | CARGRIAAAGLCNYYYYGMDVW 849 | >seq424 850 | CVRGRRITIFGVVIDSFGMDVW 851 | >seq425 852 | CARVTVLRYFDWSYYYYGMDVW 853 | >seq426 854 | CARDSRFWEWLLYSYYYYMDVW 855 | >seq427 856 | CARVTYYYDSSGYPFYWYFDLW 857 | >seq428 858 | CARNGLYYYDSSGYYLDAFDIW 859 | >seq429 860 | CARTGVLRYFDWSYYYYGMDVW 861 | >seq430 862 | CARVPLLRYFDWTYYYYGMDVW 863 | >seq431 864 | CARVGVLRYFDWLLHYYGMDVW 865 | >seq432 866 | CAKVVGPYCSGGSCYSGQLDYW 867 | >seq433 868 | CARGGYGSGSYYNYYYYYMDVW 869 | >seq434 870 | CARDSVLRYFDWLPYYYGMDVW 871 | >seq435 872 | CARDSYSSSWLGSYYYYGMDVW 873 | >seq436 874 | CARDRYCSSTSCYRDYYYGMDVW 875 | >seq437 876 | CARGWIVVVPAATYYYYYGMDVW 877 | >seq438 878 | CARDRYCSGGSCYVYYYYGMDVW 879 | >seq439 880 | CARVSYSSSWYWGDYYYYGMDVW 881 | >seq440 882 | CARDTYCSGGSCYYYYYYGMDVW 883 | >seq441 884 | CARVWGAGYSSGWYYYYYYMDVW 885 | >seq442 886 | CARDVSSSWYSSGWYYYYYMDVW 887 | >seq443 888 | CAKMGGGPYCSGGSCYSNYFDYW 889 | >seq444 890 | CARGYTMVRGVIPRYYYYYMDVW 891 | >seq445 892 | CARVGYCSSTSCYVPYYYYGMDVW 893 | >seq446 894 | CARDVDIVVVPAATNYYYYGMDVW 895 | >seq447 896 | CARDVTYYDILTGYYPQYYYYGMDVW 897 | >seq448 898 | CARHLYYYDSSGYYYPPYYYYGMDVW 899 | >seq449 900 | CARYQYYYDSSGYYYPTPYYYGMDVW 901 | >seq450 902 | CARVGGGMDVW 903 | >seq451 904 | CVKERQWLAYFDYW 905 | >seq452 906 | CARGRYYDSSGLDYW 907 | >seq453 908 | CARELVAGYYGMDVW 909 | >seq454 910 | CARDSYSSSWYFDYW 911 | >seq455 912 | CARGRGSYYGWFDPW 913 | >seq456 914 | CARGYSSGYYYYFDYW 915 | >seq457 916 | CARDPSSSWYIYFDYW 917 | >seq458 918 | CASDPYYYDSSGLLDYW 919 | >seq459 920 | CARSYDILTGYLYFDYW 921 | >seq460 922 | CARVRYYDSSGYYSPFDYW 923 | >seq461 924 | CARDLAVAATYYYYYGMDVW 925 | >seq462 926 | CARDSGVAAAGNYYYYGMDVW 927 | -------------------------------------------------------------------------------- /examples/cdr3.fa: -------------------------------------------------------------------------------- 1 | >seq 2 | AGGSGISTPMDV 3 | >seq 4 | ARDLVVYGMDV 5 | >seq 6 | ARDLVVYGMDV 7 | >seq 8 | ARDAMSYGMDV 9 | >seq 10 | ARDAAVYGIDV 11 | >seq 12 | ARDLISRGMDV 13 | >seq 14 | ARDRVVYGMDV 15 | >seq 16 | ARDLVSYGMDV 17 | >seq 18 | ARDLVVYGMDV 19 | >seq 20 | ARDAQNYGMDV 21 | >seq 22 | ARDLDVYGLDV 23 | >seq 24 | ARDYGDLYFDY 25 | >seq 26 | ARDFGDFYFDY 27 | >seq 28 | ARWYDSTGSIDY 29 | >seq 30 | ALRNQWDLLVY 31 | >seq 32 | ARVEWAAAGTFY 33 | >seq 34 | ARDLERAGGMDV 35 | >seq 36 | ARDLDTMGGMDV 37 | >seq 38 | ARWKYNDRFDY 39 | >seq 40 | ARHPSGLYQLLN 41 | >seq 42 | AKEIAVAGCFDY 43 | >seq 44 | ARATWLRGVMDV 45 | >seq 46 | ARSSSRGFDY 47 | >seq 48 | ARWGGGMQYLDV 49 | >seq 50 | ARDFISRPRGYR 51 | >seq 52 | ARDFISRPRGYR 53 | >seq 54 | ARDFISRPRGYR 55 | >seq 56 | ARVWYYYGPRDY 57 | >seq 58 | ARVQWLRGEFDY 59 | >seq 60 | TRDPWGTTYFDY 61 | >seq 62 | ARMEAPKLTLDP 63 | >seq 64 | ARGEGWELPYDY 65 | >seq 66 | ARDLGDYGMDV 67 | >seq 68 | ARDLYYYGMDV 69 | >seq 70 | AGGTWLRSSFDY 71 | >seq 72 | ASSSGYLFHSDY 73 | >seq 74 | ASSSGYLFHFDY 75 | >seq 76 | ATSLFGIISLDY 77 | >seq 78 | ARSFYFDAFDI 79 | >seq 80 | TRDPVPGRGDAY 81 | >seq 82 | ARDLYYYGMDV 83 | >seq 84 | ARDIAGRLDY 85 | >seq 86 | ARDLVVYGADY 87 | >seq 88 | ARDLQYYGMDV 89 | >seq 90 | ASHLMPDAFDI 91 | >seq 92 | ARGRGLPPWFDP 93 | >seq 94 | ARGVAVDWYFDL 95 | >seq 96 | ARGVAVDWYFDL 97 | >seq 98 | ARGVAVDWYFDL 99 | >seq 100 | ARGVAVDWYFDL 101 | >seq 102 | ARGVAVDWYFDL 103 | >seq 104 | ARGVAVDWYFDL 105 | >seq 106 | ARGVAVDWYFDL 107 | >seq 108 | ARGVAVDWYFDL 109 | >seq 110 | ARGVAVDWYFDL 111 | >seq 112 | ARSGPDYFDY 113 | >seq 114 | ARDLAVYGMDV 115 | >seq 116 | ARDGEGQRETDY 117 | >seq 118 | ARGIAVDWYFDL 119 | >seq 120 | ARDWGEYYFDY 121 | >seq 122 | ARDYGDYYFDY 123 | >seq 124 | ARDWGEYYFDY 125 | >seq 126 | ARDYGDYYFDY 127 | >seq 128 | VRDYGDFYFDY 129 | >seq 130 | ARGEGWDLPYDY 131 | >seq 132 | ARGEGWDLPYDY 133 | >seq 134 | ARGEGWELPYDY 135 | >seq 136 | TRWDGWSQHDY 137 | >seq 138 | TRWDGWSQHDY 139 | >seq 140 | AREGMGMAAAGT 141 | >seq 142 | AREGMGIAAAGT 143 | >seq 144 | ARGDGELFFDH 145 | >seq 146 | ARGDGELFFDQ 147 | >seq 148 | ARSQGWLQLNDY 149 | >seq 150 | ARGQLLPFADY 151 | >seq 152 | ARDFYHNWFDP 153 | >seq 154 | ARDLRGPGTFDI 155 | >seq 156 | ARDLSAAFDI 157 | >seq 158 | ARTMDGDYFDY 159 | >seq 160 | ARDLGTGLFDY 161 | >seq 162 | ARDYGDFYFDY 163 | >seq 164 | ARDYGDLYFDY 165 | >seq 166 | ATDLTSGRGP 167 | >seq 168 | ARDLVVWGMDV 169 | >seq 170 | AIQLWLRGGYDY 171 | >seq 172 | AVQLWLRGNFDY 173 | >seq 174 | ARLQWLRGAFDI 175 | >seq 176 | ARATTPFSGVDY 177 | >seq 178 | ARHISSGWYDY 179 | >seq 180 | ARDTLGRGGDY 181 | >seq 182 | ARDTFGRGGDY 183 | >seq 184 | ARDTLGRGGDY 185 | >seq 186 | ARGFSLTWYFDL 187 | >seq 188 | ARGFSLTWYFDL 189 | >seq 190 | ASEWEIFGFDY 191 | >seq 192 | ARDLPPRRFDI 193 | >seq 194 | ARDRDSSWSFDY 195 | >seq 196 | AKEEYYGMDV 197 | >seq 198 | ARSLWLRGSFQH 199 | >seq 200 | ARDLMAYGMDV 201 | >seq 202 | ARDGGHYGMDV 203 | >seq 204 | ARDYGDFYFDY 205 | >seq 206 | ATAPWLRGGFDY 207 | >seq 208 | AREIPSTWYFDL 209 | >seq 210 | ARDIPPTWYFDL 211 | >seq 212 | ARPVTYDWYFDL 213 | >seq 214 | ARDYGDYYFDY 215 | >seq 216 | ARDFGEFYFDY 217 | >seq 218 | ARVGYGYYFDY 219 | >seq 220 | TSQVWLRGPGDY 221 | >seq 222 | AREAEWEAFDI 223 | >seq 224 | ARVVANWFDP 225 | >seq 226 | ARAEWLRGAFDI 227 | >seq 228 | ARDRVIYGMDV 229 | >seq 230 | ARVLPFGDYFDY 231 | >seq 232 | ARDSTPGYGDY 233 | >seq 234 | ARDYGDFYFDY 235 | >seq 236 | ASAFWQRGNFDY 237 | >seq 238 | ARHSWLRGMADY 239 | >seq 240 | ARGRTWELVDY 241 | >seq 242 | ARGKWLRGSFDY 243 | >seq 244 | ARDLYSSGGTDI 245 | >seq 246 | ARDLYSSGGTDI 247 | >seq 248 | ARGYGDYYFDY 249 | >seq 250 | ARDYGDFYFDY 251 | >seq 252 | ARDWGDYYFDY 253 | >seq 254 | ARDYGDFYFDY 255 | >seq 256 | ARDLSVFGMDV 257 | >seq 258 | ARDLGERGMDV 259 | >seq 260 | ARLRWLRADFDY 261 | >seq 262 | ARFRYGDYPDY 263 | >seq 264 | ARRWDGVGFDI 265 | >seq 266 | ARSPHYYGGFDY 267 | >seq 268 | ARDGRAVAGTD 269 | >seq 270 | ARVQWLRLDY 271 | >seq 272 | ARMVVRGVMLDY 273 | >seq 274 | ARDLNIAGGFDI 275 | >seq 276 | ARLMYYYGNFDY 277 | >seq 278 | ALGRAGTMDY 279 | >seq 280 | ARDPMRPGMDV 281 | >seq 282 | AKMLWLRGWFDP 283 | >seq 284 | ARQESGWSFDY 285 | >seq 286 | ARDLEERGAMDV 287 | >seq 288 | ARAGWLRGRFDP 289 | >seq 290 | ARTRLGITAFDI 291 | >seq 292 | ARDYGDYYFDY 293 | >seq 294 | ARDLDYYGMDV 295 | >seq 296 | ASFGSLWDLRDY 297 | >seq 298 | TRTATVVKDY 299 | >seq 300 | ARMIPIPALDY 301 | >seq 302 | AREEAAGTKLDY 303 | >seq 304 | ARGPYPSSSWA 305 | >seq 306 | ARDLVTWGLDY 307 | >seq 308 | ARITPHLVYDY 309 | >seq 310 | ARQESGWSFDY 311 | >seq 312 | ARVQEFWLDP 313 | >seq 314 | ARDLENGGLDV 315 | >seq 316 | ARHPVLRGNIDY 317 | >seq 318 | ARDKWEGTFDY 319 | >seq 320 | AETGWDGMDV 321 | >seq 322 | ARIQRGIAADY 323 | >seq 324 | ARDLDIAGAFDI 325 | >seq 326 | AGLFWYGGYFDY 327 | >seq 328 | ARELGPVGGTDQ 329 | >seq 330 | AESSSLTGNFNY 331 | >seq 332 | ARVGPSSSWPS 333 | >seq 334 | AREVAGAVHLDY 335 | >seq 336 | ARGSSNLFDI 337 | >seq 338 | ARAGSVGVVDY 339 | >seq 340 | TTWFYYDIRDH 341 | >seq 342 | ARAGNIGAVDY 343 | >seq 344 | ARAGSIASVDY 345 | >seq 346 | AKGGWRYFDY 347 | >seq 348 | ARAGDRGAVDY 349 | >seq 350 | ARAGSRGSVDY 351 | >seq 352 | ARESYEGSSFDY 353 | >seq 354 | ARDYGRCGDY 355 | >seq 356 | AKVTWTTTGDS 357 | >seq 358 | ARAGDRGAVDY 359 | >seq 360 | ARAGDRGAVDY 361 | >seq 362 | ARAGSVGTVDY 363 | >seq 364 | VWTSRGYFDH 365 | >seq 366 | ARDLSSGWSLDS 367 | >seq 368 | ARAGGVGTVDY 369 | >seq 370 | ARCDMAGTTDY 371 | >seq 372 | AKKADPHSAFDY 373 | >seq 374 | ATLTYGYSPY 375 | >seq 376 | ASNFNRNDGY 377 | >seq 378 | ARLQGTGYLDY 379 | >seq 380 | ARQWAVNWFVP 381 | >seq 382 | ARKYGDLHFDY 383 | >seq 384 | ARGGYRNWFDP 385 | >seq 386 | ASLLGGTVVNDY 387 | >seq 388 | TSERWLDAFDI 389 | >seq 390 | ARGWGKGPPLGY 391 | >seq 392 | AKDNGDYACFDY 393 | -------------------------------------------------------------------------------- /examples/color.json: -------------------------------------------------------------------------------- 1 | {"A": "red", "T": "blue", "G": "#5dff11", "C": "#119dff"} -------------------------------------------------------------------------------- /examples/ectf.fa: -------------------------------------------------------------------------------- 1 | >dinD 32->52 2 | aactgtatataaatacagtt 3 | >dinG 15->35 4 | tattggctgtttatacagta 5 | >dinH 77->97 6 | tcctgttaatccatacagca 7 | >dinI 19->39 8 | acctgtataaataaccagta 9 | >lexA-1 28->48 10 | tgctgtatatactcacagca 11 | >lexA-2 7->27 12 | aactgtatatacacccaggg 13 | >polB(dinA) 53->73 14 | gactgtataaaaccacagcc 15 | >recA 59->79 16 | tactgtatgagcatacagta 17 | >recN-1 49->69 18 | tactgtatataaaaccagtt 19 | >recN-2 27->47 20 | tactgtacacaataacagta 21 | >recN-3 9-29 22 | TCCTGTATGAAAAACCATTA 23 | >ruvAB 49->69 24 | cgctggatatctatccagca 25 | >sosC 18->38 26 | tactgatgatatatacaggt 27 | >sosD 14->34 28 | cactggatagataaccagca 29 | >sulA 22->42 30 | tactgtacatccatacagta 31 | >umuDC 20->40 32 | tactgtatataaaaacagta 33 | >uvrA 83->103 34 | tactgtatattcattcaggt 35 | >uvrB 75->95 36 | aactgtttttttatccagta 37 | >uvrD 57->77 38 | atctgtatatatacccagct 39 | >hns1 40 | tAGGCTGATTT 41 | >hns2 42 | gAAAATTATTT 43 | >hns3 44 | gGGAGTTATTC 45 | >hns4 46 | aCAAATTATTT 47 | >hns5 48 | gCAACAGAGTA 49 | >hns6 50 | aCGCCTGAATA 51 | >hns7 52 | tCGAGAAAGTT 53 | >hns8 54 | tCGCCGGAATT 55 | >hns9 56 | tGGCATGAATA 57 | >hns10 58 | aTAAAGGAATC 59 | >hns11 60 | cTAATTTAATT 61 | >hns12 62 | gCAATTAAATT 63 | >hns13 64 | tGACATGAATC 65 | >hns14 66 | cTGGCTAATTT 67 | >hns15 68 | aCAACTGAATT 69 | >dnaA_1 rpoH-1 70 | aatttattcacaagc 71 | >dnaA_2 rpoH-2 72 | attttatccacaagt 73 | >dnaA_3 nrd 74 | gagttatccacaaag 75 | >dnaA_4 oriC-R1 76 | ttgttatccacaggg 77 | >dnaA_5 oriC-R2 78 | ggggttatacacaac 79 | >dnaA_6 oriC-R3 80 | ttctttggataacta 81 | >dnaA_7 oriC-R4 82 | gagttatccacagta 83 | >dnaA_10 dnaA 84 | gatttatccacagga 85 | >argA-1 32->50 86 | acagaataaaaatacact 87 | >argA-2 11->29 88 | ttcgaataatcatgcaaa 89 | >argD-1 51->69 90 | agtgattttttatgcata 91 | >argD-2 30->48 92 | tgtggttataatttcaca 93 | >argECBH-1 26->44, argC 110->128 94 | tatcaatattcatgcagt 95 | >argECBH-2 47->65, argC 89->107 96 | tatgaataaaaatacact 97 | >argF-1 48->66 98 | aatgaataattacacata 99 | >argF-2 27->45 100 | agtgaattttaattcaat 101 | >argG-1 73->91 102 | attaaatgaaaactcatt 103 | >argG-2 52->70 104 | tttgcataaaaattcagt 105 | >argG-3 192->210 106 | tgtgaatgaatatccagt 107 | >argI-1 46->64 108 | aatgaataatcatccata 109 | >argI-2 25->43 110 | attgaattttaattcatt 111 | >argR-1 45->63 112 | tttgcataaaaattcatc 113 | >argR-2 24->42 114 | tatgcacaataatgttgt 115 | >carAB-1 32->50 116 | tgtgaattaatatgcaaa 117 | >carAB-2 11->29 118 | agtgagtgaatattctct -------------------------------------------------------------------------------- /examples/example.fa: -------------------------------------------------------------------------------- 1 | >seq1 group@1-good 2 | CACCACACACACAGAC 3 | >seq2 group@1-good 4 | TGACTTAGACACAGAC 5 | >seq3 group@1-good 6 | GTGCATACGCACAGAC 7 | >seq3 group@1-good 8 | ATTCATACGCGCAGCC 9 | >seq3 group@1-good 10 | CTACATACGCCCAGCC 11 | >seq3 group@1-good 12 | ATGCATACGCCCAGCC 13 | >seq3 group@1-good 14 | TTACATACGCCCAGCC 15 | >seq4 group@2-bad 16 | CTGGATGCAGAC 17 | >seq5 group@2-bad 18 | GAACATACACAC 19 | >seq6 group@2-bad 20 | AACCCTCTCTAT 21 | >seq4 group@2-bad 22 | CTGCATGCAGAT 23 | >seq5 group@2-bad 24 | GTACATACACAG 25 | >seq6 group@2-bad 26 | ATACATATATCT 27 | >seq4 group@2-bad 28 | ACAGATAGCCCA 29 | >seq5 group@2-bad 30 | ACAATACAGGCA 31 | >seq6 group@2-bad 32 | AGCCGTCAACAC -------------------------------------------------------------------------------- /examples/example2.fa: -------------------------------------------------------------------------------- 1 | >seq0 group@12-tmp 2 | CGAGCAGTGTCA 3 | >seq1 group@12-tmp 4 | CATAGGAGATTT 5 | >seq2 group@12-tmp 6 | AACACATTGCAC 7 | >seq3 group@12-tmp 8 | GCTGGCGGAGAG 9 | >seq4 group@12-tmp 10 | AGTAATCCATCG 11 | >seq5 group@12-tmp 12 | GACACCGACTCA 13 | >seq6 group@12-tmp 14 | TGCCACGTCTCC 15 | >seq7 group@12-tmp 16 | TGGTAGTGTTTT 17 | >seq8 group@12-tmp 18 | CTGAGGGTGTTA 19 | >seq9 group@12-tmp 20 | CGCAGGAAGTTT 21 | >seq0 group@13-tmp 22 | AGCATCTTTTGCC 23 | >seq1 group@13-tmp 24 | CAAAGGCAATCAA 25 | >seq2 group@13-tmp 26 | CGTTGTATGTCTC 27 | >seq3 group@13-tmp 28 | AAAAAGATACTTG 29 | >seq4 group@13-tmp 30 | TAGAGGCAATAGC 31 | >seq5 group@13-tmp 32 | TAAAGTCTTTCCC 33 | >seq6 group@13-tmp 34 | TTCAACACATGCT 35 | >seq7 group@13-tmp 36 | GCTAACGCGTGCC 37 | >seq8 group@13-tmp 38 | CGTAATTTCCAGC 39 | >seq9 group@13-tmp 40 | CGTAGATCCTCCT 41 | >seq0 group@14-tmp 42 | TAGAAGTGAGGTGC 43 | >seq1 group@14-tmp 44 | GCTTACCAGCCTCA 45 | >seq2 group@14-tmp 46 | CTTAATTAACCACT 47 | >seq3 group@14-2-tmp 48 | GCACAAGCGTCCTC 49 | >seq4 group@14-2-tmp 50 | TATACTACTTATTC 51 | >seq5 group@14-2-tmp 52 | GTCGAACGATAGTG 53 | >seq6 group@14-2-tmp 54 | ACATACCCGTTTCC 55 | >seq7 group@14-2-tmp 56 | GCCGATCTGCTTGA 57 | >seq8 group@14-2-tmp 58 | AAGATTTAGCTGTG 59 | >seq9 group@14-tmp 60 | GGGAGTTAACGTAC 61 | >seq0 group@15-tmp 62 | GGAGTACAGCCCTAT 63 | >seq1 group@15-tmp 64 | CGATAGAAAATAAGC 65 | >seq2 group@15-tmp 66 | TCCACGCTACGTATT 67 | >seq3 group@15-tmp 68 | CGTCGATGGTATTTC 69 | >seq4 group@15-tmp 70 | TCCTAAGAAAACCCT 71 | >seq5 group@15-tmp 72 | GTAAAAAATTGCTGC 73 | >seq6 group@15-tmp 74 | TAGCATCGGTTGAGC 75 | >seq7 group@15-tmp 76 | AATACTCCCTTCCAG 77 | >seq8 group@15-tmp 78 | GATTGAAACTGGCAA 79 | >seq9 group@15-tmp 80 | GTATTCATTCAAATA 81 | >seq0 group@16-tmp 82 | TCTGCACGGTGATTCA 83 | >seq1 group@16-tmp 84 | CTAAAAAGCTGGTTCT 85 | >seq2 group@16-tmp 86 | GGTAATCTCTTGCTCA 87 | >seq3 group@16-tmp 88 | GCAACAAAGTGACCAA 89 | >seq4 group@16-tmp 90 | TCTAACCGCTAGCTCA 91 | >seq5 group@16-tmp 92 | AATACACGCGGTACAA 93 | >seq6 group@16-tmp 94 | ACCAGCTTAAACGCGA 95 | >seq7 group@16-tmp 96 | GGTTATAATCTGTTCT 97 | >seq8 group@16-tmp 98 | CCTATTCGATTCCTCA 99 | >seq9 group@16-tmp 100 | AATAGTCGCCCCATCA 101 | >seq0 group@17-tmp 102 | TTTATTGCTTACCAAAT 103 | >seq1 group@17-tmp 104 | ACCAGCAAGCGCTGGGA 105 | >seq2 group@17-tmp 106 | ATAATGACCTCAGTCAA 107 | >seq3 group@17-tmp 108 | GGTAAATCAAGCTCCAT 109 | >seq4 group@17-tmp 110 | CCAAGTGACTACGGCTC 111 | >seq5 group@17-tmp 112 | TTCATTGGGTAGCTGTA 113 | >seq6 group@17-tmp 114 | GGCACATCTCTGAGAAT 115 | >seq7 group@17-tmp 116 | ATTCGGAACTATGAGAT 117 | >seq8 group@17-tmp 118 | CTATACAGGTGACTGAT 119 | >seq9 group@17-tmp 120 | TGGATCTCTTTCCCAAC 121 | -------------------------------------------------------------------------------- /logs/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labomics/MetaLogo/918b04cbf15dd3bdd4acad5d792d20a7f6c693ce/logs/.gitkeep -------------------------------------------------------------------------------- /pngs/about.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labomics/MetaLogo/918b04cbf15dd3bdd4acad5d792d20a7f6c693ce/pngs/about.PNG -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | biopython==1.77 2 | dash==1.21.0 3 | dash-bootstrap-components==0.12.2 4 | Flask==2.0.1 5 | gunicorn==20.1.0 6 | matplotlib==3.3.0 7 | numpy>=1.22 8 | pandas==1.3.0 9 | plotly==5.1.0 10 | scipy==1.5.2 11 | seaborn==0.11.1 12 | toml==0.10.2 13 | treecluster==1.0.3 14 | DendroPy==4.5.2 15 | supervisor==4.2.2 16 | rq==1.10.0 17 | hiredis==2.0.0 18 | ete3==3.1.1 19 | dash-bio==0.8.0 20 | -------------------------------------------------------------------------------- /server.cmd.sh: -------------------------------------------------------------------------------- 1 | if [ -f "/var/run/redis_6379.pid" ]; then 2 | /etc/init.d/redis_6379 stop 3 | /etc/init.d/redis_6379 start 4 | else 5 | /etc/init.d/redis_6379 start 6 | fi 7 | 8 | if pgrep -x "supervisord" > /dev/null 9 | then 10 | echo "supervisord running" 11 | else 12 | supervisord -c /etc/supervisord.conf 13 | fi 14 | 15 | GUNICORN_CMD_ARGS='--workers 10 --timeout 300 --threads 10 -b 0.0.0.0:8050 --access-logfile MetaLogo/logs/access.log --error-logfile MetaLogo/logs/err.log --capture-output --access-logformat "%({X-Forwarded-For}i)s %(l)s %(u)s %(t)s \"%(r)s\" %(s)s %(b)s \"%(f)s\" \"%(a)s\"" ' gunicorn MetaLogo.server.index:server -------------------------------------------------------------------------------- /server.dev.sh: -------------------------------------------------------------------------------- 1 | python -m MetaLogo.server.index 2 | -------------------------------------------------------------------------------- /server.docker.sh: -------------------------------------------------------------------------------- 1 | docker build -t metalogo:v2 . 2 | docker run -d --expose 8050 --name metalogo -e VIRTUAL_HOST=metalogo.omicsnet.org -v "$(pwd)"/..:/code metalogo:v2 3 | -------------------------------------------------------------------------------- /server.toml: -------------------------------------------------------------------------------- 1 | # This is a TOML document 2 | 3 | title = "MetaLogo webserver configure example" 4 | 5 | example_path = 'MetaLogo/examples' 6 | output_png_path = 'MetaLogo/figure_output' 7 | output_fa_path = 'MetaLogo/sequence_input' 8 | config_path = 'MetaLogo/configs' 9 | log_path = 'MetaLogo/logs' 10 | group_limit = 20 11 | max_seq_limit = 200000 12 | max_seq_limit_auto = 20000 13 | max_seq_len = 2000 14 | max_input_size = 5242880 15 | google_analytics_id = 'G-0EZX46EYE9' 16 | baidu_tongji_id = '53d867057aa25d8e9cf8e04221f009f8' 17 | 18 | #bins 19 | clustalo_bin = '/usr/bin/clustalo' 20 | fasttree_bin = '/usr/bin/FastTree' 21 | fasttreemp_bin = '/usr/bin/FastTreeMP' 22 | treecluster_bin = 'TreeCluster.py' 23 | 24 | sqlite3_db = 'MetaLogo/db/metalogo.db' 25 | -------------------------------------------------------------------------------- /server/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labomics/MetaLogo/918b04cbf15dd3bdd4acad5d792d20a7f6c693ce/server/__init__.py -------------------------------------------------------------------------------- /server/app.py: -------------------------------------------------------------------------------- 1 | import dash 2 | from flask import Flask 3 | import dash_bootstrap_components as dbc 4 | from .config import GOOGLE_ANALYTICS_ID 5 | server = Flask(__name__) 6 | 7 | external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css'] 8 | external_scripts = [f'https://www.googletagmanager.com/gtag/js?id={GOOGLE_ANALYTICS_ID}'] 9 | 10 | #app = dash.Dash(__name__, external_stylesheets=external_stylesheets) 11 | app = dash.Dash( 12 | __name__, 13 | title="MetaLogo", 14 | external_stylesheets=[dbc.themes.BOOTSTRAP], 15 | external_scripts=external_scripts, 16 | server=server, 17 | update_title=None, 18 | meta_tags=[{ 19 | 'name': 'description', 20 | 'content': 'A website to plot and align multiple sequences logos on one single figure,it can integrate the logo images of sequece of different lengths, and align them through algorithms, so as to display the samples in a more macroscopic view.' 21 | }] 22 | 23 | ) 24 | app.title = "MetaLogo: a heterogeneity-aware sequence logo generator and aligner" 25 | -------------------------------------------------------------------------------- /server/apps/about.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | #!/usr/bin/env python 3 | 4 | import dash 5 | import dash_core_components as dcc 6 | import dash_html_components as html 7 | import dash_bootstrap_components as dbc 8 | from dash.dependencies import Input, Output, State 9 | 10 | 11 | about_md = dcc.Markdown(''' 12 | **MetaLogo** is a tool for making sequence logos. It can take multiple sequences as input, automatically identify the homogeneity and heterogeneity among sequences and cluster them into different groups given any wanted resolution, finally output multiple aligned sequence logos in one figure. Grouping can also be specified by users, such as grouping by lengths, grouping by sample Id, etc. Compared to conventional sequence logo generator, MetaLogo can display the total sequence population in a more detailed, dynamic and informative view. 13 | 14 | In the auto-grouping mode, MetaLogo performs multiple sequence alignment (MSA), phylogenetic tree construction and group clustering for the input sequences. Users can give MetaLogo different resolution values to guide the sequence clustering process and the sequence logos building, which lead to a dynamic and complete understanding of the input data. In the user-defined-grouping mode, MetaLogo will perform an adjusted MSA algorithms to align multiple logos and highlight the conserved connections among groups. MetaLogo also provides a basic analysis module to present statistics of the sequences, involving sequencing characteristics distributions, conservation scores, pairwise distances, group correlations, etc. Almost all the related intermediate results are available for downloading. 15 | 16 | Users have plenty of options to get their custom sequence logos and basic analysis figures. Multiple styles of the output are provided. Users can customize most of the elements of drawing, including shape, title, axis, ticks, labels, font color, graphic size, etc. At the same time, it can export a variety of formats including PDF, PNG, SVG and so on. It is really convenient for users without programming experiences to produce publication-ready figures. 17 | 18 | Users could also download the standalone package of MetaLogo, integrate it into their own python project or easily set up a local MetaLogo server by using docker. A easy-to-use front website + a job queue organized back end could give users convenience to investigate and understand their sequences in their own computing environments. 19 | 20 | If you think this tool is easy to use, please share it with those who need it. If you have any comments, please send an email to the maintainer via the feedback button at the top. 21 | 22 | If you use MetaLogo in your work, please kindly cite our paper: 23 | 24 | - Chen, Y., He, Z., Men, Y., Dong, G., Hu, S., & Ying, X. (2022). MetaLogo: a heterogeneity-aware sequence logo generator and aligner. Briefings in Bioinformatics, 23(2), bbab591. 25 | 26 | Thank you. 27 | 28 | ''') 29 | 30 | aboutpanel = dbc.Card( 31 | [ 32 | dbc.CardHeader("About MetaLogo"), 33 | dbc.CardBody( 34 | [ 35 | dbc.Row([ 36 | dbc.Col(html.Div(html.Img(src='/assets/about.PNG',width='100%'))) 37 | ]) 38 | , 39 | dbc.Row([ 40 | dbc.Col(about_md), 41 | ]), 42 | dbc.Row( 43 | dbc.Col([ 44 | html.Div([ 45 | dbc.NavLink( 46 | [dbc.Button("> Analysis", color='info')], 47 | href='/analysis' 48 | ) 49 | ], 50 | style={'marginTop':'20px','textAlign':'right'}) 51 | ])) 52 | ] 53 | ) 54 | ],style={'marginBottom':'10px'} 55 | ) 56 | 57 | 58 | layout = dbc.Container(children=[ 59 | html.Hr(), 60 | aboutpanel 61 | 62 | ]) -------------------------------------------------------------------------------- /server/apps/msa.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | #!/usr/bin/env python 3 | 4 | import dash 5 | import dash_core_components as dcc 6 | import dash_html_components as html 7 | import dash_bootstrap_components as dbc 8 | import dash_bio as dashbio 9 | from dash.dependencies import Input, Output, State 10 | from dash.exceptions import PreventUpdate 11 | from ..app import app 12 | from ..config import CONFIG_PATH, SQLITE3_DB, PNG_PATH, FA_PATH 13 | import os 14 | import re 15 | 16 | loading_spinner = html.Div( 17 | [ 18 | dbc.Spinner(html.Div(id="loading-output3"), fullscreen=True, 19 | fullscreen_style={"opacity": "0.8"}), 20 | dbc.Spinner(html.Div(id="loading-output4"), fullscreen=True, 21 | fullscreen_style={"opacity": "0.8"}), 22 | ] 23 | ) 24 | checklist = dbc.FormGroup( 25 | [ 26 | dbc.Label("Panels"), 27 | dbc.Checklist( 28 | options=[ 29 | {"label": "Conservation", "value": 'conservation'}, 30 | {"label": "Gaps", "value": 'gaps'}, 31 | {"label": "Overview", "value": 'overview'}, 32 | {"label": "Consensus", "value": 'consensus'}, 33 | ], 34 | value=[], 35 | id="checklist", 36 | inline=True, 37 | ), 38 | ] 39 | ) 40 | COLORSCALES_DICT = [ 41 | {'value': 'buried', 'label': 'Buried'}, 42 | {'value': 'cinema', 'label': 'Cinema'}, 43 | {'value': 'clustal2', 'label': 'Clustal2'}, 44 | {'value': 'clustal', 'label': 'Clustal'}, 45 | {'value': 'helix', 'label': 'Helix'}, 46 | {'value': 'hydro', 'label': 'Hydrophobicity'}, 47 | {'value': 'lesk', 'label': 'Lesk'}, 48 | {'value': 'mae', 'label': 'Mae'}, 49 | {'value': 'nucleotide', 'label': 'Nucleotide'}, 50 | {'value': 'purine', 'label': 'Purine'}, 51 | {'value': 'strand', 'label': 'Strand'}, 52 | {'value': 'taylor', 'label': 'Taylor'}, 53 | {'value': 'turn', 'label': 'Turn'}, 54 | {'value': 'zappo', 'label': 'Zappo'}, 55 | ] 56 | 57 | 58 | colorscale_dropdown = dbc.FormGroup( 59 | [ 60 | dbc.Label("Color scale", html_for="dropdown"), 61 | dcc.Dropdown( 62 | id="color_scale_dropdown", 63 | options=COLORSCALES_DICT, 64 | value='buried', 65 | searchable=False, 66 | clearable=False, 67 | ), 68 | ], 69 | style={'width':'200px','marginRight':'50px'} 70 | ) 71 | layout = dbc.Container([ 72 | html.H3([html.Span("MSA result for task "),html.A(id="uid")]), 73 | dbc.Col([ 74 | dbc.Row([ 75 | colorscale_dropdown, 76 | checklist 77 | ]), 78 | ] 79 | ), 80 | dbc.Col( 81 | [ 82 | dbc.Row([ 83 | dashbio.AlignmentChart( 84 | id='my-default-alignment-viewer', 85 | data='>a\nA', 86 | height=1200, 87 | width="100%", 88 | showgap=False, 89 | #showconservation=False, 90 | #showconsensus=False, 91 | tilewidth=30, 92 | overview='slider' 93 | )]), 94 | dbc.Row([html.Div(id='default-alignment-viewer-output',style={'display': 'none'})]), 95 | ] 96 | ), 97 | loading_spinner 98 | ]) 99 | 100 | 101 | def get_values(checklist): 102 | arr = [] 103 | if 'gaps' in checklist: 104 | arr.append(True) 105 | else: 106 | arr.append(False) 107 | if 'conservation' in checklist: 108 | arr.append(True) 109 | else: 110 | arr.append(False) 111 | if 'consensus' in checklist: 112 | arr.append(True) 113 | else: 114 | arr.append(False) 115 | if 'overview' in checklist: 116 | arr.append('slider') 117 | else: 118 | arr.append('none') 119 | return arr 120 | 121 | @app.callback( 122 | Output('my-default-alignment-viewer','colorscale'), 123 | Input('color_scale_dropdown','value') 124 | ) 125 | def change_color(val): 126 | return val 127 | 128 | 129 | @app.callback( 130 | [ 131 | Output('my-default-alignment-viewer', 'data'), 132 | Output('my-default-alignment-viewer', 'height'), 133 | Output("loading-output3", "children"), 134 | Output("uid","children"), 135 | Output("uid","href"), 136 | Output('my-default-alignment-viewer','showgap'), 137 | Output('my-default-alignment-viewer','showconservation'), 138 | Output('my-default-alignment-viewer','showconsensus'), 139 | Output('my-default-alignment-viewer','overview') 140 | ], 141 | [ 142 | Input('url', 'pathname'), 143 | Input('checklist','value') 144 | ], 145 | [ 146 | State('my-default-alignment-viewer','data'), 147 | State('my-default-alignment-viewer','height'), 148 | ] 149 | ) 150 | 151 | def display_page(pathname,checklist,data,height): 152 | arrs = pathname.split('/msa/') 153 | return_arrs = [] 154 | 155 | ctx = dash.callback_context 156 | example_id = '' 157 | if ctx.triggered: 158 | example_id = ctx.triggered[0]['prop_id'].split('.')[0] 159 | 160 | if len(arrs) > 1: 161 | uid = arrs[-1] 162 | if example_id != 'checklist': 163 | msa_file = f'{FA_PATH}/server.{uid}.msa.rawid.fa' 164 | if not os.path.exists(msa_file): 165 | return_arrs = ["",100,'',uid,'/results/'+uid] 166 | else: 167 | with open(msa_file, encoding='utf-8') as data_file: 168 | data = data_file.read() 169 | line_no = len(re.findall('\n',data))/2 170 | return_arrs = [data, line_no*20,'',uid,'/results/'+uid] 171 | return_arrs += [False,False,False,'none'] 172 | else: 173 | vals = get_values(checklist) 174 | c = 0 175 | for v in vals: 176 | if v: 177 | c+= 1 178 | if vals[-1] != 'none': 179 | c += 1 180 | return_arrs = [data,height,'',uid,'/results/'+uid] + vals 181 | return return_arrs 182 | else: 183 | return '','','','','','','','','' 184 | 185 | 186 | @app.callback( 187 | Output('default-alignment-viewer-output', 'children'), 188 | #Output("loading-output4", "children")], 189 | Input('my-default-alignment-viewer', 'eventDatum') 190 | ) 191 | def update_output(value): 192 | if value is None: 193 | return 'No data.'#,'' 194 | else: 195 | return str(value)#,'' 196 | -------------------------------------------------------------------------------- /server/apps/results.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | #!/usr/bin/env python 3 | 4 | from re import search 5 | from sys import getallocatedblocks 6 | import dash_core_components as dcc 7 | import dash_html_components as html 8 | import dash_bootstrap_components as dbc 9 | import flask 10 | from dash.dependencies import Input, Output, State 11 | import os 12 | from flask import config 13 | from pandas.io.formats import style 14 | import toml 15 | import math 16 | 17 | from ..app import app 18 | from ..config import PNG_PATH,CONFIG_PATH,SQLITE3_DB,FA_PATH,GROUP_LIMIT,MAX_SEQ_LEN 19 | from ..utils import get_img_src 20 | from ..sqlite3 import get_finished_time, get_status 21 | from ..redis_queue import check_queue_status, enqueue 22 | 23 | from dash.exceptions import PreventUpdate 24 | import base64 25 | import datetime,time 26 | import re 27 | import pandas as pd 28 | 29 | 30 | 31 | 32 | def get_layout(): 33 | uid_input = dbc.FormGroup( 34 | [ 35 | dbc.Label("Please input the job ID and click the Search button",html_for='input',id='input_label'), 36 | dbc.Input(type="string", id="uid_input"), 37 | ]) 38 | search_btn = html.Div( 39 | #[dbc.Button("Check", color='primary',id='search_btn',n_clicks=0)], 40 | [html.A("Check", href="results",target='_self',id='search_btn')], 41 | style={'marginTop':'20px','textAlign':'right'}) 42 | 43 | not_found_panel = html.Div( 44 | [html.Span('Task id not found, please input the right id and click the check button', id='not_found_span', style={'display':'none'})], style={'fontSize':'25px','color':'#ff3400','margin':'20px'} 45 | ) 46 | 47 | error_panel = html.Div( 48 | [ 49 | html.Div([ 50 | html.Div('Task failed. '), 51 | html.Div('',id='error_info_panel'), 52 | ], id='error_span', style={'display':'none'}) 53 | ], style={'fontSize':'25px','color':'#ff3400','margin':'20px'} 54 | ) 55 | 56 | running_panel = html.Div( 57 | [ 58 | html.Span( 59 | [ 60 | html.Div([html.Div([ 61 | html.Span('The task is still running'), 62 | html.Span('',id='in_queue_note'), 63 | html.Span(' please check it later: '), 64 | ],style={'color':'black'}), 65 | html.Div('',id='page_url')]), 66 | html.Div( 67 | [ 68 | html.Span('The page will be refreshed every '), 69 | html.Span('10',id='refresh_count',style={'color':'red'}), 70 | html.Span(' seconds') 71 | ],style={'fontSize':'5px'}) 72 | ], 73 | id='running_span',style={'display':'none'})], 74 | style={'fontSize':'25px','color':'#092eff','margin':'20px'} 75 | ) 76 | label_style = { 77 | 'background':'#80808021', 78 | 'borderRadius':'5px', 79 | 'margin':'5px', 80 | 'padding':'5px', 81 | 'fontSize':'10px', 82 | } 83 | 84 | value_style = { 85 | 'marigin' : '5px', 86 | } 87 | 88 | task_info_panel = dbc.Card( 89 | [ 90 | html.Div('Please save the link of this page for future use. MetaLogo will keep it for at least 7 days.',style={'fontWeight':'bold','color':'#576ef2','background':'#568af13b'}), 91 | dbc.CardHeader("Task info and parameters",style={'fontWeight':'bold'}), 92 | dbc.CardBody( 93 | [ 94 | dbc.Col([ 95 | dbc.Row([ 96 | dbc.Col([html.Span('ID',style=label_style), html.Span('xx',style=value_style, id='uid_span')]) , 97 | ],style={'marginTop':'10px'}), 98 | dbc.Row([ 99 | dbc.Col([html.Span('Name',style=label_style), html.Span('xx',style=value_style, id='task_name_span')]) , 100 | ],style={'marginTop':'10px'}), 101 | dbc.Row([ 102 | dbc.Col([html.Span('Created Time',style=label_style), html.Span('2021/2/3, 15:00',style=value_style, id='create_time')]), 103 | ],style={'marginTop':'10px'}), 104 | html.Hr(), 105 | dbc.Row([ 106 | dbc.Col([html.Span('Input Format',style=label_style), html.Span('auto',style=value_style,id='input_format')]), 107 | dbc.Col([html.Span('Sequence Type',style=label_style), html.Span('auto',style=value_style,id='sequence_type')]), 108 | dbc.Col([html.Span('Group Strategy',style=label_style), html.Span('auto',style=value_style,id='group_strategy')]), 109 | dbc.Col([html.Span('Grouping Resolution ',style=label_style), html.Span('auto',style=value_style,id='group_resolution')]), 110 | dbc.Col([html.Span('Clustering Method ',style=label_style), html.Span('auto',style=value_style,id='clustering_method_value')]), 111 | ],style={'marginTop':'10px'}), 112 | dbc.Row([ 113 | dbc.Col([html.Span('Min Length ',style=label_style), html.Span('auto',style=value_style,id='min_len')]), 114 | dbc.Col([html.Span('Max Length ',style=label_style), html.Span('auto',style=value_style,id='max_len')]), 115 | dbc.Col([html.Span('Display Range',style=label_style), html.Span('auto',style=value_style,id='display_left_right')]), 116 | dbc.Col([html.Span('Basic Analysis',style=label_style), html.Span('auto',style=value_style,id='basic_analysis')]), 117 | dbc.Col([html.Span('Height',style=label_style), html.Span('auto',style=value_style,id='height_algorithm')]), 118 | ],style={'marginTop':'10px'}), 119 | dbc.Row([ 120 | dbc.Col([html.Span('Adjacent Alignment ',style=label_style), html.Span('auto',style=value_style,id='adjacent_alignment')]), 121 | dbc.Col([html.Span('Global Alignment ',style=label_style), html.Span('auto',style=value_style,id='global_alignment')]), 122 | dbc.Col([html.Span('Align Metric ',style=label_style), html.Span('auto',style=value_style,id='align_metric')]), 123 | dbc.Col([html.Span('Connect Threshold ',style=label_style), html.Span('auto',style=value_style,id='connect_threshold_value')]), 124 | dbc.Col([html.Span('Logo Type',style=label_style), html.Span('auto',style=value_style,id='logo_type')]), 125 | ],style={'marginTop':'10px'}), 126 | 127 | html.Hr(), 128 | dbc.Row([ 129 | dbc.Col([html.Span('* For more details, please download the configure file at the bottom of the result page')],style={'fontSize':'10px'}), 130 | ]), 131 | ], style={'display':'tableCell','verticalAlign':'middle',}) 132 | ] 133 | ) 134 | ],style={'marginBottom':'10px'},id='task_info_panel' 135 | ) 136 | 137 | seqlogo_panel = dbc.Card( 138 | [ 139 | dbc.CardHeader("Sequence Logo",style={'fontWeight':'bold'}), 140 | dbc.CardBody([ 141 | #html.Div([ 142 | # html.Img(id='logo_img',src='',style={"height":"100%","verticalAlign":"top"}), 143 | #], style={"height":"100%","overflowX":"scroll","whiteSpace":"nowrap","width":"100%"}), 144 | html.Div([ 145 | html.Img(id='logo_img',src='',style={"width":"100%","verticalAlign":"top"}), 146 | ], ), 147 | 148 | html.Div('* The red dot on the tree indicates the group your target sequence (first sequence of your input) is in.',style={'fontSize':'10px','color':'#ff6f00'}), 149 | html.Hr(), 150 | dbc.Row( 151 | [ 152 | dbc.Col( 153 | dbc.FormGroup([ 154 | dbc.Label("Left pos",html_for='input'), 155 | dbc.Input(type="number", min=0, max=MAX_SEQ_LEN, id="fast_left_pos",step=1,value=0), 156 | ]) 157 | ), 158 | 159 | dbc.Col( 160 | dbc.FormGroup([ 161 | dbc.Label("Right pos",html_for='input'), 162 | dbc.Input(type="number", max=MAX_SEQ_LEN, id="fast_right_pos",step=1,value=10), 163 | ]) 164 | ), 165 | 166 | dbc.Col( 167 | dbc.FormGroup([ 168 | dbc.Label("Group limit",html_for='input'), 169 | dbc.Input(type="number", min=1, max=GROUP_LIMIT, id="fast_group_limit",step=1,value=1), 170 | ]) 171 | ), 172 | 173 | dbc.Col( 174 | dbc.FormGroup([ 175 | dbc.Label("Reset resolution",html_for='input'), 176 | dbc.Input(type="number", min=0, max=1, id="reset_resolution",step=0.000001,value=1),]) 177 | ) 178 | ]), 179 | 180 | html.Div( 181 | [ 182 | dbc.Button("Fast Re-Run", n_clicks=0,id='reset_resolution_btn',color='info'), 183 | html.Div('* Only for auto-grouping or global alignment scenario'), 184 | ],style={'textAlign':'right','fontSize':'10px'}) 185 | 186 | ]) 187 | ],style={'marginBottom':'10px'},id='seqlogo_panel' 188 | ) 189 | statistics_panel = dbc.Card( 190 | [ 191 | dbc.CardHeader("Statistics Analysis",style={'fontWeight':'bold'}), 192 | dbc.CardBody( 193 | [ 194 | dbc.Row( 195 | dbc.Col( 196 | html.Span('Figure 0. Sequence lengths distribution.') 197 | )), 198 | dbc.Row([ 199 | html.Img(id='lengths_img_res',src='',style={"margin":"auto"}), 200 | ]), 201 | html.Hr(), 202 | dbc.Row( 203 | dbc.Col( 204 | html.Span('Figure 1. Sequence counts of each group.') 205 | )), 206 | dbc.Row([ 207 | html.Img(id='count_img_res',src='',style={"margin":"auto"}), 208 | ]), 209 | html.Hr(), 210 | dbc.Row( 211 | dbc.Col( 212 | html.Span('Figure 2. Entropies of each position. ("X"s mean gaps)') 213 | )), 214 | dbc.Row([ 215 | html.Img(id='entropy_img_res',src='',style={"margin":"auto","width":"100%"}), 216 | ]), 217 | html.Hr(), 218 | dbc.Row( 219 | dbc.Col( 220 | html.Span('Figure 3. Entropies distribution of each group.') 221 | )), 222 | dbc.Row([ 223 | html.Img(id='entropy_boxplot_img_res',src='',style={"margin":"auto"}), 224 | ]), 225 | html.Hr(), 226 | dbc.Row( 227 | dbc.Col( 228 | html.Span('Figure 4. Correlations among groups (only in global alignment mode and #groups>1). (Only for global alignment or auto-grouping mode)') 229 | )), 230 | dbc.Row([ 231 | html.Img(id='clustermap_img_res',src='',style={"margin":"auto","width":"60%"}), 232 | ]), 233 | html.Hr(), 234 | dbc.Row( 235 | dbc.Col( 236 | html.Span('Figure 5. Distribution of pairwise distances of nodes in the phylogenetic tree. (Only for auto-grouping mode)') 237 | )), 238 | dbc.Row([ 239 | html.Img(id='dists_img_res',src='',style={"margin":"auto","width":"60%"}), 240 | ]), 241 | ] 242 | ) 243 | ], id='statistics_panel', style={'marginBottom':'10px'} 244 | ) 245 | 246 | other_panel = dbc.Card( 247 | [ 248 | dbc.CardHeader("Other Results",style={'fontWeight':'bold'}), 249 | dbc.CardBody( 250 | [ 251 | html.Div([ 252 | html.Span('1. Please click to open '), 253 | html.A("MSA visualization", href="/msa",target='_blank',id='msa_btn') 254 | ]), 255 | html.Div([ 256 | html.Span('2. Please click to open '), 257 | html.A("Phylogenetic tree visualization", href="/tree",target='_blank',id='tree_btn') 258 | ]) 259 | 260 | ] 261 | ) 262 | ],style={'marginBottom':'10px'},id='other_panel' 263 | ) 264 | btn_style = {'maring':'10px'} 265 | download_panel = dbc.Card( 266 | [ 267 | dbc.CardHeader("Download Files",style={'fontWeight':'bold'}), 268 | dbc.CardBody( 269 | [ 270 | dbc.Col([ 271 | dbc.Row([ 272 | dbc.Col( 273 | [ 274 | dbc.Button("Config File", id="config_download_btn", style=btn_style, color='info'), 275 | dcc.Download(id="config_download",type='text'), 276 | ] 277 | ), 278 | dbc.Col( 279 | [ 280 | dbc.Button("Sequence Input", color='info',id='seq_input_download_btn',style=btn_style), 281 | dcc.Download(id="seq_input_download",type='text'), 282 | ] 283 | 284 | ), 285 | dbc.Col( 286 | [ 287 | dbc.Button("Sequence Logo", color='info',id='seq_logo_download_btn',style=btn_style), 288 | dcc.Download(id="seq_logo_download",type='text',), 289 | ] 290 | ) 291 | ], style={'margin':'20px'}), 292 | dbc.Row([ 293 | dbc.Col( 294 | [ 295 | dbc.Button("MSA result", color='info',id='msa_download_btn',style=btn_style), 296 | dcc.Download(id="msa_download",type='text',), 297 | ] 298 | ), 299 | dbc.Col( 300 | [ 301 | dbc.Button("Phylogenetic Tree", color='info',id='phylo_download_btn',style=btn_style), 302 | dcc.Download(id="phylo_download",type='text',), 303 | ]), 304 | dbc.Col( 305 | [ 306 | dbc.Button("Grouping details", color='info',id='grouping_download_btn',style=btn_style), 307 | dcc.Download(id="grouping_download",type='text',), 308 | ] 309 | ), 310 | 311 | ],style={'margin':'20px'}), 312 | ]) 313 | ] 314 | ) 315 | ],style={'marginBottom':'10px'},id='download_panel' 316 | ) 317 | trigger_panel = html.Span('',style={'display':'none'},id='trigger_panel') 318 | 319 | loading_spinner = html.Div( 320 | [ 321 | dbc.Spinner(html.Div(id="loading-output2"),fullscreen=True,fullscreen_style={"opacity":"0.8"}), 322 | ] 323 | ) 324 | modal = dbc.Modal( 325 | [ 326 | dbc.ModalHeader("Error", id="result_modal_header"), 327 | dbc.ModalBody("Message", id="result_modal_body"), 328 | dbc.ModalFooter( 329 | html.Span('* Click outside of the modal or press ESC to hide it', style={"fontSize":"10px","color":"orange"}) 330 | ), 331 | ], 332 | id="result_modal", 333 | centered=True, 334 | is_open=False, 335 | ) 336 | 337 | layout = dbc.Container(children=[ 338 | html.Hr(), 339 | uid_input, 340 | search_btn, 341 | html.Hr(), 342 | not_found_panel, 343 | error_panel, 344 | running_panel, 345 | html.Div([ 346 | task_info_panel, 347 | seqlogo_panel, 348 | statistics_panel, 349 | other_panel, 350 | download_panel, 351 | trigger_panel 352 | ],id='result_panel',style={"display":"none"}), 353 | loading_spinner, 354 | html.Div('',id='garbage3',style={'display':'none'}), 355 | html.Div('',id='garbage4',style={'display':'none'}), 356 | html.Div('',id='reset_waitter',style={'display':'none'}), 357 | dbc.Input(id='status',style={'display':'none'},type='string'), 358 | modal 359 | ]) 360 | 361 | return layout 362 | 363 | 364 | @app.callback( 365 | Output("config_download","data"), 366 | Input("config_download_btn","n_clicks"), 367 | State('url','pathname'), 368 | prevent_initial_call=True, 369 | ) 370 | def update_config_download(n_clicks,pathname): 371 | 372 | if ('/results' in pathname) and (not pathname == '/results'): 373 | uid = pathname.split('/')[-1] 374 | else: 375 | uid = '' 376 | target = f"{CONFIG_PATH}/{uid}.toml" 377 | 378 | if len(uid) > 0 and n_clicks > 0 and os.path.exists(target): 379 | return dcc.send_file(target) 380 | else: 381 | return None 382 | 383 | @app.callback( 384 | Output("seq_input_download","data"), 385 | Input("seq_input_download_btn","n_clicks"), 386 | State('url','pathname'), 387 | prevent_initial_call=True, 388 | ) 389 | def update_seq_input_download(n_clicks,pathname): 390 | if ('/results' in pathname) and (not pathname == '/results'): 391 | uid = pathname.split('/')[-1] 392 | else: 393 | uid = '' 394 | target = f"{FA_PATH}/server.{uid}.fasta" 395 | if len(uid) > 0 and n_clicks > 0 and os.path.exists(target): 396 | return dcc.send_file(target) 397 | else: 398 | return None 399 | 400 | @app.callback( 401 | Output("seq_logo_download","data"), 402 | Input("seq_logo_download_btn","n_clicks"), 403 | State('url','pathname'), 404 | prevent_initial_call=True, 405 | ) 406 | def update_logo_input_download(n_clicks,pathname): 407 | if ('/results' in pathname) and (not pathname == '/results'): 408 | uid = pathname.split('/')[-1] 409 | else: 410 | uid = '' 411 | 412 | config_file = f"{CONFIG_PATH}/{uid}.toml" 413 | config_dict = load_config(config_file) 414 | 415 | target = f"{PNG_PATH}/{uid}.{config_dict['logo_format']}" 416 | if len(uid) > 0 and n_clicks > 0 and os.path.exists(target): 417 | return dcc.send_file(target) 418 | else: 419 | return None 420 | 421 | @app.callback( 422 | Output("msa_download","data"), 423 | Input("msa_download_btn","n_clicks"), 424 | State('url','pathname'), 425 | prevent_initial_call=True, 426 | ) 427 | def update_msa_download(n_clicks,pathname): 428 | if ('/results' in pathname) and (not pathname == '/results'): 429 | uid = pathname.split('/')[-1] 430 | else: 431 | uid = '' 432 | target = f"{FA_PATH}/server.{uid}.msa.rawid.fa" 433 | if len(uid) > 0 and n_clicks > 0 and os.path.exists(target): 434 | return dcc.send_file(target) 435 | else: 436 | return None 437 | 438 | @app.callback( 439 | Output("phylo_download","data"), 440 | Input("phylo_download_btn","n_clicks"), 441 | State('url','pathname'), 442 | prevent_initial_call=True, 443 | ) 444 | def update_phylo_download(n_clicks,pathname): 445 | if ('/results' in pathname) and (not pathname == '/results'): 446 | uid = pathname.split('/')[-1] 447 | else: 448 | uid = '' 449 | target = f"{FA_PATH}/server.{uid}.fasttree.rawid.tree" 450 | if len(uid) > 0 and n_clicks > 0 and os.path.exists(target): 451 | return dcc.send_file(target) 452 | else: 453 | return None 454 | 455 | 456 | @app.callback( 457 | Output("grouping_download","data"), 458 | Input("grouping_download_btn","n_clicks"), 459 | State('url','pathname'), 460 | prevent_initial_call=True, 461 | ) 462 | def update_grouping_download(n_clicks,pathname): 463 | if ('/results' in pathname) and (not pathname == '/results'): 464 | uid = pathname.split('/')[-1] 465 | else: 466 | uid = '' 467 | target = f"{FA_PATH}/server.{uid}.grouping.fa" 468 | if len(uid) > 0 and n_clicks > 0 and os.path.exists(target): 469 | return dcc.send_file(target) 470 | else: 471 | return None 472 | 473 | @app.callback( 474 | [ 475 | Output('uid_input', 'value'), 476 | Output('page_url', 'children') 477 | ], 478 | Input('url', 'href'), 479 | State('url','pathname') 480 | ) 481 | def display_page(href,pathname): 482 | 483 | uid_arr = href.split('/results/') 484 | if len(uid_arr) > 1: 485 | return uid_arr[-1].split('#')[0], href 486 | else: 487 | return '','' 488 | 489 | @app.callback( 490 | [Output("search_btn","href"), 491 | Output("msa_btn","href"), 492 | Output("tree_btn","href")], 493 | Input("uid_input","value") 494 | ) 495 | def change_link(uid): 496 | return f'/results/{uid}',f'/msa/{uid}',f'/tree/{uid}' 497 | #@app.callback( 498 | # Output("url","pathname"), 499 | # Input('search_btn','n_clicks'), 500 | # State('uid_input','value'), prevent_initial_call=True 501 | #) 502 | #def navigate(n_clicks,uid): 503 | # if n_clicks > 0: 504 | # return f'/results/{uid}' 505 | # else: 506 | # raise PreventUpdate 507 | 508 | 509 | LOADED = False 510 | 511 | 512 | def load_config(config_file): 513 | if os.path.exists(config_file): 514 | paras_dict = toml.load(config_file) 515 | else: 516 | paras_dict = None 517 | return paras_dict 518 | 519 | def save_config(config,config_file): 520 | 521 | with open(config_file, 'w') as f: 522 | toml.dump(config, f) 523 | 524 | 525 | @app.callback( 526 | [ 527 | Output("loading-output2", "children"), 528 | Output('uid_span','children') , 529 | 530 | Output('not_found_span','style'), 531 | Output('running_span','style'), 532 | Output('error_span','style'), 533 | Output('result_panel','style'), 534 | Output('in_queue_note','children'), 535 | Output('error_info_panel','children'), 536 | 537 | Output('logo_img','src'), 538 | #info 1L 539 | Output('task_name_span','children') , 540 | Output('create_time','children'), 541 | Output('input_format','children'), 542 | Output('sequence_type','children'), 543 | Output('group_strategy','children'), 544 | Output('group_resolution','children'), 545 | Output('clustering_method_value','children'), 546 | #info 2L 547 | Output('min_len','children'), 548 | Output('max_len','children'), 549 | Output('display_left_right','children'), 550 | Output('basic_analysis','children'), 551 | Output('height_algorithm','children'), 552 | #info 3L 553 | Output('adjacent_alignment','children'), 554 | Output('global_alignment','children'), 555 | Output('align_metric','children'), 556 | Output('connect_threshold_value','children'), 557 | Output('logo_type','children'), 558 | 559 | Output("fast_left_pos","value"), 560 | Output("fast_right_pos","value"), 561 | Output("fast_group_limit","value"), 562 | Output("reset_resolution","value"), 563 | 564 | #statistics 565 | Output('lengths_img_res', 'src'), 566 | Output('count_img_res', 'src'), 567 | Output('entropy_img_res', 'src'), 568 | Output('entropy_boxplot_img_res', 'src'), 569 | Output('clustermap_img_res', 'src'), 570 | Output('dists_img_res', 'src'), 571 | #nondisplay/active 572 | Output('other_panel','style'), 573 | Output('msa_download_btn','disabled'), 574 | Output('phylo_download_btn','disabled'), 575 | Output('grouping_download_btn','disabled'), 576 | Output('reset_resolution_btn','disabled'), 577 | Output('reset_resolution','disabled'), 578 | #other 579 | Output("seq_logo_download_btn","children"), 580 | #loaded count 581 | Output("status","value") 582 | 583 | ], 584 | [ 585 | Input('trigger_panel','children'), 586 | ], 587 | State('url','pathname'), 588 | ) 589 | def trigger(nonsense,pathname): 590 | 591 | if ('result' not in pathname): 592 | raise PreventUpdate 593 | 594 | if ('/results' in pathname) and (not pathname == '/results'): 595 | uid = pathname.split('/')[-1] 596 | else: 597 | uid = '' 598 | 599 | 600 | 601 | results_arr = ['',uid] 602 | 603 | if uid != '': 604 | status = get_status(uid) 605 | rq_found,rq_failed,exc_info = check_queue_status(uid) 606 | else: 607 | status = 'not found' 608 | rq_found = False 609 | rq_failed = False 610 | exc_info = '' 611 | if exc_info is None: 612 | exc_info = '' 613 | 614 | 615 | 616 | global LOADED 617 | if not rq_failed: 618 | if status == 'not found': 619 | LOADED = True 620 | results_arr += [{},{'display':'none'},{'display':'none'},{'display':'none'},''] 621 | elif status == 'running': 622 | LOADED = False 623 | results_arr += [{'display':'none'},{},{'display':'none'},{'display':'none'},''] 624 | elif status == 'in-queue': 625 | LOADED = False 626 | results_arr += [{'display':'none'},{},{'display':'none'},{'display':'none'},'(in queue)'] 627 | elif status in ['error','failed']: 628 | LOADED = True 629 | results_arr += [{'display':'none'},{'display':'none'},{},{'display':'none'},''] 630 | elif status == 'finished': 631 | finished_time = get_finished_time(uid) 632 | if finished_time - time.time() > 7 * 24 * 60 * 60: 633 | results_arr += [{},{'display':'none'},{'display':'none'},{'display':'none'},''] 634 | status = 'not found' 635 | else: 636 | results_arr += [{'display':'none'},{'display':'none'},{'display':'none'},{},''] 637 | LOADED = True 638 | else: 639 | LOADED = True 640 | else: 641 | LOADED = True 642 | results_arr += [{'display':'none'},{'display':'none'},{},{'display':'none'},''] 643 | 644 | err_info = '' 645 | if status in ['error','failed']: 646 | err_info = get_status(f"{uid}-errinfo") 647 | if err_info is None: 648 | err_info = '' 649 | if err_info == 'not found': 650 | err_info = '' 651 | err_info += ' ('+exc_info + ') ' 652 | results_arr += [err_info] 653 | 654 | src = '' 655 | if LOADED and status == 'finished': 656 | encoded_image = base64.b64encode(open(f'{PNG_PATH}/{uid}.png', 'rb').read()) 657 | src = 'data:image/png;base64,{}'.format(encoded_image.decode()) 658 | results_arr += [src] 659 | 660 | config_file = f"{CONFIG_PATH}/{uid}.toml" 661 | if os.path.exists(config_file): 662 | config_dict = load_config(config_file) 663 | else: 664 | config_dict = {} 665 | 666 | #if not config_dict: 667 | # raise PreventUpdate 668 | 669 | for item in ['task_name','create_time','seq_file_type','sequence_type','group_strategy','group_resolution','clustering_method', 670 | 'min_length','max_length','display_left_right','analysis','height_algorithm','align','padding_align','align_metric','connect_threshold','logo_type']: 671 | if item == 'create_time': 672 | tm = config_dict.get(item,'') 673 | if tm != '': 674 | tm = datetime.datetime.utcfromtimestamp(int(tm)).strftime('%Y-%m-%d %H:%M:%S (UTC)') 675 | results_arr += ['%s'%(tm)] 676 | elif item == 'display_left_right': 677 | results_arr += ['%s:%s'%(config_dict.get('display_range_left'),config_dict.get('display_range_right'))] 678 | else: 679 | results_arr += ['%s'%(config_dict.get(item,''))] 680 | 681 | #fast rerun 682 | results_arr += [config_dict.get('display_range_left',''),config_dict.get('display_range_right',''), config_dict.get('group_limit',''),config_dict.get('group_resolution','')] 683 | ### 684 | 685 | lengths_src = '' 686 | count_src = '' 687 | entropy_src = '' 688 | boxplot_entropy_src = '' 689 | clustermap_src = '' 690 | dists_src = '' 691 | 692 | if LOADED and status == 'finished': 693 | if config_dict['analysis']: 694 | 695 | lengths_name = f'{PNG_PATH}/{uid}.lengths.png' 696 | lengths_src = get_img_src(lengths_name) 697 | 698 | count_name = f'{PNG_PATH}/{uid}.counts.png' 699 | count_src = get_img_src(count_name) 700 | 701 | entropy_name = f'{PNG_PATH}/{uid}.entropy.png' 702 | entropy_src = get_img_src(entropy_name) 703 | 704 | boxplot_entropy_name = f'{PNG_PATH}/{uid}.boxplot_entropy.png' 705 | boxplot_entropy_src = get_img_src(boxplot_entropy_name) 706 | 707 | clustermap_name = f'{PNG_PATH}/{uid}.clustermap.png' 708 | clustermap_src = get_img_src(clustermap_name) 709 | 710 | dists_name = f'{PNG_PATH}/{uid}.treedistances.png' 711 | dists_src = get_img_src(dists_name) 712 | 713 | results_arr += [lengths_src, count_src, entropy_src, boxplot_entropy_src, clustermap_src,dists_src] 714 | 715 | show_other_panel = False 716 | disabled_msa_download = True 717 | disabled_phylo_download = True 718 | disabled_grouping_download = True 719 | disabled_reset_resolution_btn = True 720 | disabled_reset_resolution_input = True 721 | 722 | if config_dict.get('group_strategy','') == 'auto': 723 | show_other_panel = True 724 | disabled_msa_download = False 725 | disabled_phylo_download = False 726 | disabled_grouping_download = False 727 | disabled_reset_resolution_btn = False 728 | disabled_reset_resolution_input = False 729 | 730 | if config_dict.get('padding_align',False) and config_dict.get('align',False): 731 | show_other_panel = True 732 | 733 | if not show_other_panel: 734 | results_arr += [{'display':'none'}] 735 | else: 736 | results_arr += [{}] 737 | results_arr += [disabled_msa_download,disabled_phylo_download, 738 | disabled_grouping_download, 739 | disabled_reset_resolution_btn,disabled_reset_resolution_input] 740 | 741 | 742 | ### 743 | logo_type = config_dict.get('logo_format','') 744 | results_arr += [f'Sequence Logo ({logo_type})'] 745 | 746 | results_arr += [status] 747 | return results_arr 748 | 749 | @app.callback( 750 | [ 751 | Output('result_modal_body', 'children'), 752 | Output('result_modal', 'is_open'), 753 | Output('reset_waitter', 'children'), 754 | ], 755 | [ 756 | Input("reset_resolution_btn","n_clicks") 757 | ], 758 | [ 759 | State("fast_left_pos","value"), 760 | State("fast_right_pos","value"), 761 | State("fast_group_limit","value"), 762 | State("reset_resolution","value"), 763 | State("url","pathname"), 764 | State("uid_span","children") 765 | ], 766 | prevent_initial_call=True, 767 | ) 768 | def trigger_reset_resolution(n_clicks,left_pos,right_pos,group_limit,resolution,pathname,uid): 769 | 770 | if n_clicks == 0: 771 | raise PreventUpdate 772 | 773 | config_file = f"{CONFIG_PATH}/{uid}.toml" 774 | config_dict = load_config(config_file) 775 | 776 | if resolution == config_dict['group_resolution'] and left_pos == config_dict['display_range_left'] and right_pos == config_dict['display_range_right'] and group_limit == config_dict['group_limit']: 777 | return 'Same configuration, no need to re-run',True,'' 778 | if resolution is None: 779 | return 'Please input the resolution',True,'' 780 | if left_pos is None: 781 | return 'Please input the left position',True,'' 782 | if right_pos is None: 783 | return 'Please input the right position',True,'' 784 | if group_limit is None: 785 | return 'Please input the group limit',True,'' 786 | 787 | if resolution > 1 or resolution <0 : 788 | return 'Resolution value must be between 0 and 1',True,'' 789 | 790 | if group_limit > GROUP_LIMIT: 791 | return f'Group limit must be <= {GROUP_LIMIT}',True,'' 792 | if group_limit == 0: 793 | return f'Group limit must > 0 ',True,'' 794 | 795 | if right_pos > MAX_SEQ_LEN: 796 | return f'Right pos > Max sequence length {MAX_SEQ_LEN}',True,'' 797 | if right_pos < -1*MAX_SEQ_LEN: 798 | return f'Right pos < -1 * Max sequence length {MAX_SEQ_LEN}',True,'' 799 | 800 | config_dict['group_resolution'] = float(resolution) 801 | config_dict['display_range_left'] = left_pos 802 | config_dict['display_range_right'] = right_pos 803 | config_dict['group_limit'] = group_limit 804 | save_config(config_dict,config_file) 805 | 806 | enqueue(config_file) 807 | 808 | return '',False,'Go' 809 | 810 | app.clientside_callback( 811 | """ 812 | function(command) { 813 | if (command=='Go'){ 814 | document.location.reload() 815 | } 816 | return '' 817 | } 818 | """, 819 | Output('garbage3', 'children'), 820 | Input('reset_waitter', 'children'), 821 | ) 822 | 823 | 824 | app.clientside_callback( 825 | """ 826 | function(status) { 827 | if((status == 'running')||(status=='in-queue')){ 828 | setTimeout(function(){ 829 | window.location.reload(1); 830 | }, 10000); 831 | } 832 | } 833 | """, 834 | Output('garbage4', 'children'), 835 | Input('status', 'value'), 836 | ) 837 | 838 | layout = get_layout() -------------------------------------------------------------------------------- /server/apps/tree.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | #!/usr/bin/env python 3 | 4 | import dash 5 | import dash_core_components as dcc 6 | import dash_html_components as html 7 | import dash_bootstrap_components as dbc 8 | from dash.dependencies import Input, Output, State 9 | import os 10 | from ..app import app 11 | from ..config import CONFIG_PATH, SQLITE3_DB, PNG_PATH, FA_PATH 12 | from ..utils import get_img_src 13 | 14 | loading_spinner = html.Div( 15 | [ 16 | dbc.Spinner(html.Div(id="loading-output5"), fullscreen=True, 17 | fullscreen_style={"opacity": "0.8"}), 18 | ] 19 | ) 20 | 21 | layout = dbc.Container([ 22 | html.H3([html.Span("Phylogenetic tree visualization result for task "),html.A(id="tree_uid")]), 23 | dbc.Col( 24 | [ 25 | dbc.Row([ 26 | html.Img(id='tree_img_src',src='',style={"margin":"auto","width":"100%"}), 27 | ]), 28 | ] 29 | ), 30 | loading_spinner 31 | ]) 32 | 33 | @app.callback( 34 | [ 35 | Output("tree_uid","children"), 36 | Output("tree_uid","href"), 37 | Output("tree_img_src","src"), 38 | Output("loading-output5", "children"), 39 | ], 40 | Input('url', 'pathname'), 41 | ) 42 | 43 | def display_page(pathname): 44 | arrs = pathname.split('/tree/') 45 | if len(arrs) > 1: 46 | uid = arrs[-1] 47 | tree_file = f'{PNG_PATH}/{uid}.tree.png' 48 | if not os.path.exists(tree_file): 49 | return uid,'/results/'+uid,'','' 50 | tree_src = get_img_src(tree_file) 51 | return uid,'/results/'+uid,tree_src,'' 52 | else: 53 | return '','','','' 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /server/assets/about.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labomics/MetaLogo/918b04cbf15dd3bdd4acad5d792d20a7f6c693ce/server/assets/about.PNG -------------------------------------------------------------------------------- /server/assets/baidu.js: -------------------------------------------------------------------------------- 1 | 2 | var _hmt = _hmt || []; 3 | (function() { 4 | var hm = document.createElement('script'); 5 | hm.src = 'https://hm.baidu.com/hm.js?53d867057aa25d8e9cf8e04221f009f8'; 6 | var s = document.getElementsByTagName('script')[0]; 7 | s.parentNode.insertBefore(hm, s); 8 | })(); 9 | -------------------------------------------------------------------------------- /server/assets/fav1.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labomics/MetaLogo/918b04cbf15dd3bdd4acad5d792d20a7f6c693ce/server/assets/fav1.ico -------------------------------------------------------------------------------- /server/assets/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labomics/MetaLogo/918b04cbf15dd3bdd4acad5d792d20a7f6c693ce/server/assets/favicon.ico -------------------------------------------------------------------------------- /server/assets/google.js: -------------------------------------------------------------------------------- 1 | 2 | window.dataLayer = window.dataLayer || []; 3 | function gtag(){dataLayer.push(arguments);} 4 | gtag('js', new Date()); 5 | gtag('config', 'G-0EZX46EYE9'); 6 | -------------------------------------------------------------------------------- /server/assets/introduction.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/labomics/MetaLogo/918b04cbf15dd3bdd4acad5d792d20a7f6c693ce/server/assets/introduction.PNG -------------------------------------------------------------------------------- /server/config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | #!/usr/bin/env python 3 | import os 4 | import toml 5 | 6 | #read config file 7 | PNG_PATH = 'MetaLogo/figure_output' 8 | FA_PATH = 'MetaLogo/sequence_input' 9 | EXAMPLE_PATH = 'MetaLogo/examples' 10 | CONFIG_PATH = 'MetaLogo/configs' 11 | MAX_SEQ_LIMIT = 50000 12 | MAX_SEQ_LIMIT_AUTO = 10000 13 | MAX_INPUT_SIZE = 5242880 14 | MAX_SEQ_LEN = 100 15 | GROUP_LIMIT = 20 16 | GOOGLE_ANALYTICS_ID = '' 17 | BAIDU_TONGJI_ID = '' 18 | SQLITE3_DB = 'MetaLogo/db/metalogo.db' 19 | CLUSTALO_BIN = '' 20 | FASTTREE_BIN = '' 21 | FASTTREEMP_BIN = '' 22 | TREECLUSTER_BIN = '' 23 | 24 | if os.path.exists('MetaLogo/server.toml'): 25 | paras_dict = toml.load('MetaLogo/server.toml') 26 | if 'example_path' in paras_dict: 27 | EXAMPLE_PATH = paras_dict['example_path'] 28 | if 'output_fa_path' in paras_dict: 29 | FA_PATH = paras_dict['output_fa_path'] 30 | if 'output_png_path' in paras_dict: 31 | PNG_PATH = paras_dict['output_png_path'] 32 | if 'config_path' in paras_dict: 33 | CONFIG_PATH = paras_dict['config_path'] 34 | if 'max_seq_limit' in paras_dict: 35 | MAX_SEQ_LIMIT = paras_dict['max_seq_limit'] 36 | if 'max_seq_limit_auto' in paras_dict: 37 | MAX_SEQ_LIMIT_AUTO = paras_dict['max_seq_limit_auto'] 38 | if 'max_input_size' in paras_dict: 39 | MAX_INPUT_SIZE = paras_dict['max_input_size'] 40 | if 'max_seq_len' in paras_dict: 41 | MAX_SEQ_LEN = paras_dict['max_seq_len'] 42 | if 'google_analytics_id' in paras_dict: 43 | GOOGLE_ANALYTICS_ID = paras_dict['google_analytics_id'] 44 | if 'baidu_tongji_id' in paras_dict: 45 | BAIDU_TONGJI_ID = paras_dict['baidu_tongji_id'] 46 | if 'sqlite3_db' in paras_dict: 47 | SQLITE3_DB = paras_dict['sqlite3_db'] 48 | if 'clustalo_bin' in paras_dict: 49 | CLUSTALO_BIN = paras_dict['clustalo_bin'] 50 | if 'fasttree_bin' in paras_dict: 51 | FASTTREE_BIN = paras_dict['fasttree_bin'] 52 | if 'fasttreemp_bin' in paras_dict: 53 | FASTTREEMP_BIN = paras_dict['fasttreemp_bin'] 54 | if 'treecluster_bin' in paras_dict: 55 | TREECLUSTER_BIN = paras_dict['treecluster_bin'] 56 | if 'group_limit' in paras_dict: 57 | GROUP_LIMIT = paras_dict['group_limit'] 58 | 59 | if not os.path.exists(PNG_PATH): 60 | os.makedirs(PNG_PATH, exist_ok=True) 61 | if not os.path.exists(FA_PATH): 62 | os.makedirs(FA_PATH, exist_ok=True) 63 | if not os.path.exists(CONFIG_PATH): 64 | os.makedirs(CONFIG_PATH, exist_ok=True) 65 | 66 | if not os.path.exists(os.path.dirname(SQLITE3_DB)): 67 | os.makedirs(os.path.dirname(SQLITE3_DB), exist_ok=True) 68 | -------------------------------------------------------------------------------- /server/gen_example.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import random 3 | if __name__ == '__main__': 4 | n = 10 5 | lens = [12,13,14,15,16,17] 6 | 7 | point = 4 8 | seqs = [] 9 | for ln in lens: 10 | for c in range(n): 11 | seq = '' 12 | name = f'>seq{c} group@{ln}-tmp' 13 | if ln == 14: 14 | name = random.choice([f'>seq{c} group@{ln}-tmp',f'>seq{c} group@{ln}-2-tmp']) 15 | targets = [3,9,14,15] 16 | table = {3:'ATGC',9:'TCGA',14:'CAGT',15:'AGCT'} 17 | for i in range(ln): 18 | if i in targets: 19 | seq += random.choices(table[i],weights=[0.7,0.2,0.1,0.1],k=1)[0] 20 | else: 21 | seq += random.choice('ATGC') 22 | seqs.append([name,seq]) 23 | 24 | with open('example2.fa','w') as outpf: 25 | for name,seq in seqs: 26 | outpf.write(f'{name}\n') 27 | outpf.write(f'{seq}\n') 28 | 29 | 30 | -------------------------------------------------------------------------------- /server/handle_seqs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import base64 3 | import os 4 | 5 | def save_seqs(seqs, filename): 6 | with open(filename,'w') as outpf: 7 | for seqname,seq in seqs: 8 | outpf.write(f'>{seqname}\n') 9 | outpf.write(f'{seq}\n') 10 | 11 | def handle_seqs_file(content,format="fasta",sequence_type="dna"): 12 | try: 13 | data = content.split(";base64,")[1] 14 | data = base64.b64decode(data).decode('utf-8') 15 | return handle_seqs_str(data,format,sequence_type) 16 | except Exception as e: 17 | return {'successful':False, 'msg':f'File processing error: {e}'} 18 | 19 | def handle_seqs_str(content, format="fasta", sequence_type="dna"): 20 | seqs = [] 21 | msg = '' 22 | successful = True 23 | if format.lower() == 'fasta': 24 | seq_name = '' 25 | seq = '' 26 | i = -1 27 | for line in content.split('\n'): 28 | line = line.strip() 29 | if len(line) == 0: 30 | continue 31 | i += 1 32 | if i == 0 and line[0] != '>': 33 | successful = False 34 | msg = 'Fasta format error, please check!' 35 | break 36 | 37 | if line[0] == '>': 38 | if len(seq_name) > 0: 39 | if len(seq) > 0: 40 | seqs.append([seq_name,seq.upper()]) 41 | else: 42 | successful = False 43 | msg = 'Fasta format error, please check!' 44 | break 45 | 46 | seq_name = line[1:] 47 | seq = '' 48 | else: 49 | seq += line 50 | if len(seq_name) > 0: 51 | if len(seq) > 0: 52 | seqs.append([seq_name,seq.upper()]) 53 | else: 54 | successful = False 55 | msg = 'Fastq format error, please check!' 56 | 57 | elif format.lower() == 'fastq': 58 | seq_name = '' 59 | seq = '' 60 | i = -1 61 | for line in content.split('\n'): 62 | line = line.strip() 63 | if len(line) == 0: 64 | continue 65 | i += 1 66 | if i%4 == 0: 67 | if line[0] != '@': 68 | successful = False 69 | msg = 'Fastq format error, please check!' 70 | break 71 | seq_name = line[1:] 72 | if i%4 == 1: 73 | seq = line 74 | seqs.append([seq_name,seq.upper()]) 75 | 76 | if successful: 77 | if len(seqs) == 0: 78 | successful = False 79 | msg = 'No sequences parsed, please check!' 80 | 81 | base_err = False 82 | base_set = set() 83 | 84 | dna_set = {'A','T','G','C','N','-'} 85 | rna_set = {'A','U','G','C','N','-'} 86 | protein_set = {'A','R','N','D','C','Q','E','G','H','I','L','K','M','F','P','S','T','W','Y','V','-'} 87 | 88 | for seqname,seq in seqs: 89 | base_set |= set(seq) 90 | 91 | if sequence_type.upper() == 'DNA': 92 | if not base_set.issubset(dna_set): 93 | base_err = True 94 | elif sequence_type.upper() == 'AA': 95 | if not base_set.issubset(protein_set): 96 | base_err = True 97 | elif sequence_type.upper() == 'RNA': 98 | if not base_set.issubset(rna_set): 99 | base_err = True 100 | if base_err: 101 | return {'successful':False, 'msg':f'{sequence_type} sequences not valid, please check', 'res': {'seqs':seqs}} 102 | 103 | is_dna = False 104 | is_rna = False 105 | is_protein = False 106 | 107 | if sequence_type.upper() == 'AUTO': 108 | if base_set.issubset(dna_set): 109 | is_dna = True 110 | if base_set.issubset(rna_set): 111 | is_rna = True 112 | elif base_set.issubset(protein_set): 113 | is_protein = True 114 | 115 | if (sequence_type.upper() == 'AUTO') and (not is_dna) and (not is_protein) and (not is_rna) : 116 | return {'successful':False, 'msg':f'Unclear sequence type (DNA or Protein), please check', 'res': {'seqs':seqs}} 117 | 118 | if len(seqs) == 0: 119 | return {'successful':False, 'msg':f'No sequences parsed, please check', 'res': {'seqs':seqs}} 120 | 121 | if sequence_type.upper() == 'AUTO': 122 | if is_dna: 123 | sequence_type = 'dna' 124 | elif is_rna: 125 | sequence_type = 'rna' 126 | elif is_protein: 127 | sequence_type = 'aa' 128 | return {'successful':successful, 'msg':msg, 'res': {'seqs':seqs,'sequence_type':sequence_type}} 129 | 130 | 131 | 132 | -------------------------------------------------------------------------------- /server/index.py: -------------------------------------------------------------------------------- 1 | import dash_core_components as dcc 2 | import dash_html_components as html 3 | from dash.dependencies import Input, Output 4 | import dash_bootstrap_components as dbc 5 | 6 | from .app import app 7 | from .apps import results, analysis,about,msa,tree 8 | from .. import MetaLogo 9 | 10 | server = app.server 11 | 12 | nav = dbc.Nav( 13 | [ 14 | dbc.NavItem(dbc.NavLink("About", href="/about",id='jump_about')), 15 | dbc.NavItem(dbc.NavLink("Analysis", href="/analysis", id='jump_analysis')), 16 | dbc.NavItem(dbc.NavLink("Results", href="/results", id='jump_results')), 17 | dbc.NavItem(dbc.NavLink("Tutorial", href="https://github.com/labomics/MetaLogo/wiki/Web-server",target='_blank')), 18 | dbc.NavItem(dbc.NavLink("Python package", href="https://github.com/labomics/MetaLogo",target='_blank')), 19 | dbc.NavItem(dbc.NavLink("Paper", target="_blank",href='https://pubmed.ncbi.nlm.nih.gov/35108357/')), 20 | dbc.NavItem(dbc.NavLink("Feedback", href="mailto:achenge07@163.com", target='_blank')), 21 | ] 22 | ) 23 | toppanel = html.Div( 24 | [ 25 | dbc.Row([ 26 | dbc.Col(dbc.Row([ 27 | html.H1(['MetaLogo']), 28 | html.Span(MetaLogo.__version__,style={'color':'grey'}), 29 | ])), 30 | ], 31 | style={'marginTop':'10px'} 32 | ), 33 | dbc.Row(nav) 34 | ] 35 | ) 36 | 37 | footer_panel = html.Div([ 38 | dbc.Row([ 39 | dbc.Col(['© Developed by Yaowen Chen @ Beijing Institute of Basic Medical Sciences by using ', 40 | html.A('Matplotlib',href='https://matplotlib.org/'),', ', html.A('Plotly Dash',href='https://dash.plotly.com/'), ' and ', 41 | html.A('other great tools',href='https://github.com/labomics/MetaLogo/blob/main/requirements.txt') 42 | ], 43 | ) 44 | ]), 45 | dbc.Row(dbc.Col('July, 2021')) 46 | ],style={"textAlign":"center","marginTop":"40px","fontSize":"10px","color":"grey"}) 47 | 48 | 49 | layout = dbc.Container(children=[ 50 | toppanel 51 | ]) 52 | 53 | app.layout = html.Div([ 54 | dcc.Location(id='url', refresh=False), 55 | layout, 56 | html.Div(id='page-content'), 57 | footer_panel 58 | ]) 59 | 60 | # "complete" layout 61 | app.validation_layout = html.Div([ 62 | dcc.Location(id='url', refresh=False), 63 | layout, 64 | html.Div(id='page-content'), 65 | about.layout, 66 | results.layout, 67 | analysis.layout, 68 | msa.layout, 69 | tree.layout 70 | ]) 71 | 72 | @app.callback( 73 | [Output('jump_analysis','style'), 74 | Output('jump_about','style'), 75 | Output('jump_results','style')], 76 | Input('url','pathname')) 77 | def highlight_btn(pathname): 78 | hightlighted_style = {'color':'#e517e5','fontWeight':'bold'} 79 | if 'analysis' in pathname: 80 | return hightlighted_style,None,None 81 | elif 'about' in pathname: 82 | return None,hightlighted_style,None 83 | elif 'results' in pathname: 84 | return None,None,hightlighted_style 85 | else: 86 | return None,None,None 87 | 88 | @app.callback(Output('page-content', 'children'), 89 | Input('url', 'pathname')) 90 | def display_page(pathname): 91 | if pathname == '/results': 92 | return results.layout 93 | elif pathname == '/analysis': 94 | return analysis.layout 95 | elif pathname == '/about': 96 | return about.layout 97 | elif pathname == '/': 98 | return about.layout 99 | elif '/results/' in pathname: 100 | return results.layout 101 | elif '/msa/' in pathname: 102 | return msa.layout 103 | elif '/tree/' in pathname: 104 | return tree.layout 105 | else: 106 | return '404' 107 | 108 | if __name__ == '__main__': 109 | 110 | app.run_server(debug=True) 111 | -------------------------------------------------------------------------------- /server/redis_queue.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | #!/usr/bin/env python 3 | 4 | from rq import Connection, Queue 5 | from redis import Redis 6 | from .sqlite3 import write_status 7 | from .run_metalogo import execute 8 | import toml 9 | 10 | def enqueue(config_file,queue='default'): 11 | 12 | config = toml.load(config_file) 13 | write_status(config['uid'],'in-queue') 14 | redis_conn = Redis() 15 | q = Queue(queue,connection=redis_conn,default_timeout=3600) 16 | job = q.enqueue(execute,args=(config_file,),job_timeout=3600, 17 | job_id=config['uid'],result_ttl=60*60*24*7, 18 | on_success=report_success,on_failure=report_failure) 19 | return 0 20 | 21 | def report_failure(job,connection,type,value,traceback): 22 | print('failure') 23 | print('args: ', job.args) 24 | print('traceback',traceback) 25 | write_status(job.id,'failed') 26 | 27 | def report_success(job,connection,result,*args,**kwargs): 28 | print('finished: ', job.id) 29 | print('args: ', job.args) 30 | 31 | 32 | def check_queue_status(job_id): 33 | #return if_found, is_failed, exc_info 34 | ret = [] 35 | redis_conn = Redis() 36 | q = Queue(connection=redis_conn) 37 | job = q.fetch_job(job_id) 38 | if job is not None: 39 | return True,job.is_failed,job.exc_info 40 | else: 41 | return False,False,'' 42 | 43 | -------------------------------------------------------------------------------- /server/run_metalogo.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import toml 4 | import sys 5 | from .sqlite3 import write_status 6 | from ..MetaLogo.entry import run_from_config 7 | 8 | 9 | def execute(config_file): 10 | config = toml.load(config_file) 11 | try: 12 | write_status(config['uid'],'running') 13 | result = run_from_config(config_file) 14 | 15 | if (result is not None) and 'error' in result: 16 | error = result['error'] 17 | write_status(config['uid'],'error',config['sqlite3_db']) 18 | write_status(f"{config['uid']}-errinfo",error,config['sqlite3_db']) 19 | else: 20 | write_status(config['uid'],'finished',config['sqlite3_db']) 21 | 22 | except Exception as e: 23 | print('error: ', repr(e)) 24 | write_status(config['uid'],'error',config['sqlite3_db']) 25 | error = ' '.join(e.args).replace('\'','').replace('\"','') 26 | print(error) 27 | write_status(f"{config['uid']}-errinfo",error,config['sqlite3_db']) 28 | return e -------------------------------------------------------------------------------- /server/sqlite3.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | #!/usr/bin/env python 3 | 4 | from contextlib import closing 5 | import sqlite3 6 | import time 7 | from .config import SQLITE3_DB 8 | 9 | def get_status(uid): 10 | with closing(sqlite3.connect(SQLITE3_DB)) as connection: 11 | with closing(connection.cursor()) as cursor: 12 | cursor.execute("create table if not exists metalogo_server (uid TEXT primary key, status TEXT, created INTEGER, finished INTEGER )") 13 | rows = cursor.execute(f"SELECT uid, status FROM metalogo_server WHERE uid = '{uid}'").fetchall() 14 | if len(rows) == 1: 15 | return rows[0][1] 16 | else: 17 | return 'not found' 18 | 19 | def get_create_time(uid): 20 | with closing(sqlite3.connect(SQLITE3_DB)) as connection: 21 | with closing(connection.cursor()) as cursor: 22 | cursor.execute("create table if not exists metalogo_server (uid TEXT primary key, status TEXT, created INTEGER, finished INTEGER )") 23 | rows = cursor.execute(f"SELECT uid, created FROM metalogo_server WHERE uid = '{uid}'").fetchall() 24 | if len(rows) == 1: 25 | return rows[0][1] 26 | else: 27 | return -1 28 | 29 | def get_finished_time(uid): 30 | with closing(sqlite3.connect(SQLITE3_DB)) as connection: 31 | with closing(connection.cursor()) as cursor: 32 | cursor.execute("create table if not exists metalogo_server (uid TEXT primary key, status TEXT, created INTEGER, finished INTEGER )") 33 | rows = cursor.execute(f"SELECT uid, finished FROM metalogo_server WHERE uid = '{uid}'").fetchall() 34 | if len(rows) == 1: 35 | return rows[0][1] 36 | else: 37 | return -1 38 | 39 | def write_status(uid,status,db=SQLITE3_DB): 40 | with closing(sqlite3.connect(db)) as connection: 41 | with closing(connection.cursor()) as cursor: 42 | cursor.execute("create table if not exists metalogo_server (uid TEXT primary key, status TEXT, created INTEGER, finished INTEGER )") 43 | rows = cursor.execute(f"SELECT uid, status FROM metalogo_server WHERE uid = '{uid}'").fetchall() 44 | if len(rows) == 1: 45 | if status == 'finished': 46 | cursor.execute(f"UPDATE metalogo_server SET status = '{status}', finished = {round(time.time())} where uid = '{uid}' ") 47 | else: 48 | cursor.execute(f"UPDATE metalogo_server SET status = '{status}' where uid = '{uid}' ") 49 | else: 50 | cursor.execute(f"INSERT INTO metalogo_server VALUES ('{uid}','{status}',{round(time.time())},-1) ") 51 | connection.commit() -------------------------------------------------------------------------------- /server/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | from io import BytesIO 5 | import base64 6 | import os 7 | 8 | def fig_to_uri(in_fig, close_all=True, **save_args): 9 | """ 10 | Save a figure as a URI 11 | :param in_fig: 12 | :return: 13 | """ 14 | out_img = BytesIO() 15 | in_fig.savefig(out_img, format='png', **save_args) 16 | if close_all: 17 | in_fig.clf() 18 | plt.close('all') 19 | out_img.seek(0) # rewind file 20 | encoded = base64.b64encode(out_img.read()).decode("ascii").replace("\n", "") 21 | return "data:image/png;base64,{}".format(encoded) 22 | 23 | def figfile_to_uri(in_fig, close_all=True, **save_args): 24 | """ 25 | Save a figure as a URI 26 | :param in_fig: 27 | :return: 28 | """ 29 | out_img = BytesIO() 30 | in_fig.savefig(out_img, format='png', **save_args) 31 | if close_all: 32 | in_fig.clf() 33 | plt.close('all') 34 | out_img.seek(0) # rewind file 35 | encoded = base64.b64encode(out_img.read()).decode("ascii").replace("\n", "") 36 | return "data:image/png;base64,{}".format(encoded) 37 | 38 | def get_img_src(f): 39 | if not os.path.exists(f): 40 | return '' 41 | encoded_image = base64.b64encode(open(f, 'rb').read()) 42 | src = 'data:image/png;base64,{}'.format(encoded_image.decode()) 43 | return src 44 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | try: 4 | from setuptools import setup 5 | except ImportError: 6 | from distutils.core import setup 7 | 8 | exec(open('MetaLogo/version.py').read()) 9 | 10 | setup(name='MetaLogo', 11 | version=__version__, 12 | description='MetaLogo is a heterogeneity-aware sequence logo generator and aligner', 13 | long_description='MetaLogo is a tool for making sequence logos. It can take multiple sequences as input, automatically identify the homogeneity and heterogeneity among sequences and cluster them into different groups given any wanted resolution, finally output multiple aligned sequence logos in one figure. Grouping can also be specified by users, such as grouping by lengths, grouping by sample Id, etc. Compared to conventional sequence logo generator, MetaLogo can display the total sequence population in a more detailed, dynamic and informative view. homogeneity.\ 14 | To use MetaLogo, you could visit our public webserver http://metalogo.omicsnet.org. You could also install MetaLogo as a python package to using MetaLogo in your python scripts or in your OS terminal. If you want to provide MetaLogo to people in your local network, you could also setup a webserver by using docker.\ 15 | Please check the tutorial for detailed usage of MetaLogo package and webserver (https://github.com/labomics/MetaLogo).', 16 | long_description_content_type = 'text/plain', 17 | author='Yaowen Chen', 18 | author_email='achenge07@163.com', 19 | url='https://github.com/labomics/MetaLogo', 20 | packages=['MetaLogo'], 21 | entry_points={ 22 | 'console_scripts': ['metalogo=MetaLogo.entry:main'] 23 | }, 24 | python_requires='>=3.6', 25 | install_requires=[ 26 | 'biopython>=1.77', 27 | 'matplotlib>=3.3.0', 28 | 'numpy>=1.19.1', 29 | 'pandas>=1.3.0', 30 | 'scipy>=1.5.2', 31 | 'seaborn>=0.11.1', 32 | 'toml>=0.10.2', 33 | 'treecluster>=1.0.3', 34 | 'dendropy>=4.5.2', 35 | 'ete3>=3.1.1', 36 | ], 37 | extras_require={ 38 | 'webserver': [ 39 | 'dash==1.21.0', 40 | 'dash-bio==0.8.0', 41 | 'dash-bootstrap-components==0.12.2', 42 | 'Flask==2.0.1', 43 | 'gunicorn==20.1.0', 44 | 'plotly==5.1.0', 45 | 'supervisor==4.2.2', 46 | 'rq==1.10.0', 47 | 'hiredis==2.0.0', 48 | ] 49 | } 50 | ) 51 | --------------------------------------------------------------------------------