├── .gitignore ├── LICENSE ├── README.md ├── data ├── EBV_BMLF1_sc.csv ├── HCMV_pp65_sc.csv ├── InfluenzaA_sc.csv ├── InfluenzaM_sc.csv ├── readme.md ├── tcr_seq.csv ├── testing_data.csv └── training_data.csv ├── example_pic ├── flow_chart_simple.png ├── input_file_example.png ├── output_file_example.png └── pic1.png ├── library ├── Atchley_factors.csv ├── bg_tcr_library │ ├── TCR_10k_bg_seq.csv │ ├── TCR_output_10k.csv │ └── TCR_output_1k.csv ├── h5_file │ ├── HLA_antigen_encoder_60.h5 │ ├── TCR_encoder_30.h5 │ └── weights.h5 └── hla_library │ ├── A_prot.fasta │ ├── B_prot.fasta │ ├── C_prot.fasta │ └── E_prot.fasta ├── pMTnet.py └── test ├── code ├── ternary_train_encoding.py └── ternary_train_model_pMTnet.py ├── example_cmd.sh ├── input └── test_input.csv └── output ├── output.log └── prediction.csv /.gitignore: -------------------------------------------------------------------------------- 1 | ### VisualStudioCode ### 2 | .vscode/* 3 | !.vscode/settings.json 4 | !.vscode/tasks.json 5 | !.vscode/launch.json 6 | !.vscode/extensions.json 7 | *.code-workspace 8 | 9 | # Local History for Visual Studio Code 10 | .history/ 11 | 12 | ### VisualStudioCode Patch ### 13 | # Ignore all local history of files 14 | .history 15 | .ionide 16 | 17 | ### Python ### 18 | # Byte-compiled / optimized / DLL files 19 | __pycache__/ 20 | *.py[cod] 21 | *$py.class 22 | 23 | # C extensions 24 | *.so 25 | 26 | # Distribution / packaging 27 | .Python 28 | build/ 29 | develop-eggs/ 30 | dist/ 31 | downloads/ 32 | eggs/ 33 | .eggs/ 34 | lib/ 35 | lib64/ 36 | parts/ 37 | sdist/ 38 | var/ 39 | wheels/ 40 | share/python-wheels/ 41 | *.egg-info/ 42 | .installed.cfg 43 | *.egg 44 | MANIFEST 45 | 46 | # PyInstaller 47 | # Usually these files are written by a python script from a template 48 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 49 | *.manifest 50 | *.spec 51 | 52 | # Installer logs 53 | pip-log.txt 54 | pip-delete-this-directory.txt 55 | 56 | # Unit test / coverage reports 57 | htmlcov/ 58 | .tox/ 59 | .nox/ 60 | .coverage 61 | .coverage.* 62 | .cache 63 | nosetests.xml 64 | coverage.xml 65 | *.cover 66 | *.py,cover 67 | .hypothesis/ 68 | .pytest_cache/ 69 | cover/ 70 | 71 | # Translations 72 | *.mo 73 | *.pot 74 | 75 | # Django stuff: 76 | *.log 77 | local_settings.py 78 | db.sqlite3 79 | db.sqlite3-journal 80 | 81 | # Flask stuff: 82 | instance/ 83 | .webassets-cache 84 | 85 | # Scrapy stuff: 86 | .scrapy 87 | 88 | # Sphinx documentation 89 | docs/_build/ 90 | 91 | # PyBuilder 92 | .pybuilder/ 93 | target/ 94 | 95 | # Jupyter Notebook 96 | .ipynb_checkpoints 97 | 98 | # IPython 99 | profile_default/ 100 | ipython_config.py 101 | 102 | # pyenv 103 | # For a library or package, you might want to ignore these files since the code is 104 | # intended to run in multiple environments; otherwise, check them in: 105 | # .python-version 106 | 107 | # pipenv 108 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 109 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 110 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 111 | # install all needed dependencies. 112 | #Pipfile.lock 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | , 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pMTnet 2 | Deep learning neural network prediction tcr binding specificity to peptide and HLA based on peptide sequences. Please refer to our paper for more details: 'Deep learning-based prediction of T cell receptor-antigen binding specificity.'(https://www.nature.com/articles/s42256-021-00383-2) Lu, T., Zhang, Z., Zhu, J. et al. 2021. 3 | ![preview](https://github.com/tianshilu/pMTnet/blob/master/example_pic/pic1.png) 4 | ## Online prediction 5 | The online tool for prediction is available here : https://dbai.biohpc.swmed.edu/pmtnet/index.php 6 | ## Dependencies 7 | python(version>3.0.0) ; 8 | tensorflow (version>1.5.0) ; 9 | numpy (version=1.16.3) ; 10 | keras (version=2.2.4) ; 11 | pandas (version=0.23.4) ; 12 | scikit-learn (version=0.20.3) ; 13 | scipy (version=1.2.1) 14 | ## Guided Tutorial 15 | Command: 16 | ``` 17 | python pMTnet.py -input input.csv -library library -output output_dir -output_log test/output/output.log 18 | ``` 19 | * input.csv: input csv file with 3 columns named as "CDR3,Antigen,HLA": TCR-beta CDR3 sequence, peptide sequence, and HLA allele. \ 20 | ![Input_file_example](https://github.com/tianshilu/pMTnet/blob/master/example_pic/input_file_example.png) 21 | For more details about CDR3 encoding, please refer to https://github.com/jcao89757/TESSA. 22 | * library: diretory to the downloaded library with trained models, hla sequences, background TCR sequences, and Atchley Factors table. 23 | * output_dir : diretory you want to save the output 24 | * output_log : local directory to log file with CDR, Antigen, HLA information and predicted binding rank.\ 25 | 26 | 27 | ## Example 28 | The example input file is under test/input/.\ 29 | Comand : 30 | ``` 31 | python pMTnet.py -input test/input/test_input.csv -library library -output test/output -output_log test/output/output.log 32 | ``` 33 | The output for test_input.csv is under test/output. 34 | 35 | ## Output file example 36 | pMTnet outputs a table with 4 columns: CDR3 sequences, antigens sequences, HLA alleles, and ranks for each pair of TCR/pMHC. The rank reflects the percentile rank of the predicted binding strength between the TCR and the pMHC with respect to the 10,000 randomly sampled TCRs against the same pMHC. A lower rank considered a good prediction. The sequences of 10,000 background TCRs can be fold under https://github.com/tianshilu/pMTnet/tree/master/library/bg_tcr_library. 37 | ![Output file example](https://github.com/tianshilu/pMTnet/blob/master/example_pic/output_file_example.png) 38 | -------------------------------------------------------------------------------- /data/EBV_BMLF1_sc.csv: -------------------------------------------------------------------------------- 1 | CDR3,rank,ratio 2 | CAITPGPYPSGYGYTF,0.049,2.54280407839979 3 | CASSESGQFYEQYF,0.098,1.08977317645705 4 | CASSILTGELFF,0.0659999999999999,1.27140203919989 5 | CASSLYLGQVRGGGEQFF,0.073,1.27140203919989 6 | CASSQDVQTTDTQYF,0.078,0.61027297881595 7 | CASSSRLAESSYEQYF,0.052,2.54280407839979 8 | CASSVYPSYEQYF,0.00800000000000001,3.39040543786638 9 | CAVGQGDGEQYF,0.00590000000000002,0.726515450971368 10 | CAWSVLAGDRETQYF,0.07,0.508560815679959 11 | CAYDRATYEQYF,0.00749999999999995,2.54280407839979 12 | CSVEDLFYGYTF,0.082,1.69520271893319 13 | CSVVGQGLGAYEQYF,0.023,1.35616217514656 14 | -------------------------------------------------------------------------------- /data/HCMV_pp65_sc.csv: -------------------------------------------------------------------------------- 1 | CDR3,rank,ratio 2 | CASFTGGTEAFF,0.034,1.90070841522651 3 | CASGPYSGGGTDTQYF,0.0141,0.863958370557501 4 | CASRVGGIGTEAFF,0.03,0.475177103806625 5 | CASSLARIINYGYTF,0.0570000000000001,1.9007084152265 6 | CASSNSEKLFF,0.025,1.9007084152265 7 | CASSPLVYEQFF,0.037,5.7021252456795 8 | CASSSAHYGYTF,0.00139999999999996,1.31587505669527 9 | CASSYATGGMKEQFF,0.075,1.90070841522651 10 | CASSYSTQGYNEQFF,0.024,1.23959244471293 11 | CASTSEAGSSYNSPLHF,0.02,0.997871917993913 12 | CATLGEGGLAGGPTQYF,0.099,1.1404250491359 13 | CAVRVAETQYF,0.078,3.80141683045301 14 | CAWSISDLAKNIQYF,0.00719999999999998,0.95035420761325 15 | CAWSVGDGRSGYTF,0.029,0.760283366090601 16 | CAYDRATYEQYF,0.0600000000000001,0.95035420761325 17 | CSVPTRTGTPLSYEQYF,0.0679999999999999,3.80141683045301 18 | -------------------------------------------------------------------------------- /data/InfluenzaA_sc.csv: -------------------------------------------------------------------------------- 1 | CDR3,rank,ratio 2 | CAGEGASEKLFF,0.098,1.58025201913967 3 | CASSFIPRSYQETQYF,0.084,1.58025201913967 4 | CASSLGGESYGYTF,0.043,3.16050403827935 5 | CASSPDRGLSYEQYF,0.038,3.16050403827933 6 | CASSPWTGGFSNSPLHF,0.0570000000000001,0.790126009569835 7 | CASSQEGYNEQFF,0.0244,6.32100807655868 8 | CASSSDEKLFF,0.0679999999999999,1.04152973988751 9 | CASSYGGEYTF,0.072,0.395063004784916 10 | CASSYSTQGYNEQFF,0.0580000000000001,1.51154540961186 11 | CATKGSGANTEAFF,0.00670000000000004,1.58025201913967 12 | CATSGRRPYEQYF,0.037,3.16050403827933 13 | CATSRQGFYGYTF,0.083,3.16050403827935 14 | CSVVGQGLGAYEQYF,0.0286,2.7391034998421 15 | -------------------------------------------------------------------------------- /data/InfluenzaM_sc.csv: -------------------------------------------------------------------------------- 1 | CDR3,rank,ratio 2 | CASAGLLNARTEAFF,0.032,2.45707129911937 3 | CASGPYSGGGTDTQYF,0.0629999999999999,1.34022070861057 4 | CASSFEITGGTEAFF,0.082,7.37121389735813 5 | CASSIRSSYEQYF,0.00680000000000003,7.37121389735813 6 | CASSLTSLRPQHF,0.034,3.68560694867908 7 | CASSPWTGGFSNSPLHF,0.049,3.68560694867908 8 | CASSSAGTGSSTSVYEQYF,0.0639999999999999,1.1793942235773 9 | CAWSLTDSNEQFF,0.088,1.22853564955969 10 | -------------------------------------------------------------------------------- /data/readme.md: -------------------------------------------------------------------------------- 1 | training_data.csv: data used for training 2 | testing_data.csv: data used for testing 3 | -------------------------------------------------------------------------------- /data/tcr_seq.csv: -------------------------------------------------------------------------------- 1 | Amino.Acid,X3.G0,X3_T1,rank 2 | CASSFRVFNQPQHF,0.07936103717327,0.000782354,0.102 3 | CASSPGTLSQETQYF,0.0085711058267507,0.00565314,0.148 4 | CASNLSGFGNQPQHF,0.000256279084520842,0.033868363,0.193 5 | CASSRRSWTEGDGYTF,0.0031038220236405,0.000290228,0.131 6 | CASKVLMNTEAFF,2.847540939119e-05,0.042499495,0.294 7 | CASTSPAGGAQYF,2.847540939119e-05,0.041893802,0.241 8 | CSASGQGGFYGYTF,0.00156614851651578,0.017994145,0.055 9 | CAVGQGDGEQYF,0.00387265927720303,0.000933778,0.00590000000000002 10 | CASRTRNWGAGEAFF,0.0134688804420368,1.26186e-05,0.112 11 | CASSLGLMNTEAFF,0.000170853056347338,0.010801534,0.274 12 | CASSEGGHAYEQYF,2.847540939119e-05,0.024353927,0.839 13 | CASSSDEKLFF,5.6950918782413e-05,0.016896325,0.201 14 | CSVVGQGLGAYEQYF,0.0101942053620489,0.000492126,0.023 15 | CASSVTGTGGYGYTF,0.012899372254213,5.04745e-05,0.053 16 | CSVEDLFYGYTF,0.0002847540939119,7.57117e-05,0.082 17 | CASSFITSRIDEQFF,2.847540939119e-05,0.013691197,0.219 18 | CASRSNREDGYTF,0.00720428537597332,2.52372e-05,0.582 19 | CASSFVEGTEAFF,5.6950918782413e-05,0.015369473,0.196 20 | CAWSVLTQETQYF,5.6950918782413e-05,0.013350495,0.22 21 | CASSGANSPLHF,0.00763141651684117,1.26186e-05,0.108 22 | CASSLLPGQAAEQYF,0.00165157554468961,0.001375429,0.00419999999999998 23 | CASSTGTGIYEQYF,5.6950918782413e-05,0.012088633,0.538 24 | CAIQREEGIYEQYF,5.6950918782413e-05,0.012732183,0.276 25 | CASSQNGRGDQPQHF,0.00130987043199527,0.007003331,0.45 26 | CASRPLGFGYGYTF,2.847540939119e-05,0.007482839,0.589 27 | CSARGPASGRDEQFF,0.000199328065738396,0.011066525,0.129 28 | CASSMGQKNQPQHF,0.000256279084520842,0.000189279,0.447 29 | CASSLRGADYEQYF,0.00666325219754057,1.26186e-05,0.205 30 | CASSAGLGNQPQHF,0.000113902037564892,0.003962245,0.376 31 | CASTTPVTGGREPQHF,0.00128139442260388,0.001589945,0.213 32 | CASSFNLGLGGEQYF,5.6950918782413e-05,0.009817282,0.0179 33 | CASSELSTVSYEQYF,0.00327467507998784,0.001022108,0.324 34 | CASSRTSGTYTDTQYF,0.00378723324902953,8.83303e-05,0.11 35 | CRVDQEGGYTF,0.000968164319300592,0.00082021,0.279 36 | CASSPQRDRTDTQYF,0.00145224747895122,0.001160913,0.381 37 | CASSQDVQTTDTQYF,2.847540939119e-05,0.007280941,0.078 38 | CASADSYSYEQYF,0.00153767350712472,0.000429033,0.054 39 | CASATLNQPQHF,5.6950918782413e-05,0.007205229,0.274 40 | CASSPQGAGGAVSYGYTF,5.6950918782413e-05,0.006599536,0.154 41 | CASKREDGIYEQYF,0.000170853056347338,0.005388149,0.467 42 | CASALDYNEQFF,0.0025343138358167,1.26186e-05,0.342 43 | CAVRVAETQYF,0.000768836253562196,0.000466889,0.052 44 | CASSEWGEANTGELFF,5.6950918782413e-05,0.006120028,0.233 45 | CSVDMGHLNTEAFF,0.000768836253562196,0.000227135,0.444 46 | CASSQAWGDEKLFF,5.6950918782413e-05,1.26186e-05,0.476 47 | CASRLARTGCRQPQHF,2.847540939119e-05,0.006006461,0.054 48 | CASSLAPGATNEKLFF,0.0027336419015551,6.30931e-05,0.00209999999999999 49 | CASTSEAGSSYNSPLHF,0.000768836253562196,0.002082071,0.02 50 | CASSSQGQSTDTQYF,2.847540939119e-05,2.52372e-05,0.592 51 | CASSESGQFYEQYF,2.847540939119e-05,0.004037957,0.098 52 | CASSKRTGWPGNTIYF,5.6950918782413e-05,0.000416414,0.573 53 | CAWRGKGWTEAFF,0.00179395259164556,0.000706642,0.161 54 | CAISEQGNIQYF,2.847540939119e-05,3.78558e-05,0.088 55 | CASSIDRNTEAFF,2.847540939119e-05,1.26186e-05,0.598 56 | CASSLTSGTYQETQYF,0.00187937861981907,1.26186e-05,0.454 57 | CASSQGLTNTEAFF,0.00014237704695595,0.002561579,0.801 58 | CASSSAGTGSSTSVYEQYF,5.6950918782413e-05,0.001753987,0.0639999999999999 59 | CASSLDLGSPLHF,0.00170852656347206,0.000113568,0.102 60 | CASGYNTGELFF,0.00219260872312236,6.30931e-05,0.0161 61 | CASSIRSSYEQYF,0.000370181122085734,0.000176661,0.00680000000000003 62 | CASRDILTGELFF,2.847540939119e-05,0.003684636,0.024 63 | CASSPRRSGNTIYF,2.847540939119e-05,3.78558e-05,0.364 64 | CASSPGTEAFF,2.847540939119e-05,0.000164042,0.116 65 | CAISGEAFNYGYTF,2.847540939119e-05,0.00373511,0.0580000000000001 66 | CASSRQGENEKLFF,2.847540939119e-05,0.00164042,0.266 67 | CASAWILAYNEQFF,2.847540939119e-05,0.000744498,0.051 68 | CASSQTAGANVLTF,0.00148072248834228,1.26186e-05,0.102 69 | CASRDRLGEKLFF,0.000597984197215188,0.000694024,0.1 70 | CSARDSGVGQPQHF,2.847540939119e-05,0.000580456,0.022 71 | CSASRPEGETGELFF,0.000768836253562196,0.000555219,0.023 72 | CASSYSTQGYNEQFF,0.000911214300518476,0.000340703,0.024 73 | CSASLAGSPLQFF,0.00014237704695595,8.83303e-05,0.106 74 | CASSFSSGTTDTQYF,0.00071188623478008,7.57117e-05,0.088 75 | CASSRGLSGNQPQHF,5.6950918782413e-05,0.000113568,0.107 76 | CASSQDGWPQHF,0.00190785462921046,0.000164042,0.815 77 | CASSLGQGNYGYTF,5.6950918782413e-05,0.001842318,0.446 78 | CASSSGPATEAFF,0.0002847540939119,0.001287099,0.142 79 | CASSNREYYGYTF,2.847540939119e-05,1.26186e-05,0.395 80 | CASSIRAEDTGELFF,0.00182242760103662,1.26186e-05,0.039 81 | CASSLGVGRYSNQPQHF,0.000455607150259238,0.000946396,0.587 82 | CASSSLDVHEKLFF,2.847540939119e-05,2.52372e-05,0.07 83 | CASSTGRTEAFF,0.000170853056347338,0.000189279,0.085 84 | CASSADGMNTEAFF,0.000683410225388692,0.000328084,0.031 85 | CAWSPTGNNYGYTF,0.000683410225388692,0.000164042,0.093 86 | CASSLGDRAYNEQFF,0.000256279084520842,0.000214516,0.2 87 | CASSLRTVDNSPLHF,0.000341705112694346,7.57117e-05,0.505 88 | CASSFYSRGDGYTF,8.5426328173603e-05,0.000126186,0.179 89 | CASSEGTANQPQHF,0.000484082159650296,0.000252372,0.479 90 | CASSLFDVGLGELFF,2.847540939119e-05,0.001905411,0.0649999999999999 91 | CASSRGGNQPQHF,0.000626459206606246,1.26186e-05,0.099 92 | CASSLEGATDTQYF,2.847540939119e-05,0.00209469,0.277 93 | CASSLGGYEQYF,0.00071188623478008,0.000126186,0.052 94 | CASSLDRSFYGYTF,0.000170853056347338,0.000378558,0.141 95 | CASSLTGGNTEAFF,2.847540939119e-05,0.002044216,0.349 96 | CAWSVGSGGELFF,0.000313230103303288,0.000227135,0.0115 97 | CASRDLASTGELFF,0.0002847540939119,0.000189279,0.538 98 | CASSLAVMGTGGELGGYTF,2.847540939119e-05,0.001501615,0.09 99 | CASSGDFTEAFF,2.847540939119e-05,0.000113568,0.459 100 | CAGEGASEKLFF,0.000370181122085734,8.83303e-05,0.103 101 | CASSITVSYEQYF,0.000113902037564892,2.52372e-05,0.00280000000000002 102 | CASSQWTESSHEQYF,2.847540939119e-05,0.001160913,0.147 103 | CAWSLGAASYEQYF,0.000313230103303288,8.83303e-05,0.385 104 | CASRTEVNTEAFF,5.6950918782413e-05,2.52372e-05,0.1 105 | CASSPGPGLNYGYTF,5.6950918782413e-05,0.001249243,0.464 106 | CASSLNGEVGYGYTF,5.6950918782413e-05,0.000933778,0.764 107 | CASSTGTASRSYNSPLHF,0.00014237704695595,0.001059964,0.662 108 | CSASSGASGYGYTF,0.00099664032869198,3.78558e-05,0.481 109 | CSARRYGPNSPLHF,2.847540939119e-05,0.001135675,0.055 110 | CASSPTGLEGYTF,0.000968164319300592,0.000113568,0.139 111 | CASSQGLAPYQETQYF,8.5426328173603e-05,0.001413285,0.404 112 | CASSVDRTGNQPQHF,0.000968164319300592,3.78558e-05,0.307 113 | CSAREPAGSNQPQHF,0.000256279084520842,0.000100949,0.422 114 | CATLGEGGLAGGPTQYF,2.847540939119e-05,0.000832829,0.099 115 | CASSSAHYGYTF,0.000256279084520842,0.000416414,0.00139999999999996 116 | CASSPTGAGYQPQHF,0.000113902037564892,1.26186e-05,0.196 117 | CASSLGDRGAEAFF,8.5426328173603e-05,0.000151423,0.455 118 | CASSYRVGEKLFF,0.000170853056347338,5.04745e-05,0.00890000000000002 119 | CSARDSGTDTQYF,2.847540939119e-05,0.000984252,0.128 120 | CASSNQPSGGQTQYF,5.6950918782413e-05,0.000328084,0.212 121 | CASSQEVGSDTQYF,2.847540939119e-05,0.000138805,0.039 122 | CASSPFRASLDGELFF,2.847540939119e-05,1.26186e-05,0.525 123 | CASSRLLGTASYNEKLFF,5.6950918782413e-05,0.001224006,0.11 124 | CASSLGRFDDEQYF,0.000199328065738396,3.78558e-05,0.211 125 | CASSVKSRSYNEQFF,5.6950918782413e-05,2.52372e-05,0.1 126 | CASAPGPGSYNEQFF,5.6950918782413e-05,0.000100949,0.123 127 | CASTTSGTGREQYF,0.000170853056347338,2.52372e-05,0.612 128 | CASSWDPNTEAFF,8.5426328173603e-05,0.000164042,0.599 129 | CASSDLGGQNSPLHF,2.847540939119e-05,0.000302847,0.217 130 | CASSLMGGGAGKQYF,0.000541033178432742,1.26186e-05,0.00700000000000001 131 | CASSQMGTGHFYGYTF,0.000797312262953584,2.52372e-05,0.241 132 | CASRDSGANVLTF,5.6950918782413e-05,2.52372e-05,0.52 133 | CASGGWGTDEQFF,0.000170853056347338,3.78558e-05,0.722 134 | CASSPDRGNSPLHF,0.000170853056347338,0.000391177,0.138 135 | CSAREAASSYEQYF,5.6950918782413e-05,1.26186e-05,0.444 136 | CASREQGWSEAFF,2.847540939119e-05,1.26186e-05,0.799 137 | CASSQGGTSGTTGELFF,2.847540939119e-05,1.26186e-05,0.241 138 | CASSLFRGPYEQYF,0.000341705112694346,0.000239754,0.642 139 | CASSQAGNGQRNYGYTF,0.000313230103303288,0.000126186,0.985 140 | CASRLQGMETQYF,8.5426328173603e-05,1.26186e-05,0.00460000000000005 141 | CAWSLADNQPQHF,2.847540939119e-05,0.000643549,0.198 142 | CASSSSTTGLKQFF,0.00014237704695595,0.000113568,0.378 143 | CASSLEGVSTIYF,2.847540939119e-05,0.000138805,0.487 144 | CASSPDGRGNSPLHF,0.000113902037564892,0.000138805,0.514 145 | CSAETGLSNQPQHF,2.847540939119e-05,0.00073188,0.578 146 | CASSLRRGRGYTF,0.000398656131476792,2.52372e-05,0.672 147 | CASSLLYGYTF,2.847540939119e-05,0.000517363,0.166 148 | CASRDRLSTEAFF,2.847540939119e-05,0.000845447,0.144 149 | CATSRQGFYGYTF,0.000512558169041684,1.26186e-05,0.087 150 | CSARAVGPNEQFF,0.000170853056347338,3.78558e-05,0.229 151 | CASGPYSGGGTDTQYF,0.000398656131476792,3.78558e-05,0.0141 152 | CASSQENGGHYEQYF,0.000199328065738396,5.04745e-05,0.171 153 | CASAGGGTDTQYF,2.847540939119e-05,0.000479507,0.195 154 | CASSEANGFEPQHF,0.000313230103303288,0.000164042,0.23 155 | CATSDLRAGVRFF,0.00014237704695595,8.83303e-05,0.154 156 | CASSLVGSEQFF,0.000370181122085734,0.000201898,0.04 157 | CASSHPGPAEGYTF,0.000199328065738396,8.83303e-05,0.336 158 | CSATGWGEAATGELFF,0.000170853056347338,3.78558e-05,0.157 159 | CATSRDLASSDEQYF,0.000113902037564892,5.04745e-05,0.036 160 | CASSSPAVPNYGYTF,2.847540939119e-05,0.00073188,0.646 161 | CSASGTSGRLYEQFF,5.6950918782413e-05,0.000694024,0.381 162 | CAWSGEDNEQFF,0.00014237704695595,0.000252372,0.256 163 | CASSIVAGGYEQYF,5.6950918782413e-05,0.000290228,0.514 164 | CASGFERLNEQFF,5.6950918782413e-05,6.30931e-05,0.549 165 | CAVGSSGGLNEQFF,0.000541033178432742,0.000151423,0.095 166 | CAIFNPFSGRPYEQYF,0.000170853056347338,0.000214516,0.00880000000000003 167 | CASSLGSSGSSETQYF,0.000170853056347338,0.000100949,0.156 168 | CASSAGSGQPYNEQFF,2.847540939119e-05,2.52372e-05,0.652 169 | CASSKDRGGGYTF,8.5426328173603e-05,0.000113568,0.0679999999999999 170 | CASSFGLQETQYF,2.847540939119e-05,0.000630931,0.069 171 | CSARDEMGTEAFF,5.6950918782413e-05,1.26186e-05,0.00360000000000005 172 | CASSRRTSGPTDTQYF,2.847540939119e-05,3.78558e-05,0.574 173 | CASSQDSFSLYGYTF,5.6950918782413e-05,5.04745e-05,0.458 174 | CSAAGQGLSEQFF,8.5426328173603e-05,1.26186e-05,0.029 175 | CSARGRADEQYF,0.00014237704695595,1.26186e-05,0.098 176 | CATSGPPGLASSTDTQYF,2.847540939119e-05,6.30931e-05,0.00439999999999996 177 | CASSDTVGYEQYF,0.000170853056347338,3.78558e-05,0.149 178 | CAWNFGRAGEQYF,0.000113902037564892,2.52372e-05,0.184 179 | CASSSPLTGRYEQYF,2.847540939119e-05,0.000201898,0.26 180 | CASSLGNTEAFF,0.000227803075129454,1.26186e-05,0.099 181 | CASSQAGTGYEQYF,0.000170853056347338,2.52372e-05,0.281 182 | CASSFEGTDTQYF,2.847540939119e-05,0.000151423,0.262 183 | CASSQDEYEQYF,5.6950918782413e-05,2.52372e-05,0.11 184 | CASSQTGTYEQYF,2.847540939119e-05,0.000668787,0.227 185 | CASSPGQGELPEAFF,0.000484082159650296,2.52372e-05,0.034 186 | CAWSPVAGHLEAFF,2.847540939119e-05,2.52372e-05,0.089 187 | CATAPGHRMGGYTF,0.000398656131476792,1.26186e-05,0.00660000000000005 188 | CASLPPAANVLTF,5.6950918782413e-05,0.000264991,0.669 189 | CASTPVEGLDEQYF,5.6950918782413e-05,6.30931e-05,0.151 190 | CASSVGDSEQYF,8.5426328173603e-05,3.78558e-05,0.307 191 | CASSLNRGPSYEQYF,2.847540939119e-05,1.26186e-05,0.312 192 | CSAPEGRTSRSRDTQYF,8.5426328173603e-05,0.000113568,0.159 193 | CASAGLLNARTEAFF,5.6950918782413e-05,6.30931e-05,0 194 | CASSLAAGTGSEQYF,0.00014237704695595,5.04745e-05,0.079 195 | CASRGLLGRNQPQHF,0.000227803075129454,2.52372e-05,0.131 196 | CATLYQVQAFF,0.00014237704695595,0.000151423,0.0129 197 | CASIPRVDGANVLTF,0.000455607150259238,1.26186e-05,0.328 198 | CASSSQGGNIQYF,0.00014237704695595,7.57117e-05,0.0570000000000001 199 | CASSFQGRTEAFF,2.847540939119e-05,0.000100949,0.028 200 | CARRGTSYNEQFF,5.6950918782413e-05,1.26186e-05,0.082 201 | CASSLGQGTVSGANVLTF,2.847540939119e-05,8.83303e-05,0.807 202 | CSARDLGSYNSPLHF,0.00014237704695595,1.26186e-05,0.14 203 | CSASNTATYEQYF,8.5426328173603e-05,2.52372e-05,0.328 204 | CATSDGASGRRYF,8.5426328173603e-05,3.78558e-05,0.106 205 | CASSPGGTSVNIQYF,8.5426328173603e-05,7.57117e-05,0.075 206 | CASRDLLGRGSATNEKLFF,2.847540939119e-05,1.26186e-05,0.456 207 | CASSREGPSYEQYF,2.847540939119e-05,0.000504745,0.255 208 | CASSVSQGRNQPQHF,0.00014237704695595,0.000126186,0.323 209 | CASSSRILRTGGVLGYTF,8.5426328173603e-05,0.000138805,0.645 210 | CSVEEGATEAFF,2.847540939119e-05,5.04745e-05,0.398 211 | CASSQRAGTEISPLHF,2.847540939119e-05,1.26186e-05,0.461 212 | CASSQDPGASYNEQFF,0.000484082159650296,1.26186e-05,0.1 213 | CASSFTLTGTAIEAFF,0.000113902037564892,7.57117e-05,0.202 214 | CASSRTGSYEQYF,2.847540939119e-05,8.83303e-05,0.045 215 | CASTWVQIPWANVLTF,0.000199328065738396,6.30931e-05,0.134 216 | CASSEEGETQYF,2.847540939119e-05,2.52372e-05,0.028 217 | CASSYSRLRGGEIGNQPQHF,2.847540939119e-05,0.000290228,0.145 218 | CASTRTDMNTEAFF,2.847540939119e-05,0.000151423,0.0679999999999999 219 | CAIGTENSPLHF,0.000113902037564892,6.30931e-05,0.112 220 | CASTPNYQSSYEQYF,2.847540939119e-05,1.26186e-05,0.171 221 | CASSLGMRKILSYNEQFF,2.847540939119e-05,0.000492126,0.824 222 | CASSFGGAGDTQYF,0.000370181122085734,1.26186e-05,0.385 223 | CASSQYAGTNEKLFF,2.847540939119e-05,0.000328084,0.482 224 | CASSRLQGSYGYTF,5.6950918782413e-05,1.26186e-05,0.601 225 | CSARSVEGTQETQYF,2.847540939119e-05,1.26186e-05,0.316 226 | CASSPGLAGGPSTDTQYF,0.000370181122085734,1.26186e-05,0.265 227 | CASSPGTSNTGELFF,2.847540939119e-05,8.83303e-05,0.048 228 | CASSDIQTYSGANVLTF,5.6950918782413e-05,5.04745e-05,0.625 229 | CASWDTPNQPQHF,0.000227803075129454,1.26186e-05,0.407 230 | CASSVQGLAIETQYF,0.000341705112694346,1.26186e-05,0.461 231 | CASSYSGTSSNQPQHF,5.6950918782413e-05,2.52372e-05,0.118 232 | CASSLKGKEQYF,2.847540939119e-05,2.52372e-05,0.285 233 | CASSLATRVLPYGYTF,2.847540939119e-05,1.26186e-05,0.221 234 | CASSPGYTGELFF,8.5426328173603e-05,1.26186e-05,0.101 235 | CASSNRVGIGAFF,0.000113902037564892,2.52372e-05,0.0144 236 | CSARDLSSSGTVNNQPQHF,5.6950918782413e-05,3.78558e-05,0.541 237 | CASSDGQGAYEQYF,2.847540939119e-05,5.04745e-05,0.754 238 | CASTLGIAGGPDTQYF,0.00014237704695595,3.78558e-05,0.281 239 | CSALSLRTSGATDTQYF,0.000113902037564892,3.78558e-05,0.171 240 | CASSLYWGSSTDTQYF,2.847540939119e-05,0.000391177,0.549 241 | CASSYLGQANEKLFF,2.847540939119e-05,2.52372e-05,0.418 242 | CASSDAPTVTNYGYTF,0.0002847540939119,1.26186e-05,0.127 243 | CASSLLTPGNSPLHF,8.5426328173603e-05,1.26186e-05,0.404 244 | CASSPLPGYEQYF,0.00014237704695595,3.78558e-05,0.014 245 | CSALPPPLSDGYTF,5.6950918782413e-05,0.000164042,0.341 246 | CASSGVPGTPGIYEQYF,2.847540939119e-05,0.00045427,0.176 247 | CASSQVSGVGYTF,5.6950918782413e-05,2.52372e-05,0.145 248 | CASSQGDSANQPQHF,0.000199328065738396,6.30931e-05,0.302 249 | CASSESGNYEQYF,0.000170853056347338,8.83303e-05,0.12 250 | CASSTDGDTEAFF,2.847540939119e-05,2.52372e-05,0.682 251 | CSATGGVGYEQYF,2.847540939119e-05,5.04745e-05,0.214 252 | CSASRHSNQGARNGYTF,0.0002847540939119,1.26186e-05,0.038 253 | CASSATTGTVEKLFF,5.6950918782413e-05,3.78558e-05,0.105 254 | CASSLTGINHIDTQYF,5.6950918782413e-05,1.26186e-05,0.716 255 | CASSIVSGGYEAFF,8.5426328173603e-05,7.57117e-05,0.248 256 | CASSLGLAPSTDTQYF,0.000170853056347338,5.04745e-05,0.153 257 | CASSTGTGGEQYF,5.6950918782413e-05,7.57117e-05,0.138 258 | CASSYPGGWNGYTF,0.000170853056347338,2.52372e-05,0.024 259 | CASSEAATLRRGELFF,5.6950918782413e-05,5.04745e-05,0.365 260 | CASSLGQAGYEQYF,2.847540939119e-05,2.52372e-05,0.203 261 | CASTPWGSGDTEAFF,8.5426328173603e-05,2.52372e-05,0.235 262 | CATKEAFNTGELFF,2.847540939119e-05,3.78558e-05,0.162 263 | CASSPGTSGNTIYF,2.847540939119e-05,7.57117e-05,0.282 264 | CASRDRYNSPLHF,2.847540939119e-05,1.26186e-05,0.388 265 | CASSLTGGGYEQYF,2.847540939119e-05,5.04745e-05,0.447 266 | CAWSVQGASEAFF,0.000227803075129454,2.52372e-05,0.535 267 | CASSPSEVKNIQYF,0.000113902037564892,0.000138805,0.17 268 | CASSYNRHNEQFF,5.6950918782413e-05,1.26186e-05,0.743 269 | CASSLSGQGPRTEAFF,5.6950918782413e-05,0.000189279,0.145 270 | CASSRRSSGAANEQFF,2.847540939119e-05,1.26186e-05,0.126 271 | CAWSVMGATEAFF,2.847540939119e-05,0.000391177,0.391 272 | CASSMGTDTQYF,2.847540939119e-05,1.26186e-05,0.183 273 | CASRYYPSTGSTGELFF,5.6950918782413e-05,2.52372e-05,0.876 274 | CSVRGQFDEQFF,0.000113902037564892,1.26186e-05,0.00109999999999999 275 | CASSFPPSGASRDEQYF,5.6950918782413e-05,8.83303e-05,0.294 276 | CASSLLTSGGDEQFF,8.5426328173603e-05,1.26186e-05,0.024 277 | CSAISTSNNEQFF,0.00014237704695595,2.52372e-05,0.042 278 | CASSQDLGVAQPQHF,5.6950918782413e-05,1.26186e-05,0.867 279 | CSASGEPKPYEQYF,2.847540939119e-05,2.52372e-05,0.099 280 | CASSLDSNQPQHF,0.000113902037564892,3.78558e-05,0.141 281 | CASSRQGLGNTIYF,2.847540939119e-05,0.00027761,0.183 282 | CASSVESSRNNEQFF,2.847540939119e-05,0.000100949,0.192 283 | CASSSEGSYNSPLHF,2.847540939119e-05,1.26186e-05,0.881 284 | CATSRGWTGYPYEQYF,5.6950918782413e-05,6.30931e-05,0.392 285 | CASSGTGNTNEKLFF,0.000227803075129454,2.52372e-05,0.426 286 | CASSLEAGVVTEAFF,2.847540939119e-05,5.04745e-05,0.289 287 | CAISDLPSGGGAFF,0.000113902037564892,5.04745e-05,0.253 288 | CSARSAAVWLDEQFF,2.847540939119e-05,0.000214516,0.164 289 | CASSQDRTASYEQYF,0.00014237704695595,1.26186e-05,0.107 290 | CASRGQYTGELFF,2.847540939119e-05,1.26186e-05,0.792 291 | CATSDLGQGDGYTF,5.6950918782413e-05,2.52372e-05,0.668 292 | CASTPNGRGDSPLHF,0.000113902037564892,0.000113568,0.574 293 | CASRLSGVKGGYRYNEQFF,0.000113902037564892,2.52372e-05,0.403 294 | CASSEGDRYSPLHF,8.5426328173603e-05,5.04745e-05,0.143 295 | CASSLGGLYNEQFF,8.5426328173603e-05,2.52372e-05,0.241 296 | CAWRGAGTGQPQHF,2.847540939119e-05,1.26186e-05,0.198 297 | CAWSWGSTDTQYF,2.847540939119e-05,0.000100949,0.029 298 | CASSQDAGELFF,2.847540939119e-05,1.26186e-05,0.504 299 | CASSVGGLAGGADTQYF,5.6950918782413e-05,2.52372e-05,0.1 300 | CASSVDTSSYNEQFF,2.847540939119e-05,1.26186e-05,0.0620000000000001 301 | CASSLDFRGVHSYEQYF,2.847540939119e-05,5.04745e-05,0.703 302 | CASSQEGPGEQYF,2.847540939119e-05,5.04745e-05,0.297 303 | CASSLGETQYF,5.6950918782413e-05,1.26186e-05,0.0679999999999999 304 | CASSILTGELFF,8.5426328173603e-05,0.000113568,0.0659999999999999 305 | CAYSRVGGYTF,2.847540939119e-05,3.78558e-05,0.096 306 | CASSLDFRGRWSSYNEQFF,2.847540939119e-05,3.78558e-05,0.642 307 | CASSAEGQGIRYGYTF,5.6950918782413e-05,2.52372e-05,0.089 308 | CASRVRGGMNTEAFF,0.000170853056347338,2.52372e-05,0.242 309 | CASSYWRDRGQSHNSPLHF,2.847540939119e-05,0.000164042,0.36 310 | CASSQASGGDHYGYTF,2.847540939119e-05,7.57117e-05,0.102 311 | CASSSAGLDSADTQYF,8.5426328173603e-05,1.26186e-05,0.415 312 | CASSPYHGGNTGELFF,2.847540939119e-05,2.52372e-05,0.215 313 | CATSDLRVAEQYF,2.847540939119e-05,0.000201898,0.281 314 | CSARDFDRGRIGYTF,2.847540939119e-05,0.000201898,0.478 315 | CASSQLGTLGNTIYF,2.847540939119e-05,2.52372e-05,0.114 316 | CSVQQGVREAFF,5.6950918782413e-05,1.26186e-05,0.379 317 | CSARDEGEEQYF,5.6950918782413e-05,0.000126186,0.039 318 | CASSGIGGTDTQYF,0.000113902037564892,2.52372e-05,0.391 319 | CASRGITDNYGYTF,0.000113902037564892,2.52372e-05,0.698 320 | CASSSGLHRSSYNEQFF,2.847540939119e-05,6.30931e-05,0.379 321 | CATSGLAGSDTQYF,5.6950918782413e-05,7.57117e-05,0.074 322 | CASSYKETQYF,2.847540939119e-05,0.00027761,0.104 323 | CAAVRERGETQYF,2.847540939119e-05,0.000189279,0.05 324 | CASSRQGQYNEQFF,2.847540939119e-05,0.000100949,0.171 325 | CASSLKETQYF,2.847540939119e-05,1.26186e-05,0.177 326 | CSARDGTANTEAFF,2.847540939119e-05,1.26186e-05,0.04 327 | CASSQEWSSGGFDIQYF,2.847540939119e-05,1.26186e-05,0.595 328 | CASSKYIMTASNQPQHF,0.000113902037564892,1.26186e-05,0.082 329 | CSASDTTNTGELFF,2.847540939119e-05,2.52372e-05,0.436 330 | CASGLSYFSAESGNTIYF,0.00014237704695595,2.52372e-05,0.405 331 | CSVPTRTGTPLSYEQYF,2.847540939119e-05,5.04745e-05,0.023 332 | CASHVGGQETQYF,0.000170853056347338,3.78558e-05,0.04 333 | CSGQGPEQFF,2.847540939119e-05,0.000264991,0.048 334 | CASSPYGGANTIYF,5.6950918782413e-05,1.26186e-05,0.4 335 | CASSFLQGTDTQYF,8.5426328173603e-05,2.52372e-05,0.223 336 | CSVYNTTYEQYF,2.847540939119e-05,3.78558e-05,0.0639999999999999 337 | CASTKSRGTYNEQFF,2.847540939119e-05,1.26186e-05,0.289 338 | CSASGSVSYEQYF,2.847540939119e-05,0.000164042,0.0106000000000001 339 | CASSYATGGMKEQFF,5.6950918782413e-05,6.30931e-05,0.032 340 | CASSKAGGGLNTEAFF,2.847540939119e-05,0.000201898,0.47 341 | CSATTPTDTQYF,2.847540939119e-05,0.000201898,0.0610000000000001 342 | CATIRDDYNEQFF,8.5426328173603e-05,0.000100949,0.191 343 | CASSPGVRENTEAFF,2.847540939119e-05,0.000113568,0.123 344 | CASSVDGQGMNEQYF,5.6950918782413e-05,3.78558e-05,0.288 345 | CASSLIAYGYTF,5.6950918782413e-05,3.78558e-05,0.16 346 | CASSRTGSGNTEAFF,2.847540939119e-05,2.52372e-05,0.417 347 | CASSLFSGINTGELFF,5.6950918782413e-05,1.26186e-05,0.043 348 | CASSDPPF,0.000170853056347338,1.26186e-05,0.116 349 | CASSDYRVLSGNTIYF,5.6950918782413e-05,3.78558e-05,0.11 350 | CASSQGFTEAFF,5.6950918782413e-05,3.78558e-05,0.328 351 | CASSKTGNQDNSPLHF,5.6950918782413e-05,3.78558e-05,0.545 352 | CASSLFWRKIAITISGANVLTF,5.6950918782413e-05,0.000100949,0.202 353 | CASSYPAWSGYNEQFF,8.5426328173603e-05,5.04745e-05,0.313 354 | CASSLGHLSYEQYF,0.00014237704695595,1.26186e-05,0.22 355 | CASSPSDSFYEQYF,8.5426328173603e-05,2.52372e-05,0.0570000000000001 356 | CASMRGGTTDTQYF,2.847540939119e-05,3.78558e-05,0.794 357 | CASSIGWGRNQPQHF,0.000113902037564892,6.30931e-05,0.657 358 | CASIRGVRANYGYTF,0.00014237704695595,7.57117e-05,0.563 359 | CASSLAGGLQPYNEQFF,2.847540939119e-05,0.000189279,0.465 360 | CASSSTLEQFF,5.6950918782413e-05,2.52372e-05,0.149 361 | CASSTGQFYEQYF,0.000113902037564892,1.26186e-05,0.143 362 | CASSLGGPLHF,5.6950918782413e-05,2.52372e-05,0.117 363 | CAWARGWGLSETQYF,8.5426328173603e-05,1.26186e-05,0.33 364 | CASSYGRADGYTF,2.847540939119e-05,2.52372e-05,0.405 365 | CAWSVLGTGSRTDTQYF,0.000113902037564892,2.52372e-05,0.367 366 | CASSKQGGSTDSYNEQFF,2.847540939119e-05,8.83303e-05,0.282 367 | CSVDPQGGGGEAFF,8.5426328173603e-05,2.52372e-05,0.028 368 | CASSLGQGPYNEQFF,5.6950918782413e-05,1.26186e-05,0.1 369 | CASSTSWTVYNQPQHF,5.6950918782413e-05,1.26186e-05,0.347 370 | CASSQAAEQYF,2.847540939119e-05,3.78558e-05,0.078 371 | CASSNSEKLFF,8.5426328173603e-05,2.52372e-05,0.025 372 | CASSPTRQGVKGNTIYF,8.5426328173603e-05,2.52372e-05,0.653 373 | CSARGYGNTIYF,2.847540939119e-05,3.78558e-05,0.00260000000000005 374 | CASSSTPGGGLYNEQFF,0.000227803075129454,1.26186e-05,0.026 375 | CASSVGDSGWAQYF,2.847540939119e-05,1.26186e-05,0.18 376 | CASSVAGGIGEAFF,2.847540939119e-05,1.26186e-05,0.135 377 | CASSPMRETGNYGYTF,2.847540939119e-05,1.26186e-05,0.186 378 | CASSTSRAHYEQYF,5.6950918782413e-05,5.04745e-05,0.546 379 | CSANRERTMETQYF,0.000113902037564892,1.26186e-05,0.0113 380 | CASSSRRWGRGPDTQYF,2.847540939119e-05,5.04745e-05,0.504 381 | CATSRTGSANYGYTF,8.5426328173603e-05,1.26186e-05,0.0570000000000001 382 | CSARDLGKHNEQFF,2.847540939119e-05,2.52372e-05,0.0162 383 | CASSGPGYEQYF,2.847540939119e-05,2.52372e-05,0.148 384 | CASSENAARGNTIYF,2.847540939119e-05,2.52372e-05,0.39 385 | CASSLAAGGSYGYTF,0.000170853056347338,1.26186e-05,0.096 386 | CASRTGGYGYTF,0.000113902037564892,2.52372e-05,0.188 387 | CASSQGGEPQHF,5.6950918782413e-05,1.26186e-05,0.223 388 | CSATEGTSGGEQYF,5.6950918782413e-05,1.26186e-05,0.318 389 | CASSDGLFLGDEQFF,2.847540939119e-05,6.30931e-05,0.938 390 | CATSDFAGRGSETQYF,2.847540939119e-05,1.26186e-05,0.432 391 | CASSTYSSNQPQHF,2.847540939119e-05,1.26186e-05,0.0629999999999999 392 | CAWSHTGQGSGANVLTF,2.847540939119e-05,1.26186e-05,0.601 393 | CASSQVASGGTNTGELFF,5.6950918782413e-05,2.52372e-05,0.3 394 | CASSLPPRADTQYF,2.847540939119e-05,1.26186e-05,0.487 395 | CASSLQGNQPQHF,2.847540939119e-05,1.26186e-05,0.207 396 | CASSLGHDSYEQYF,2.847540939119e-05,1.26186e-05,0.508 397 | CASSLEGSGELFF,2.847540939119e-05,1.26186e-05,0.131 398 | CASSYGAGEMNEQYF,2.847540939119e-05,1.26186e-05,0.615 399 | CASSELWIGYWDTQYF,2.847540939119e-05,1.26186e-05,0.426 400 | CASSEAEGSGGQPQHF,0.000113902037564892,1.26186e-05,0.595 401 | CASSVETGPYEQYF,5.6950918782413e-05,2.52372e-05,0.168 402 | CASSDPDSNQPQHF,0.000199328065738396,1.26186e-05,0.895 403 | CATSDRDRGPGLRDWTYEQYF,0.00014237704695595,2.52372e-05,0.845 404 | CATSRRDRGSNGYTF,8.5426328173603e-05,3.78558e-05,0.218 405 | CASSFRTSGTGELFF,5.6950918782413e-05,1.26186e-05,0.438 406 | CASSDALQARNEKLFF,5.6950918782413e-05,1.26186e-05,0.528 407 | CASSVGGGDTQYF,5.6950918782413e-05,1.26186e-05,0.207 408 | CASSLWTAKLYEQYF,5.6950918782413e-05,1.26186e-05,0.306 409 | CASSLTVNTEAFF,5.6950918782413e-05,1.26186e-05,0.079 410 | CASVGHSNTGELFF,8.5426328173603e-05,2.52372e-05,0.299 411 | CASSLAANNEQFF,8.5426328173603e-05,2.52372e-05,0.468 412 | CASGLVLWMGITF,2.847540939119e-05,3.78558e-05,0.502 413 | CAWKKALNTEAFF,2.847540939119e-05,1.26186e-05,0.048 414 | CASSRAQGGYEQYF,2.847540939119e-05,0.000164042,0.00209999999999999 415 | CSVEASVGEQYF,2.847540939119e-05,7.57117e-05,0.045 416 | CASSYDPRSWGGGQETQYF,5.6950918782413e-05,8.83303e-05,0.467 417 | CSARDRVGNTIYF,8.5426328173603e-05,1.26186e-05,0.00170000000000003 418 | CATSGERDSPTDTQYF,8.5426328173603e-05,1.26186e-05,0.317 419 | CASSLWAGTDTQYF,8.5426328173603e-05,1.26186e-05,0.395 420 | CSAPDSSSGNTIYF,8.5426328173603e-05,1.26186e-05,0.081 421 | CASSQVDQGGHHSPLHF,8.5426328173603e-05,1.26186e-05,0.213 422 | CASSLTNEQFF,8.5426328173603e-05,1.26186e-05,0.034 423 | CASTQGYEQYF,8.5426328173603e-05,1.26186e-05,0.036 424 | CASSLTGYSNQPQHF,2.847540939119e-05,2.52372e-05,0.275 425 | CASSAPTRTSNQPQHF,0.000170853056347338,1.26186e-05,0.511 426 | CASSYQGPQKGQPQHF,0.000170853056347338,1.26186e-05,0.105 427 | CASSLPLGQGDTQYF,0.000113902037564892,2.52372e-05,0.054 428 | CASSPQTGIVAEAFF,5.6950918782413e-05,1.26186e-05,0.397 429 | CSARNIYEQYF,2.847540939119e-05,6.30931e-05,0.028 430 | CASSSGQVAPGELFF,2.847540939119e-05,6.30931e-05,0 431 | CASSYKYPPRGNEQFF,2.847540939119e-05,0.000100949,0.143 432 | CASSVKGTPVDSPLHF,2.847540939119e-05,1.26186e-05,0.144 433 | CASSESLPGDYNEQFF,2.847540939119e-05,1.26186e-05,0.424 434 | CASSPDTYYGYTF,2.847540939119e-05,1.26186e-05,0.076 435 | CASIDEPGTRTTDTQYF,2.847540939119e-05,1.26186e-05,0.367 436 | CASSVATPYNEQFF,2.847540939119e-05,1.26186e-05,0.52 437 | CASSLEVARRRSSGNTIYF,2.847540939119e-05,1.26186e-05,0.022 438 | CASSIGNYGYTF,2.847540939119e-05,1.26186e-05,0.00649999999999995 439 | CASSLGLALETQYF,2.847540939119e-05,1.26186e-05,0.314 440 | CASSIADRSRGYTF,2.847540939119e-05,1.26186e-05,0.118 441 | CASSPWGQGGNGELFF,2.847540939119e-05,1.26186e-05,0.133 442 | CASSLDRGTEAFF,0.000113902037564892,1.26186e-05,0.379 443 | CAIQEADTQYF,0.000113902037564892,1.26186e-05,0.00929999999999997 444 | CASSPTMDRGRTEAFF,0.000113902037564892,1.26186e-05,0.138 445 | CASSPSTGTGGLGTEAFF,0.000113902037564892,1.26186e-05,0.033 446 | CASSVVWEENGYTF,0.000113902037564892,1.26186e-05,0.101 447 | CASSPGGALVTYEQYF,5.6950918782413e-05,2.52372e-05,0.028 448 | CASSLDQLSSYEQYF,2.847540939119e-05,0.000138805,0.148 449 | CASSLSYSSGYTF,8.5426328173603e-05,3.78558e-05,0.464 450 | CSASRQGNGYTF,2.847540939119e-05,5.04745e-05,0.043 451 | CASRDSFSPPYEQYF,2.847540939119e-05,5.04745e-05,0.747 452 | CSARPNKGPNYGYTF,2.847540939119e-05,2.52372e-05,0.0185999999999999 453 | CASSANIRDSSGVRQPQHF,5.6950918782413e-05,1.26186e-05,0.046 454 | CASSQPQQTSGLNNEQFF,5.6950918782413e-05,1.26186e-05,0.274 455 | CASSPTSGSSAPEAFF,5.6950918782413e-05,1.26186e-05,0.421 456 | CASSLGQTTGELFF,5.6950918782413e-05,1.26186e-05,0.115 457 | CASQLSTGVNQPQHF,5.6950918782413e-05,1.26186e-05,0.241 458 | CSATGQLHTDTQYF,5.6950918782413e-05,1.26186e-05,0.0639999999999999 459 | CASSPGTYYGYTF,5.6950918782413e-05,1.26186e-05,0.379 460 | CASSVGAGGGETQYF,5.6950918782413e-05,1.26186e-05,0.222 461 | CASSQEIHRKHGYTF,8.5426328173603e-05,2.52372e-05,0.138 462 | CSAPGTVQETQYF,2.847540939119e-05,3.78558e-05,0.00629999999999997 463 | CASSQFSGGSLNTEAFF,2.847540939119e-05,3.78558e-05,0.289 464 | CASSIAGTGGRREQYF,2.847540939119e-05,1.26186e-05,0.0231 465 | CSARDRGLGNTIYF,2.847540939119e-05,1.26186e-05,0.00170000000000003 466 | CASSSQGLNEKLFF,2.847540939119e-05,1.26186e-05,0.084 467 | CASSDQANQYF,2.847540939119e-05,1.26186e-05,0.0610000000000001 468 | CASSLGRSEAFF,5.6950918782413e-05,2.52372e-05,0.102 469 | CASSLDPYANEKLFF,8.5426328173603e-05,6.30931e-05,0.281 470 | CASSLGLIRDRWGYTF,8.5426328173603e-05,1.26186e-05,0.359 471 | CASRAGTANNQPQHF,8.5426328173603e-05,1.26186e-05,0.101 472 | CASSKHPGQGYTF,8.5426328173603e-05,1.26186e-05,0.364 473 | CASSLVLNTEAFF,8.5426328173603e-05,1.26186e-05,0.329 474 | CASTRRILAGGTEAFF,8.5426328173603e-05,1.26186e-05,0.512 475 | CASSYRDRERTDTQYF,8.5426328173603e-05,1.26186e-05,0.134 476 | CAWSVGVNQPQHF,2.847540939119e-05,2.52372e-05,0.105 477 | CAWKGQGNQPQHF,2.847540939119e-05,2.52372e-05,0.467 478 | CASSLTKGSVHEQYF,2.847540939119e-05,2.52372e-05,0.107 479 | CASSLGLYTEAFF,2.847540939119e-05,2.52372e-05,0.086 480 | CASSQDRTGNQPQHF,5.6950918782413e-05,3.78558e-05,0.341 481 | CAISESRQGGGYTF,5.6950918782413e-05,3.78558e-05,0.154 482 | CASSQDYGYTF,5.6950918782413e-05,1.26186e-05,0.069 483 | CASSVDGLRIYF,5.6950918782413e-05,1.26186e-05,0.147 484 | CASSQVLGQNTEAFF,5.6950918782413e-05,1.26186e-05,0.354 485 | CSVRQGPAGYGYTF,2.847540939119e-05,6.30931e-05,0.605 486 | CASTLHF,2.847540939119e-05,6.30931e-05,0.03 487 | CASIPPAERNQPQHF,2.847540939119e-05,6.30931e-05,0.00939999999999996 488 | CSASGNEAFF,2.847540939119e-05,1.26186e-05,0.226 489 | CAISAQGGTDTQYF,2.847540939119e-05,1.26186e-05,0.0264 490 | CSARREFGTSPGGSYNEQFF,2.847540939119e-05,1.26186e-05,0.101 491 | CAWRMQGNYGYTF,2.847540939119e-05,1.26186e-05,0.077 492 | CATTRSGAMNTEAFF,2.847540939119e-05,1.26186e-05,0.146 493 | CASSSTAGSREQFF,2.847540939119e-05,1.26186e-05,0.292 494 | CASSLGTSLKGAFF,2.847540939119e-05,1.26186e-05,0.214 495 | CASSYPDRVLSRNQPQHF,2.847540939119e-05,1.26186e-05,0.202 496 | CASSPDRGQETQYF,2.847540939119e-05,1.26186e-05,0.239 497 | CAWSPGINEQFF,2.847540939119e-05,1.26186e-05,0.0102 498 | CSARTGDGEKLFF,2.847540939119e-05,1.26186e-05,0.308 499 | CASSPSEGAGNTIYF,2.847540939119e-05,1.26186e-05,0.032 500 | CASSQGAGPGNEQFF,2.847540939119e-05,1.26186e-05,0.07 501 | CASSPRDRELEKLFF,2.847540939119e-05,1.26186e-05,0.877 502 | CSAPERRGGYNEQFF,2.847540939119e-05,1.26186e-05,0.658 503 | CASSYVGLPQETQYF,2.847540939119e-05,1.26186e-05,0.645 504 | CAIREGLDNQPQHF,2.847540939119e-05,1.26186e-05,0.17 505 | CAWSSMIGRGYTF,0.000113902037564892,1.26186e-05,0.128 506 | CASSSHRGSYEQYF,5.6950918782413e-05,2.52372e-05,0.345 507 | CASSESWGTGELFF,5.6950918782413e-05,2.52372e-05,0.073 508 | CASVIPNQETQYF,5.6950918782413e-05,2.52372e-05,0.099 509 | CASSLGPDRAGYGYTF,5.6950918782413e-05,2.52372e-05,0.157 510 | CASSSLLVGGELFF,5.6950918782413e-05,2.52372e-05,0.273 511 | CASSRDPEAFF,2.847540939119e-05,5.04745e-05,0.046 512 | CASSPYREINYGYTF,2.847540939119e-05,2.52372e-05,0.193 513 | CASSQTDRDTEAFF,2.847540939119e-05,2.52372e-05,0.323 514 | CASSVVGSAGELFF,5.6950918782413e-05,1.26186e-05,0.0199 515 | CASMSLIGGLNTEAFF,5.6950918782413e-05,1.26186e-05,0.048 516 | CASSQEAAANTEAFF,5.6950918782413e-05,1.26186e-05,0.098 517 | CASSPEGENEQFF,5.6950918782413e-05,1.26186e-05,0.17 518 | CASSMGLNNEQFF,5.6950918782413e-05,1.26186e-05,0.071 519 | CASSYTGSTEAFF,5.6950918782413e-05,1.26186e-05,0.098 520 | CASSGTLSFGAEAFF,5.6950918782413e-05,1.26186e-05,0.0649999999999999 521 | CASSPDTTVSYNEQFF,5.6950918782413e-05,1.26186e-05,0.384 522 | CASSPGGEYGYTF,5.6950918782413e-05,1.26186e-05,0.126 523 | CASSYRPGQGFRAQHF,5.6950918782413e-05,1.26186e-05,0.235 524 | CASSLLPPGLNTEAFF,5.6950918782413e-05,1.26186e-05,0.112 525 | CASSSSGNEQYF,5.6950918782413e-05,1.26186e-05,0.039 526 | CASTRWGDKEETQYF,5.6950918782413e-05,1.26186e-05,0.649 527 | CAWSVQGLGDTQYF,5.6950918782413e-05,1.26186e-05,0.297 528 | CSANGAKNIQYF,5.6950918782413e-05,1.26186e-05,0.00560000000000005 529 | CASSDGPAGVDEAFF,5.6950918782413e-05,1.26186e-05,0.775 530 | CASRSVGQETQYF,5.6950918782413e-05,1.26186e-05,0.425 531 | CASSVGGGQNNEQFF,5.6950918782413e-05,1.26186e-05,0.214 532 | CASSDGTPNAFYEQYF,5.6950918782413e-05,1.26186e-05,0.3 533 | CASSHSTGRAFF,2.847540939119e-05,3.78558e-05,0.2 534 | CASSQDRAVYEQFF,2.847540939119e-05,1.26186e-05,0.051 535 | CASSQDEGPNTEAFF,2.847540939119e-05,1.26186e-05,0.212 536 | CASRTGGIQPQHF,5.6950918782413e-05,5.04745e-05,0.137 537 | CAWRRADRDNSPLHF,5.6950918782413e-05,5.04745e-05,0.163 538 | CASSQDRRQGREKLFF,5.6950918782413e-05,5.04745e-05,0.18 539 | CASSQEGYNEQFF,2.847540939119e-05,7.57117e-05,0.17 540 | CASRPQRGLDNSPLHF,8.5426328173603e-05,1.26186e-05,0.044 541 | CASRTSGGSGANVLTF,8.5426328173603e-05,1.26186e-05,0.3 542 | CASRFSGANVLTF,8.5426328173603e-05,1.26186e-05,0.861 543 | CSARTGSKNIQYF,8.5426328173603e-05,1.26186e-05,0.09 544 | CAWNQGGIGYTF,8.5426328173603e-05,1.26186e-05,0.00890000000000002 545 | CASNSDRVVQPQHF,2.847540939119e-05,2.52372e-05,0.101 546 | CASSPSYTGELFF,2.847540939119e-05,2.52372e-05,0.265 547 | CASSLGGTANYGYTF,2.847540939119e-05,2.52372e-05,0.204 548 | CASSYRSTGSGDTQYF,2.847540939119e-05,2.52372e-05,0.393 549 | CASSYSRGGYGYTF,2.847540939119e-05,2.52372e-05,0.207 550 | RASSLGLYGYTF,2.847540939119e-05,2.52372e-05,0.069 551 | CSARVGGVNTEAFF,2.847540939119e-05,2.52372e-05,0.051 552 | CASSLDRGVQPQHF,2.847540939119e-05,2.52372e-05,0.242 553 | CASSYSAGSYNEQFF,2.847540939119e-05,2.52372e-05,0.265 554 | CASSSGSYEQYF,2.847540939119e-05,2.52372e-05,0.259 555 | CAWARTGLNTEAFF,2.847540939119e-05,2.52372e-05,0.253 556 | CASSQDNSVNYGYTF,5.6950918782413e-05,3.78558e-05,0.097 557 | CASRLAGGSYEQYF,2.847540939119e-05,6.30931e-05,0.608 558 | CASSQDLGVGYTF,2.847540939119e-05,1.26186e-05,0.181 559 | CASSQDDGGLTGYTF,2.847540939119e-05,1.26186e-05,0.32 560 | CASSKPTGGDGYTF,2.847540939119e-05,1.26186e-05,0.379 561 | CASSLGTGELFF,2.847540939119e-05,1.26186e-05,0.323 562 | CASTGSFGGYTF,2.847540939119e-05,1.26186e-05,0.23 563 | CAWSGSLNYGYTF,2.847540939119e-05,1.26186e-05,0.351 564 | CASSWWRTGGSTEAFF,2.847540939119e-05,1.26186e-05,0.128 565 | CASSLGGGPGDEQYF,2.847540939119e-05,1.26186e-05,0.383 566 | CASSPNTGELFF,2.847540939119e-05,1.26186e-05,0.647 567 | CASSSGTSGSAGTDTQYF,2.847540939119e-05,1.26186e-05,0.042 568 | CASSLGQHLAKNIQYF,2.847540939119e-05,1.26186e-05,0.294 569 | CASREDSFFRGQPQHF,2.847540939119e-05,1.26186e-05,0.0171 570 | CASSTDGTGWYEQFF,2.847540939119e-05,1.26186e-05,0.114 571 | CKPDPEAFF,2.847540939119e-05,1.26186e-05,0.026 572 | CASSFTGVGYGYTF,2.847540939119e-05,1.26186e-05,0.093 573 | CAWRLTFDSPLHF,2.847540939119e-05,1.26186e-05,0.047 574 | CASSYSGGSYEQYF,2.847540939119e-05,1.26186e-05,0.249 575 | CSARGGSVVEAFF,2.847540939119e-05,1.26186e-05,0.074 576 | CSARVPAASGRAGKNIQYF,2.847540939119e-05,1.26186e-05,0.022 577 | CASSTLGQHSNQPQHF,2.847540939119e-05,1.26186e-05,0.048 578 | CASSFRDRYTDTQYF,2.847540939119e-05,1.26186e-05,0.584 579 | CASSYSSGGRRGGEQFF,2.847540939119e-05,1.26186e-05,0.452 580 | CASSIAQGTTYNEQFF,2.847540939119e-05,1.26186e-05,0.104 581 | CASSLGRRTEAFF,2.847540939119e-05,1.26186e-05,0.34 582 | CAWSVGQGSEAFF,2.847540939119e-05,1.26186e-05,0.048 583 | CASSTGEVAPGEQYF,2.847540939119e-05,1.26186e-05,0.0071 584 | CASSSYSGGSLGNEQFF,2.847540939119e-05,1.26186e-05,0.134 585 | CASSVPTGAGGYTF,2.847540939119e-05,1.26186e-05,0.145 586 | CASSSIGSNQPQHF,2.847540939119e-05,1.26186e-05,0.155 587 | CASSLQDHRTSSSYEQYF,2.847540939119e-05,1.26186e-05,0.916 588 | CASSLGGAFEQFF,2.847540939119e-05,1.26186e-05,0.259 589 | CATMGGASYEQYF,2.847540939119e-05,1.26186e-05,0.124 590 | CASSLGRSGQETQYF,2.847540939119e-05,1.26186e-05,0.452 591 | CASSFQSGYTF,5.6950918782413e-05,2.52372e-05,0.105 592 | CASSEGPGLGGYTF,5.6950918782413e-05,2.52372e-05,0.54 593 | CASSLGQGNTEAFF,2.847540939119e-05,5.04745e-05,0.608 594 | CSVKGLDEQFF,2.847540939119e-05,5.04745e-05,0.045 595 | CASSRGARHEKLFF,2.847540939119e-05,5.04745e-05,0.593 596 | CASSFGPGLSSRGAVF,2.847540939119e-05,5.04745e-05,0.587 597 | CASSEGAGSPLHF,2.847540939119e-05,5.04745e-05,0.036 598 | CASSLAAFPDRGRNTIYF,5.6950918782413e-05,1.26186e-05,0.289 599 | CATSGSNQPQHF,5.6950918782413e-05,1.26186e-05,0.133 600 | CSASIQGSYGYTF,5.6950918782413e-05,1.26186e-05,0.0182 601 | CAIMGGQASYEQYF,5.6950918782413e-05,1.26186e-05,0.351 602 | CASPYPAGRTLTPSGYTF,5.6950918782413e-05,1.26186e-05,0.282 603 | CAISRRLPNEQFF,5.6950918782413e-05,1.26186e-05,0.102 604 | CASSLISPGNTIYF,5.6950918782413e-05,1.26186e-05,0.248 605 | CASSLGNSPLHF,5.6950918782413e-05,1.26186e-05,0.448 606 | CASSVTNLISYEQYF,5.6950918782413e-05,1.26186e-05,0.428 607 | CSARKAGGRLDTQYF,5.6950918782413e-05,1.26186e-05,0.227 608 | CASSHRQNTEAFF,5.6950918782413e-05,1.26186e-05,0.021 609 | CASSERGAEKLFF,5.6950918782413e-05,1.26186e-05,0.001 610 | CASREQETDTQYF,5.6950918782413e-05,1.26186e-05,0.248 611 | CASSLTTAGDGYTF,5.6950918782413e-05,1.26186e-05,0.244 612 | CASRRSGTSNYGYTF,5.6950918782413e-05,1.26186e-05,0.74 613 | CASSVTGGEGGELFF,5.6950918782413e-05,1.26186e-05,0.032 614 | CASRLRAHGYTF,5.6950918782413e-05,1.26186e-05,0.186 615 | CASSFYPRWQGGNTIYF,2.847540939119e-05,3.78558e-05,0.464 616 | CASSPTYEQYF,2.847540939119e-05,3.78558e-05,0.071 617 | CASLWGTEAFF,2.847540939119e-05,3.78558e-05,0.158 618 | CASSQDPGRLFNEKLFF,2.847540939119e-05,3.78558e-05,0.032 619 | CSAGSEGYQETQYF,2.847540939119e-05,3.78558e-05,0.125 620 | CASTTTGEEQYF,2.847540939119e-05,3.78558e-05,0.145 621 | CSAPGPRNTEAFF,2.847540939119e-05,3.78558e-05,0.00560000000000005 622 | CASSSGTTYNEQFF,2.847540939119e-05,2.52372e-05,0.33 623 | CATSDGGESGTQYF,2.847540939119e-05,2.52372e-05,0.405 624 | CASSWGALGEEQYF,2.847540939119e-05,2.52372e-05,0.0256999999999999 625 | CSARGPTSGRIETQYF,2.847540939119e-05,2.52372e-05,0.125 626 | CASSFSIEGYTF,2.847540939119e-05,2.52372e-05,0.191 627 | CASSIGDTEWEAEAFF,2.847540939119e-05,2.52372e-05,0.028 628 | CASSVGGGGPGDTQYF,2.847540939119e-05,2.52372e-05,0.682 629 | CASSLGAYEQYF,2.847540939119e-05,2.52372e-05,0.241 630 | CASRRNSGGSLSSYNSPLHF,2.847540939119e-05,2.52372e-05,0.205 631 | CASSLVGVYTF,2.847540939119e-05,2.52372e-05,0.328 632 | CARGSGQSSNYGYTF,2.847540939119e-05,2.52372e-05,0.118 633 | CSAPLGGSNYNEQFF,2.847540939119e-05,2.52372e-05,0.339 634 | CASSQPCWLLGYGYTF,2.847540939119e-05,2.52372e-05,0.075 635 | CASSIWAGGPPDTQYF,2.847540939119e-05,2.52372e-05,0.476 636 | CASSLDPNIQYF,2.847540939119e-05,2.52372e-05,0.654 637 | CAWSVLSQGTQYF,2.847540939119e-05,2.52372e-05,0.0185 638 | CASSFSGAQFYEQYF,2.847540939119e-05,2.52372e-05,0.223 639 | CASSQEAGGNEQFF,2.847540939119e-05,2.52372e-05,0.204 640 | CASSLGLAGGLEQYF,2.847540939119e-05,2.52372e-05,0.389 641 | CASSSGQGSGNEQFF,2.847540939119e-05,1.26186e-05,0.00549999999999995 642 | CASSSRAGTPYNEQFF,2.847540939119e-05,1.26186e-05,0.479 643 | CAWSLPPRGQFF,2.847540939119e-05,1.26186e-05,0.05 644 | CASSLATGSYEQYF,2.847540939119e-05,1.26186e-05,0.089 645 | CASSFRLAGESSYNEQFF,2.847540939119e-05,1.26186e-05,0.279 646 | CASSQSTSGRRYEQYF,2.847540939119e-05,1.26186e-05,0.326 647 | CASSLGTEDEKLFF,2.847540939119e-05,1.26186e-05,0.768 648 | CSVFGGQHSYEQYF,2.847540939119e-05,1.26186e-05,0.85 649 | CASSFVEGSTEAFF,2.847540939119e-05,1.26186e-05,0.392 650 | CASSSSPGLFSNQPQHF,2.847540939119e-05,1.26186e-05,0.806 651 | CASRDAWSGELFF,2.847540939119e-05,1.26186e-05,0.054 652 | CASSYRQGREDGYTF,2.847540939119e-05,1.26186e-05,0.608 653 | CASSLGSTGTNEQFF,2.847540939119e-05,1.26186e-05,0.186 654 | CATGLAGNYEQYF,2.847540939119e-05,1.26186e-05,0.395 655 | CASSPGQGLLVGSPLHF,2.847540939119e-05,1.26186e-05,0.349 656 | CAWRLQGAIEAFF,2.847540939119e-05,1.26186e-05,0.054 657 | CASRPRDFSRTDTQYF,2.847540939119e-05,1.26186e-05,0.029 658 | CASSRRQGGSGELFF,2.847540939119e-05,1.26186e-05,0.207 659 | CSATGGFQPQHF,2.847540939119e-05,1.26186e-05,0.102 660 | CASSDMMGGLQPQHF,2.847540939119e-05,1.26186e-05,0.889 661 | CASSLIGDTSYGYTF,2.847540939119e-05,1.26186e-05,0.355 662 | CAWTKPINFGQGSNSPLHF,2.847540939119e-05,1.26186e-05,0.159 663 | CASSFGGTGAEAFF,2.847540939119e-05,1.26186e-05,0.226 664 | CASRPPGQGLDGYTF,2.847540939119e-05,1.26186e-05,0.881 665 | CSARDPVRTDYGYTF,2.847540939119e-05,1.26186e-05,0.053 666 | CSVRPGGTEAFF,2.847540939119e-05,1.26186e-05,0.107 667 | CAISEIPIHRLYQPYNEQFF,2.847540939119e-05,1.26186e-05,0.359 668 | CSASLTVNTEAFF,2.847540939119e-05,1.26186e-05,0.077 669 | CSARVIGPNEQYF,2.847540939119e-05,1.26186e-05,0.176 670 | CASSVGDYGYTF,2.847540939119e-05,1.26186e-05,0.079 671 | CASSQTETQYF,2.847540939119e-05,1.26186e-05,0.159 672 | CASSGDTSTNEKLFF,2.847540939119e-05,1.26186e-05,0.123 673 | CASSTGQGSEQYF,2.847540939119e-05,1.26186e-05,0.634 674 | CASSQGAGDEQFF,2.847540939119e-05,1.26186e-05,0.266 675 | CASSQEGAGELFF,2.847540939119e-05,1.26186e-05,0.232 676 | CASSSTGGVGTGELFF,2.847540939119e-05,1.26186e-05,0.042 677 | CASRASRDRGRRSNQPQHF,2.847540939119e-05,1.26186e-05,0.908 678 | CSARGLEDRGFYGYTF,2.847540939119e-05,1.26186e-05,0.0183 679 | CASSLRNTEAFF,2.847540939119e-05,1.26186e-05,0.54 680 | CSASGRAVGGETQYF,2.847540939119e-05,1.26186e-05,0.077 681 | CASSLDPGKAFF,2.847540939119e-05,1.26186e-05,0.154 682 | CASSLETGTGYEQYF,2.847540939119e-05,1.26186e-05,0.076 683 | CASSLGAQGSGYTF,2.847540939119e-05,1.26186e-05,0.12 684 | CASSYSKGSYEQYF,2.847540939119e-05,1.26186e-05,0.115 685 | CASSIDLSRQGSEQYF,2.847540939119e-05,1.26186e-05,0.143 686 | CASSIRGGYEQYF,2.847540939119e-05,1.26186e-05,0.0191 687 | CSAVPVDYGYTF,2.847540939119e-05,1.26186e-05,0.034 688 | CASRDPHLTGNYGYTF,2.847540939119e-05,1.26186e-05,0.279 689 | CASSREGTGAWNEQFF,2.847540939119e-05,1.26186e-05,0.204 690 | CASSLTGVGNTIYF,2.847540939119e-05,1.26186e-05,0.069 691 | CATSRGREGSTEAFF,2.847540939119e-05,1.26186e-05,0.739 692 | CASSTRDRASDSGNTIYF,2.847540939119e-05,1.26186e-05,0.339 693 | CASSLGQGREKLFF,2.847540939119e-05,1.26186e-05,0.581 694 | CSARDPNLNEKLFF,2.847540939119e-05,1.26186e-05,0.107 695 | CAWSRREGDTQYF,2.847540939119e-05,1.26186e-05,0.486 696 | CASSGTGGNQPQHF,2.847540939119e-05,1.26186e-05,0.106 697 | CASSIGGTGQRRLEQYF,2.847540939119e-05,1.26186e-05,0.835 698 | CSASARGMGGRDNEQFF,2.847540939119e-05,1.26186e-05,0.075 699 | CASSIQTGTMGNTIYF,2.847540939119e-05,1.26186e-05,0.243 700 | CASRDWDTGTDTQYF,2.847540939119e-05,1.26186e-05,0.297 701 | CASIPGTSGGNTQYF,2.847540939119e-05,1.26186e-05,0.43 702 | CASSYWRANTGELFF,2.847540939119e-05,1.26186e-05,0.204 703 | CASSQAGNTIYF,2.847540939119e-05,1.26186e-05,0.194 704 | CSALNGGPREKLFF,2.847540939119e-05,1.26186e-05,0.098 705 | CASSDQVGPYEQYF,2.847540939119e-05,1.26186e-05,0.15 706 | CAWNSARGAEAFF,2.847540939119e-05,1.26186e-05,0.029 707 | CASSPWAAANTGELFF,2.847540939119e-05,1.26186e-05,0.049 708 | CSASDPREPPGGHLNYGYTF,2.847540939119e-05,1.26186e-05,0.341 709 | CSAPGQGARAGELFF,2.847540939119e-05,1.26186e-05,0.268 710 | CASSQDGYGYTF,2.847540939119e-05,1.26186e-05,0.055 711 | CASSTSTDTQYF,2.847540939119e-05,1.26186e-05,0.043 712 | CASTPLGGAGTEAFF,2.847540939119e-05,1.26186e-05,0.00570000000000004 713 | CSARAQMNTEAFF,2.847540939119e-05,1.26186e-05,0.023 714 | CSAPKGTGDEQYF,2.847540939119e-05,1.26186e-05,0.562 715 | CASSVESGEQYF,2.847540939119e-05,1.26186e-05,0.032 716 | CSAKIPGTKGGNEQFF,2.847540939119e-05,1.26186e-05,0.0649999999999999 717 | CASSLVGLSEAFF,2.847540939119e-05,1.26186e-05,0.295 718 | CASTTGAYGYTF,2.847540939119e-05,1.26186e-05,0.0153 719 | CAWIKEGRWRNGYTF,2.847540939119e-05,1.26186e-05,0.082 720 | CASSLGGGYGYTF,2.847540939119e-05,1.26186e-05,0.185 721 | CASSLGSTIYF,2.847540939119e-05,1.26186e-05,0.191 722 | CASNPPGQGINEQYF,2.847540939119e-05,1.26186e-05,0.026 723 | CASSFGHSSGNTIYF,2.847540939119e-05,1.26186e-05,0.668 724 | CSARDGGGIQPQHF,2.847540939119e-05,1.26186e-05,0.542 725 | CASSSWRGARVNSPLHF,2.847540939119e-05,1.26186e-05,0.128 726 | CASSVGLAGVETQYF,2.847540939119e-05,1.26186e-05,0.611 727 | CASSYVLGGASSYNEQFF,2.847540939119e-05,1.26186e-05,0.328 728 | CASSFAKDRYYEQYF,2.847540939119e-05,1.26186e-05,0.054 729 | CASSVGPGSSYEQYF,2.847540939119e-05,1.26186e-05,0.087 730 | CASSVFPGQGARGNTIYF,2.847540939119e-05,1.26186e-05,0.366 731 | CASSPPRGGAGELFF,2.847540939119e-05,1.26186e-05,0.285 732 | CASSLGNYGYTF,2.847540939119e-05,1.26186e-05,0.105 733 | CASSYGDNEQFF,2.847540939119e-05,1.26186e-05,0.149 734 | CASSLSSPGTQYF,2.847540939119e-05,1.26186e-05,0.1 735 | CASSFEGARDTQYF,2.847540939119e-05,1.26186e-05,0.443 736 | CATRTTQGTDTQYF,2.847540939119e-05,1.26186e-05,0.464 737 | CASSPVAGGYEQFF,2.847540939119e-05,1.26186e-05,0.19 738 | CAIGGDRDSGHSNGYTF,2.847540939119e-05,1.26186e-05,0.0570000000000001 739 | -------------------------------------------------------------------------------- /data/testing_data.csv: -------------------------------------------------------------------------------- 1 | CDR3,Antigen,HLA 2 | CASSLSFGTEAFF,SMLGIGIYPV,A*02:01 3 | CASSLSFGTEAFF,LMFDRGMSLL,A*02:01 4 | CASSLSFGTEAFF,ILEDRGFNQV,A*02:01 5 | CASSLSFGTEAFF,MMWDRGLGMM,A*02:01 6 | CASSLSFGTEAFF,NLSNLGILPV,A*02:01 7 | CASSLSFGTEAFF,IMEDVGWLNV,A*02:01 8 | CASSLSFGTEAFF,NMGGLGIMPV,A*02:01 9 | CASSLSFGTEAFF,MMWDRGMGLL,A*02:01 10 | CASSLSFGTEAFF,SMAGIGIVDV,A*02:01 11 | CASSLSFGTEAFF,SMLGIGIVPV,A*02:01 12 | CASSQNYEQYF,ALDSRSEHFM,A*02:01 13 | CASSQNYEQYF,AMDSRADMFV,A*02:01 14 | CASSQNYEQYF,SMNSREEVFV,A*02:01 15 | CASSQNYEQYF,IMDSKSENFL,A*02:01 16 | CASSQNYEQYF,SMNSHSGTFL,A*02:01 17 | CAWSETGLGTGELFFG,ELAGIGILTV,A*02:01 18 | CAWSETGLGLIGGWQFG,ELAGIGILTV,A*02:01 19 | CAWSETGLNEGGEFFG,ELAGIGILTV,A*02:01 20 | CASSLSFGTEAFF,AAGIGILTV,A*02:01 21 | CASSLSFGTEAFF,ELAGIGILTV,A*02:01 22 | CAISEVGVGQPQHF,EAAGIGILTV,A*02:01 23 | CAISEVGVGQPQHF,AAGIGILTV,A*02:01 24 | CAISEVGVGQPQHF,ELAGIGILTV,A*02:01 25 | CASSLSAVQNNEQF,SLYNTVATL,A*02:01 26 | CASSIRSSYEQ,GILGFVFTL,A*02:01 27 | CASSISSTGEL,GILGFVFTL,A*02:01 28 | CASSIGVYGY,GILGFVFTL,A*02:01 29 | CASSSRSSYEQ,GILGFVFTL,A*02:01 30 | CAWSETGLGMGGWQFG,ELAGIGILTV ,A*02:01 31 | CAWSETGLGTGELFFG,ELAGIGILTV ,A*02:01 32 | CASSFDAEAF,ALFNTVATL,A*02:01 33 | CASSFDAEAF,CLFNTVATL,A*02:01 34 | CASSFDAEAF,DLFNTVATL,A*02:01 35 | CASSFDAEAF,ELFNTVATL,A*02:01 36 | CASSFDAEAF,FLFNTVATL,A*02:01 37 | CASSFDAEAF,GLFNTVATL,A*02:01 38 | CASSFDAEAF,HLFNTVATL,A*02:01 39 | CASSFDAEAF,ILFNTVATL,A*02:01 40 | CASSFDAEAF,KLFNTVATL,A*02:01 41 | CASSFDAEAF,LLFNTVATL,A*02:01 42 | CASSFDAEAF,MLFNTVATL,A*02:01 43 | CASSFDAEAF,NLFNTVATL,A*02:01 44 | CASSFDAEAF,PLFNTVATL,A*02:01 45 | CASSFDAEAF,QLFNTVATL,A*02:01 46 | CASSFDAEAF,RLFNTVATL,A*02:01 47 | CASSFDAEAF,SLFNTVATL,A*02:01 48 | CASSFDAEAF,TLFNTVATL,A*02:01 49 | CASSFDAEAF,VLFNTVATL,A*02:01 50 | CASSFDAEAF,WLFNTVATL,A*02:01 51 | CASSFDAEAF,SIFNTVATL,A*02:01 52 | CASSFDAEAF,SMFNTVATL,A*02:01 53 | CASSFDAEAF,STFNTVATL,A*02:01 54 | CASSFDAEAF,SVFNTVATL,A*02:01 55 | CASSFDAEAF,SLCNTVATL,A*02:01 56 | CASSFDAEAF,SLWNTVATL,A*02:01 57 | CASSFDAEAF,SLYNTVATL,A*02:01 58 | CASSFDAEAF,SLFCTVATL,A*02:01 59 | CASSFDAEAF,SLFHTVATL,A*02:01 60 | CASSFDAEAF,SLFWTVATL,A*02:01 61 | CASSFDAEAF,SLFYTVATL,A*02:01 62 | CASSFDAEAF,SLFNSVATL,A*02:01 63 | CASSFDAEAF,SLFNTFATL,A*02:01 64 | CASSFDAEAF,SLFNTGATL,A*02:01 65 | CASSFDAEAF,SLFNTIATL,A*02:01 66 | CASSFDAEAF,SLFNTKATL,A*02:01 67 | CASSFDAEAF,SLFNTLATL,A*02:01 68 | CASSFDAEAF,SLFNTPATL,A*02:01 69 | CASSFDAEAF,SLFNTTATL,A*02:01 70 | CASSFDAEAF,SLFNTVCTL,A*02:01 71 | CASSFDAEAF,SLFNTVETL,A*02:01 72 | CASSFDAEAF,SLFNTVGTL,A*02:01 73 | CASSFDAEAF,SLFNTVITL,A*02:01 74 | CASSFDAEAF,SLFNTVLTL,A*02:01 75 | CASSFDAEAF,SLFNTVMTL,A*02:01 76 | CASSFDAEAF,SLFNTVPTL,A*02:01 77 | CASSFDAEAF,SLFNTVRTL,A*02:01 78 | CASSFDAEAF,SLFNTVSTL,A*02:01 79 | CASSFDAEAF,SLFNTVTTL,A*02:01 80 | CASSFDAEAF,SLFNTVVTL,A*02:01 81 | CASSFDAEAF,SLFNTVWTL,A*02:01 82 | CASSFDAEAF,SLFNTVAIL,A*02:01 83 | CASSFDAEAF,SLFNTVALL,A*02:01 84 | CASSFDAEAF,SLFNTVATF,A*02:01 85 | CASSFDAEAF,SLFNTVATI,A*02:01 86 | CASSFDAEAF,SLFNTVATM,A*02:01 87 | CASSFDAEAF,SLFNTVATP,A*02:01 88 | CASSFDAEAF,SLFNTVATT,A*02:01 89 | CASSFDAEAF,SLFNTVATV,A*02:01 90 | CATSSTGTGGGETQY,ALFNTVATL,A*02:01 91 | CATSSTGTGGGETQY,CLFNTVATL,A*02:01 92 | CATSSTGTGGGETQY,DLFNTVATL,A*02:01 93 | CATSSTGTGGGETQY,ELFNTVATL,A*02:01 94 | CATSSTGTGGGETQY,FLFNTVATL,A*02:01 95 | CATSSTGTGGGETQY,GLFNTVATL,A*02:01 96 | CATSSTGTGGGETQY,HLFNTVATL,A*02:01 97 | CATSSTGTGGGETQY,ILFNTVATL,A*02:01 98 | CATSSTGTGGGETQY,KLFNTVATL,A*02:01 99 | CATSSTGTGGGETQY,LLFNTVATL,A*02:01 100 | CATSSTGTGGGETQY,MLFNTVATL,A*02:01 101 | CATSSTGTGGGETQY,NLFNTVATL,A*02:01 102 | CATSSTGTGGGETQY,PLFNTVATL,A*02:01 103 | CATSSTGTGGGETQY,QLFNTVATL,A*02:01 104 | CATSSTGTGGGETQY,SLFNTVATL,A*02:01 105 | CATSSTGTGGGETQY,TLFNTVATL,A*02:01 106 | CATSSTGTGGGETQY,VLFNTVATL,A*02:01 107 | CATSSTGTGGGETQY,WLFNTVATL,A*02:01 108 | CATSSTGTGGGETQY,YLFNTVATL,A*02:01 109 | CATSSTGTGGGETQY,SAFNTVATL,A*02:01 110 | CATSSTGTGGGETQY,SIFNTVATL,A*02:01 111 | CATSSTGTGGGETQY,SKFNTVATL,A*02:01 112 | CATSSTGTGGGETQY,SMFNTVATL,A*02:01 113 | CATSSTGTGGGETQY,SQFNTVATL,A*02:01 114 | CATSSTGTGGGETQY,STFNTVATL,A*02:01 115 | CATSSTGTGGGETQY,SVFNTVATL,A*02:01 116 | CATSSTGTGGGETQY,SLHNTVATL,A*02:01 117 | CATSSTGTGGGETQY,SLINTVATL,A*02:01 118 | CATSSTGTGGGETQY,SLPNTVATL,A*02:01 119 | CATSSTGTGGGETQY,SLYNTVATL,A*02:01 120 | CATSSTGTGGGETQY,SLFCTVATL,A*02:01 121 | CATSSTGTGGGETQY,SLFDTVATL,A*02:01 122 | CATSSTGTGGGETQY,SLFNSVATL,A*02:01 123 | CATSSTGTGGGETQY,SLFNTIATL,A*02:01 124 | CATSSTGTGGGETQY,SLFNTLATL,A*02:01 125 | CATSSTGTGGGETQY,SLFNTYATL,A*02:01 126 | CATSSTGTGGGETQY,SLFNTVETL,A*02:01 127 | CATSSTGTGGGETQY,SLFNTVGTL,A*02:01 128 | CATSSTGTGGGETQY,SLFNTVMTL,A*02:01 129 | CATSSTGTGGGETQY,SLFNTVPTL,A*02:01 130 | CATSSTGTGGGETQY,SLFNTVAAL,A*02:01 131 | CATSSTGTGGGETQY,SLFNTVACL,A*02:01 132 | CATSSTGTGGGETQY,SLFNTVAIL,A*02:01 133 | CATSSTGTGGGETQY,SLFNTVAKL,A*02:01 134 | CATSSTGTGGGETQY,SLFNTVAML,A*02:01 135 | CATSSTGTGGGETQY,SLFNTVANL,A*02:01 136 | CATSSTGTGGGETQY,SLFNTVARL,A*02:01 137 | CATSSTGTGGGETQY,SLFNTVASL,A*02:01 138 | CATSSTGTGGGETQY,SLFNTVAVL,A*02:01 139 | CATSSTGTGGGETQY,SLFNTVATA,A*02:01 140 | CATSSTGTGGGETQY,SLFNTVATE,A*02:01 141 | CATSSTGTGGGETQY,SLFNTVATF,A*02:01 142 | CATSSTGTGGGETQY,SLFNTVATG,A*02:01 143 | CATSSTGTGGGETQY,SLFNTVATI,A*02:01 144 | CATSSTGTGGGETQY,SLFNTVATM,A*02:01 145 | CATSSTGTGGGETQY,SLFNTVATN,A*02:01 146 | CATSSTGTGGGETQY,SLFNTVATP,A*02:01 147 | CATSSTGTGGGETQY,SLFNTVATQ,A*02:01 148 | CATSSTGTGGGETQY,SLFNTVATS,A*02:01 149 | CATSSTGTGGGETQY,SLFNTVATT,A*02:01 150 | CATSSTGTGGGETQY,SLFNTVATV,A*02:01 151 | CASSSYRVQGASENIQY,SLFNTVATL,A*02:01 152 | CASSGTEDEKLF,SLFNTVATL,A*02:01 153 | CASSSYRVQGASENIQY,SLYNTVATL,A*02:01 154 | CASSSHDTGGYNSPLHFG,TPRVTGGGAM,B*07:02 155 | CASSFRDYGNYEQYFG,TPRVTGGGAM,B*07:02 156 | CASSIGPALNTEAFFG,TPRVTGGGAM,B*07:02 157 | CASSLHDRGSRTEAFFG,TPRVTGGGAM,B*07:02 158 | CASKVGAGGLYEQYFG,TPRVTGGGAM,B*07:02 159 | CASSLIGVSSYNEQFFG,TPRVTGGGAM,B*07:02 160 | CASSLIGVSSYNEQFFQ,TPRVTGGGAM,B*07:02 161 | CASSSHDRQGASSPLHFG,TPRVTGGGAM,B*07:02 162 | CASSSHDTGGYNSPLHFG,EFFWDANDIY,A*02:01 163 | CASSFRDYGNYEQYFG,EFFWDANDIY,A*02:01 164 | CASSIGPALNTEAFFG,EFFWDANDIY,A*02:01 165 | CASMGNSAGANVLTFG,NLVPMVATV,A*02:01 166 | CASSSANYGYTFG,NLVPMVATV,A*02:01 167 | CASSSVSEAFFG,NLVPMVATV,A*02:01 168 | CASSSVNEQFFG,NLVPMVATV,A*02:01 169 | CASSYQTGTGNYGYTFG,NLVPMVATV,A*02:01 170 | CASSYSGHVYEQYFG,NLVPMVATV,A*02:01 171 | CASRGQGFSYEQYFG,NLVPMVATV,A*02:01 172 | CASSFLGLNEQFFG,NLVPMVATV,A*02:01 173 | CASSLDIPSYNEQFFG,NLVPMVATV,A*02:01 174 | CASSQAALAGFGYEQYFG,TPRVTGGGAM,B*07:02 175 | CASSPARNTEAFFG,TPRVTGGGAM,B*07:02 176 | CASSPSRNTEAFFG,TPRVTGGGAM,B*07:02 177 | CASSPHRNTEAFFG,TPRVTGGGAM,B*07:02 178 | CASSIFGEQFF, NEGVKAAW,B*44:03 179 | CASSIFGELFF, NEGVKAAW,B*44:03 180 | CAVGNNAGNMLTF, NEGVKAAW,B*44:03 181 | CAVGANAGNMLTF, NEGVKAAW,B*44:03 182 | CASPGLAGEYEQYF,SPEPLPQGQLTAY,B*35:08 183 | CASPGLAGEYEQYF,VPEPLPQGQLTAY,B*35:08 184 | CASPGLAGEYEQYF,YPEPLPQGQLTAY,B*35:08 185 | CASPGLAGEYEQYF,LPDPLPQGQLTAY,B*35:08 186 | CASPGLAGEYEQYF,LPEPSPQGQLTAY,B*35:08 187 | CASPGLAGEYEQYF,LPEPVPQGQLTAY,B*35:08 188 | CASPGLAGEYEQYF,LPEPDPQGQLTAY,B*35:08 189 | CASPGLAGEYEQYF,LPEPLPQSQLTAY,B*35:08 190 | CASPGLAGEYEQYF,LPEPLPQDQLTAY,B*35:08 191 | CASPGLAGEYEQYF,LPEPLPQGSLTAY,B*35:08 192 | CASPGLAGEYEQYF,LPEPLPQGVLTAY,B*35:08 193 | CASPGLAGEYEQYF,LPEPLPQGKLTAY,B*35:08 194 | CASPGLAGEYEQYF,LPEPLPQGDLTAY,B*35:08 195 | CASPGLAGEYEQYF,LPEPLPQGYLTAY,B*35:08 196 | CASPGLAGEYEQYF,LPEPLPQGQSTAY,B*35:08 197 | CASPGLAGEYEQYF,LPEPLPQGQVTAY,B*35:08 198 | CASPGLAGEYEQYF,LPEPLPQGQKTAY,B*35:08 199 | CASPGLAGEYEQYF,LPEPLPQGQDTAY,B*35:08 200 | CASPGLAGEYEQYF,LPEPLPQGQYTAY,B*35:08 201 | CASPGLAGEYEQYF,LPEPLPQGQLSAY,B*35:08 202 | CASPGLAGEYEQYF,LPEPLPQGQLVAY,B*35:08 203 | CASPGLAGEYEQYF,LPEPLPQGQLKAY,B*35:08 204 | CASPGLAGEYEQYF,LPEPLPQGQLDAY,B*35:08 205 | CASPGLAGEYEQYF,LPEPLPQGQLYAY,B*35:08 206 | CASPGLAGEYEQYF,LPEPLPQGQLTSY,B*35:08 207 | CASPGLAGEYEQYF,LPEPLPQGQLTVY,B*35:08 208 | CASPGLAGEYEQYF,LPEPLPQGQLTKY,B*35:08 209 | CASPGLAGEYEQYF,LPEPLPQGQLTDY,B*35:08 210 | CASPGLAGEYEQYF,LPEPLPQGQLTYY,B*35:08 211 | CASPGETEAFF,SPEPLPQGQLTAY,B*35:08 212 | CASPGETEAFF,VPEPLPQGQLTAY,B*35:08 213 | CASPGETEAFF,YPEPLPQGQLTAY,B*35:08 214 | CASPGETEAFF,LPDPLPQGQLTAY,B*35:08 215 | CASPGETEAFF,LPEGLPQGQLTAY,B*35:08 216 | CASPGETEAFF,LPEPVPQGQLTAY,B*35:08 217 | CASPGETEAFF,LPEPLPQAQLTAY,B*35:08 218 | CASPGETEAFF,LPEPLPQSQLTAY,B*35:08 219 | CASPGETEAFF,LPEPLPQGSLTAY,B*35:08 220 | CASPGETEAFF,LPEPLPQGVLTAY,B*35:08 221 | CASPGETEAFF,LPEPLPQGKLTAY,B*35:08 222 | CASPGETEAFF,LPEPLPQGDLTAY,B*35:08 223 | CASPGETEAFF,LPEPLPQGYLTAY,B*35:08 224 | CASPGETEAFF,LPEPLPQGQGTAY,B*35:08 225 | CASPGETEAFF,LPEPLPQGQSTAY,B*35:08 226 | CASPGETEAFF,LPEPLPQGQVTAY,B*35:08 227 | CASPGETEAFF,LPEPLPQGQDTAY,B*35:08 228 | CASPGETEAFF,LPEPLPQGQYTAY,B*35:08 229 | CASPGETEAFF,LPEPLPQGQLGAY,B*35:08 230 | CASPGETEAFF,LPEPLPQGQLSAY,B*35:08 231 | CASPGETEAFF,LPEPLPQGQLVAY,B*35:08 232 | CASPGETEAFF,LPEPLPQGQLKAY,B*35:08 233 | CASPGETEAFF,LPEPLPQGQLDAY,B*35:08 234 | CASPGETEAFF,LPEPLPQGQLYAY,B*35:08 235 | CASPGETEAFF,LPEPLPQGQLTGY,B*35:08 236 | CASPGETEAFF,LPEPLPQGQLTSY,B*35:08 237 | CASPGETEAFF,LPEPLPQGQLTVY,B*35:08 238 | CASPGETEAFF,LPEPLPQGQLTKY,B*35:08 239 | CASPGETEAFF,LPEPLPQGQLTDY,B*35:08 240 | CASPGETEAFF,LPEPLPQGQLTYY,B*35:08 241 | CASSRTGSTYEQYF,VPEPLPQGQLTAY,B*35:08 242 | CASSRTGSTYEQYF,LPDPLPQGQLTAY,B*35:08 243 | CASSRTGSTYEQYF,LPEPLPGGQLTAY,B*35:08 244 | CASSRTGSTYEQYF,LPEPLPSGQLTAY,B*35:08 245 | CASSRTGSTYEQYF,LPEPLPVGQLTAY,B*35:08 246 | CASSRTGSTYEQYF,LPEPLPKGQLTAY,B*35:08 247 | CASSRTGSTYEQYF,LPEPLPDGQLTAY,B*35:08 248 | CASSRTGSTYEQYF,LPEPLPYGQLTAY,B*35:08 249 | CASSRTGSTYEQYF,LPEPLPQAQLTAY,B*35:08 250 | CASSRTGSTYEQYF,LPEPLPQSQLTAY,B*35:08 251 | CASSRTGSTYEQYF,LPEPLPQKQLTAY,B*35:08 252 | CASSRTGSTYEQYF,LPEPLPQDQLTAY,B*35:08 253 | CASSRTGSTYEQYF,LPEPLPQYQLTAY,B*35:08 254 | CASSRTGSTYEQYF,LPEPLPQGGLTAY,B*35:08 255 | CASSRTGSTYEQYF,LPEPLPQGSLTAY,B*35:08 256 | CASSRTGSTYEQYF,LPEPLPQGVLTAY,B*35:08 257 | CASSRTGSTYEQYF,LPEPLPQGKLTAY,B*35:08 258 | CASSRTGSTYEQYF,LPEPLPQGDLTAY,B*35:08 259 | CASSRTGSTYEQYF,LPEPLPQGYLTAY,B*35:08 260 | CASSRTGSTYEQYF,LPEPLPQGQSTAY,B*35:08 261 | CASSRTGSTYEQYF,LPEPLPQGQVTAY,B*35:08 262 | CASSRTGSTYEQYF,LPEPLPQGQDTAY,B*35:08 263 | CASSRTGSTYEQYF,LPEPLPQGQYTAY,B*35:08 264 | CASSRTGSTYEQYF,LPEPLPQGQLSAY,B*35:08 265 | CASSRTGSTYEQYF,LPEPLPQGQLVAY,B*35:08 266 | CASSRTGSTYEQYF,LPEPLPQGQLKAY,B*35:08 267 | CASSRTGSTYEQYF,LPEPLPQGQLDAY,B*35:08 268 | CASSRTGSTYEQYF,LPEPLPQGQLYAY,B*35:08 269 | CASSRTGSTYEQYF,LPEPLPQGQLTGY,B*35:08 270 | CASSRTGSTYEQYF,LPEPLPQGQLTSY,B*35:08 271 | CASSRTGSTYEQYF,LPEPLPQGQLTDY,B*35:08 272 | CASSFRGGKTQY,IPLTEEAEL,B*35:01 273 | CASSFRGGKTQY,IPLTEEAEL,B*53:01 274 | CASSFRGGKTQY,IPLTEEAEF,B*35:01 275 | CASSFRGGKTQY,IPLTEEAEF,B*53:01 276 | CASGPPLRGNYGY,FLRGRAYGL,B*08:01 277 | CASSPRVSGGNYEQ,FLRGRAYGL,B*08:01 278 | CASSHGTSGILETQ,FLRGRAYGL,B*08:01 279 | CASSFTWTSGGATDTQ,FLRGRAYGL,B*08:01 280 | CASSLFPTGSTAGEL,FLRGRAYGL,B*08:01 281 | CASSLGQAYEQ,FLRGRAYGL,B*08:01 282 | CASSLGLNTIY,TPQDLNTML,B*42:01 283 | CASSQEGGGQGQPQH,TPQDLNTML,B*42:01 284 | CASSLGANTIY,TPQDLNTML,B*42:01 285 | CASSVDKGGADEQF,IIKDYGKQMA,B*42:01 286 | CASSVDKGGTDTQY,IIKDYGKQMA,B*42:01 287 | CASSEDKGGGDTQY,IIKDYGKQMA,B*42:01 288 | CVSSVDKGGTDTQY,IIKDYGKQMA,B*42:01 289 | CASSVPGAGEETQY,LLFGYPVYV,A*02:01 290 | CASSSPGQGNYEQY,LLFGYPVYV,A*02:01 291 | CASSTPGQGAYEQY,LLFGYPVYV,A*02:01 292 | CASSYPGQGEHEQY,LLFGYPVYV,A*02:01 293 | CASSSPGTGVNEQF,LLFGYPVYV,A*02:01 294 | CASSSPGTGVNEQF,LGYGFVNYV,A*02:01 295 | CASSSPGTGVNEQF,MLWGYLQYV,A*02:01 296 | CASSVPGAGEETQY,LLFGYPVVV,A*02:01 297 | CASSVPGAGEETQY,LLFGYPVLV,A*02:01 298 | CASSVPGAGEETQY,LLFGYPVIV,A*02:01 299 | CASSVPGAGEETQY,LLFGYPVMV,A*02:01 300 | CASSVPGAGEETQY,LLFGYPVFV,A*02:01 301 | CASSVPGAGEETQY,LLFGYPVWV,A*02:01 302 | CASSVPGAGEETQY,LLFGYPVHV,A*02:01 303 | CASSVPGAGEETQY,LLFGYPVRV,A*02:01 304 | CASSVPGAGEETQY,LLFGYPVKV,A*02:01 305 | CASSVPGAGEETQY,LLFGYPVDV,A*02:01 306 | CASSVPGAGEETQY,LLFGYPVEV,A*02:01 307 | CASSVPGAGEETQY,LLFGYPVNV,A*02:01 308 | CASSVPGAGEETQY,LLFGYPVQV,A*02:01 309 | CASSVPGAGEETQY,LLFGYPVSV,A*02:01 310 | CASSVPGAGEETQY,LLFGYPVTV,A*02:01 311 | CASSSPGQGNYEQY,LLFGYPVVV,A*02:01 312 | CASSSPGQGNYEQY,LLFGYPVLV,A*02:01 313 | CASSSPGQGNYEQY,LLFGYPVIV,A*02:01 314 | CASSSPGQGNYEQY,LLFGYPVMV,A*02:01 315 | CASSSPGQGNYEQY,LLFGYPVFV,A*02:01 316 | CASSSPGQGNYEQY,LLFGYPVWV,A*02:01 317 | CASSSPGQGNYEQY,LLFGYPVHV,A*02:01 318 | CASSSPGQGNYEQY,LLFGYPVRV,A*02:01 319 | CASSSPGQGNYEQY,LLFGYPVKV,A*02:01 320 | CASSSPGQGNYEQY,LLFGYPVDV,A*02:01 321 | CASSSPGQGNYEQY,LLFGYPVEV,A*02:01 322 | CASSSPGQGNYEQY,LLFGYPVNV,A*02:01 323 | CASSSPGQGNYEQY,LLFGYPVQV,A*02:01 324 | CASSSPGQGNYEQY,LLFGYPVSV,A*02:01 325 | CASSSPGQGNYEQY,LLFGYPVTV,A*02:01 326 | CASSTPGQGAYEQY,LLFGYPVVV,A*02:01 327 | CASSTPGQGAYEQY,LLFGYPVLV,A*02:01 328 | CASSTPGQGAYEQY,LLFGYPVIV,A*02:01 329 | CASSTPGQGAYEQY,LLFGYPVMV,A*02:01 330 | CASSTPGQGAYEQY,LLFGYPVFV,A*02:01 331 | CASSTPGQGAYEQY,LLFGYPVWV,A*02:01 332 | CASSTPGQGAYEQY,LLFGYPVHV,A*02:01 333 | CASSTPGQGAYEQY,LLFGYPVRV,A*02:01 334 | CASSTPGQGAYEQY,LLFGYPVKV,A*02:01 335 | CASSTPGQGAYEQY,LLFGYPVEV,A*02:01 336 | CASSTPGQGAYEQY,LLFGYPVNV,A*02:01 337 | CASSTPGQGAYEQY,LLFGYPVQV,A*02:01 338 | CASSTPGQGAYEQY,LLFGYPVSV,A*02:01 339 | CASSTPGQGAYEQY,LLFGYPVTV,A*02:01 340 | CASSYPGQGEHEQY,LLFGYPVVV,A*02:01 341 | CASSYPGQGEHEQY,LLFGYPVLV,A*02:01 342 | CASSYPGQGEHEQY,LLFGYPVIV,A*02:01 343 | CASSYPGQGEHEQY,LLFGYPVMV,A*02:01 344 | CASSYPGQGEHEQY,LLFGYPVFV,A*02:01 345 | CASSYPGQGEHEQY,LLFGYPVWV,A*02:01 346 | CASSYPGQGEHEQY,LLFGYPVHV,A*02:01 347 | CASSYPGQGEHEQY,LLFGYPVRV,A*02:01 348 | CASSYPGQGEHEQY,LLFGYPVKV,A*02:01 349 | CASSYPGQGEHEQY,LLFGYPVDV,A*02:01 350 | CASSYPGQGEHEQY,LLFGYPVEV,A*02:01 351 | CASSYPGQGEHEQY,LLFGYPVNV,A*02:01 352 | CASSYPGQGEHEQY,LLFGYPVQV,A*02:01 353 | CASSYPGQGEHEQY,LLFGYPVSV,A*02:01 354 | CASSYPGQGEHEQY,LLFGYPVTV,A*02:01 355 | CASSSPGTGVNEQF,LLFGYPVVV,A*02:01 356 | CASSSPGTGVNEQF,LLFGYPVLV,A*02:01 357 | CASSSPGTGVNEQF,LLFGYPVIV,A*02:01 358 | CASSSPGTGVNEQF,LLFGYPVMV,A*02:01 359 | CASSSPGTGVNEQF,LLFGYPVFV,A*02:01 360 | CASSSPGTGVNEQF,LLFGYPVWV,A*02:01 361 | CASSSPGTGVNEQF,LLFGYPVHV,A*02:01 362 | CASSSPGTGVNEQF,LLFGYPVRV,A*02:01 363 | CASSSPGTGVNEQF,LLFGYPVKV,A*02:01 364 | CASSSPGTGVNEQF,LLFGYPVEV,A*02:01 365 | CASSSPGTGVNEQF,LLFGYPVNV,A*02:01 366 | CASSSPGTGVNEQF,LLFGYPVQV,A*02:01 367 | CASSSPGTGVNEQF,LLFGYPVSV,A*02:01 368 | CASSSPGTGVNEQF,LLFGYPVTV,A*02:01 369 | CASSGTSGTEQYF,GILGFVFTL,A*02:01 370 | CASSIRSGAEQYF,GILGFVFTL,A*02:01 371 | CASSLRAAYEQYF,GILGFVFTL,A*02:01 372 | CSSVRSADTQYF,GILGFVFTL,A*02:01 373 | CASSIFSVSNEQYF,GILGFVFTL,A*02:01 374 | CASSGTGAGEQYF,GILGFVFTL,A*02:01 375 | CASSVGQPQHF,GILGFVFTL,A*02:01 376 | CASSVRSSGEQFF,GILGFVFTL,A*02:01 377 | CASSLIFPGRAFF,GILGFVFTL,A*02:01 378 | CSARDRIGNTIYF,GLCTLVAML,A*02:01 379 | CSARPDSDYGYTF,GLCTLVAML,A*02:01 380 | CSARSGVVNTIYF,GLCTLVAML,A*02:01 381 | CSARWGVGNTIYF,GLCTLVAML,A*02:01 382 | CASSQGDVAPGTQYF,GLCTLVAML,A*02:01 383 | CASSDGQLTAGELFF,GLCTLVAML,A*02:01 384 | CASSRLGDYTF,GLCTLVAML,A*02:01 385 | CSARTEGAEAFF,CVNGSCFTV,A*02:01 386 | CASSYWTGSSAETQYF,CINGVCWTV,A*02:01 387 | CSARTSGDFGEQFF,CINGVCWTV,A*02:01 388 | CASSQESGTEAFFG,CINGVCWTV,A*02:01 389 | CASTIGSSEKLFF,CINGVCWTV,A*02:01 390 | CASSQVQTEPSGYTF,CINGVCWTV,A*02:01 391 | CASSQEVTVPTYEQYF,CINGVCWTV,A*02:01 392 | CASSSWDTGELF,RYPLTFGWCF,A*24 393 | CASSSWDTGELF,RFPLTFGWCF,A*24 394 | CASSSWDTGELF,RYPLTLGWCF,A*24 395 | CASSGASHEQY,RYPLTFGWCF,A*24 396 | CASSGASHEQY,RFPLTFGWCF,A*24 397 | CASSPTSGIYEQY,RYPLTFGWCF,A*24 398 | CASSPTSGIYEQY,RFPLTFGWCF,A*24 399 | CSVLKVGTSGFNEQF,QVPLRPMTYK,A*11:01 400 | CSVPKVGTSGFNEQF,QVPLRPMTYK,A*11:01 401 | CSVPKTGTSGFNEQF,QVPLRPMTYK,A*11:01 402 | CSVVAGGPGDTQY,QVPLRPMTYK,A*11:01 403 | CASSVSGFSTDTQY,QVPLRPMTYK,A*11:01 404 | CASRETGWGNQPQH,QVPLRPMTYK,A*03:01 405 | CASSETNRVEMEAF,QVPLRPMTYK,A*03:01 406 | CATSAGRQRDTGELF,QVPLRPMTYK,A*03:01 407 | CASSKYNEQF,QVPLRPMTYK,A*03:01 408 | CASSPPGQVGANVLT,QVPLRPMTYK,A*03:01 409 | CASSYSRGSGNTIY,QVPLRPMTYK,A*03:01 410 | CASSPYRGPNTEAF,QVPLRPMTYK,A*03:01 411 | CAISAGASFVTRSTDTQY,QVPLRPMTYK,A*03:01 412 | CAIRSTDTQY,QVPLRPMTYK,A*03:01 413 | CASRQQGFVFEAKNIQY,QVPLRPMTYK,A*03:01 414 | CSAPTSGSAAF,QVPLRPMTYK,A*03:01 415 | CASRDSIQFSSNQPQH,QVPLRPMTYK,A*03:01 416 | CASSYSMTSGSFSDLGAKNIQY,QVPLRPMTYK,A*03:01 417 | CASRPGPVKNTGELF,QVPLRPMTYK,A*03:01 418 | CASSLYHNTGELF,QVPLRPMTYK,A*03:01 419 | CASSGGAHFSKIPLAGYNEQF,QVPLRPMTYK,A*03:01 420 | CASSRTDFTAGELF,QVPLRPMTYK,A*03:01 421 | CASSLTGHPYEQY,QVPLRPMTYK,A*03:01 422 | CASSPGEKYEQY,QVPLRPMTYK,A*03:01 423 | CSARGAGGFTHYEQY,QVPLRPMTYK,A*03:01 424 | CASSLVGFANTGELF,QVPLRPMTYK,A*03:01 425 | CASSSSGRGLGIQY,QVPLRPMTYK,A*03:01 426 | CASGEVGELF,QVPLRPMTYK,A*03:01 427 | CASSVRTGELF,QVPLRPMTYK,A*11:01 428 | CASSERDSQYQETQY,QVPLRPMTYK,A*11:01 429 | CASSARAFPEGNQPQH,QVPLRPMTYK,A*11:01 430 | CASSYSGQGAAGELF,QVPLRPMTYK,A*11:01 431 | CASSPVLYEQY,QVPLRPMTYK,A*11:01 432 | CASSVRTGELF,QVPLRPMTYK,A*03:01 433 | CASSERDSQYQETQY,QVPLRPMTYK,A*03:01 434 | CASSARAFPEGNQPQH,QVPLRPMTYK,A*03:01 435 | CASSYSGQGAAGELF,QVPLRPMTYK,A*03:01 436 | CASSPVLYEQY,QVPLRPMTYK,A*03:01 437 | CASSYQGTEAF,ILAKFLHWL,A*02:01 438 | CGIQPQGTEAF,ILAKFLHWL,A*02:01 439 | CASSLTGGGELFF,TAFTIPSI ,B*51:01 440 | CATSDLQGVRGVNEQF,TAFTIPSI ,B*51:01 441 | CASSLTGGGKLFF,TAFTIPSI ,B*51:01 442 | CASSPPGGTADTQYF,TAFTIPSI ,B*51:01 443 | CASSSGRGGEQYF,TAFTIPSI ,B*51:01 444 | CASSLTGGNTIYF,TAFTIPSI ,B*51:01 445 | CASSSTGGNTIYF,TAFTIPSI ,B*51:01 446 | CASSLTGNTEAFF,TAFTIPSI ,B*51:01 447 | CASSTRDPRQTQYF,TAFTIPSI ,B*51:01 448 | CASSLWLPKETQYF,TAFTIPSI ,B*51:01 449 | CASRKWNGELFF,TAFTIPSI ,B*51:01 450 | CSVEGPLAGEADTQYF,TAFTIPSI ,B*51:01 451 | CASSPPGGRADTQYF,TAFTIPSI ,B*51:01 452 | CASAQGRGTEAFF,TAFTIPSI ,B*51:01 453 | CASSNPIVPQETQYF,TAFTIPSI ,B*51:01 454 | CASSRGAESYNSPLHF,TAFTIPSI ,B*51:01 455 | CASSLTGGARARLFF,TAFTIPSI ,B*51:01 456 | CASSRRGEGHNEQFF,TAFTIPSI ,B*51:01 457 | CASSLGTLNEQFF,TPQDLNTML,B*42 458 | CASSLGTLNEQFF,TPQDLNTML,B*81 459 | CASSSSLTGVTSYNEQFF,TPQDLNTML,B*42 460 | CASSFSKNTEAFF,TPQDLNTML,B*42 461 | CASSFSKNTEAFF,TPQDLNTML,B*81 462 | CASSHSKNTEAFF,TPQDLNTML,B*42 463 | CASSYSKNTEAFF,TPQDLNTML,B*42 464 | CASKQGVAEAFF,TPQDLNTML,B*42 465 | CASSLGIDAIYF,TPQDLNTML,B*42 466 | CASSHSKNTEAFF,TPQDLNTML,B*81 467 | CASSYSKNTEAFF,TPQDLNTML,B*81 468 | CASKQGVAEAFF,TPQDLNTML,B*81 469 | CASSLGIDAIYF,TPQDLNTML,B*81 470 | CASSGQGYGYA,KAFSPEVIPMF,B*57:01 471 | CASTGGGYGYT,KAFSPEVIPMF,B*57:01 472 | CASSGQDYGYT,KAFSPEVIPMF,B*57:01 473 | CASTGSGYGYT,KAFSPEVIPMF,B*57:01 474 | CASSGQEYGYT,KAFSPEVIPMF,B*57:01 475 | CASTDSYGYT,KAFSPEVIPMF,B*57:01 476 | CATTDTYGYT,KAFSPEVIPMF,B*57:01 477 | CATTGTYGYT,KAFSPEVIPMF,B*57:01 478 | CASSSRQNYGYT,KAFSPEVIPMF,B*57:01 479 | CASSGQLYGYT,KAFSPEVIPMF,B*57:01 480 | CASSATYGYT,KAFSPEVIPMF,B*57:01 481 | CASTGTAYGYT,KAFSPEVIPMF,B*57:01 482 | CASTGTDYGYT,KAFSPEVIPMF,B*57:01 483 | CASSGQNYGYT,KAFSPEVIPMF,B*57:01 484 | CASSGRNYGYT,KAFSPEVIPMF,B*57:01 485 | CAGTGTAYGYT,KAFSPEVIPMF,B*57:01 486 | CAWTGTNYGYT,KAFSPEVIPMF,B*57:01 487 | CASSREVYGYT,KAFSPEVIPMF,B*57:01 488 | CASSTSYGYT,KAFSPEVIPMF,B*57:01 489 | CASNSREVYGYT,KAFSPEVIPMF,B*57:01 490 | CAAGGQFYGYT,KAFSPEVIPMF,B*57:01 491 | CASSGGSYGYT,KAFSPEVIPMF,B*57:01 492 | CATTGSYGYT,KAFSPEVIPMF,B*57:01 493 | CASTGGQYGYT,KAFSPEVIPMF,B*57:01 494 | CASTGGSYGYT,KAFSPEVIPMF,B*57:01 495 | CASSGTSYGYT,KAFSPEVIPMF,B*57:01 496 | CSARLRDRGYEQY,KAFSPEVIPMF,B*57:01 497 | CASSRQGFT,KAFSPEVIPMF,B*57:03 498 | CASSPRDSKETQY,KAFSPEVIPMF,B*57:03 499 | CASSLAGGQETQY,KAFSPEVIPMF,B*57:03 500 | CASSLAGTQETQY,KAFSPEVIPMF,B*57:03 501 | CASSSAGTQETQY,KAFSPEVIPMF,B*57:03 502 | CVSSLAGTQETQY,KAFSPEVIPMF,B*57:03 503 | CASTASYGYT,KAFSPEVIPMF,B*57:03 504 | CASSLGASISYEQY,KAFSPEVIPMF,B*57:03 505 | CATSGEGQGGPYGYT,KAFSPEVIPMF,B*57:03 506 | CAGSLSNGYT,KAFSPEVIPMF,B*57:03 507 | CASSVGEGFDIQY,KAFSPEVIPMF,B*57:03 508 | CASSSLIRGLGNQPQH,KAFSPEVIPMF,B*57:03 509 | CASSESRMGSQANYGYT,KAFSPEVIPMF,B*57:03 510 | CVSSESRMGSQANYGYT,KAFSPEVIPMF,B*57:03 511 | CASSLAETSADGYT,KAFSPEVIPMF,B*57:03 512 | CASSSFLSSGETQY,KAFSPEVIPMF,B*57:03 513 | CANSSFLSSGETQY,KAFSPEVIPMF,B*57:03 514 | CASTGTYGYT,KAFSPEVIPMF,B*57:01 515 | CASTGTYGYT,KGFSPEVIPMF,B*57:01 516 | CASTGTYGYT,KNFSPEVIPMF,B*57:01 517 | CASTGTYGYT,KGFNPEVIPMF,B*57:01 518 | CASTGTYGYT,KGFKPEVIPMF,B*57:01 519 | CASTGTYGYT,KAFSPEVIPMF,B*57:03 520 | CASTGTYGYT,KGFSPEVIPMF,B*57:03 521 | CASTGTYGYT,KNFSPEVIPMF,B*57:03 522 | CATSEAANTGELF,QASQEVKNW,B*58 523 | CASSSGDETQY,QASQEVKNW,B*58 524 | CASSMIQGADTQY,TPQDLNTML,B*42 525 | CASSLGLREQF,TPQDLNTML,B*42 526 | CASSFGQGAIEQY,KRWIILGLNK,B*27 527 | CASSQDRMDRAGEQY,KRWIILGLNK,B*27 528 | CASSLVLAIEQY,KRWIILGLNK,B*27 529 | CASSLEGGAALWDTQY,KRWIILGLNK,B*27 530 | CASSQDQALYFMNTEAF,KRWIILGLNK,B*27 531 | CASSLSSGSSYEQY,KRWIILGLNK,B*27 532 | CASSSGETGNTEAF,QASQEVKNW,B*58 533 | CAWSVFSGAYEQY,QASQEVKNW,B*58 534 | CASSLLTLARENTEAF,QASQEVKNW,B*58 535 | CASSPVSGEDEQY,QASQEVKNW,B*58 536 | CASSSPGTGSGELF,QASQEVKNW,B*58 537 | CAISAPDIAGNTIY,QASQEVKNW,B*58 538 | CASSVGPGQTEAF,QASQEVKNW,B*58 539 | CASSVEIGQTEAF,QASQEVKNW,B*58 540 | CATSEANNTGELF,QASQEVKNW,B*58 541 | CASSYGGTGPEGSSYEQY,TPQDLNTML,B*42 542 | CASSMUQGADTQY,TPQDLNTML,B*42 543 | CASSSRDREETQY,TPQDLNTML,B*42 544 | CASSQGQLGNTIY,KRWIILGLNK,B*27 545 | CASSLGGTNHGYT,KRWIILGLNK,B*27 546 | CASSPGTGKNIQY,KRWIILGLNK,B*27 547 | CASSQEGVNTEAF,KRWIILGLNK,B*27 548 | CASSQAQGLSNSPLH,KRWIILGLNK,B*27 549 | CASSRTQGPNTDTQY,KRWIILGLNK,B*27 550 | CASSLDHLAGVNNEQF,KRWIILGLNK,B*27 551 | CASSGQLLEAF,KRWIILGLNK,B*27 552 | CASSLEGSRNTEAF,KRWIILGLNK,B*27 553 | CASSQPPDRGYGYT,KRWIILGLNK,B*27 554 | CASSLVGSYNEQF,KRWIILGLNK,B*27 555 | CASSTTSGRYEQY,KRWIILGLNK,B*27 556 | CASSLSSDEHGYT,KRWIILGLNK,B*27 557 | CASSLGQTNYGYTF,GADGVGKSAL,C*05:01 558 | CASSLGQTNYGYTF,GADGVGKSAL,C*08:02 559 | CAWSAASGGAQDTQYF,MLFSHGLVK,A*03:01 560 | CAWSAASGGAQDTQYF,EMLFSHGLVK,A*03:01 561 | CASSPVYGVAYEQYF,LLQCTQQAV,A*02:01 562 | CASSDLPGAPTDTQYF,ILCETCLIV,A*02:01 563 | CASSLAGSKLYEQYF,EHEGSGPEL,B*38:01 564 | CASSLAGSKLYEQYF,SEHEGSGPEL,B*38:01 565 | CASSPGTVKETQYF,MLFSHGLVK,A*03:01 566 | CASSPGTVKETQYF,EMLFSHGLVK,A*03:01 567 | CASSQDRGIGYGYTF,LLQCTQQAV,A*02:01 568 | CASSLGQAYEQY,FLRGRAYGL,B*08:01 569 | CASSIGVYGYT,GILGFVFTL,A*02:01 570 | CAWSETGLGTGELF,ELAGIGILTV,A*02:01 571 | CASSYVGNTGELF,SLLMWITQC,A*02:01 572 | CASSARSGELF,HPVGEADYFEY,B*35:01 573 | CASRTRGGTLIEQY,IPLTEEAEL,B*35:01 574 | CASSPVTGGIYGYT,NLVPMVATV,A*02:01 575 | CAWSETGLGTGELF,EAAGIGILTV,A*02:01 576 | CASSFNMATGQY,EVDPIGHLY,A*01:01 577 | CSARDGTGNGYT,GLCTLVAML,A*02:01 578 | CASSLGQGLLYGYT,GTSGSPIINR,A*11:01 579 | CASSLGQGLLYGYT,GTSGSPIVNR,A*11:01 580 | CASSLRGRGDQPQH,HSKKKCDEL,B*08:01 581 | CASSLEGGYYNEQF,IPSINVHHY,B*35:01 582 | CASRRGPYEQY,KLVALGINAV,A*02:01 583 | CASREGLGGTEAF,KRWIILGLNK,B*27:05 584 | CASRPGLAGGRPEQY,LGYGFVNYI,A*02:01 585 | CASRPGLAGGRPEQY,LLFGYAVYV,A*02:01 586 | CASRPGLAGGRPEQY,LLFGYPVAV,A*02:01 587 | CASRPGLAGGRPEQY,LLFGFPVYV,A*02:01 588 | CASSYVGNTGELF,SLLMWITQV,A*02:01 589 | CASSFIGGTDTQY,YLEPGPVTV,A*02:01 590 | CASSLTGGGELF,TAFTIPSI,B*51:01 591 | CASSLWEKLAKNIQY,ALWGPDPAAA,A*02:01 592 | CASSLWEKLAKNIQY,RQWGPDPAAV,A*02:01 593 | CASSDTVSYEQY,SLFNTIAVL,A*02:01 594 | CASSDTVSYEQY,SLYNTIATL,A*02:01 595 | CASSPTSGIYEQY,RFPLTFGWCF,A*24:02 596 | CASSSWDTGELF,RYPLTLGWCF,A*24:02 597 | CASSARSGELF,HPVGQADYFEY,B*35:01 598 | CASSFNMATGQY,ESDPIVAQY,A*01:01 599 | CASSSRSSYEQY,GILGLVFTL,A*02:01 600 | CASSLWEKLAKNIQY,MVWGPDPLYV,A*02:01 601 | CASSLWEKLAKNIQY,RQFGPDFPTI,A*02:01 602 | CASSLWEKLAKNIQY,RQFGPDWIVA,A*02:01 603 | CASSLWEKLAKNIQY,YLGGPDFPTI,A*02:01 604 | CASSLWEKLAKNIQY,YQFGPDFPIA,A*02:01 605 | CASSPTSGSIYEQYF,GLCTLVAML,A*02:01 606 | CSARTGAGNTIYF,GLCTLVAML,A*02:01 607 | CSVGQDGTNEKLF,GLCTLVAML,A*02:01 608 | CSVGRDGTNEKLF,GLCTLVAML,A*02:01 609 | CSGVSGGTNEKLF,GLCTLVAML,A*02:01 610 | CASSPLRGSNQPQHF,YVLDHLIVV,A*02:01 611 | CASSIQGGNYGYTF,FLYALALLL,A*02:01 612 | CASSYQGGSDGYTF,FLYALALLL,A*02:01 613 | CASSYQGGSSGYTF,FLYALALLL,A*02:01 614 | CASSYQGGGNYGYTF,FLYALALLL,A*02:01 615 | CASSYQGGTNYGYTF,FLYALALLL,A*02:01 616 | CASSPRHTDTQYF,GILGFVFTL,A*02:01 617 | CASSTRHTDTQYF,GILGFVFTL,A*02:01 618 | CASSYQTGANYGYTF,NLVPMVATV,A*02:01 619 | CASSKGGTEAF,TLLANVTAV,A*02:01 620 | CASSLGGTEAF,TLLANVTAV,A*02:01 621 | -------------------------------------------------------------------------------- /example_pic/flow_chart_simple.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshilu/pMTnet/f9244234be1bc80a310a3dc04c02093b58e71b5e/example_pic/flow_chart_simple.png -------------------------------------------------------------------------------- /example_pic/input_file_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshilu/pMTnet/f9244234be1bc80a310a3dc04c02093b58e71b5e/example_pic/input_file_example.png -------------------------------------------------------------------------------- /example_pic/output_file_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshilu/pMTnet/f9244234be1bc80a310a3dc04c02093b58e71b5e/example_pic/output_file_example.png -------------------------------------------------------------------------------- /example_pic/pic1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshilu/pMTnet/f9244234be1bc80a310a3dc04c02093b58e71b5e/example_pic/pic1.png -------------------------------------------------------------------------------- /library/Atchley_factors.csv: -------------------------------------------------------------------------------- 1 | Amino acid,Factor I,Factor II,Factor III,Factor IV,Factor V 2 | A,-0.591,-1.302,-0.733,1.57,-0.146 3 | C,-1.343,0.465,-0.862,-1.02,-0.255 4 | D,1.05,0.302,-3.656,-0.259,-3.242 5 | E,1.357,-1.453,1.477,0.113,-0.837 6 | F,-1.006,-0.59,1.891,-0.397,0.412 7 | G,-0.384,1.652,1.33,1.045,2.064 8 | H,0.336,-0.417,-1.673,-1.474,-0.078 9 | I,-1.239,-0.547,2.131,0.393,0.816 10 | K,1.831,-0.561,0.533,-0.277,1.648 11 | L,-1.019,-0.987,-1.505,1.266,-0.912 12 | M,-0.663,-1.524,2.219,-1.005,1.212 13 | N,0.945,0.828,1.299,-0.169,0.933 14 | P,0.189,2.081,-1.628,0.421,-1.392 15 | Q,0.931,-0.179,-3.005,-0.503,-1.853 16 | R,1.538,-0.055,1.502,0.44,2.897 17 | S,-0.228,1.399,-4.76,0.67,-2.647 18 | T,-0.032,0.326,2.213,0.908,1.313 19 | V,-1.337,-0.279,-0.544,1.242,-1.262 20 | W,-0.595,0.009,0.672,-2.128,-0.184 21 | Y,0.26,0.83,3.097,-0.838,1.512 22 | -------------------------------------------------------------------------------- /library/h5_file/HLA_antigen_encoder_60.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshilu/pMTnet/f9244234be1bc80a310a3dc04c02093b58e71b5e/library/h5_file/HLA_antigen_encoder_60.h5 -------------------------------------------------------------------------------- /library/h5_file/TCR_encoder_30.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshilu/pMTnet/f9244234be1bc80a310a3dc04c02093b58e71b5e/library/h5_file/TCR_encoder_30.h5 -------------------------------------------------------------------------------- /library/h5_file/weights.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tianshilu/pMTnet/f9244234be1bc80a310a3dc04c02093b58e71b5e/library/h5_file/weights.h5 -------------------------------------------------------------------------------- /library/hla_library/E_prot.fasta: -------------------------------------------------------------------------------- 1 | >HLA:HLA00934 E*01:01:01:01 358 bp 2 | MVDGTLLLLLSEALALTQTWAGSHSLKYFHTSVSRPGRGEPRFISVGYVDDTQFVRFDND 3 | AASPRMVPRAPWMEQEGSEYWDRETRSARDTAQIFRVNLRTLRGYYNQSEAGSHTLQWMH 4 | GCELGPDRRFLRGYEQFAYDGKDYLTLNEDLRSWTAVDTAAQISEQKSNDASEAEHQRAY 5 | LEDTCVEWLHKYLEKGKETLLHLEPPKTHVTHHPISDHEATLRCWALGFYPAEITLTWQQ 6 | DGEGHTQDTELVETRPAGDGTFQKWAAVVVPSGEEQRYTCHVQHEGLPEPVTLRWKPASQ 7 | PTIPIVGIIAGLVLLGSVVSGAVVAAVIWRKKSSGGKGGSYSKAEWSDSAQGSESHSL 8 | >HLA:HLA02225 E*01:01:01:02 358 bp 9 | MVDGTLLLLLSEALALTQTWAGSHSLKYFHTSVSRPGRGEPRFISVGYVDDTQFVRFDND 10 | AASPRMVPRAPWMEQEGSEYWDRETRSARDTAQIFRVNLRTLRGYYNQSEAGSHTLQWMH 11 | GCELGPDRRFLRGYEQFAYDGKDYLTLNEDLRSWTAVDTAAQISEQKSNDASEAEHQRAY 12 | LEDTCVEWLHKYLEKGKETLLHLEPPKTHVTHHPISDHEATLRCWALGFYPAEITLTWQQ 13 | DGEGHTQDTELVETRPAGDGTFQKWAAVVVPSGEEQRYTCHVQHEGLPEPVTLRWKPASQ 14 | PTIPIVGIIAGLVLLGSVVSGAVVAAVIWRKKSSGGKGGSYSKAEWSDSAQGSESHSL 15 | >HLA:HLA02450 E*01:01:01:03 358 bp 16 | MVDGTLLLLLSEALALTQTWAGSHSLKYFHTSVSRPGRGEPRFISVGYVDDTQFVRFDND 17 | AASPRMVPRAPWMEQEGSEYWDRETRSARDTAQIFRVNLRTLRGYYNQSEAGSHTLQWMH 18 | GCELGPDRRFLRGYEQFAYDGKDYLTLNEDLRSWTAVDTAAQISEQKSNDASEAEHQRAY 19 | LEDTCVEWLHKYLEKGKETLLHLEPPKTHVTHHPISDHEATLRCWALGFYPAEITLTWQQ 20 | DGEGHTQDTELVETRPAGDGTFQKWAAVVVPSGEEQRYTCHVQHEGLPEPVTLRWKPASQ 21 | PTIPIVGIIAGLVLLGSVVSGAVVAAVIWRKKSSGGKGGSYSKAEWSDSAQGSESHSL 22 | >HLA:HLA14497 E*01:01:01:04 358 bp 23 | MVDGTLLLLLSEALALTQTWAGSHSLKYFHTSVSRPGRGEPRFISVGYVDDTQFVRFDND 24 | AASPRMVPRAPWMEQEGSEYWDRETRSARDTAQIFRVNLRTLRGYYNQSEAGSHTLQWMH 25 | GCELGPDRRFLRGYEQFAYDGKDYLTLNEDLRSWTAVDTAAQISEQKSNDASEAEHQRAY 26 | LEDTCVEWLHKYLEKGKETLLHLEPPKTHVTHHPISDHEATLRCWALGFYPAEITLTWQQ 27 | DGEGHTQDTELVETRPAGDGTFQKWAAVVVPSGEEQRYTCHVQHEGLPEPVTLRWKPASQ 28 | PTIPIVGIIAGLVLLGSVVSGAVVAAVIWRKKSSGGKGGSYSKAEWSDSAQGSESHSL 29 | >HLA:HLA14499 E*01:01:01:05 358 bp 30 | MVDGTLLLLLSEALALTQTWAGSHSLKYFHTSVSRPGRGEPRFISVGYVDDTQFVRFDND 31 | AASPRMVPRAPWMEQEGSEYWDRETRSARDTAQIFRVNLRTLRGYYNQSEAGSHTLQWMH 32 | GCELGPDRRFLRGYEQFAYDGKDYLTLNEDLRSWTAVDTAAQISEQKSNDASEAEHQRAY 33 | LEDTCVEWLHKYLEKGKETLLHLEPPKTHVTHHPISDHEATLRCWALGFYPAEITLTWQQ 34 | DGEGHTQDTELVETRPAGDGTFQKWAAVVVPSGEEQRYTCHVQHEGLPEPVTLRWKPASQ 35 | PTIPIVGIIAGLVLLGSVVSGAVVAAVIWRKKSSGGKGGSYSKAEWSDSAQGSESHSL 36 | >HLA:HLA15627 E*01:01:01:06 358 bp 37 | MVDGTLLLLLSEALALTQTWAGSHSLKYFHTSVSRPGRGEPRFISVGYVDDTQFVRFDND 38 | AASPRMVPRAPWMEQEGSEYWDRETRSARDTAQIFRVNLRTLRGYYNQSEAGSHTLQWMH 39 | GCELGPDRRFLRGYEQFAYDGKDYLTLNEDLRSWTAVDTAAQISEQKSNDASEAEHQRAY 40 | LEDTCVEWLHKYLEKGKETLLHLEPPKTHVTHHPISDHEATLRCWALGFYPAEITLTWQQ 41 | DGEGHTQDTELVETRPAGDGTFQKWAAVVVPSGEEQRYTCHVQHEGLPEPVTLRWKPASQ 42 | PTIPIVGIIAGLVLLGSVVSGAVVAAVIWRKKSSGGKGGSYSKAEWSDSAQGSESHSL 43 | >HLA:HLA15628 E*01:01:01:07 358 bp 44 | MVDGTLLLLLSEALALTQTWAGSHSLKYFHTSVSRPGRGEPRFISVGYVDDTQFVRFDND 45 | AASPRMVPRAPWMEQEGSEYWDRETRSARDTAQIFRVNLRTLRGYYNQSEAGSHTLQWMH 46 | GCELGPDRRFLRGYEQFAYDGKDYLTLNEDLRSWTAVDTAAQISEQKSNDASEAEHQRAY 47 | LEDTCVEWLHKYLEKGKETLLHLEPPKTHVTHHPISDHEATLRCWALGFYPAEITLTWQQ 48 | DGEGHTQDTELVETRPAGDGTFQKWAAVVVPSGEEQRYTCHVQHEGLPEPVTLRWKPASQ 49 | PTIPIVGIIAGLVLLGSVVSGAVVAAVIWRKKSSGGKGGSYSKAEWSDSAQGSESHSL 50 | >HLA:HLA16323 E*01:01:01:08 358 bp 51 | MVDGTLLLLLSEALALTQTWAGSHSLKYFHTSVSRPGRGEPRFISVGYVDDTQFVRFDND 52 | AASPRMVPRAPWMEQEGSEYWDRETRSARDTAQIFRVNLRTLRGYYNQSEAGSHTLQWMH 53 | GCELGPDRRFLRGYEQFAYDGKDYLTLNEDLRSWTAVDTAAQISEQKSNDASEAEHQRAY 54 | LEDTCVEWLHKYLEKGKETLLHLEPPKTHVTHHPISDHEATLRCWALGFYPAEITLTWQQ 55 | DGEGHTQDTELVETRPAGDGTFQKWAAVVVPSGEEQRYTCHVQHEGLPEPVTLRWKPASQ 56 | PTIPIVGIIAGLVLLGSVVSGAVVAAVIWRKKSSGGKGGSYSKAEWSDSAQGSESHSL 57 | >HLA:HLA17337 E*01:01:01:09 358 bp 58 | MVDGTLLLLLSEALALTQTWAGSHSLKYFHTSVSRPGRGEPRFISVGYVDDTQFVRFDND 59 | AASPRMVPRAPWMEQEGSEYWDRETRSARDTAQIFRVNLRTLRGYYNQSEAGSHTLQWMH 60 | GCELGPDRRFLRGYEQFAYDGKDYLTLNEDLRSWTAVDTAAQISEQKSNDASEAEHQRAY 61 | LEDTCVEWLHKYLEKGKETLLHLEPPKTHVTHHPISDHEATLRCWALGFYPAEITLTWQQ 62 | DGEGHTQDTELVETRPAGDGTFQKWAAVVVPSGEEQRYTCHVQHEGLPEPVTLRWKPASQ 63 | PTIPIVGIIAGLVLLGSVVSGAVVAAVIWRKKSSGGKGGSYSKAEWSDSAQGSESHSL 64 | >HLA:HLA18020 E*01:01:01:10 358 bp 65 | MVDGTLLLLLSEALALTQTWAGSHSLKYFHTSVSRPGRGEPRFISVGYVDDTQFVRFDND 66 | AASPRMVPRAPWMEQEGSEYWDRETRSARDTAQIFRVNLRTLRGYYNQSEAGSHTLQWMH 67 | GCELGPDRRFLRGYEQFAYDGKDYLTLNEDLRSWTAVDTAAQISEQKSNDASEAEHQRAY 68 | LEDTCVEWLHKYLEKGKETLLHLEPPKTHVTHHPISDHEATLRCWALGFYPAEITLTWQQ 69 | DGEGHTQDTELVETRPAGDGTFQKWAAVVVPSGEEQRYTCHVQHEGLPEPVTLRWKPASQ 70 | PTIPIVGIIAGLVLLGSVVSGAVVAAVIWRKKSSGGKGGSYSKAEWSDSAQGSESHSL 71 | >HLA:HLA10665 E*01:01:02 181 bp 72 | SHSLKYFHTSVSRPGRGEPRFISVGYVDDTQFVRFDNDAASPRMVPRAPWMEQEGSEYWD 73 | RETRSARDTAQIFRVNLRTLRGYYNQSEAGSHTLQWMHGCELGPDRRFLRGYEQFAYDGK 74 | DYLTLNEDLRSWTAVDTAAQISEQKSNDASEAEHQRAYLEDTCVEWLHKYLEKGKETLLH 75 | L 76 | >HLA:HLA00936 E*01:03:01:01 358 bp 77 | MVDGTLLLLLSEALALTQTWAGSHSLKYFHTSVSRPGRGEPRFISVGYVDDTQFVRFDND 78 | AASPRMVPRAPWMEQEGSEYWDRETRSARDTAQIFRVNLRTLRGYYNQSEAGSHTLQWMH 79 | GCELGPDGRFLRGYEQFAYDGKDYLTLNEDLRSWTAVDTAAQISEQKSNDASEAEHQRAY 80 | LEDTCVEWLHKYLEKGKETLLHLEPPKTHVTHHPISDHEATLRCWALGFYPAEITLTWQQ 81 | DGEGHTQDTELVETRPAGDGTFQKWAAVVVPSGEEQRYTCHVQHEGLPEPVTLRWKPASQ 82 | PTIPIVGIIAGLVLLGSVVSGAVVAAVIWRKKSSGGKGGSYSKAEWSDSAQGSESHSL 83 | >HLA:HLA02226 E*01:03:01:02 358 bp 84 | MVDGTLLLLLSEALALTQTWAGSHSLKYFHTSVSRPGRGEPRFISVGYVDDTQFVRFDND 85 | AASPRMVPRAPWMEQEGSEYWDRETRSARDTAQIFRVNLRTLRGYYNQSEAGSHTLQWMH 86 | GCELGPDGRFLRGYEQFAYDGKDYLTLNEDLRSWTAVDTAAQISEQKSNDASEAEHQRAY 87 | LEDTCVEWLHKYLEKGKETLLHLEPPKTHVTHHPISDHEATLRCWALGFYPAEITLTWQQ 88 | DGEGHTQDTELVETRPAGDGTFQKWAAVVVPSGEEQRYTCHVQHEGLPEPVTLRWKPASQ 89 | PTIPIVGIIAGLVLLGSVVSGAVVAAVIWRKKSSGGKGGSYSKAEWSDSAQGSESHSL 90 | >HLA:HLA12602 E*01:03:01:03 358 bp 91 | MVDGTLLLLLSEALALTQTWAGSHSLKYFHTSVSRPGRGEPRFISVGYVDDTQFVRFDND 92 | AASPRMVPRAPWMEQEGSEYWDRETRSARDTAQIFRVNLRTLRGYYNQSEAGSHTLQWMH 93 | GCELGPDGRFLRGYEQFAYDGKDYLTLNEDLRSWTAVDTAAQISEQKSNDASEAEHQRAY 94 | LEDTCVEWLHKYLEKGKETLLHLEPPKTHVTHHPISDHEATLRCWALGFYPAEITLTWQQ 95 | DGEGHTQDTELVETRPAGDGTFQKWAAVVVPSGEEQRYTCHVQHEGLPEPVTLRWKPASQ 96 | PTIPIVGIIAGLVLLGSVVSGAVVAAVIWRKKSSGGKGGSYSKAEWSDSAQGSESHSL 97 | >HLA:HLA16324 E*01:03:01:04 358 bp 98 | MVDGTLLLLLSEALALTQTWAGSHSLKYFHTSVSRPGRGEPRFISVGYVDDTQFVRFDND 99 | AASPRMVPRAPWMEQEGSEYWDRETRSARDTAQIFRVNLRTLRGYYNQSEAGSHTLQWMH 100 | GCELGPDGRFLRGYEQFAYDGKDYLTLNEDLRSWTAVDTAAQISEQKSNDASEAEHQRAY 101 | LEDTCVEWLHKYLEKGKETLLHLEPPKTHVTHHPISDHEATLRCWALGFYPAEITLTWQQ 102 | DGEGHTQDTELVETRPAGDGTFQKWAAVVVPSGEEQRYTCHVQHEGLPEPVTLRWKPASQ 103 | PTIPIVGIIAGLVLLGSVVSGAVVAAVIWRKKSSGGKGGSYSKAEWSDSAQGSESHSL 104 | >HLA:HLA00937 E*01:03:02:01 358 bp 105 | MVDGTLLLLLSEALALTQTWAGSHSLKYFHTSVSRPGRGEPRFISVGYVDDTQFVRFDND 106 | AASPRMVPRAPWMEQEGSEYWDRETRSARDTAQIFRVNLRTLRGYYNQSEAGSHTLQWMH 107 | GCELGPDGRFLRGYEQFAYDGKDYLTLNEDLRSWTAVDTAAQISEQKSNDASEAEHQRAY 108 | LEDTCVEWLHKYLEKGKETLLHLEPPKTHVTHHPISDHEATLRCWALGFYPAEITLTWQQ 109 | DGEGHTQDTELVETRPAGDGTFQKWAAVVVPSGEEQRYTCHVQHEGLPEPVTLRWKPASQ 110 | PTIPIVGIIAGLVLLGSVVSGAVVAAVIWRKKSSGGKGGSYSKAEWSDSAQGSESHSL 111 | >HLA:HLA05913 E*01:03:02:02 358 bp 112 | MVDGTLLLLLSEALALTQTWAGSHSLKYFHTSVSRPGRGEPRFISVGYVDDTQFVRFDND 113 | AASPRMVPRAPWMEQEGSEYWDRETRSARDTAQIFRVNLRTLRGYYNQSEAGSHTLQWMH 114 | GCELGPDGRFLRGYEQFAYDGKDYLTLNEDLRSWTAVDTAAQISEQKSNDASEAEHQRAY 115 | LEDTCVEWLHKYLEKGKETLLHLEPPKTHVTHHPISDHEATLRCWALGFYPAEITLTWQQ 116 | DGEGHTQDTELVETRPAGDGTFQKWAAVVVPSGEEQRYTCHVQHEGLPEPVTLRWKPASQ 117 | PTIPIVGIIAGLVLLGSVVSGAVVAAVIWRKKSSGGKGGSYSKAEWSDSAQGSESHSL 118 | >HLA:HLA01339 E*01:03:03 358 bp 119 | MVDGTLLLLLSEALALTQTWAGSHSLKYFHTSVSRPGRGEPRFISVGYVDDTQFVRFDND 120 | AASPRMVPRAPWMEQEGSEYWDRETRSARDTAQIFRVNLRTLRGYYNQSEAGSHTLQWMH 121 | GCELGPDGRFLRGYEQFAYDGKDYLTLNEDLRSWTAVDTAAQISEQKSNDASEAEHQRAY 122 | LEDTCVEWLHKYLEKGKETLLHLEPPKTHVTHHPISDHEATLRCWALGFYPAEITLTWQQ 123 | DGEGHTQDTELVETRPAGDGTFQKWAAVVVPSGEEQRYTCHVQHEGLPEPVTLRWKPASQ 124 | PTIPIVGIIAGLVLLGSVVSGAVVAAVIWRKKSSGGKGGSYSKAEWSDSAQGSESHSL 125 | >HLA:HLA02224 E*01:03:04 358 bp 126 | MVDGTLLLLLSEALALTQTWAGSHSLKYFHTSVSRPGRGEPRFISVGYVDDTQFVRFDND 127 | AASPRMVPRAPWMEQEGSEYWDRETRSARDTAQIFRVNLRTLRGYYNQSEAGSHTLQWMH 128 | GCELGPDGRFLRGYEQFAYDGKDYLTLNEDLRSWTAVDTAAQISEQKSNDASEAEHQRAY 129 | LEDTCVEWLHKYLEKGKETLLHLEPPKTHVTHHPISDHEATLRCWALGFYPAEITLTWQQ 130 | DGEGHTQDTELVETRPAGDGTFQKWAAVVVPSGEEQRYTCHVQHEGLPEPVTLRWKPASQ 131 | PTIPIVGIIAGLVLLGSVVSGAVVAAVIWRKKSSGGKGGSYSKAEWSDSAQGSESHSL 132 | >HLA:HLA07813 E*01:03:05 295 bp 133 | MVDGTLLLLLSEALALTQTWAGSHSLKYFHTSVSRPGRGEPRFISVGYVDDTQFVRFDND 134 | AASPRMVPRAPWMEQEGSEYWDRETRSARDTAQIFRVNLRTLRGYYNQSEAGSHTLQWMH 135 | GCELGPDGRFLRGYEQFAYDGKDYLTLNEDLRSWTAVDTAAQISEQKSNDASEAEHQRAY 136 | LEDTCVEWLHKYLEKGKETLLHLEPPKTHVTHHPISDHEATLRCWALGFYPAEITLTWQQ 137 | DGEGHTQDTELVETRPAGDGTFQKWAAVVVPSGEEQRYTCHVQHEGLPEPVTLRW 138 | >HLA:HLA00938 E*01:04 91 bp 139 | SHTLQWMHGCELGPDGRFLRGYEQFAYDGKDYLTLNEDLRSWTAVDTAAQISEQKSNDAS 140 | EAEHQGAYLEDTCVEWLHKYLEKGKETLLHL 141 | >HLA:HLA09519 E*01:05 181 bp 142 | SHSLKYFHTSVSRPGRGEPRFISVGYVDDTQFVRFDNDAASPRMVPRAPWMEQEGSEYWD 143 | RETRSARDTAQIFRVNLRTLRGYYNQSKAGSHTLQWMHGCELGPDGRFLRGYEQFAYDGK 144 | DYLTLNEDLRSWTAVDTAAQISEQKSNDASEAEHQRAYLEDTCVEWLHKYLEKGKETLLH 145 | L 146 | >HLA:HLA09694 E*01:06 358 bp 147 | MVDGTLLLLLSEALALTQTWAGSHSLKYFHTSVSRPGRGEPRFISVGYVDDTQFVRFDND 148 | AASPRMVPRAPWMEQEGSEYWDRETRSARDTAQIFRVNLRTLRGYYNQSEAGSHTLQWMH 149 | GCELGPDGRFLRGYEQFAYDGKDYLTLNEDLRSWTAVDTAAQISEQKSNDASEAEHQRAY 150 | LEDTCVEWLHKYLEKGKETLLHLEPPKTHVTHHPISDHEATLRCWALGFYPAEITLTWQQ 151 | DGEGHTQDTELVETRPAGDGTFQKWAAVVVPSGEEQRYTCHVQHEGLSEPVTLRWKPASQ 152 | PTIPIVGIIAGLVLLGSVVSGAVVAAVIWRKKSSGGKGGSYSKAEWSDSAQGSESHSL 153 | >HLA:HLA10666 E*01:07 181 bp 154 | SHSLKYFHTSVSRPGRGEPRFISVGYVDDTQFVRFDNDAASPRMVPRAPWMEQEGSEYWD 155 | RETRSARDTAQIFRVNLRTLRGYYNQSEAGSHTLQWMHGCELGPDRRFLRGYEQFAYDGK 156 | DYLTLNEDLRSWTAVDTAAQISEQKSNDASEAEHQRVYLEDTCVEWLHKYLEKGKETLLH 157 | L 158 | >HLA:HLA12603 E*01:08N 105 bp 159 | MVDGTLLLLLSEALALTQTWAGSHSLKYFHTSVSRPGRGEPRFISVGYVDDTQFVRFDND 160 | AASPRMVPRAPWMEQEGSEYWDRETRSARDTAQIFRVNLRTLRGX 161 | >HLA:HLA13757 E*01:09 358 bp 162 | MVDGTLLLLLSEALALTQTWAGSHSLKYFHTSVSRPGRGEPRFISVGYVDDTQFVRFDND 163 | AASPRMVPRAPWMEQEGSEYWDRETRSARDTAQIFRVNLRTLRGYYNQSEAGSHTLQWMH 164 | GCELGPDRRFLRGYEQFAYDGKDYLTLNEDLRSWTAVDTAAQISEQKSNDASEAEHQRAY 165 | LEDTCVEWLHKYLEKGKETLLHLEPPKTHVTHHPISDHEATLRCWALGFYPAEITLTWQQ 166 | DGEGHTQDTELVETRPAGDGTFQKWAAVVVPSGEEPRYTCHVQHEGLPEPVTLRWKPASQ 167 | PTIPIVGIIAGLVLLGSVVSGAVVAAVIWRKKSSGGKGGSYSKAEWSDSAQGSESHSL 168 | >HLA:HLA14498 E*01:10 358 bp 169 | MVDGTLLLLLSEALALTQTWAGSHSLKYFHTSVSRPGRGEPRFISVGYVDDTQFVRFDND 170 | AASPRMVPRAPWMEQEGSEYWDRETRSARDTAQIFRVNLRTLRGYYNQSEAGSHTLQWMH 171 | GCELGPDGRFLRGYEQFAYDGKDYLTLNEDLRSWTAVDTAAQISEQKSNDASEAEHQRAY 172 | LEDTCVEWLHKYLEKGKETLLHLEPPKTHVTHHPISDHEATLRCWALGFYPAEITLTWQQ 173 | DGEGHTQDTELVETRPAGDGTFQKWAAVVVPSGEEQRYTCHVQHEGLPEPVTLRWKPASQ 174 | PTIPIVGIIAGLVLLGSVVSGAVIAAVIWRKKSSGGKGGSYSKAEWSDSAQGSESHSL 175 | -------------------------------------------------------------------------------- /pMTnet.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import tensorflow as tf 4 | import sys 5 | import csv 6 | import random 7 | import os 8 | from io import StringIO 9 | from collections import Counter 10 | import keras 11 | from keras.layers import Input,Dense,concatenate,Dropout 12 | from keras.models import Model,load_model 13 | from keras import backend as K 14 | ##Customer Input 15 | #python pMTnet.py -input input.csv -library library_dir -output output_dir 16 | args = sys.argv 17 | file_dir=args[args.index('-input')+1] #input protein seq file 18 | library_dir=args[args.index('-library')+1] #directory to downloaded library 19 | 20 | model_dir=library_dir+'/h5_file' 21 | aa_dict_dir=library_dir+'/Atchley_factors.csv' #embedding vector for tcr encoding 22 | hla_db_dir=library_dir+'/hla_library/' #hla sequence 23 | output_dir=args[args.index('-output')+1] #diretory to hold encoding and prediction output 24 | output_log_dir=args[args.index('-output_log')+1] #standard output 25 | ################################ 26 | # Reading Encoding Matrix # 27 | ################################ 28 | ########################### Atchley's factors####################### 29 | aa_dict_atchley=dict() 30 | with open(aa_dict_dir,'r') as aa: 31 | aa_reader=csv.reader(aa) 32 | next(aa_reader, None) 33 | for rows in aa_reader: 34 | aa_name=rows[0] 35 | aa_factor=rows[1:len(rows)] 36 | aa_dict_atchley[aa_name]=np.asarray(aa_factor,dtype='float') 37 | ########################### One Hot ########################## 38 | aa_dict_one_hot = {'A': 0,'C': 1,'D': 2,'E': 3,'F': 4,'G': 5,'H': 6,'I': 7,'K': 8,'L': 9, 39 | 'M': 10,'N': 11,'P': 12,'Q': 13,'R': 14,'S': 15,'T': 16,'V': 17, 40 | 'W': 18,'Y': 19,'X': 20} # 'X' is a padding variable 41 | ########################### Blosum ########################## 42 | BLOSUM50_MATRIX = pd.read_table(StringIO(u""" 43 | A R N D C Q E G H I L K M F P S T W Y V B J Z X * 44 | A 5 -2 -1 -2 -1 -1 -1 0 -2 -1 -2 -1 -1 -3 -1 1 0 -3 -2 0 -2 -2 -1 -1 -5 45 | R -2 7 -1 -2 -4 1 0 -3 0 -4 -3 3 -2 -3 -3 -1 -1 -3 -1 -3 -1 -3 0 -1 -5 46 | N -1 -1 7 2 -2 0 0 0 1 -3 -4 0 -2 -4 -2 1 0 -4 -2 -3 5 -4 0 -1 -5 47 | D -2 -2 2 8 -4 0 2 -1 -1 -4 -4 -1 -4 -5 -1 0 -1 -5 -3 -4 6 -4 1 -1 -5 48 | C -1 -4 -2 -4 13 -3 -3 -3 -3 -2 -2 -3 -2 -2 -4 -1 -1 -5 -3 -1 -3 -2 -3 -1 -5 49 | Q -1 1 0 0 -3 7 2 -2 1 -3 -2 2 0 -4 -1 0 -1 -1 -1 -3 0 -3 4 -1 -5 50 | E -1 0 0 2 -3 2 6 -3 0 -4 -3 1 -2 -3 -1 -1 -1 -3 -2 -3 1 -3 5 -1 -5 51 | G 0 -3 0 -1 -3 -2 -3 8 -2 -4 -4 -2 -3 -4 -2 0 -2 -3 -3 -4 -1 -4 -2 -1 -5 52 | H -2 0 1 -1 -3 1 0 -2 10 -4 -3 0 -1 -1 -2 -1 -2 -3 2 -4 0 -3 0 -1 -5 53 | I -1 -4 -3 -4 -2 -3 -4 -4 -4 5 2 -3 2 0 -3 -3 -1 -3 -1 4 -4 4 -3 -1 -5 54 | L -2 -3 -4 -4 -2 -2 -3 -4 -3 2 5 -3 3 1 -4 -3 -1 -2 -1 1 -4 4 -3 -1 -5 55 | K -1 3 0 -1 -3 2 1 -2 0 -3 -3 6 -2 -4 -1 0 -1 -3 -2 -3 0 -3 1 -1 -5 56 | M -1 -2 -2 -4 -2 0 -2 -3 -1 2 3 -2 7 0 -3 -2 -1 -1 0 1 -3 2 -1 -1 -5 57 | F -3 -3 -4 -5 -2 -4 -3 -4 -1 0 1 -4 0 8 -4 -3 -2 1 4 -1 -4 1 -4 -1 -5 58 | P -1 -3 -2 -1 -4 -1 -1 -2 -2 -3 -4 -1 -3 -4 10 -1 -1 -4 -3 -3 -2 -3 -1 -1 -5 59 | S 1 -1 1 0 -1 0 -1 0 -1 -3 -3 0 -2 -3 -1 5 2 -4 -2 -2 0 -3 0 -1 -5 60 | T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 2 5 -3 -2 0 0 -1 -1 -1 -5 61 | W -3 -3 -4 -5 -5 -1 -3 -3 -3 -3 -2 -3 -1 1 -4 -4 -3 15 2 -3 -5 -2 -2 -1 -5 62 | Y -2 -1 -2 -3 -3 -1 -2 -3 2 -1 -1 -2 0 4 -3 -2 -2 2 8 -1 -3 -1 -2 -1 -5 63 | V 0 -3 -3 -4 -1 -3 -3 -4 -4 4 1 -3 1 -1 -3 -2 0 -3 -1 5 -3 2 -3 -1 -5 64 | B -2 -1 5 6 -3 0 1 -1 0 -4 -4 0 -3 -4 -2 0 0 -5 -3 -3 6 -4 1 -1 -5 65 | J -2 -3 -4 -4 -2 -3 -3 -4 -3 4 4 -3 2 1 -3 -3 -1 -2 -1 2 -4 4 -3 -1 -5 66 | Z -1 0 0 1 -3 4 5 -2 0 -3 -3 1 -1 -4 -1 0 -1 -2 -2 -3 1 -3 5 -1 -5 67 | X -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -5 68 | * -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 1 69 | """), sep='\s+').loc[list(aa_dict_one_hot.keys()), list(aa_dict_one_hot.keys())] 70 | assert (BLOSUM50_MATRIX == BLOSUM50_MATRIX.T).all().all() 71 | 72 | ENCODING_DATA_FRAMES = { 73 | "BLOSUM50": BLOSUM50_MATRIX, 74 | "one-hot": pd.DataFrame([ 75 | [1 if i == j else 0 for i in range(len(aa_dict_one_hot.keys()))] 76 | for j in range(len(aa_dict_one_hot.keys())) 77 | ], index=aa_dict_one_hot.keys(), columns=aa_dict_one_hot.keys()) 78 | } 79 | 80 | ########################### HLA pseudo-sequence ########################## 81 | #pMHCpan 82 | HLA_ABC=[hla_db_dir+'/A_prot.fasta',hla_db_dir+'/B_prot.fasta',hla_db_dir+'/C_prot.fasta',hla_db_dir+'/E_prot.fasta'] 83 | HLA_seq_lib={} 84 | for one_class in HLA_ABC: 85 | prot=open(one_class) 86 | #pseudo_seq from netMHCpan:https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0000796; minor bug 33 aa are used for pseudo seq, the performance is still good 87 | #HLA sequences are not aligned before taking pseudo-seq. but the performance is still good. will consider doing alignment before taking pseudo sequences in order to improve the performance 88 | pseudo_seq_pos=[7,9,24,45,59,62,63,66,67,79,70,73,74,76,77,80,81,84,95,97,99,114,116,118,143,147,150,152,156,158,159,163,167,171] 89 | #write HLA sequences into a library 90 | #class I alles 91 | name='' 92 | sequence='' 93 | for line in prot: 94 | if len(name)!=0: 95 | if line.startswith('>HLA'): 96 | pseudo='' 97 | for i in range(0,33): 98 | if len(sequence)>pseudo_seq_pos[i]: 99 | pseudo=pseudo+sequence[pseudo_seq_pos[i]] 100 | HLA_seq_lib[name]=pseudo 101 | name=line.split(' ')[1] 102 | sequence='' 103 | else: 104 | sequence=sequence+line.strip() 105 | else: 106 | name=line.split(' ')[1] 107 | ######################################## 108 | # Input data encoding helper functions # 109 | ######################################## 110 | #################functions for TCR encoding#################### 111 | def preprocess(filedir): 112 | #Preprocess TCR files 113 | print('Processing: '+filedir) 114 | if not os.path.exists(filedir): 115 | print('Invalid file path: ' + filedir) 116 | return 0 117 | dataset = pd.read_csv(filedir, header=0) 118 | dataset = dataset.sort_values('CDR3').reset_index(drop=True) 119 | #Preprocess HLA_antigen files 120 | #remove HLA which is not in HLA_seq_lib; if the input hla allele is not in HLA_seq_lib; then the first HLA startswith the input HLA allele will be given 121 | #Remove antigen that is longer than 15aa 122 | dataset=dataset.dropna() 123 | HLA_list=set(dataset['HLA']) 124 | HLA_to_drop = list() 125 | for i in HLA_list: 126 | if len([hla_allele for hla_allele in HLA_seq_lib.keys() if hla_allele.startswith(str(i))])==0: 127 | HLA_to_drop.append(i) 128 | print('drop '+i) 129 | dataset=dataset[~dataset['HLA'].isin(HLA_to_drop)] 130 | dataset=dataset[dataset.Antigen.str.len()<16] 131 | print(str(max(dataset.index)-dataset.shape[0]+1)+' antigens longer than 15aa are dropped!') 132 | TCR_list=dataset['CDR3'].tolist() 133 | antigen_list=dataset['Antigen'].tolist() 134 | HLA_list=dataset['HLA'].tolist() 135 | return TCR_list,antigen_list,HLA_list 136 | 137 | def aamapping_TCR(peptideSeq,aa_dict): 138 | #Transform aa seqs to Atchley's factors. 139 | peptideArray = [] 140 | if len(peptideSeq)>80: 141 | print('Length: '+str(len(peptideSeq))+' over bound!') 142 | peptideSeq=peptideSeq[0:80] 143 | for aa_single in peptideSeq: 144 | try: 145 | peptideArray.append(aa_dict[aa_single]) 146 | except KeyError: 147 | print('Not proper aaSeqs: '+peptideSeq) 148 | peptideArray.append(np.zeros(5,dtype='float32')) 149 | for i in range(0,80-len(peptideSeq)): 150 | peptideArray.append(np.zeros(5,dtype='float32')) 151 | return np.asarray(peptideArray) 152 | 153 | def hla_encode(HLA_name,encoding_method): 154 | #Convert the a HLA allele to a zero-padded numeric representation. 155 | if HLA_name not in HLA_seq_lib.keys(): 156 | HLA_name=[hla_allele for hla_allele in HLA_seq_lib.keys() if hla_allele.startswith(str(HLA_name))][0] 157 | if HLA_name not in HLA_seq_lib.keys(): 158 | print('Not proper HLA allele:'+HLA_name) 159 | HLA_sequence=HLA_seq_lib[HLA_name] 160 | HLA_int=[aa_dict_one_hot[char] for char in HLA_sequence] 161 | if len(HLA_int)!=34: 162 | k=len(HLA_int) 163 | HLA_int.extend([20] * (34 - k)) 164 | result=ENCODING_DATA_FRAMES[encoding_method].iloc[HLA_int] 165 | # Get a numpy array of 34 rows and 21 columns 166 | return np.asarray(result) 167 | 168 | def peptide_encode_HLA(peptide, maxlen,encoding_method): 169 | #Convert peptide amino acid sequence to numeric encoding 170 | if len(peptide) > maxlen: 171 | msg = 'Peptide %s has length %d > maxlen = %d.' 172 | raise ValueError(msg % (peptide, len(peptide), maxlen)) 173 | peptide= peptide.replace(u'\xa0', u'').upper() #remove non-breaking space 174 | o = [aa_dict_one_hot[aa] if aa in aa_dict_one_hot.keys() else 20 for aa in peptide] 175 | #if the amino acid is not valid, replace it with padding aa 'X':20 176 | k = len(o) 177 | #use 'X'(20) for padding 178 | o = o[:k // 2] + [20] * (int(maxlen) - k) + o[k // 2:] 179 | if len(o) != maxlen: 180 | msg = 'Peptide %s has length %d < maxlen = %d, but pad is "none".' 181 | raise ValueError(msg % (peptide, len(peptide), maxlen)) 182 | result=ENCODING_DATA_FRAMES[encoding_method].iloc[o] 183 | return np.asarray(result) 184 | 185 | def TCRMap(dataset,aa_dict): 186 | #Wrapper of aamapping 187 | pos = 0 188 | TCR_counter = Counter(dataset) 189 | TCR_array = np.zeros((len(dataset), 80, 5, 1), dtype=np.float32) 190 | for sequence, length in TCR_counter.items(): 191 | TCR_array[pos:pos+length] = np.repeat(aamapping_TCR(sequence,aa_dict).reshape(1,80,5,1), length, axis=0) 192 | pos += length 193 | print('TCRMap done!') 194 | return TCR_array 195 | 196 | def HLAMap(dataset,encoding_method): 197 | #Input a list of HLA and get a three dimentional array 198 | pos=0 199 | HLA_array = np.zeros((len(dataset), 34, 21), dtype=np.int8) 200 | HLA_seen = dict() 201 | for HLA in dataset: 202 | if HLA not in HLA_seen.keys(): 203 | HLA_array[pos] = hla_encode(HLA,encoding_method).reshape(1,34,21) 204 | HLA_seen[HLA] = HLA_array[pos] 205 | else: 206 | HLA_array[pos] = HLA_seen[HLA] 207 | pos += 1 208 | print('HLAMap done!') 209 | return HLA_array 210 | 211 | def antigenMap(dataset,maxlen,encoding_method): 212 | #Input a list of antigens and get a three dimentional array 213 | pos=0 214 | antigen_array = np.zeros((len(dataset), maxlen, 21), dtype=np.int8) 215 | antigens_seen = dict() 216 | for antigen in dataset: 217 | if antigen not in antigens_seen.keys(): 218 | antigen_array[pos]=peptide_encode_HLA(antigen, maxlen,encoding_method).reshape(1,maxlen,21) 219 | antigens_seen[antigen] = antigen_array[pos] 220 | else: 221 | antigen_array[pos] = antigens_seen[antigen] 222 | pos += 1 223 | print('antigenMap done!') 224 | return antigen_array 225 | 226 | def pearson_correlation_f(y_true, y_pred): 227 | fsp = y_pred - K.mean(y_pred) #being K.mean a scalar here, it will be automatically subtracted from all elements in y_pred 228 | fst = y_true - K.mean(y_true) 229 | devP = K.std(y_pred) 230 | devT = K.std(y_true) 231 | return K.mean(fsp*fst)/(devP*devT) 232 | 233 | def pos_neg_acc(y_true,y_pred): 234 | #self-defined prediction accuracy metric 235 | positive_pred=y_pred[:,1] 236 | negative_pred=y_pred[:,0] 237 | diff=K.mean(K.cast(negative_pred=1.5.0 2 | import numpy as np 3 | import pandas as pd 4 | import tensorflow as tf 5 | import sys 6 | import csv 7 | from io import StringIO 8 | import keras 9 | from keras.layers import Input,Dense,concatenate,Flatten,BatchNormalization,LSTM 10 | from keras.models import Model,load_model 11 | import os 12 | from keras import backend as K 13 | from keras.optimizers import Adam 14 | from keras.layers.advanced_activations import LeakyReLU 15 | ##Customer Input 16 | args = sys.argv 17 | file_dir=args[args.index('-file')+1] 18 | model_dir=args[args.index('-model')+1] 19 | #embedding vector for tcr encoding 20 | aa_dict_dir=args[args.index('-embeding_vectors_tcr')+1] 21 | hla_db_dir=args[args.index('-hla_db')+1] 22 | output_dir=args[args.index('-output')+1] 23 | output_log_dir=args[args.index('-output_log')+1] 24 | paired=args[args.index('-paired')+1] 25 | #tcr encoding dimension 26 | encode_dim=int(args[args.index('-tcr_encode_dim')+1]) 27 | 28 | #example command 29 | #python ternary_train_encoding.py -file training_positive.csv -model /dir/to/encoders/ -embeding_vectors_tcr Atchley_factors.csv -hla_db /dir/to/HLA_seq -output /dir/to/output/ \ 30 | -output_log /dir/to/output/ternary.log -tcr_encode_dim 80 -paired T 31 | ################################ 32 | # Model settings and constants # 33 | ################################ 34 | ########################### Atchley's factors####################### 35 | aa_dict_atchley=dict() 36 | with open(aa_dict_dir,'r') as aa: 37 | aa_reader=csv.reader(aa) 38 | next(aa_reader, None) 39 | for rows in aa_reader: 40 | aa_name=rows[0] 41 | aa_factor=rows[1:len(rows)] 42 | aa_dict_atchley[aa_name]=np.asarray(aa_factor,dtype='float') 43 | ########################### One Hot ########################## 44 | aa_dict_one_hot = {'A': 0,'C': 1,'D': 2,'E': 3,'F': 4,'G': 5,'H': 6,'I': 7,'K': 8,'L': 9, 45 | 'M': 10,'N': 11,'P': 12,'Q': 13,'R': 14,'S': 15,'T': 16,'V': 17, 46 | 'W': 18,'Y': 19,'X': 20} # 'X' is a padding variable 47 | ########################### Blosum ########################## 48 | BLOSUM50_MATRIX = pd.read_table(StringIO(u""" 49 | A R N D C Q E G H I L K M F P S T W Y V B J Z X * 50 | A 5 -2 -1 -2 -1 -1 -1 0 -2 -1 -2 -1 -1 -3 -1 1 0 -3 -2 0 -2 -2 -1 -1 -5 51 | R -2 7 -1 -2 -4 1 0 -3 0 -4 -3 3 -2 -3 -3 -1 -1 -3 -1 -3 -1 -3 0 -1 -5 52 | N -1 -1 7 2 -2 0 0 0 1 -3 -4 0 -2 -4 -2 1 0 -4 -2 -3 5 -4 0 -1 -5 53 | D -2 -2 2 8 -4 0 2 -1 -1 -4 -4 -1 -4 -5 -1 0 -1 -5 -3 -4 6 -4 1 -1 -5 54 | C -1 -4 -2 -4 13 -3 -3 -3 -3 -2 -2 -3 -2 -2 -4 -1 -1 -5 -3 -1 -3 -2 -3 -1 -5 55 | Q -1 1 0 0 -3 7 2 -2 1 -3 -2 2 0 -4 -1 0 -1 -1 -1 -3 0 -3 4 -1 -5 56 | E -1 0 0 2 -3 2 6 -3 0 -4 -3 1 -2 -3 -1 -1 -1 -3 -2 -3 1 -3 5 -1 -5 57 | G 0 -3 0 -1 -3 -2 -3 8 -2 -4 -4 -2 -3 -4 -2 0 -2 -3 -3 -4 -1 -4 -2 -1 -5 58 | H -2 0 1 -1 -3 1 0 -2 10 -4 -3 0 -1 -1 -2 -1 -2 -3 2 -4 0 -3 0 -1 -5 59 | I -1 -4 -3 -4 -2 -3 -4 -4 -4 5 2 -3 2 0 -3 -3 -1 -3 -1 4 -4 4 -3 -1 -5 60 | L -2 -3 -4 -4 -2 -2 -3 -4 -3 2 5 -3 3 1 -4 -3 -1 -2 -1 1 -4 4 -3 -1 -5 61 | K -1 3 0 -1 -3 2 1 -2 0 -3 -3 6 -2 -4 -1 0 -1 -3 -2 -3 0 -3 1 -1 -5 62 | M -1 -2 -2 -4 -2 0 -2 -3 -1 2 3 -2 7 0 -3 -2 -1 -1 0 1 -3 2 -1 -1 -5 63 | F -3 -3 -4 -5 -2 -4 -3 -4 -1 0 1 -4 0 8 -4 -3 -2 1 4 -1 -4 1 -4 -1 -5 64 | P -1 -3 -2 -1 -4 -1 -1 -2 -2 -3 -4 -1 -3 -4 10 -1 -1 -4 -3 -3 -2 -3 -1 -1 -5 65 | S 1 -1 1 0 -1 0 -1 0 -1 -3 -3 0 -2 -3 -1 5 2 -4 -2 -2 0 -3 0 -1 -5 66 | T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 2 5 -3 -2 0 0 -1 -1 -1 -5 67 | W -3 -3 -4 -5 -5 -1 -3 -3 -3 -3 -2 -3 -1 1 -4 -4 -3 15 2 -3 -5 -2 -2 -1 -5 68 | Y -2 -1 -2 -3 -3 -1 -2 -3 2 -1 -1 -2 0 4 -3 -2 -2 2 8 -1 -3 -1 -2 -1 -5 69 | V 0 -3 -3 -4 -1 -3 -3 -4 -4 4 1 -3 1 -1 -3 -2 0 -3 -1 5 -3 2 -3 -1 -5 70 | B -2 -1 5 6 -3 0 1 -1 0 -4 -4 0 -3 -4 -2 0 0 -5 -3 -3 6 -4 1 -1 -5 71 | J -2 -3 -4 -4 -2 -3 -3 -4 -3 4 4 -3 2 1 -3 -3 -1 -2 -1 2 -4 4 -3 -1 -5 72 | Z -1 0 0 1 -3 4 5 -2 0 -3 -3 1 -1 -4 -1 0 -1 -2 -2 -3 1 -3 5 -1 -5 73 | X -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -5 74 | * -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 1 75 | """), sep='\s+').loc[list(aa_dict_one_hot.keys()), list(aa_dict_one_hot.keys())] 76 | assert (BLOSUM50_MATRIX == BLOSUM50_MATRIX.T).all().all() 77 | 78 | ENCODING_DATA_FRAMES = { 79 | "BLOSUM50": BLOSUM50_MATRIX, 80 | "one-hot": pd.DataFrame([ 81 | [1 if i == j else 0 for i in range(len(aa_dict_one_hot.keys()))] 82 | for j in range(len(aa_dict_one_hot.keys())) 83 | ], index=aa_dict_one_hot.keys(), columns=aa_dict_one_hot.keys()) 84 | } 85 | 86 | ########################### HLA pseudo-sequence########################## 87 | path=hla_db_dir 88 | HLA_ABC=[path+'/A_prot.fasta',path+'/B_prot.fasta',path+'/C_prot.fasta',path+'/E_prot.fasta'] 89 | HLA_seq_lib={} 90 | for one_class in HLA_ABC: 91 | prot=open(one_class) 92 | #pseudo_seq from netMHCpan:https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0000796 93 | pseudo_seq_pos=[7,9,24,45,59,62,63,66,67,79,70,73,74,76,77,80,81,84,95,97,99,114,116,118,143,147,150,152,156,158,159,163,167,171] 94 | #write HLA sequences into a library 95 | #class I alles 96 | name='' 97 | sequence='' 98 | for line in prot: 99 | if len(name)!=0: 100 | if line.startswith('>HLA'): 101 | pseudo='' 102 | for i in range(0,33): 103 | if len(sequence)>pseudo_seq_pos[i]: 104 | pseudo=pseudo+sequence[pseudo_seq_pos[i]] 105 | HLA_seq_lib[name]=pseudo 106 | name=line.split(' ')[1] 107 | sequence='' 108 | else: 109 | sequence=sequence+line.strip() 110 | else: 111 | name=line.split(' ')[1] 112 | ######################################## 113 | # Input data encoding helper functions # 114 | ######################################## 115 | #################functions for TCR encoding#################### 116 | def preprocess(filedir): 117 | #Preprocess TCR files 118 | print('Processing: '+filedir) 119 | if not os.path.exists(filedir): 120 | print('Invalid file path: ' + filedir) 121 | return 0 122 | dataset = pd.read_csv(filedir, header=0) 123 | #Preprocess HLA_antigen files 124 | #remove HLA which is not in HLA_seq_lib; if HLA*01:01 not in HLA_seq_lib; then the first HLA startswith input HLA allele will be given 125 | #Remove antigen that is longer than 15aa 126 | if paired=='F': 127 | HLA_antigen=dataset[['HLA','Antigen']].dropna() 128 | HLA_list=list(HLA_antigen['HLA']) 129 | antigen_list=list(HLA_antigen['Antigen']) 130 | ind=0 131 | index_list=[] 132 | for i in HLA_list: 133 | if len([hla_allele for hla_allele in HLA_seq_lib.keys() if hla_allele.startswith(str(i))])==0: 134 | index_list.append(ind) 135 | ind=ind+1 136 | HLA_antigen=HLA_antigen.drop(HLA_antigen.iloc[index_list].index) 137 | HLA_antigen=HLA_antigen[HLA_antigen.Antigen.str.len()<16] 138 | print(str(max(HLA_antigen.index)-HLA_antigen.shape[0])+' antigens longer than 15aa are dropped!') 139 | TCR_list=list(dataset['CDR3'].dropna()) 140 | antigen_list=list(HLA_antigen['Antigen']) 141 | HLA_list=list(HLA_antigen['HLA']) 142 | else: 143 | dataset=dataset.dropna() 144 | HLA_list=list(dataset['HLA']) 145 | ind=0 146 | index_list=[] 147 | for i in HLA_list: 148 | if len([hla_allele for hla_allele in HLA_seq_lib.keys() if hla_allele.startswith(str(i))])==0: 149 | index_list.append(ind) 150 | print('drop '+i) 151 | ind=ind+1 152 | dataset=dataset.drop(dataset.iloc[index_list].index) 153 | dataset=dataset[dataset.Antigen.str.len()<16] 154 | print(str(max(dataset.index)-dataset.shape[0])+' antigens longer than 15aa are dropped!') 155 | TCR_list=dataset['CDR3'].tolist() 156 | antigen_list=dataset['Antigen'].tolist() 157 | HLA_list=dataset['HLA'].tolist() 158 | return TCR_list,antigen_list,HLA_list 159 | 160 | def aamapping_TCR(peptideSeq,aa_dict,encode_dim): 161 | #Transform aa seqs to Atchley's factors. 162 | peptideArray = [] 163 | if len(peptideSeq)>encode_dim: 164 | print('Length: '+str(len(peptideSeq))+' over bound!') 165 | peptideSeq=peptideSeq[0:encode_dim] 166 | for aa_single in peptideSeq: 167 | try: 168 | peptideArray.append(aa_dict[aa_single]) 169 | except KeyError: 170 | print('Not proper aaSeqs: '+peptideSeq) 171 | peptideArray.append(np.zeros(5,dtype='float64')) 172 | for i in range(0,encode_dim-len(peptideSeq)): 173 | peptideArray.append(np.zeros(5,dtype='float64')) 174 | return np.asarray(peptideArray) 175 | 176 | def hla_encode(HLA_name,encoding_method): 177 | '''Convert the HLAs of a sample(s) to a zero-padded (for homozygotes) 178 | numeric representation. 179 | 180 | Parameters 181 | ---------- 182 | HLA_name: the name of the HLA 183 | encoding_method:'BLOSUM50' or 'one-hot' 184 | ''' 185 | if HLA_name not in HLA_seq_lib.keys(): 186 | if len([hla_allele for hla_allele in HLA_seq_lib.keys() if hla_allele.startswith(str(HLA_name))])==0: 187 | print('cannot find'+HLA_name) 188 | HLA_name=[hla_allele for hla_allele in HLA_seq_lib.keys() if hla_allele.startswith(str(HLA_name))][0] 189 | if HLA_name not in HLA_seq_lib.keys(): 190 | print('Not proper HLA allele:'+HLA_name) 191 | HLA_sequence=HLA_seq_lib[HLA_name] 192 | HLA_int=[aa_dict_one_hot[char] for char in HLA_sequence] 193 | while len(HLA_int)!=34: 194 | #if the pseudo sequence length is not 34, use X for padding 195 | HLA_int.append(20) 196 | result=ENCODING_DATA_FRAMES[encoding_method].iloc[HLA_int] 197 | # Get a numpy array of 34 rows and 21 columns 198 | return np.asarray(result) 199 | 200 | def peptide_encode_HLA(peptide, maxlen,encoding_method): 201 | '''Convert peptide amino acid sequence to one-hot encoding, 202 | optionally left padded with zeros to maxlen(15). 203 | 204 | The letter 'X' is interpreted as the padding character and 205 | is assigned a value of zero. 206 | 207 | e.g. encode('SIINFEKL', maxlen=12) 208 | := [16, 8, 8, 12, 0, 0, 0, 0, 5, 4, 9, 10] 209 | 210 | Parameters 211 | ---------- 212 | peptide:string of peptide comprising amino acids 213 | maxlen : int, default 15 214 | Pad peptides to this maximum length. If maxlen is None, 215 | maxlen is set to the length of the first peptide. 216 | 217 | Returns 218 | ------- 219 | ''' 220 | if len(peptide) > maxlen: 221 | msg = 'Peptide %s has length %d > maxlen = %d.' 222 | raise ValueError(msg % (peptide, len(peptide), maxlen)) 223 | peptide= peptide.replace(u'\xa0', u'') #remove non-breaking space 224 | o = list(map(lambda x: aa_dict_one_hot[x.upper()] if x.upper() in aa_dict_one_hot.keys() else 20 , peptide)) 225 | #if the amino acid is not valid, replace it with padding aa 'X':20 226 | k = len(o) 227 | #use 'X'(20) for padding 228 | o = o[:k // 2] + [20] * (int(maxlen) - k) + o[k // 2:] 229 | if len(o) != maxlen: 230 | msg = 'Peptide %s has length %d < maxlen = %d, but pad is "none".' 231 | raise ValueError(msg % (peptide, len(peptide), maxlen)) 232 | result=ENCODING_DATA_FRAMES[encoding_method].iloc[o] 233 | return np.asarray(result) 234 | 235 | def TCRMap(dataset,aa_dict,encode_dim): 236 | #Wrapper of aamapping 237 | for i in range(0,len(dataset)): 238 | if i==0: 239 | TCR_array=aamapping_TCR(dataset[i],aa_dict,encode_dim).reshape(1,encode_dim,5,1) 240 | else: 241 | TCR_array=np.append(TCR_array,aamapping_TCR(dataset[i],aa_dict,encode_dim).reshape(1,encode_dim,5,1),axis=0) 242 | print('TCRMap done!') 243 | return TCR_array 244 | 245 | def HLAMap(dataset,encoding_method): 246 | '''Input a list of HLA and get a three dimentional array''' 247 | m=0 248 | for each_HLA in dataset: 249 | if m==0: 250 | HLA_array=hla_encode(each_HLA,encoding_method).reshape(1,34,21) 251 | else: 252 | HLA_array=np.append(HLA_array,hla_encode(each_HLA,encoding_method).reshape(1,34,21),axis=0) 253 | m=m+1 254 | print('HLAMap done!') 255 | return HLA_array 256 | 257 | def antigenMap(dataset,maxlen,encoding_method): 258 | '''Input a list of antigens and get a three dimentional array''' 259 | m=0 260 | for each_antigen in dataset: 261 | if m==0: 262 | antigen_array=peptide_encode_HLA(each_antigen,maxlen,encoding_method).reshape(1,maxlen,21) 263 | else: 264 | antigen_array=np.append(antigen_array,peptide_encode_HLA(each_antigen, maxlen,encoding_method).reshape(1,maxlen,21),axis=0) 265 | m=m+1 266 | print('antigenMap done!') 267 | return antigen_array 268 | 269 | def pearson_correlation_f(y_true, y_pred): 270 | fsp = y_pred - K.mean(y_pred) #being K.mean a scalar here, it will be automatically subtracted from all elements in y_pred 271 | fst = y_true - K.mean(y_true) 272 | devP = K.std(y_pred) 273 | devT = K.std(y_true) 274 | return K.mean(fsp*fst)/(devP*devT) 275 | #################################### 276 | # import training and testing data # 277 | #################################### 278 | #Read data 279 | #TCR Data preprocess 280 | log_file=open(output_log_dir,'w') 281 | sys.stdout=log_file 282 | print('Mission loading.') 283 | 284 | TCR_list,antigen_list,HLA_list=preprocess(file_dir) 285 | TCR_array=TCRMap(TCR_list,aa_dict_atchley,encode_dim) 286 | antigen_array=antigenMap(antigen_list,15,'BLOSUM50') 287 | HLA_array=HLAMap(HLA_list,'BLOSUM50') 288 | 289 | #Model prediction 290 | TCR_encoder=load_model(model_dir+'/TCR_encoder_30.h5') 291 | TCR_encoder=Model(TCR_encoder.input,TCR_encoder.layers[-12].output) 292 | TCR_encoded_result=TCR_encoder.predict(TCR_array) 293 | 294 | HLA_antigen_encoder=load_model(model_dir+'/HLA_antigen_encoder_60.h5',custom_objects={'pearson_correlation_f': pearson_correlation_f}) 295 | HLA_antigen_encoder=Model(HLA_antigen_encoder.input,HLA_antigen_encoder.layers[-2].output) 296 | HLA_antigen_encoded_result=HLA_antigen_encoder.predict([antigen_array,HLA_array]) 297 | 298 | TCR_encoded_matrix=pd.DataFrame(data=TCR_encoded_result,index=range(1,len(TCR_list)+1)) 299 | HLA_antigen_encoded_matrix=pd.DataFrame(data=HLA_antigen_encoded_result,index=range(1,len(HLA_list)+1)) 300 | allele_matrix=pd.DataFrame({'CDR3':TCR_list,'Antigen':antigen_list,'HLA':HLA_list},index=range(1,len(TCR_list)+1)) 301 | TCR_encoded_matrix.to_csv(output_dir+'/TCR_output.csv',sep=',') 302 | HLA_antigen_encoded_matrix.to_csv(output_dir+'/MHC_antigen_output.csv',sep=',') 303 | allele_matrix.to_csv(output_dir+'/sequence_info.csv',sep=',') 304 | print('Mission Accomplished.\n') 305 | log_file.close() 306 | -------------------------------------------------------------------------------- /test/code/ternary_train_model_pMTnet.py: -------------------------------------------------------------------------------- 1 | import random 2 | import pandas as pd 3 | import csv 4 | import sys 5 | import keras 6 | import numpy as np 7 | from keras.layers import Dense,Input,concatenate,Dropout,BatchNormalization,LSTM,Reshape 8 | from keras.models import Model 9 | from keras.optimizers import Adam, Adamax, RMSprop, Adagrad, Adadelta, Nadam 10 | from keras import backend as K 11 | from keras import regularizers 12 | #random.seed(54321) 13 | ############# Define function ################ 14 | 15 | def pos_neg_acc(y_true,y_pred): 16 | positive_pred=y_pred[:,1] 17 | negative_pred=y_pred[:,0] 18 | diff=K.mean(K.cast(negative_pred