├── data ├── uniprotSeq │ ├── P15421.fasta │ ├── P13501.fasta │ ├── P0CE72.fasta │ ├── P31949.fasta │ ├── Q99525.fasta │ ├── P0CL80.fasta │ ├── P01707.fasta │ ├── Q9NV29.fasta │ ├── O95867.fasta │ ├── Q8NC38.fasta │ ├── Q9GZQ8.fasta │ ├── P15382.fasta │ ├── O14880.fasta │ ├── P54852.fasta │ ├── Q9HAE3.fasta │ ├── Q15669.fasta │ ├── Q5T7N8.fasta │ ├── Q7RTU1.fasta │ ├── P98173.fasta │ ├── Q14442.fasta │ ├── P49755.fasta │ ├── P26436.fasta │ ├── P30281.fasta │ ├── P78345.fasta │ ├── P57076.fasta │ ├── Q8NH43.fasta │ ├── A6NH11.fasta │ ├── Q8NGJ5.fasta │ ├── Q9HC38.fasta │ ├── Q6PF06.fasta │ ├── P78382.fasta │ ├── P47211.fasta │ ├── Q3MIR4.fasta │ ├── Q9BY21.fasta │ ├── Q13155.fasta │ ├── Q9BYB4.fasta │ ├── A6NFE2.fasta │ ├── Q8TDG2.fasta │ ├── Q92734.fasta │ ├── P01860.fasta │ ├── Q16690.fasta │ ├── Q96DV4.fasta │ ├── P15086.fasta │ ├── A6NKF1.fasta │ ├── Q5VV16.fasta │ ├── Q5SWX8.fasta │ ├── Q15465.fasta │ ├── Q6NT16.fasta │ ├── Q05901.fasta │ ├── Q9NWZ3.fasta │ ├── Q9BY11.fasta │ ├── Q6B0B8.fasta │ ├── Q9Y512.fasta │ ├── P47974.fasta │ ├── Q8IW19.fasta │ ├── Q8TAA9.fasta │ ├── Q9UBM8.fasta │ ├── Q13087.fasta │ ├── Q86TJ5.fasta │ ├── P40222.fasta │ ├── P07947.fasta │ ├── Q494X3.fasta │ ├── Q5T749.fasta │ ├── Q8N1G4.fasta │ ├── Q9HA90.fasta │ ├── Q8N6Q8.fasta │ ├── P31645.fasta │ ├── Q9UNN5.fasta │ ├── Q06187.fasta │ ├── Q6PI48.fasta │ ├── P05160.fasta │ ├── Q7Z340.fasta │ ├── Q9H461.fasta │ ├── O75509.fasta │ ├── Q92542.fasta │ ├── P78563.fasta │ ├── Q96NI6.fasta │ ├── Q9BXB4.fasta │ ├── Q9UFB7.fasta │ ├── Q9HCI6.fasta │ ├── C9J798.fasta │ ├── Q9UKQ2.fasta │ ├── Q9QC07.fasta │ ├── Q8TF61.fasta │ ├── Q8NB90.fasta │ ├── Q9H3R1.fasta │ ├── Q9Y5G9.fasta │ ├── Q6UXY8.fasta │ ├── Q6ZRS4.fasta │ ├── Q8IZA0.fasta │ ├── ReverseCovid.fasta │ ├── O75151.fasta │ ├── O60721.fasta │ ├── Q15147.fasta │ ├── P54098.fasta │ ├── P0DTC2.fasta │ ├── Q6VMQ6.fasta │ ├── Q5TZJ5.fasta │ ├── Q96HA7.fasta │ ├── Q5VYS8.fasta │ ├── P51805.fasta │ └── Q9NR99.fasta ├── RNA │ └── P15421_rna.txt └── InputFiles │ ├── miscellaneous.csv │ ├── stack.csv │ ├── stack_H.csv │ ├── sample_codon_usage.csv │ ├── mismatch1nI.csv │ ├── mismatch23I.csv │ ├── mismatchI.csv │ ├── mismatch1nI_H.csv │ ├── mismatchI_H.csv │ ├── mismatchH.csv │ ├── mismatchH_H.csv │ ├── intl11.csv │ └── intl11_H.csv ├── CMakeLists.txt ├── src ├── NussinovAlgorithm.h ├── params │ ├── constants.h │ └── intl11.h ├── Nussinov.h ├── NussinovAlgorithm.cpp ├── BeamZuker.h ├── utils.h ├── ZukerAlgorithm.h ├── default.h ├── BeamZuker.cpp ├── main.cpp └── utils.cpp ├── LICENSE └── README.md /data/uniprotSeq/P15421.fasta: -------------------------------------------------------------------------------- 1 | >sp|P15421|GLPE_HUMAN Glycophorin-E OS=Homo sapiens OX=9606 GN=GYPE PE=2 SV=2 2 | MYGKIIFVLLLSGIVSISASSTTGVAMHTSTSSSVTKSYISSQTNGITLINWWAMARVIF 3 | EVMLVVVGMIILISYCIR 4 | -------------------------------------------------------------------------------- /data/uniprotSeq/P13501.fasta: -------------------------------------------------------------------------------- 1 | >sp|P13501|CCL5_HUMAN C-C motif chemokine 5 OS=Homo sapiens OX=9606 GN=CCL5 PE=1 SV=3 2 | MKVSAAALAVILIATALCAPASASPYSSDTTPCCFAYIARPLPRAHIKEYFYTSGKCSNP 3 | AVVFVTRKNRQVCANPEKKWVREYINSLEMS 4 | -------------------------------------------------------------------------------- /data/uniprotSeq/P0CE72.fasta: -------------------------------------------------------------------------------- 1 | >sp|P0CE72|ONCO_HUMAN Oncomodulin-1 OS=Homo sapiens OX=9606 GN=OCM PE=1 SV=1 2 | MSITDVLSADDIAAALQECRDPDTFEPQKFFQTSGLSKMSANQVKDVFRFIDNDQSGYLD 3 | EEELKFFLQKFESGARELTESETKSLMAAADNDGDGKIGAEEFQEMVHS 4 | -------------------------------------------------------------------------------- /data/uniprotSeq/P31949.fasta: -------------------------------------------------------------------------------- 1 | >sp|P31949|S10AB_HUMAN Protein S100-A11 OS=Homo sapiens OX=9606 GN=S100A11 PE=1 SV=2 2 | MAKISSPTETERCIESLIAVFQKYAGKDGYNYTLSKTEFLSFMNTELAAFTKNQKDPGVL 3 | DRMMKKLDTNSDGQLDFSEFLNLIGGLAMACHDSFLKAVPSQKRT 4 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q99525.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q99525|H4G_HUMAN Histone H4-like protein type G OS=Homo sapiens OX=9606 GN=H4C7 PE=1 SV=1 2 | MSVRGKAGKGLGKGGAKCHRKVLSDNIQGITKCTIRRLARHGGVKRILGLIYEETRRVFK 3 | VFLENVIWYAVTNTEHAKRKTVTAMAVVYVLKRQGRTL 4 | -------------------------------------------------------------------------------- /data/uniprotSeq/P0CL80.fasta: -------------------------------------------------------------------------------- 1 | >sp|P0CL80|GG12F_HUMAN G antigen 12F OS=Homo sapiens OX=9606 GN=GAGE12F PE=3 SV=1 2 | MSWRGRSTYYWPRPRRYVQPPEMIGPMRPEQFSDEVEPATPEEGEPATQRQDPAAAQEGE 3 | DEGASAGQGPKPEAHSQEQGHPQTGCECEDGPDGQEMDPPNPEEVKTPEEGEKQSQC 4 | -------------------------------------------------------------------------------- /data/RNA/P15421_rna.txt: -------------------------------------------------------------------------------- 1 | AUGUAUGGCAAAAUCAUCUUCGUCCUGCUGCUCUCCGGGAUCGUUUCGAUCUCGGCGAGCAGCACGACGGGGGUGGCCAUGCAUACGAGUACUAGCAGUAGCGUGACUAAGAGUUAUAUAUCCUCACAGACCAACGGCAUCACCUUGAUAAAUUGGUGGGCGAUGGCCCGCGUAAUUUUCGAGGUGAUGCUGGUGGUCGUGGGGAUGAUAAUUCUUAUCAGCUACUGCAUUCGU -------------------------------------------------------------------------------- /data/uniprotSeq/P01707.fasta: -------------------------------------------------------------------------------- 1 | >sp|P01706|LV211_HUMAN Immunoglobulin lambda variable 2-11 OS=Homo sapiens OX=9606 GN=IGLV2-11 PE=1 SV=2 2 | MAWALLLLSLLTQGTGSWAQSALTQPRSVSGSPGQSVTISCTGTSSDVGGYNYVSWYQQH 3 | PGKAPKLMIYDVSKRPSGVPDRFSGSKSGNTASLTISGLQAEDEADYYCCSYAGSYTFH 4 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q9NV29.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q9NV29|TM100_HUMAN Transmembrane protein 100 OS=Homo sapiens OX=9606 GN=TMEM100 PE=1 SV=2 2 | MTEEPIKEILGAPKAHMAATMEKSPKSEVVITTVPLVSEIQLMAATGGTELSCYRCIIPF 3 | AVVVFIAGIVVTAVAYSFNSHGSIISIFGLVVLSSGLFLLASSALCWKVRQRSKKAKRRE 4 | SQTALVANQRSLFA 5 | -------------------------------------------------------------------------------- /data/uniprotSeq/O95867.fasta: -------------------------------------------------------------------------------- 1 | >sp|O95867|LY66C_HUMAN Lymphocyte antigen 6 complex locus protein G6c OS=Homo sapiens OX=9606 GN=LY6G6C PE=1 SV=1 2 | MKALMLLTLSVLLCWVSADIRCHSCYKVPVLGCVDRQSCRLEPGQQCLTTHAYLGKMWVF 3 | SNLRCGTPEEPCQEAFNQTNRKLGLTYNTTCCNKDNCNSAGPRPTPALGLVFLTSLAGLG 4 | LWLLH 5 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q8NC38.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q8NC38|CA213_HUMAN Putative uncharacterized protein ZNF436-AS1 OS=Homo sapiens OX=9606 GN=ZNF436-AS1 PE=2 SV=2 2 | MLAVPVRLKVGSRKPEWGTNRLTSCPAKDPLDRRLQNLRDRERVPEPQRSLRPGVQEDSR 3 | EHGQVPEVSDPQVDLEFVDLQAKPRYRRLILKTQIPEASDSQAAQKPQAHRQIPETTEAG 4 | RETTSN 5 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q9GZQ8.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q9GZQ8|MLP3B_HUMAN Microtubule-associated proteins 1A/1B light chain 3B OS=Homo sapiens OX=9606 GN=MAP1LC3B PE=1 SV=3 2 | MPSEKTFKQRRTFEQRVEDVRLIREQHPTKIPVIIERYKGEKQLPVLDKTKFLVPDHVNM 3 | SELIKIIRRRLQLNANQAFFLLVNGHSMVSVSTPISEVYESEKDEDGFLYMVYASQETFG 4 | MKLSV 5 | -------------------------------------------------------------------------------- /data/InputFiles/miscellaneous.csv: -------------------------------------------------------------------------------- 1 | 0,1 2 | lxc37,107.856 3 | ML_intern37,-90.0 4 | ML_interndH,-220.0 5 | ML_closing37,930.0 6 | ML_closingdH,3000.0 7 | ML_BASE37,0.0 8 | ML_BASEdH,0.0 9 | MAX_NINIO,300.0 10 | ninio37,60.0 11 | niniodH,320.0 12 | TerminalAU37,50.0 13 | TerminalAUdH,370.0 14 | -------------------------------------------------------------------------------- /data/uniprotSeq/P15382.fasta: -------------------------------------------------------------------------------- 1 | >sp|P15382|KCNE1_HUMAN Potassium voltage-gated channel subfamily E member 1 OS=Homo sapiens OX=9606 GN=KCNE1 PE=1 SV=1 2 | MILSNTTAVTPFLTKLWQETVQQGGNMSGLARRSPRSSDGKLEALYVLMVLGFFGFFTLG 3 | IMLSYIRSKKLEHSNDPFNVYIESDAWQEKDKAYVQARVLESYRSCYVVENHLAIEQPNT 4 | HLPETKPSP 5 | -------------------------------------------------------------------------------- /data/uniprotSeq/O14880.fasta: -------------------------------------------------------------------------------- 1 | >sp|O14880|MGST3_HUMAN Microsomal glutathione S-transferase 3 OS=Homo sapiens OX=9606 GN=MGST3 PE=1 SV=1 2 | MAVLSKEYGFVLLTGAASFIMVAHLAINVSKARKKYKVEYPIMYSTDPENGHIFNCIQRA 3 | HQNTLEVYPPFLFFLAVGGVYHPRIASGLGLAWIVGRVLYAYGYYTGEPSKRSRGALGSI 4 | ALLGLVGTTVCSAFQHLGWVKSGLGSGPKCCH 5 | -------------------------------------------------------------------------------- /data/uniprotSeq/P54852.fasta: -------------------------------------------------------------------------------- 1 | >sp|P54852|EMP3_HUMAN Epithelial membrane protein 3 OS=Homo sapiens OX=9606 GN=EMP3 PE=1 SV=1 2 | MSLLLLVVSALHILILILLFVATLDKSWWTLPGKESLNLWYDCTWNNDTKTWACSNVSEN 3 | GWLKAVQVLMVLSLILCCLSFILFMFQLYTMRRGGLFYATGLCQLCTSVAVFTGALIYAI 4 | HAEEILEKHPRGGSFGYCFALAWVAFPLALVSGIIYIHLRKRE 5 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q9HAE3.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q9HAE3|CLXN_HUMAN Calaxin OS=Homo sapiens OX=9606 GN=CLXN PE=2 SV=1 2 | MNRKKLQKLTDTLTKNCKHFNKFEVNCLIKLFYDLVGGVERQGLVVGLDRNAFRNILHVT 3 | FGMTDDMIMDRVFRGFDKDNDGCVNVLEWIHGLSLFLRGSLEEKMKYCFEVFDLNGDGFI 4 | SKEEMFHMLKNSLLKQPSEEDPDEGIKDLVEITLKKMDHDHDGKLSFADYELAVREETLL 5 | LEAFGPCLPDPKSQMEFEAQVFKDPNEFNDM 6 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q15669.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q15669|RHOH_HUMAN Rho-related GTP-binding protein RhoH OS=Homo sapiens OX=9606 GN=RHOH PE=1 SV=1 2 | MLSSIKCVLVGDSAVGKTSLLVRFTSETFPEAYKPTVYENTGVDVFMDGIQISLGLWDTA 3 | GNDAFRSIRPLSYQQADVVLMCYSVANHNSFLNLKNKWIGEIRSNLPCTPVLVVATQTDQ 4 | REMGPHRASCVNAMEGKKLAQDVRAKGYLECSALSNRGVQQVFECAVRTAVNQARRRNRR 5 | RLFSINECKIF 6 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q5T7N8.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q5T7N8|F27D1_HUMAN Protein FAM27D1 OS=Homo sapiens OX=9606 GN=FAM27D1 PE=3 SV=2 2 | MLEKRLLRMGMRLQLLRDRRISSRGPGLHRAKADPQQQKRLTTGLMTQAETQKEAQQRQA 3 | AMRKTALWHTGHLQPKTHTHTGMHTQTHRERERNTQRLRDRERRENGRHTHRHTHTLTHT 4 | HTHRDTHTASYRRGIETHTTRQPLRLRGSAHDENDPRVREQPRGTQADLSSRSRMAARLL 5 | GRLTPTNTVRAGLRLGSRAASPDPAWGFLIVVGPL 6 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q7RTU1.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q7RTU1|TCF23_HUMAN Transcription factor 23 OS=Homo sapiens OX=9606 GN=TCF23 PE=1 SV=1 2 | MSQRKARGPPAMPGVGHSQTQAKARLLPGADRKRSRLSRTRQDPWEERSWSNQRWSRATP 3 | GPRGTRAGGLALGRSEASPENAARERSRVRTLRQAFLALQAALPAVPPDTKLSKLDVLVL 4 | AASYIAHLTRTLGHELPGPAWPPFLRGLRYLHPLKKWPMRSRLYAGGLGYSDLDSTTAST 5 | PSQRTRDAEVGSQVPGEADALLSTTPLSPALGDK 6 | -------------------------------------------------------------------------------- /data/uniprotSeq/P98173.fasta: -------------------------------------------------------------------------------- 1 | >sp|P98173|FAM3A_HUMAN Protein FAM3A OS=Homo sapiens OX=9606 GN=FAM3A PE=1 SV=2 2 | MRLAGPLRIVVLVVSVGVTWIVVSILLGGPGSGFPRIQQLFTSPESSVTAAPRARKYKCG 3 | LPQPCPEEHLAFRVVSGAANVIGPKICLEDKMLMSSVKDNVGRGLNIALVNGVSGELIEA 4 | RAFDMWAGDVNDLLKFIRPLHEGTLVFVASYDDPATKMNEETRKLFSELGSRNAKELAFR 5 | DSWVFVGAKGVQNKSPFEQHVKNSKHSNKYEGWPEALEMEGCIPRRSTAS 6 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q14442.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q14442|PIGH_HUMAN Phosphatidylinositol N-acetylglucosaminyltransferase subunit H OS=Homo sapiens OX=9606 GN=PIGH PE=1 SV=1 2 | MEDERSFSDICGGRLALQRRYYSPSCREFCLSCPRLSLRSLTAVTCTVWLAAYGLFTLCE 3 | NSMILSAAIFITLLGLLGYLHFVKIDQETLLIIDSLGIQMTSSYASGKESTTFIEMGKVK 4 | DIVINEAIYMQKVIYYLCILLKDPVEPHGISQVVPVFQSAKPRLDCLIEVYRSCQEILAH 5 | QKATSTSP 6 | -------------------------------------------------------------------------------- /data/uniprotSeq/P49755.fasta: -------------------------------------------------------------------------------- 1 | >sp|P49755|TMEDA_HUMAN Transmembrane emp24 domain-containing protein 10 OS=Homo sapiens OX=9606 GN=TMED10 PE=1 SV=2 2 | MSGLSGPPARRGPFPLALLLLFLLGPRLVLAISFHLPINSRKCLREEIHKDLLVTGAYEI 3 | SDQSGGAGGLRSHLKITDSAGHILYSKEDATKGKFAFTTEDYDMFEVCFESKGTGRIPDQ 4 | LVILDMKHGVEAKNYEEIAKVEKLKPLEVELRRLEDLSESIVNDFAYMKKREEEMRDTNE 5 | STNTRVLYFSIFSMFCLIGLATWQVFYLRRFFKAKKLIE 6 | -------------------------------------------------------------------------------- /data/uniprotSeq/P26436.fasta: -------------------------------------------------------------------------------- 1 | >sp|P26436|ASPX_HUMAN Acrosomal protein SP-10 OS=Homo sapiens OX=9606 GN=ACRV1 PE=1 SV=2 2 | MNRFLLLMSLYLLGSARGTSSQPNELSGSIDHQTSVQQLPGEFFSLENPSDAEALYETSS 3 | GLNTLSEHGSSEHGSSKHTVAEHTSGEHAESEHASGEPAATEHAEGEHTVGEQPSGEQPS 4 | GEHLSGEQPLSELESGEQPSDEQPSGEHGSGEQPSGEQASGEQPSGEHASGEQASGAPIS 5 | STSTGTILNCYTCAYMNDQGKCLRGEGTCITQNSQQCMLKKIFEGGKLQFMVQGCENMCP 6 | SMNLFSHGTRMQIICCRNQSFCNKI 7 | -------------------------------------------------------------------------------- /data/uniprotSeq/P30281.fasta: -------------------------------------------------------------------------------- 1 | >sp|P30281|CCND3_HUMAN G1/S-specific cyclin-D3 OS=Homo sapiens OX=9606 GN=CCND3 PE=1 SV=2 2 | MELLCCEGTRHAPRAGPDPRLLGDQRVLQSLLRLEERYVPRASYFQCVQREIKPHMRKML 3 | AYWMLEVCEEQRCEEEVFPLAMNYLDRYLSCVPTRKAQLQLLGAVCMLLASKLRETTPLT 4 | IEKLCIYTDHAVSPRQLRDWEVLVLGKLKWDLAAVIAHDFLAFILHRLSLPRDRQALVKK 5 | HAQTFLALCATDYTFAMYPPSMIATGSIGAAVQGLGACSMSGDELTELLAGITGTEVDCL 6 | RACQEQIEAALRESLREASQTSSSPAPKAPRGSSSQGPSQTSTPTDVTAIHL 7 | -------------------------------------------------------------------------------- /data/uniprotSeq/P78345.fasta: -------------------------------------------------------------------------------- 1 | >sp|P78345|RPP38_HUMAN Ribonuclease P protein subunit p38 OS=Homo sapiens OX=9606 GN=RPP38 PE=1 SV=2 2 | MAAAPQAPGRGSLRKTRPLVVKTSLNNPYIIRWSALESEDMHFILQTLEDRLKAIGLQKI 3 | EDKKKKNKTPFLKKESREKCSIAVDISENLKEKKTDAKQQVSGWTPAHVRKQLAIGVNEV 4 | TRALERRELLLVLVCKSVKPAMITSHLIQLSLSRSVPACQVPRLSERIAPVIGLKCVLAL 5 | AFKKNTTDFVDEVRAIIPRVPSLSVPWLQDRIEDSGENLETEPLESQDRELLDTSFEDLS 6 | KPKRKLADGRQASVTLQPLKIKKLIPNPNKIRKPPKSKKATPK 7 | -------------------------------------------------------------------------------- /data/InputFiles/stack.csv: -------------------------------------------------------------------------------- 1 | Pairs,NP,CG,GC,GU,UG,AU,UA,NN 2 | NP,100000,100000,100000,100000,100000,100000,100000,100000 3 | CG,100000,-240,-330,-210,-140,-210,-210,-140 4 | GC,100000,-330,-340,-250,-150,-220,-240,-150 5 | GU,100000,-210,-250,130,-50,-140,-130,130 6 | UG,100000,-140,-150,-50,30,-60,-100,30 7 | AU,100000,-210,-220,-140,-60,-110,-90,-60 8 | UA,100000,-210,-240,-130,-100,-90,-130,-90 9 | NN,100000,-140,-150,130,30,-60,-90,130 10 | -------------------------------------------------------------------------------- /data/uniprotSeq/P57076.fasta: -------------------------------------------------------------------------------- 1 | >sp|P57076|CF298_HUMAN Cilia- and flagella-associated protein 298 OS=Homo sapiens OX=9606 GN=CFAP298 PE=1 SV=1 2 | MVLLHVKRGDESQFLLQAPGSTELEELTVQVARVYNGRLKVQRLCSEMEELAEHGIFLPP 3 | NMQGLTDDQIEELKLKDEWGEKCVPSGGAVFKKDDIGRRNGQAPNEKMKQVLKKTIEEAK 4 | AIISKKQVEAGVCVTMEMVKDALDQLRGAVMIVYPMGLPPYDPIRMEFENKEDLSGTQAG 5 | LNVIKEAEAQLWWAAKELRRTKKLSDYVGKNEKTKIIAKIQQRGQGAPAREPIISSEEQK 6 | QLMLYYHRRQEELKRLEENDDDAYLNSPWADNTALKRHFHGVKDIKWRPR 7 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q8NH43.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q8NH43|OR4L1_HUMAN Olfactory receptor 4L1 OS=Homo sapiens OX=9606 GN=OR4L1 PE=3 SV=1 2 | MDLKNGSLVTEFILLGFFGRWELQIFFFVTFSLIYGATVMGNILIMVTVTCRSTLHSPLY 3 | FLLGNLSFLDMCLSTATTPKMIIDLLTDHKTISVWGCVTQMFFMHFFGGAEMTLLIIMAF 4 | DRYVAICKPLHYRTIMSHKLLKGFAILSWIIGFLHSISQIVLTMNLPFCGHNVINNIFCD 5 | LPLVIKLACIETYTLELFVIADSGLLSFTCFILLLVSYIVILVSVPKKSSHGLSKALSTL 6 | SAHIIVVTLFFGPCIFIYVWPFSSLASNKTLAVFYTVITPLLNPSIYTLRNKKMQEAIRK 7 | LRFQYVSSAQNF 8 | -------------------------------------------------------------------------------- /data/uniprotSeq/A6NH11.fasta: -------------------------------------------------------------------------------- 1 | >sp|A6NH11|GLTD2_HUMAN Glycolipid transfer protein domain-containing protein 2 OS=Homo sapiens OX=9606 GN=GLTPD2 PE=1 SV=2 2 | MGVAARPPALRHWFSHSIPLAIFALLLLYLSVRSLGARSGCGPRAQPCVPGETAPFQVRQ 3 | ESGTLEAPERKQPPCLGPRGMLGRMMRRFHASLKPEGDVGLSPYLAGWRALVEFLTPLGS 4 | VFAFATREAFTKVTDLEARVHGPDAEHYWSLVAMAAWERRAGLLEQPGAAPRDPTRSSGS 5 | RTLLLLHRALRWSQLCLHRVATGALGGPDAGVQCSDAYRAALGPHHPWLVRQTARLAFLA 6 | FPGRRRLLELACPGATEAEARAALVRAAGTLEDVYNRTQSLLAERGLLQLA 7 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q8NGJ5.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q8NGJ5|O51L1_HUMAN Olfactory receptor 51L1 OS=Homo sapiens OX=9606 GN=OR51L1 PE=3 SV=1 2 | MGDWNNSDAVEPIFILRGFPGLEYVHSWLSILFCLAYLVAFMGNVTILSVIWIESSLHQP 3 | MYYFISILAVNDLGMSLSTLPTMLAVLWLDAPEIQASACYAQLFFIHTFTFLESSVLLAM 4 | AFDRFVAICHPLHYPTILTNSVIGKIGLACLLRSLGVVLPTPLLLRHYHYCHGNALSHAF 5 | CLHQDVLRLSCTDARTNSIYGLCVVIATLGVDSIFILLSYVLILNTVLDIASREEQLKAL 6 | NTCVSHICVVLIFFVPVIGVSMVHRFGKHLSPIVHILMADIYLLLPPVLNPIVYSVRTKQ 7 | IRLGILHKFVLRRRF 8 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q9HC38.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q9HC38|GLOD4_HUMAN Glyoxalase domain-containing protein 4 OS=Homo sapiens OX=9606 GN=GLOD4 PE=1 SV=1 2 | MAARRALHFVFKVGNRFQTARFYRDVLGMKVESCSVARLECSGAISAHCSDYTRITEDSF 3 | SKPYDGKWSKTMVGFGPEDDHFVAELTYNYGVGDYKLGNDFMGITLASSQAVSNARKLEW 4 | PLTEVAEGVFETEAPGGYKFYLQNRSLPQSDPVLKVTLAVSDLQKSLNYWCNLLGMKIYE 5 | KDEEKQRALLGYADNQCKLELQGVKGGVDHAAAFGRIAFSCPQKELPDLEDLMKRENQKI 6 | LTPLVSLDTPGKATVQVVILADPDGHEICFVGDEAFRELSKMDPEGSKLLDDAMAADKSD 7 | EWFAKHNKPKASG 8 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q6PF06.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q6PF06|TM10B_HUMAN tRNA methyltransferase 10 homolog B OS=Homo sapiens OX=9606 GN=TRMT10B PE=1 SV=1 2 | MDWKLEGSTQKVESPVLQGQEGILEETGEDGLPEGFQLLQIDAEGECQEGEILATGSTAW 3 | CSKNVQRKQRHWEKIVAAKKSKRKQEKERRKANRAENPGICPQHSKRFLRALTKDKLLEA 4 | KHSGPRLCIDLSMTHYMSKKELSRLAGQIRRLYGSNKKADRPFWICLTGFTTDSPLYEEC 5 | VRMNDGFSSYLLDITEEDCFSLFPLETLVYLTPDSEHALEDVDLNKVYILGGLVDESIQK 6 | KVTFQKAREYSVKTARLPIQEYMVRNQNGKNYHSEILAINQVFDILSTYLETHNWPEALK 7 | KGVSSGKGYILRNSVE 8 | -------------------------------------------------------------------------------- /data/InputFiles/stack_H.csv: -------------------------------------------------------------------------------- 1 | Pairs,NP,CG,GC,GU,UG,AU,UA,NN 2 | NP,100000,100000,100000,100000,100000,100000,100000,100000 3 | CG,100000,-1060,-1340,-1210,-560,-1050,-1040,-560 4 | GC,100000,-1340,-1490,-1260,-830,-1140,-1240,-830 5 | GU,100000,-1210,-1260,-1460,-1350,-880,-1280,-880 6 | UG,100000,-560,-830,-1350,-930,-320,-700,-320 7 | AU,100000,-1050,-1140,-880,-320,-940,-680,-320 8 | UA,100000,-1040,-1240,-1280,-700,-680,-770,-680 9 | NN,100000,-560,-830,-880,-320,-320,-680,-320 10 | -------------------------------------------------------------------------------- /data/uniprotSeq/P78382.fasta: -------------------------------------------------------------------------------- 1 | >sp|P78382|S35A1_HUMAN CMP-sialic acid transporter OS=Homo sapiens OX=9606 GN=SLC35A1 PE=1 SV=1 2 | MAAPRDNVTLLFKLYCLAVMTLMAAVYTIALRYTRTSDKELYFSTTAVCITEVIKLLLSV 3 | GILAKETGSLGRFKASLRENVLGSPKELLKLSVPSLVYAVQNNMAFLALSNLDAAVYQVT 4 | YQLKIPCTALCTVLMLNRTLSKLQWVSVFMLCAGVTLVQWKPAQATKVVVEQNPLLGFGA 5 | IAIAVLCSGFAGVYFEKVLKSSDTSLWVRNIQMYLSGIIVTLAGVYLSDGAEIKEKGFFY 6 | GYTYYVWFVIFLASVGGLYTSVVVKYTDNIMKGFSAAAAIVLSTIASVMLFGLQITLTFA 7 | LGTLLVCVSIYLYGLPRQDTTSIQQGETASKERVIGV 8 | -------------------------------------------------------------------------------- /data/uniprotSeq/P47211.fasta: -------------------------------------------------------------------------------- 1 | >sp|P47211|GALR1_HUMAN Galanin receptor type 1 OS=Homo sapiens OX=9606 GN=GALR1 PE=1 SV=3 2 | MELAVGNLSEGNASWPEPPAPEPGPLFGIGVENFVTLVVFGLIFALGVLGNSLVITVLAR 3 | SKPGKPRSTTNLFILNLSIADLAYLLFCIPFQATVYALPTWVLGAFICKFIHYFFTVSML 4 | VSIFTLAAMSVDRYVAIVHSRRSSSLRVSRNALLGVGCIWALSIAMASPVAYHQGLFHPR 5 | ASNQTFCWEQWPDPRHKKAYVVCTFVFGYLLPLLLICFCYAKVLNHLHKKLKNMSKKSEA 6 | SKKKTAQTVLVVVVVFGISWLPHHIIHLWAEFGVFPLTPASFLFRITAHCLAYSNSSVNP 7 | IIYAFLSENFRKAYKQVFKCHIRKDSHLSDTKESKSRIDTPPSTNCTHV 8 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q3MIR4.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q3MIR4|CC50B_HUMAN Cell cycle control protein 50B OS=Homo sapiens OX=9606 GN=TMEM30B PE=1 SV=1 2 | MTWSATARGAHQPDNTAFTQQRLPAWQPLLSASIALPLFFCAGLAFIGLGLGLYYSSNGI 3 | KELEYDYTGDPGTGNCSVCAAAGQGRALPPPCSCAWYFSLPELFQGPVYLYYELTNFYQN 4 | NRRYGVSRDDAQLSGLPSALRHPVNECAPYQRSAAGLPIAPCGAIANSLFNDSFSLWHQR 5 | QPGGPYVEVPLDRSGIAWWTDYHVKFRNPPLVNGSLALAFQGTAPPPNWRRPVYELSPDP 6 | NNTGFINQDFVVWMRTAALPTFRKLYARIRQGNYSAGLPRGAYRVNITYNYPVRAFGGHK 7 | LLIFSSISWMGGKNPFLGIAYLVVGSLCILTGFVMLVVYIRYQDQDDDDEE 8 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q9BY21.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q9BY21|GPR87_HUMAN G-protein coupled receptor 87 OS=Homo sapiens OX=9606 GN=GPR87 PE=2 SV=1 2 | MGFNLTLAKLPNNELHGQESHNSGNRSDGPGKNTTLHNEFDTIVLPVLYLIIFVASILLN 3 | GLAVWIFFHIRNKTSFIFYLKNIVVADLIMTLTFPFRIVHDAGFGPWYFKFILCRYTSVL 4 | FYANMYTSIVFLGLISIDRYLKVVKPFGDSRMYSITFTKVLSVCVWVIMAVLSLPNIILT 5 | NGQPTEDNIHDCSKLKSPLGVKWHTAVTYVNSCLFVAVLVILIGCYIAISRYIHKSSRQF 6 | ISQSSRKRKHNQSIRVVVAVFFTCFLPYHLCRIPFTFSHLDRLLDESAQKILYYCKEITL 7 | FLSACNVCLDPIIYFFMCRSFSRRLFKKSNIRTRSESIRSLQSVRRSEVRIYYDYTDV 8 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q13155.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q13155|AIMP2_HUMAN Aminoacyl tRNA synthase complex-interacting multifunctional protein 2 OS=Homo sapiens OX=9606 GN=AIMP2 PE=1 SV=2 2 | MPMYQVKPYHGGGAPLRVELPTCMYRLPNVHGRSYGPAPGAGHVQEESNLSLQALESRQD 3 | DILKRLYELKAAVDGLSKMIQTPDADLDVTNIIQADEPTTLTTNALDLNSVLGKDYGALK 4 | DIVINANPASPPLSLLVLHRLLCEHFRVLSTVHTHSSVKSVPENLLKCFGEQNKKQPRQD 5 | YQLGFTLIWKNVPKTQMKFSIQTMCPIEGEGNIARFLFSLFGQKHNAVNATLIDSWVDIA 6 | IFQLKEGSSKEKAAVFRSMNSALGKSPWLAGNELTVADVVLWSVLQQIGGCSVTVPANVQ 7 | RWMRSCENLAPFNTALKLLK 8 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q9BYB4.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q9BYB4|GNB1L_HUMAN Guanine nucleotide-binding protein subunit beta-like protein 1 OS=Homo sapiens OX=9606 GN=GNB1L PE=1 SV=2 2 | MTAPCPPPPPDPQFVLRGTQSPVHALHFCEGAQAQGRPLLFSGSQSGLVHIWSLQTRRAV 3 | TTLDGHGGQCVTWLQTLPQGRQLLSQGRDLKLCLWDLAEGRSAVVDSVCLESVGFCRSSI 4 | LAGGQPRWTLAVPGRGSDEVQILEMPSKTSVCALKPKADAKLGMPMCLRLWQADCSSRPL 5 | LLAGYEDGSVVLWDVSEQKVCSRIACHEEPVMDLDFDSQKARGISGSAGKALAVWSLDWQ 6 | QALQVRGTHELTNPGIAEVTIRPDRKILATAGWDHRIRVFHWRTMQPLAVLAFHSAAVQC 7 | VAFTADGLLAAGSKDQRISLWSLYPRA 8 | -------------------------------------------------------------------------------- /data/uniprotSeq/A6NFE2.fasta: -------------------------------------------------------------------------------- 1 | >sp|A6NFE2|SMCO2_HUMAN Single-pass membrane and coiled-coil domain-containing protein 2 OS=Homo sapiens OX=9606 GN=SMCO2 PE=2 SV=2 2 | MALTPTNLNNKMSLQMKMDCQEQQLTKKNNGFFQKLNVTEGAMQDLLKEIIKVDHILDRS 3 | DDEDDISSENPQTDFLHKGMLELEAEHDQDLSKQDKQETDVDEDPQASTSLQFSKKNLLE 4 | LCLKGMFLKLNYWNTKIGLQVKELGADYIDGTEKIDNIIKKINVTENTVKSLLKDMLTLK 5 | GQIEKLEDRGLDLDQGTSTEVNTCNEVYELKKKVIERLEDLCKNVELLSAKLRMYQMEAE 6 | DTDSHSSEEIDTEEMEALLPQAPASFLVQKSPPRNTAWKRALRIFIMFDVLTVTGLLCYI 7 | LFFGATFLFERVLLRMLGCRTTWDLREMREPFLNLEVEALLPS 8 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q8TDG2.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q8TDG2|ACTT1_HUMAN Actin-related protein T1 OS=Homo sapiens OX=9606 GN=ACTRT1 PE=2 SV=2 2 | MFNPHALDVPAVIFDNGSGLCKAGLSGEIGPRHVISSVLGHCKFNVPLARLNQKYFVGQE 3 | ALYKYEALHLHYPIERGLVTGWDDMEKLWKHLFERELGVKPSQQPVLMTEPSLNPREIRE 4 | KLAEMMFETFSVPGFYLSNHAVAALYASACVTGLVVDSGDGVTCTVPIFEGYSLPHAVTK 5 | LCMAGRDITEHLTRLLFASGFNFPCILNKAVVNNIKEKLCYIALEPEKELRKSRGEVLGA 6 | YRLPDGHVIHFGDELYQVPEVLFAPDQLGIHSPGLSKMVSSSIMKCDTDIQNKLYADIVL 7 | SGGTTLLPGLEERLMKEVEQLASKGTPIKITASPDRCFSAWIGASIMTSMSSFKQMWVTS 8 | ADFKEYGTSVVQRRCF 9 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q92734.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q92734|TFG_HUMAN Protein TFG OS=Homo sapiens OX=9606 GN=TFG PE=1 SV=2 2 | MNGQLDLSGKLIIKAQLGEDIRRIPIHNEDITYDELVLMMQRVFRGKLLSNDEVTIKYKD 3 | EDGDLITIFDSSDLSFAIQCSRILKLTLFVNGQPRPLESSQVKYLRRELIELRNKVNRLL 4 | DSLEPPGEPGPSTNIPENDTVDGREEKSASDSSGKQSTQVMAASMSAFDPLKNQDEINKN 5 | VMSAFGLTDDQVSGPPSAPAEDRSGTPDSIASSSSAAHPPGVQPQQPPYTGAQTQAGQIE 6 | GQMYQQYQQQAGYGAQQPQAPPQQPQQYGIQYSASYSQQTGPQQPQQFQGYGQQPTSQAP 7 | APAFSGQPQQLPAQPPQQYQASNYPAQTYTAQTSQPTNYTVAPASQPGMAPSQPGAYQPR 8 | PGFTSLPGSTMTPPPSGPNPYARNRPPFGQGYTQPGPGYR 9 | -------------------------------------------------------------------------------- /data/uniprotSeq/P01860.fasta: -------------------------------------------------------------------------------- 1 | >sp|P01860|IGHG3_HUMAN Immunoglobulin heavy constant gamma 3 OS=Homo sapiens OX=9606 GN=IGHG3 PE=1 SV=2 2 | ASTKGPSVFPLAPCSRSTSGGTAALGCLVKDYFPEPVTVSWNSGALTSGVHTFPAVLQSS 3 | GLYSLSSVVTVPSSSLGTQTYTCNVNHKPSNTKVDKRVELKTPLGDTTHTCPRCPEPKSC 4 | DTPPPCPRCPEPKSCDTPPPCPRCPEPKSCDTPPPCPRCPAPELLGGPSVFLFPPKPKDT 5 | LMISRTPEVTCVVVDVSHEDPEVQFKWYVDGVEVHNAKTKPREEQYNSTFRVVSVLTVLH 6 | QDWLNGKEYKCKVSNKALPAPIEKTISKTKGQPREPQVYTLPPSREEMTKNQVSLTCLVK 7 | GFYPSDIAVEWESSGQPENNYNTTPPMLDSDGSFFLYSKLTVDKSRWQQGNIFSCSVMHE 8 | ALHNRFTQKSLSLSPGK 9 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q16690.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q16690|DUS5_HUMAN Dual specificity protein phosphatase 5 OS=Homo sapiens OX=9606 GN=DUSP5 PE=1 SV=2 2 | MKVTSLDGRQLRKMLRKEAAARCVVLDCRPYLAFAASNVRGSLNVNLNSVVLRRARGGAV 3 | SARYVLPDEAARARLLQEGGGGVAAVVVLDQGSRHWQKLREESAARVVLTSLLACLPAGP 4 | RVYFLKGGYETFYSEYPECCVDVKPISQEKIESERALISQCGKPVVNVSYRPAYDQGGPV 5 | EILPFLYLGSAYHASKCEFLANLHITALLNVSRRTSEACATHLHYKWIPVEDSHTADISS 6 | HFQEAIDFIDCVREKGGKVLVHCEAGISRSPTICMAYLMKTKQFRLKEAFDYIKQRRSMV 7 | SPNFGFMGQLLQYESEILPSTPNPQPPSCQGEAAGSSLIGHLQTLSPDMQGAYCTFPASV 8 | LAPVPTHSTVSELSRSPVATATSC 9 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q96DV4.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q96DV4|RM38_HUMAN 39S ribosomal protein L38, mitochondrial OS=Homo sapiens OX=9606 GN=MRPL38 PE=1 SV=2 2 | MAAPWWRAALCECRRWRGFSTSAVLGRRTPPLGPMPNSDIDLSNLERLEKYRSFDRYRRR 3 | AEQEAQAPHWWRTYREYFGEKTDPKEKIDIGLPPPKVSRTQQLLERKQAIQELRANVEEE 4 | RAARLRTASVPLDAVRAEWERTCGPYHKQRLAEYYGLYRDLFHGATFVPRVPLHVAYAVG 5 | EDDLMPVYCGNEVTPTEAAQAPEVTYEAEEGSLWTLLLTSLDGHLLEPDAEYLHWLLTNI 6 | PGNRVAEGQVTCPYLPPFPARGSGIHRLAFLLFKQDQPIDFSEDARPSPCYQLAQRTFRT 7 | FDFYKKHQETMTPAGLSFFQCRWDDSVTYIFHQLLDMREPVFEFVRPPPYHPKQKRFPHR 8 | QPLRYLDRYRDSHEPTYGIY 9 | -------------------------------------------------------------------------------- /data/uniprotSeq/P15086.fasta: -------------------------------------------------------------------------------- 1 | >sp|P15086|CBPB1_HUMAN Carboxypeptidase B OS=Homo sapiens OX=9606 GN=CPB1 PE=1 SV=4 2 | MLALLVLVTVALASAHHGGEHFEGEKVFRVNVEDENHINIIRELASTTQIDFWKPDSVTQ 3 | IKPHSTVDFRVKAEDTVTVENVLKQNELQYKVLISNLRNVVEAQFDSRVRATGHSYEKYN 4 | KWETIEAWTQQVATENPALISRSVIGTTFEGRAIYLLKVGKAGQNKPAIFMDCGFHAREW 5 | ISPAFCQWFVREAVRTYGREIQVTELLDKLDFYVLPVLNIDGYIYTWTKSRFWRKTRSTH 6 | TGSSCIGTDPNRNFDAGWCEIGASRNPCDETYCGPAAESEKETKALADFIRNKLSSIKAY 7 | LTIHSYSQMMIYPYSYAYKLGENNAELNALAKATVKELASLHGTKYTYGPGATTIYPAAG 8 | GSDDWAYDQGIRYSFTFELRDTGRYGFLLPESQIRATCEETFLAIKYVASYVLEHLY 9 | -------------------------------------------------------------------------------- /data/uniprotSeq/A6NKF1.fasta: -------------------------------------------------------------------------------- 1 | >sp|A6NKF1|SAC31_HUMAN SAC3 domain-containing protein 1 OS=Homo sapiens OX=9606 GN=SAC3D1 PE=1 SV=2 2 | MAGRRAQTGSAPPRPAAPHPRPASRAFPQHCRPRDAERPPSPRSPLMPGCELPVGTCPDM 3 | CPAAERAQREREHRLHRLEVVPGCRQDPPRADPQRAVKEYSRPAAGKPRPPPSQLRPPSV 4 | LLATVRYLAGEVAESADIARAEVASFVADRLRAVLLDLALQGAGDAEAAVVLEAALATLL 5 | TVVARLGPDAARGPADPVLLQAQVQEGFGSLRRCYARGAGPHPRQPAFQGLFLLYNLGSV 6 | EALHEVLQLPAALRACPPLRKALAVDAAFREGNAARLFRLLQTLPYLPSCAVQCHVGHAR 7 | REALARFARAFSTPKGQTLPLGFMVNLLALDGLREARDLCQAHGLPLDGEERVVFLRGRY 8 | VEEGLPPASTCKVLVESKLRGRTLEEVVMAEEEDEGTDRPGSPA 9 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q5VV16.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q5VV16|FX4L5_HUMAN Forkhead box protein D4-like 5 OS=Homo sapiens OX=9606 GN=FOXD4L5 PE=3 SV=1 2 | MNLPRAERPRSTPQRSLRDSDGEDGKIDVLGEEEDEDEVEDEEEEARQQFLEQSLQPGLQ 3 | VARWGGVALPREHIEGGGGPSDPSEFGTKFRAPPRSAAASEDARQPAKPPYSYIALITMA 4 | ILQNPHKRLTLSGICAFISGRFPYYRRKFPAWQNSIRHNLSLNDCFVKIPREPGHPGKGN 5 | YWSLDPASQDMFDNGSFLRRRKRFKRHQLTPGAHLPHPFPLPAAHAALHNPHPGPLLGAP 6 | APPQPVPGAYPNTAPGRCPYALLHPHPLRYLLLSAPVYAGAPKKAEGADLATPAPFPCCS 7 | PHLVLSLGRRARVWRRHREADASLSALRVLCKGSGERVQGLRRVCPRPRGATATCSSDHQ 8 | ACCIPKPLPLCCKCPPPLLLGQFCSNSSSIRRTAPTAALPPRARCWAGTCRPRRRC 9 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q5SWX8.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q5SWX8|ODR4_HUMAN Protein odr-4 homolog OS=Homo sapiens OX=9606 GN=ODR4 PE=1 SV=1 2 | MGRTYIVEETVGQYLSNINLQGKAFVSGLLIGQCSSQKDYVILATRTPPKEEQSENLKHP 3 | KAKLDNLDEEWATEHACQVSRMLPGGLLVLGVFIITTLELANDFQNALRRLMFAVEKSIN 4 | RKRLWNFTEEEVSERVTLHICASTKKIFCRTYDIHDPKSSARPADWKYQSGLSSSWLSLE 5 | CTVHINIHIPLSATSVSYTLEKNTKNGLTRWAKEIENGVYLINGQVKDEDCDLLEGQKKS 6 | SRGNTQATSHSFDVRVLTQLLLNSDHRSTATVQICSGSVNLKGAVKCRAYIHSSKPKVKD 7 | AVQAVKRDILNTVADRCEMLFEDLLLNEIPEKKDSEKEFHVLPYRVFVPLPGSTVMLCDY 8 | KFDDESAEEIRDHFMEMLDHTIQIEDLEIAEETNTACMSSSMNSQASLDNTDDEQPKQPI 9 | KTTMLLKIQQNIGVIAAFTVAVLAAGISFHYFSD 10 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q15465.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q15465|SHH_HUMAN Sonic hedgehog protein OS=Homo sapiens OX=9606 GN=SHH PE=1 SV=1 2 | MLLLARCLLLVLVSSLLVCSGLACGPGRGFGKRRHPKKLTPLAYKQFIPNVAEKTLGASG 3 | RYEGKISRNSERFKELTPNYNPDIIFKDEENTGADRLMTQRCKDKLNALAISVMNQWPGV 4 | KLRVTEGWDEDGHHSEESLHYEGRAVDITTSDRDRSKYGMLARLAVEAGFDWVYYESKAH 5 | IHCSVKAENSVAAKSGGCFPGSATVHLEQGGTKLVKDLSPGDRVLAADDQGRLLYSDFLT 6 | FLDRDDGAKKVFYVIETREPRERLLLTAAHLLFVAPHNDSATGEPEASSGSGPPSGGALG 7 | PRALFASRVRPGQRVYVVAERDGDRRLLPAAVHSVTLSEEAAGAYAPLTAQGTILINRVL 8 | ASCYAVIEEHSWAHRAFAPFRLAHALLAALAPARTDRGGDSGGGDRGGGGGRVALTAPGA 9 | ADAPGAGATAGIHWYSQLLYQIGTWLLDSEALHPLGMAVKSS 10 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q6NT16.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q6NT16|S18B1_HUMAN MFS-type transporter SLC18B1 OS=Homo sapiens OX=9606 GN=SLC18B1 PE=1 SV=1 2 | MEALGDLEGPRAPGGDDPAGSAGETPGWLSREQVFVLISAASVNLGSMMCYSILGPFFPK 3 | EAEKKGASNTIIGMIFGCFALFELLASLVFGNYLVHIGAKFMFVAGMFVSGGVTILFGVL 4 | DRVPDGPVFIAMCFLVRVMDAVSFAAAMTASSSILAKAFPNNVATVLGSLETFSGLGLIL 5 | GPPVGGFLYQSFGYEVPFIVLGCVVLLMVPLNMYILPNYESDPGEHSFWKLIALPKVGLI 6 | AFVINSLSSCFGFLDPTLSLFVLEKFNLPAGYVGLVFLGMALSYAISSPLFGLLSDKRPP 7 | LRKWLLVFGNLITAGCYMLLGPVPILHIKSQLWLLVLILVVSGLSAGMSIIPTFPEILSC 8 | AHENGFEEGLSTLGLVSGLFSAMWSIGAFMGPTLGGFLYEKIGFEWAAAIQGLWALISGL 9 | AMGLFYLLEYSRRKRSKSQNILSTEEERTTLLPNET 10 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q05901.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q05901|ACHB3_HUMAN Neuronal acetylcholine receptor subunit beta-3 OS=Homo sapiens OX=9606 GN=CHRNB3 PE=2 SV=2 2 | MLPDFMLVLIVLGIPSSATTGFNSIAENEDALLRHLFQGYQKWVRPVLHSNDTIKVYFGL 3 | KISQLVDVDEKNQLMTTNVWLKQEWTDHKLRWNPDDYGGIHSIKVPSESLWLPDIVLFEN 4 | ADGRFEGSLMTKVIVKSNGTVVWTPPASYKSSCTMDVTFFPFDRQNCSMKFGSWTYDGTM 5 | VDLILINENVDRKDFFDNGEWEILNAKGMKGNRRDGVYSYPFITYSFVLRRLPLFYTLFL 6 | IIPCLGLSFLTVLVFYLPSDEGEKLSLSTSVLVSLTVFLLVIEEIIPSSSKVIPLIGEYL 7 | LFIMIFVTLSIIVTVFVINVHHRSSSTYHPMAPWVKRLFLQKLPKLLCMKDHVDRYSSPE 8 | KEESQPVVKGKVLEKKKQKQLSDGEKVLVAFLEKAADSIRYISRHVKKEHFISQVVQDWK 9 | FVAQVLDRIFLWLFLIVSVTGSVLIFTPALKMWLHSYH 10 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q9NWZ3.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q9NWZ3|IRAK4_HUMAN Interleukin-1 receptor-associated kinase 4 OS=Homo sapiens OX=9606 GN=IRAK4 PE=1 SV=1 2 | MNKPITPSTYVRCLNVGLIRKLSDFIDPQEGWKKLAVAIKKPSGDDRYNQFHIRRFEALL 3 | QTGKSPTSELLFDWGTTNCTVGDLVDLLIQNEFFAPASLLLPDAVPKTANTLPSKEAITV 4 | QQKQMPFCDKDRTLMTPVQNLEQSYMPPDSSSPENKSLEVSDTRFHSFSFYELKNVTNNF 5 | DERPISVGGNKMGEGGFGVVYKGYVNNTTVAVKKLAAMVDITTEELKQQFDQEIKVMAKC 6 | QHENLVELLGFSSDGDDLCLVYVYMPNGSLLDRLSCLDGTPPLSWHMRCKIAQGAANGIN 7 | FLHENHHIHRDIKSANILLDEAFTAKISDFGLARASEKFAQTVMTSRIVGTTAYMAPEAL 8 | RGEITPKSDIYSFGVVLLEIITGLPAVDEHREPQLLLDIKEEIEDEEKTIEDYIDKKMND 9 | ADSTSVEAMYSVASQCLHEKKNKRPDIKKVQQLLQEMTAS 10 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q9BY11.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q9BY11|PACN1_HUMAN Protein kinase C and casein kinase substrate in neurons protein 1 OS=Homo sapiens OX=9606 GN=PACSIN1 PE=1 SV=1 2 | MSSSYDEASLAPEETTDSFWEVGNYKRTVKRIDDGHRLCNDLMNCVQERAKIEKAYGQQL 3 | TDWAKRWRQLIEKGPQYGSLERAWGAIMTEADKVSELHQEVKNNLLNEDLEKVKNWQKDA 4 | YHKQIMGGFKETKEAEDGFRKAQKPWAKKMKELEAAKKAYHLACKEEKLAMTREMNSKTE 5 | QSVTPEQQKKLQDKVDKCKQDVQKTQEKYEKVLEDVGKTTPQYMENMEQVFEQCQQFEEK 6 | RLVFLKEVLLDIKRHLNLAENSSYIHVYRELEQAIRGADAQEDLRWFRSTSGPGMPMNWP 7 | QFEEWNPDLPHTTTKKEKQPKKAEGVALTNATGAVESTSQAGDRGSVSSYDRGQPYATEW 8 | SDDESGNPFGGSETNGGANPFEDDSKGVRVRALYDYDGQEQDELSFKAGDELTKLGEEDE 9 | QGWCRGRLDSGQLGLYPANYVEAI 10 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q6B0B8.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q6B0B8|TIGD3_HUMAN Tigger transposable element-derived protein 3 OS=Homo sapiens OX=9606 GN=TIGD3 PE=1 SV=1 2 | MELSSKKKLHALSLAEKIQVLELLDESKMSQSEVARRFQVSQPQISRICKNKEKLLADWC 3 | SGTANRERKRKRESKYSGIDEALLCWYHIARAKAWDVTGPMLLHKAKELADIMGQDFVPS 4 | IGWLVRWKRRNNVGFGARHVLAPSFPPEPPPPGLTSQAQLPLSLKDFSPEDVFGCAELPL 5 | LYRAVPGSFGACDQVQVLLCANSRGTEKRRVLLGGLQAAPRCFFGIRSEALPASYHPDLG 6 | IPWLEWLAQFDRDMGQQGRQVALLLAARVVEELAGLPGLYHVKLLPLAASSTTPPLPSSV 7 | VRAFKAHYRHRLLGKLAAIQSERDGTSLAEAGAGITVLDALHVASAAWAKVPPQLIFSSF 8 | IQEGLAPGKTPPSSHKTSEMPPVPGGLSLEEFSRFVDLEGEEPRSGVCKEEIGTEDEKGD 9 | REGAFEPLPTKADALRALGTLRRWFECNSTSPELFEKFYDCEEEVERLCCL 10 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q9Y512.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q9Y512|SAM50_HUMAN Sorting and assembly machinery component 50 homolog OS=Homo sapiens OX=9606 GN=SAMM50 PE=1 SV=3 2 | MGTVHARSLEPLPSSGPDFGGLGEEAEFVEVEPEAKQEILENKDVVVQHVHFDGLGRTKD 3 | DIIICEIGDVFKAKNLIEVMRKSHEAREKLLRLGIFRQVDVLIDTCQGDDALPNGLDVTF 4 | EVTELRRLTGSYNTMVGNNEGSMVLGLKLPNLLGRAEKVTFQFSYGTKETSYGLSFFKPR 5 | PGNFERNFSVNLYKVTGQFPWSSLRETDRGMSAEYSFPIWKTSHTVKWEGVWRELGCLSR 6 | TASFAVRKESGHSLKSSLSHAMVIDSRNSSILPRRGALLKVNQELAGYTGGDVSFIKEDF 7 | ELQLNKQLIFDSVFSASFWGGMLVPIGDKPSSIADRFYLGGPTSIRGFSMHSIGPQSEGD 8 | YLGGEAYWAGGLHLYTPLPFRPGQGGFGELFRTHFFLNAGNLCNLNYGEGPKAHIRKLAE 9 | CIRWSYGAGIVLRLGNIARLELNYCVPMGVQTGDRICDGVQFGAGIRFL 10 | -------------------------------------------------------------------------------- /data/uniprotSeq/P47974.fasta: -------------------------------------------------------------------------------- 1 | >sp|P47974|TISD_HUMAN mRNA decay activator protein ZFP36L2 OS=Homo sapiens OX=9606 GN=ZFP36L2 PE=1 SV=3 2 | MSTTLLSAFYDVDFLCKTEKSLANLNLNNMLDKKAVGTPVAAAPSSGFAPGFLRRHSASN 3 | LHALAHPAPSPGSCSPKFPGAANGSSCGSAAAGGPTSYGTLKEPSGGGGTALLNKENKFR 4 | DRSFSENGDRSQHLLHLQQQQKGGGGSQINSTRYKTELCRPFEESGTCKYGEKCQFAHGF 5 | HELRSLTRHPKYKTELCRTFHTIGFCPYGPRCHFIHNADERRPAPSGGASGDLRAFGTRD 6 | ALHLGFPREPRPKLHHSLSFSGFPSGHHQPPGGLESPLLLDSPTSRTPPPPSCSSASSCS 7 | SSASSCSSASAASTPSGAPTCCASAAAAAAAALLYGTGGAEDLLAPGAPCAACSSASCAN 8 | NAFAFGPELSSLITPLAIQTHNFAAVAAAAYYRSQQQQQQQGLAPPAQPPAPPSATLPAG 9 | AAAPPSPPFSFQLPRRLSDSPVFDAPPSPPDSLSDRDSYLSGSLSSGSLSGSESPSLDPG 10 | RRLPIFSRLSISDD 11 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q8IW19.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q8IW19|APLF_HUMAN Aprataxin and PNK-like factor OS=Homo sapiens OX=9606 GN=APLF PE=1 SV=1 2 | MSGGFELQPRDGGPRVALAPGETVIGRGPLLGITDKRVSRRHAILEVAGGQLRIKPIHTN 3 | PCFYQSSEKSQLLPLKPNLWCYLNPGDSFSLLVDKYIFRILSIPSEVEMQCTLRNSQVLD 4 | EDNILNETPKSPVINLPHETTGASQLEGSTEIAKTQMTPTNSVSFLGENRDCNKQQPILA 5 | ERKRILPTWMLAEHLSDQNLSVPAISGGNVIQGSGKEEICKDKSQLNTTQQGRRQLISSG 6 | SSENTSAEQDTGEECKNTDQEESTISSKEMPQSFSAITLSNTEMNNIKTNAQRNKLPIEE 7 | LGKVSKHKIATKRTPHKEDEAMSCSENCSSAQGDSLQDESQGSHSESSSNPSNPETLHAK 8 | ATDSVLQGSEGNKVKRTSCMYGANCYRKNPVHFQHFSHPGDSDYGGVQIVGQDETDDRPE 9 | CPYGPSCYRKNPQHKIEYRHNTLPVRNVLDEDNDNVGQPNEYDLNDSFLDDEEEDYEPTD 10 | EDSDWEPGKEDEEKEDVEELLKEAKRFMKRK 11 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q8TAA9.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q8TAA9|VANG1_HUMAN Vang-like protein 1 OS=Homo sapiens OX=9606 GN=VANGL1 PE=1 SV=1 2 | MDTESTYSGYSYYSSHSKKSHRQGERTRERHKSPRNKDGRGSEKSVTIQPPTGEPLLGND 3 | STRTEEVQDDNWGETTTAITGTSEHSISQEDIARISKDMEDSVGLDCKRYLGLTVASFLG 4 | LLVFLTPIAFILLPPILWRDELEPCGTICEGLFISMAFKLLILLIGTWALFFRKRRADMP 5 | RVFVFRALLLVLIFLFVVSYWLFYGVRILDSRDRNYQGIVQYAVSLVDALLFIHYLAIVL 6 | LELRQLQPMFTLQVVRSTDGESRFYSLGHLSIQRAALVVLENYYKDFTIYNPNLLTASKF 7 | RAAKHMAGLKVYNVDGPSNNATGQSRAMIAAAARRRDSSHNELYYEEAEHERRVKKRKAR 8 | LVVAVEEAFIHIQRLQAEEQQKAPGEVMDPREAAQAIFPSMARALQKYLRITRQQNYHSM 9 | ESILQHLAFCITNGMTPKAFLERYLSAGPTLQYDKDRWLSTQWRLVSDEAVTNGLRDGIV 10 | FVLKCLDFSLVVNVKKIPFIILSEEFIDPKSHKFVLRLQSETSV 11 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q9UBM8.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q9UBM8|MGT4C_HUMAN Alpha-1,3-mannosyl-glycoprotein 4-beta-N-acetylglucosaminyltransferase C OS=Homo sapiens OX=9606 GN=MGAT4C PE=2 SV=2 2 | MFKFHQMKHIFEILDKMRCLRKRSTVSFLGVLVIFLLFMNLYIEDSYVLEGDKQLIRETS 3 | THQLNSERYVHTFKDLSNFSGAINVTYRYLAATPLQRKRYLTIGLSSVKRKKGNYLLETI 4 | KSIFEQSSYEELKEISVVVHLADFNSSWRDAMVQDITQKFAHHIIAGRLMVIHAPEEYYP 5 | ILDGLKRNYNDPEDRVKFRSKQNVDYAFLLNFCANTSDYYVMLEDDVRCSKNFLTAIKKV 6 | IASLEGTYWVTLEFSKLGYIGKLYHSHDLPRLAHFLLMFYQEMPCDWLLTHFRGLLAQKN 7 | VIRFKPSLFQHMGYYSSYKGTENKLKDDDFEEESFDIPDNPPASLYTNMNVFENYEASKA 8 | YSSVDEYFWGKPPSTGDVFVIVFENPIIIKKIKVNTGTEDRQNDILHHGALDVGENVMPS 9 | KQRRQCSTYLRLGEFKNGNFEMSGVNQKIPFDIHCMRIYVTKTQKEWLIIRSISIWTS 10 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q13087.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q13087|PDIA2_HUMAN Protein disulfide-isomerase A2 OS=Homo sapiens OX=9606 GN=PDIA2 PE=1 SV=2 2 | MSRQLLPVLLLLLLRASCPWGQEQGARSPSEEPPEEEIPKEDGILVLSRHTLGLALREHP 3 | ALLVEFYAPWCGHCQALAPEYSKAAAVLAAESMVVTLAKVDGPAQRELAEEFGVTEYPTL 4 | KFFRNGNRTHPEEYTGPRDAEGIAEWLRRRVGPSAMRLEDEAAAQALIGGRDLVVIGFFQ 5 | DLQDEDVATFLALAQDALDMTFGLTDRPRLFQQFGLTKDTVVLFKKFDEGRADFPVDEEL 6 | GLDLGDLSRFLVTHSMRLVTEFNSQTSAKIFAARILNHLLLFVNQTLAAHRELLAGFGEA 7 | APRFRGQVLFVVVDVAADNEHVLQYFGLKAEAAPTLRLVNLETTKKYAPVDGGPVTAASI 8 | TAFCHAVLNGQVKPYLLSQEIPPDWDQRPVKTLVGKNFEQVAFDETKNVFVKFYAPWCTH 9 | CKEMAPAWEALAEKYQDHEDIIIAELDATANELDAFAVHGFPTLKYFPAGPGRKVIEYKS 10 | TRDLETFSKFLDNGGVLPTEEPPEEPAAPFPEPPANSTMGSKEEL 11 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q86TJ5.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q86TJ5|ZN554_HUMAN Zinc finger protein 554 OS=Homo sapiens OX=9606 GN=ZNF554 PE=1 SV=1 2 | MVTCAHLGRRARLPAAQPSACPGTCFSQEERMAAGYLPRWSQELVTFEDVSMDFSQEEWE 3 | LLEPAQKNLYREVMLENYRNVVSLEALKNQCTDVGIKEGPLSPAQTSQVTSLSSWTGYLL 4 | FQPVASSHLEQREALWIEEKGTPQASCSDWMTVLRNQDSTYKKVALQEEPASGINMIKLI 5 | REDGGWKQLEDSHEDPQGLLSQKASLHVVAVPQEKATAWHGFGENGNLSPALVLSQGSSK 6 | GNHLCGSELDITSLASDSVLNHHQLGYADRRPCESNECGNAIRQNSHFIQHGGKMFVYLE 7 | NGQSLNHGMALTIHNKINTAEKPFECHQCGKVFNRRHSLSEHQRIHTGEKPYECQECGRA 8 | FTHSSTLTRHLRTHTGEKPYGCGECGKAFNRISSLTQHQRIHTGEKPYKCEDCGKSFCQS 9 | SYLILHKRTHTGEKPYECSECGKAFSDRSSLNQHERTHTGENPYECKQCGRAFSQRSSLV 10 | RHERTHTGEKPYRCQECGKAFSQSSSLVTHQKTHSSQKTYKIIDCGKAFYQNRHLIGY 11 | -------------------------------------------------------------------------------- /data/uniprotSeq/P40222.fasta: -------------------------------------------------------------------------------- 1 | >sp|P40222|TXLNA_HUMAN Alpha-taxilin OS=Homo sapiens OX=9606 GN=TXLNA PE=1 SV=3 2 | MKNQDKKNGAAKQSNPKSSPGQPEAGPEGAQERPSQAAPAVEAEGPGSSQAPRKPEGAQA 3 | RTAQSGALRDVSEELSRQLEDILSTYCVDNNQGGPGEDGAQGEPAEPEDAEKSRTYVARN 4 | GEPEPTPVVNGEKEPSKGDPNTEEIRQSDEVGDRDHRRPQEKKKAKGLGKEITLLMQTLN 5 | TLSTPEEKLAALCKKYAELLEEHRNSQKQMKLLQKKQSQLVQEKDHLRGEHSKAVLARSK 6 | LESLCRELQRHNRSLKEEGVQRAREEEEKRKEVTSHFQVTLNDIQLQMEQHNERNSKLRQ 7 | ENMELAERLKKLIEQYELREEHIDKVFKHKDLQQQLVDAKLQQAQEMLKEAEERHQREKD 8 | FLLKEAVESQRMCELMKQQETHLKQQLALYTEKFEEFQNTLSKSSEVFTTFKQEMEKMTK 9 | KIKKLEKETTMYRSRWESSNKALLEMAEEKTVRDKELEGLQVKIQRLEKLCRALQTERND 10 | LNKRVQDLSAGGQGSLTDSGPERRPEGPGAQAPSSPRVTEAPCYPGAPSTEASGQTGPQE 11 | PTSARA 12 | -------------------------------------------------------------------------------- /data/uniprotSeq/P07947.fasta: -------------------------------------------------------------------------------- 1 | >sp|P07947|YES_HUMAN Tyrosine-protein kinase Yes OS=Homo sapiens OX=9606 GN=YES1 PE=1 SV=3 2 | MGCIKSKENKSPAIKYRPENTPEPVSTSVSHYGAEPTTVSPCPSSSAKGTAVNFSSLSMT 3 | PFGGSSGVTPFGGASSSFSVVPSSYPAGLTGGVTIFVALYDYEARTTEDLSFKKGERFQI 4 | INNTEGDWWEARSIATGKNGYIPSNYVAPADSIQAEEWYFGKMGRKDAERLLLNPGNQRG 5 | IFLVRESETTKGAYSLSIRDWDEIRGDNVKHYKIRKLDNGGYYITTRAQFDTLQKLVKHY 6 | TEHADGLCHKLTTVCPTVKPQTQGLAKDAWEIPRESLRLEVKLGQGCFGEVWMGTWNGTT 7 | KVAIKTLKPGTMMPEAFLQEAQIMKKLRHDKLVPLYAVVSEEPIYIVTEFMSKGSLLDFL 8 | KEGDGKYLKLPQLVDMAAQIADGMAYIERMNYIHRDLRAANILVGENLVCKIADFGLARL 9 | IEDNEYTARQGAKFPIKWTAPEAALYGRFTIKSDVWSFGILQTELVTKGRVPYPGMVNRE 10 | VLEQVERGYRMPCPQGCPESLHELMNLCWKKDPDERPTFEYIQSFLEDYFTATEPQYQPG 11 | ENL 12 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q494X3.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q494X3|ZN404_HUMAN Zinc finger protein 404 OS=Homo sapiens OX=9606 GN=ZNF404 PE=1 SV=2 2 | MARVPLTFSDVAIDFSQEEWEYLNSDQRDLYRDVMLENYTNLVSLDFNFTTESNKLSSEK 3 | RNYEVNAYHQETWKRNKTFNLMRFIFRTDPQYTIEFGRQQRPKVGCFSQMIFKKHKSLPL 4 | HKRNNTREKSYECKEYKKGFRKYLHLTEHLRDHTGVIPYECNECGKAFVVFQHFIRHRKI 5 | HTDLKPYECNGCEKAFRFYSQLIQHQIIHTGMKPYECKQCGKAFRRHSHLTEHQKIHVGL 6 | KPFECKECGETFRLYRHMCLHQKIHHGVKPYKCKECGKAFGHRSSLYQHKKIHSGEKPYK 7 | CEQCEKAFVRSYLLVEHQRSHTGEKPHECMECGKAFGKGSSLLKHKRIHSSEKLYDCKDC 8 | GKAFCRGSQLTQHQRIHTGEKPHECKECGKTFKLHSYLIQHQIIHTDLKPYECKQCGKAF 9 | SRVGDLKTHQSIHAGEKPYECKECGKTFRLNSQLIYHQTIHTGLKPYVCKECKKAFRSIS 10 | GLSQHKRIHTGEKPYECKECDKAFNRSDRLTQHETIHTGVKPQKCKECGKAFSHCYQLSQ 11 | HQRFHHGERLLM 12 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q5T749.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q5T749|KPRP_HUMAN Keratinocyte proline-rich protein OS=Homo sapiens OX=9606 GN=KPRP PE=1 SV=1 2 | MCDQQQIQCRLPLQQCCVKGPSFCSSQSPFAQSQVVVQAPCEMQIVDCPASCPVQVCQVS 3 | DQAPCQSQTTQVKCQSKTKQVKGQAQCQSKTTQVKGQAASQSQTSSVQSQAPCQSEVSYV 4 | QCEASQPVQTCFVECAPVCYTETCYVECPVQNYVPCPAPQPVQMYRGRPAVCQPQGRFST 5 | QCQYQGSYSSCGPQFQSRATCNNYTPQFQLRPSYSSCFPQYRSRTSFSPCVPQCQTQGSY 6 | GSFTEQHRSRSTSRCLPPPRRLQLFPRSCSPPRRFEPCSSSYLPLRPSEGFPNYCTPPRR 7 | SEPIYNSRCPRRPISSCSQRRGPKCRIEISSPCCPRQVPPQRCPVEIPPIRRRSQSCGPQ 8 | PSWGASCPELRPHVEPRPLPSFCPPRRLDQCPESPLQRCPPPAPRPRLRPEPCISLEPRP 9 | RPLPRQLSEPCLYPEPLPALRPTPRPVPLPRPGQCEIPEPRPCLQPCEHPEPCPRPEPIP 10 | LPAPCPSPEPCRETWRSPSPCWGPNPVPYPGDLGCHESSPHRLDTEAPYCGPSSYNQGQE 11 | SGAGCGPGDVFPERRGQDGHGDQGNAFAGVKGEAKSAYF -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.1) 2 | project(derna) 3 | 4 | set(CMAKE_CXX_STANDARD 17) 5 | 6 | IF( NOT CMAKE_BUILD_TYPE ) 7 | SET( CMAKE_BUILD_TYPE Release) 8 | ENDIF() 9 | 10 | set(CMAKE_CXX_FLAGS "-Wall -Wextra") 11 | set(CMAKE_CXX_FLAGS "-Wno-uninitialized") 12 | set(CMAKE_CXX_FLAGS_DEBUG "-g") 13 | set(CMAKE_CXX_FLAGS_RELEASE "-O3") 14 | 15 | 16 | add_executable(derna src/main.cpp src/utils.h src/utils.cpp src/Nussinov.cpp src/Nussinov.h src/NussinovAlgorithm.cpp src/NussinovAlgorithm.h src/Zuker.cpp src/Zuker.h src/default.cpp src/default.h src/params/constants.h src/params/intl11.h src/params/intl11dH.h src/params/intl21.h src/params/intl21dH.h src/params/intl22.h src/params/intl22dH.h src/ZukerAlgorithm.cpp src/ZukerAlgorithm.h) 17 | 18 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q8N1G4.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q8N1G4|LRC47_HUMAN Leucine-rich repeat-containing protein 47 OS=Homo sapiens OX=9606 GN=LRRC47 PE=1 SV=1 2 | MAAAAVSESWPELELAERERRRELLLTGPGLEERVRAAGGQLPPRLFTLPLLHYLEVSGC 3 | GSLRAPGPGLAQGLPQLHSLVLRRNALGPGLSPELGPLPALRVLDLSGNALEALPPGQGL 4 | GPAEPPGLPQLQSLNLSGNRLRELPADLARCAPRLQSLNLTGNCLDSFPAELFRPGALPL 5 | LSELAAADNCLRELSPDIAHLASLKTLDLSNNQLSEIPAELADCPKLKEINFRGNKLRDK 6 | RLEKMVSGCQTRSILEYLRVGGRGGGKGKGRAEGSEKEESRRKRRERKQRREGGDGEEQD 7 | VGDAGRLLLRVLHVSENPVPLTVRVSPEVRDVRPYIVGAVVRGMDLQPGNALKRFLTSQT 8 | KLHEDLCEKRTAATLATHELRAVKGPLLYCARPPQDLKIVPLGRKEAKAKELVRQLQLEA 9 | EEQRKQKKRQSVSGLHRYLHLLDGNENYPCLVDADGDVISFPPITNSEKTKVKKTTSDLF 10 | LEVTSATSLQICKDVMDALILKMAEMKKYTLENKEEGSLSDTEADAVSGQLPDPTTNPSA 11 | GKDGPSLLVVEQVRVVDLEGSLKVVYPSKADLATAPPHVTVVR 12 | -------------------------------------------------------------------------------- /src/NussinovAlgorithm.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by xinyu on 4/24/2022. 3 | // 4 | 5 | #ifndef RNA_DESIGN_NUSSINOVALGORITHM_H 6 | #define RNA_DESIGN_NUSSINOVALGORITHM_H 7 | #include 8 | #include 9 | 10 | using namespace std; 11 | 12 | class NussinovAlgorithm { 13 | vector rna; 14 | int n,g; 15 | vector dp; 16 | 17 | public: 18 | NussinovAlgorithm(vector &rna, int n, int g); 19 | NussinovAlgorithm(const NussinovAlgorithm &); 20 | ~NussinovAlgorithm(); 21 | int nussinov(int i, int j); 22 | // inline int nussinov(); 23 | string get_bp(int i, int j); 24 | int get_nbp(int, int); 25 | 26 | private: 27 | int index(int i, int j); 28 | 29 | }; 30 | #endif //RNA_DESIGN_NUSSINOVALGORITHM_H 31 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q9HA90.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q9HA90|EFCC1_HUMAN EF-hand and coiled-coil domain-containing protein 1 OS=Homo sapiens OX=9606 GN=EFCC1 PE=2 SV=2 2 | MEPVSTGAEAGMEGAGGDPYRRPARRTQWLLSALAHHYGLDRGVENEIVVLATGLDQYLQ 3 | EVFHHLDCRGAGRLPRADFRALCAVLGLRAEGATTAGQAAGDGNSRDVTPGDAAAELATD 4 | GDSDTDEEARLALRAEPPELTFRQFHARLCGYFGTRAGPRLPRGALSEHIETQIRLRRPR 5 | RRRRPPCAPGPDSGPDCERVARLEEENSSLRELVEDLRAALQSSDARCLALQVGLWKSQA 6 | STHEMGHGGPEAAVRELRQAQGALAAAEARAGRLRRGQAEVRRRAEEARQVVLRSLHRVR 7 | ELEALAQQVPGLQRWVRRLEAELQRYRSEDSQLPTPQLANPEPGDKSNEPEDAGTRDPDP 8 | TPEGAWQSDSSSGSRALDEVDEQLFRSVEGQAASDEEEVEEERWQEEKKTPAAEAKTLLA 9 | RLSSCRGRCDDQTAEKLMTYFGHFGGANHAHTLGELEACIAMLVEQLRTQGCGGRTLGTS 10 | EEEAELQQKVEENEHLRLELQMVETERVRLSLLEEKLVDVLQLLQRLRDLNISKRALGKI 11 | LLSTLDAFRDPTHEGRPSPAAILDALHQALAACQLLRRQPSAPASAAAALTNPLLVSC 12 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q8N6Q8.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q8N6Q8|MET25_HUMAN Probable methyltransferase-like protein 25 OS=Homo sapiens OX=9606 GN=METTL25 PE=2 SV=2 2 | MAASCPLPVTPDLPTLRAKLQGLLQFLRDALSISNAHTVDFYTESVWEELVDLPPETVLA 3 | ALRKSASETEALPSETRPLVEAEWEAGMTDFPKIFCETSQKLVSVEAFALAAKYYSVQNL 4 | GICTPFEQLLVALRGNQNQRIGENQKAVEFMNMKKSHEVQAMSELISSIADYYGIKQVID 5 | LGSGKGYLSSFLSLKYGLKVYGIDSSNTNTHGAEERNRKLKKHWKLCHAQSRLDVNGLAL 6 | KMAKERKVQNKVKNKADTEEVFNNSPTNQEKMPTSAILPDFSGSVISNIRNQMETLHSQP 7 | HQEENLCFENSFSLINLLPINAVEPTSSQQIPNRETSEANKERRKMTSKSSESNIYSPLT 8 | SFITADSELHDIIKDLEDCLMVGLHTCGDLAPNTLRIFTSNSEIKGVCSVGCCYHLLSEE 9 | FENQHKERTQEKWGFPMCHYLKEERWCCGRNARMSACLALERVAAGQGLPTESLFYRAVL 10 | QDIIKDCYGITKCDRHVGKIYSKCSSFLDYVRRSLKKLGLDESKLPEKIIMNYYEKYKPR 11 | MNELEAFNMLKVVLAPCIETLILLDRLCYLKEQEDIAWSALVKLFDPVKSPRCYAVIALK 12 | KQQ 13 | -------------------------------------------------------------------------------- /data/InputFiles/sample_codon_usage.csv: -------------------------------------------------------------------------------- 1 | ,GCU,GCC,GCA,GCG 2 | A,10,65,4,0,0,0, 3 | ,CGU,CGC,CGA,CGG,AGA,AGG 4 | R,0,1,0,19,21,1, 5 | ,AAU,AAC 6 | N,21,67,0,0,0,0, 7 | ,GAU,GAC 8 | D,18,44,0,0,0,0, 9 | ,UGU,UGC 10 | C,13,27,0,0,0,0, 11 | ,CAA,CAG 12 | Q,4,58,0,0,0,0, 13 | ,GAA,GAG 14 | E,14,34,0,0,0,0, 15 | ,GGU,GGC,GGA,GGG 16 | G,1,66,12,3,0,0, 17 | ,CAU,CAC 18 | H,2,15,0,0,0,0, 19 | ,AUG 20 | M,14,0,0,0,0,0, 21 | ,AUU,AUC,AUA 22 | I,10,66,0,0,0,0, 23 | ,CUU,CUC,CUA,CUG,UUA,UUG 24 | L,0,3,0,105,0,0, 25 | ,AAA,AAG 26 | K,9,51,0,0,0,0, 27 | ,UUU,UUC 28 | F,15,62,0,0,0,0, 29 | ,CCU,CCC,CCA,CCG 30 | P,30,26,4,0,0,0, 31 | ,UCU,UCC,UCA,UCG,AGU,AGC 32 | S,13,22,0,0,0,64, 33 | ,ACU,ACC,ACA,ACG 34 | T,0,63,34,0,0,0, 35 | ,UGG 36 | W,12,0,0,0,0,0, 37 | ,UAU,UAC 38 | Y,5,49,0,0,0,0, 39 | ,GUU,GUC,GUA,GUG 40 | V,1,8,0,87,0,0, 41 | -------------------------------------------------------------------------------- /data/uniprotSeq/P31645.fasta: -------------------------------------------------------------------------------- 1 | >sp|P31645|SC6A4_HUMAN Sodium-dependent serotonin transporter OS=Homo sapiens OX=9606 GN=SLC6A4 PE=1 SV=1 2 | METTPLNSQKQLSACEDGEDCQENGVLQKVVPTPGDKVESGQISNGYSAVPSPGAGDDTR 3 | HSIPATTTTLVAELHQGERETWGKKVDFLLSVIGYAVDLGNVWRFPYICYQNGGGAFLLP 4 | YTIMAIFGGIPLFYMELALGQYHRNGCISIWRKICPIFKGIGYAICIIAFYIASYYNTIM 5 | AWALYYLISSFTDQLPWTSCKNSWNTGNCTNYFSEDNITWTLHSTSPAEEFYTRHVLQIH 6 | RSKGLQDLGGISWQLALCIMLIFTVIYFSIWKGVKTSGKVVWVTATFPYIILSVLLVRGA 7 | TLPGAWRGVLFYLKPNWQKLLETGVWIDAAAQIFFSLGPGFGVLLAFASYNKFNNNCYQD 8 | ALVTSVVNCMTSFVSGFVIFTVLGYMAEMRNEDVSEVAKDAGPSLLFITYAEAIANMPAS 9 | TFFAIIFFLMLITLGLDSTFAGLEGVITAVLDEFPHVWAKRRERFVLAVVITCFFGSLVT 10 | LTFGGAYVVKLLEEYATGPAVLTVALIEAVAVSWFYGITQFCRDVKEMLGFSPGWFWRIC 11 | WVAISPLFLLFIICSFLMSPPQLRLFQYNYPYWSIILGYCIGTSSFICIPTYIAYRLIIT 12 | PGTFKERIIKSITPETPTEIPCGDIRLNAV 13 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q9UNN5.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q9UNN5|FAF1_HUMAN FAS-associated factor 1 OS=Homo sapiens OX=9606 GN=FAF1 PE=1 SV=2 2 | MASNMDREMILADFQACTGIENIDEAITLLEQNNWDLVAAINGVIPQENGILQSEYGGET 3 | IPGPAFNPASHPASAPTSSSSSAFRPVMPSRQIVERQPRMLDFRVEYRDRNVDVVLEDTC 4 | TVGEIKQILENELQIPVSKMLLKGWKTGDVEDSTVLKSLHLPKNNSLYVLTPDLPPPSSS 5 | SHAGALQESLNQNFMLIITHREVQREYNLNFSGSSTIQEVKRNVYDLTSIPVRHQLWEGW 6 | PTSATDDSMCLAESGLSYPCHRLTVGRRSSPAQTREQSEEQITDVHMVSDSDGDDFEDAT 7 | EFGVDDGEVFGMASSALRKSPMMPENAENEGDALLQFTAEFSSRYGDCHPVFFIGSLEAA 8 | FQEAFYVKARDRKLLAIYLHHDESVLTNVFCSQMLCAESIVSYLSQNFITWAWDLTKDSN 9 | RARFLTMCNRHFGSVVAQTIRTQKTDQFPLFLIIMGKRSSNEVLNVIQGNTTVDELMMRL 10 | MAAMEIFTAQQQEDIKDEDEREARENVKREQDEAYRLSLEADRAKREAHEREMAEQFRLE 11 | QIRKEQEEEREAIRLSLEQALPPEPKEENAEPVSKLRIRTPSGEFLERRFLASNKLQIVF 12 | DFVASKGFPWDEYKLLSTFPRRDVTQLDPNKSLLEVKLFPQETLFLEAKE 13 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q06187.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q06187|BTK_HUMAN Tyrosine-protein kinase BTK OS=Homo sapiens OX=9606 GN=BTK PE=1 SV=3 2 | MAAVILESIFLKRSQQKKKTSPLNFKKRLFLLTVHKLSYYEYDFERGRRGSKKGSIDVEK 3 | ITCVETVVPEKNPPPERQIPRRGEESSEMEQISIIERFPYPFQVVYDEGPLYVFSPTEEL 4 | RKRWIHQLKNVIRYNSDLVQKYHPCFWIDGQYLCCSQTAKNAMGCQILENRNGSLKPGSS 5 | HRKTKKPLPPTPEEDQILKKPLPPEPAAAPVSTSELKKVVALYDYMPMNANDLQLRKGDE 6 | YFILEESNLPWWRARDKNGQEGYIPSNYVTEAEDSIEMYEWYSKHMTRSQAEQLLKQEGK 7 | EGGFIVRDSSKAGKYTVSVFAKSTGDPQGVIRHYVVCSTPQSQYYLAEKHLFSTIPELIN 8 | YHQHNSAGLISRLKYPVSQQNKNAPSTAGLGYGSWEIDPKDLTFLKELGTGQFGVVKYGK 9 | WRGQYDVAIKMIKEGSMSEDEFIEEAKVMMNLSHEKLVQLYGVCTKQRPIFIITEYMANG 10 | CLLNYLREMRHRFQTQQLLEMCKDVCEAMEYLESKQFLHRDLAARNCLVNDQGVVKVSDF 11 | GLSRYVLDDEYTSSVGSKFPVRWSPPEVLMYSKFSSKSDIWAFGVLMWEIYSLGKMPYER 12 | FTNSETAEHIAQGLRLYRPHLASEKVYTIMYSCWHEKADERPTFKILLSNILDVMDEES 13 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q6PI48.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q6PI48|SYDM_HUMAN Aspartate--tRNA ligase, mitochondrial OS=Homo sapiens OX=9606 GN=DARS2 PE=1 SV=1 2 | MYFPSWLSQLYRGLSRPIRRTTQPIWGSLYRSLLQSSQRRIPEFSSFVVRTNTCGELRSS 3 | HLGQEVTLCGWIQYRRQNTFLVLRDFDGLVQVIIPQDESAASVKKILCEAPVESVVQVSG 4 | TVISRPAGQENPKMPTGEIEIKVKTAELLNACKKLPFEIKNFVKKTEALRLQYRYLDLRS 5 | FQMQYNLRLRSQMVMKMREYLCNLHGFVDIETPTLFKRTPGGAKEFLVPSREPGKFYSLP 6 | QSPQQFKQLLMVGGLDRYFQVARCYRDEGSRPDRQPEFTQIDIEMSFVDQTGIQSLIEGL 7 | LQYSWPNDKDPVVVPFPTMTFAEVLATYGTDKPDTRFGMKIIDISDVFRNTEIGFLQDAL 8 | SKPHGTVKAICIPEGAKYLKRKDIESIRNFAADHFNQEILPVFLNANRNWNSPVANFIME 9 | SQRLELIRLMETQEEDVVLLTAGEHNKACSLLGKLRLECADLLETRGVVLRDPTLFSFLW 10 | VVDFPLFLPKEENPRELESAHHPFTAPHPSDIHLLYTEPKKARSQHYDLVLNGNEIGGGS 11 | IRIHNAELQRYILATLLKEDVKMLSHLLQALDYGAPPHGGIALGLDRLICLVTGSPSIRD 12 | VIAFPKSFRGHDLMSNTPDSVPPEELKPYHIRVSKPTDSKAERAH 13 | -------------------------------------------------------------------------------- /data/uniprotSeq/P05160.fasta: -------------------------------------------------------------------------------- 1 | >sp|P05160|F13B_HUMAN Coagulation factor XIII B chain OS=Homo sapiens OX=9606 GN=F13B PE=1 SV=3 2 | MRLKNLTFIIILIISGELYAEEKPCGFPHVENGRIAQYYYTFKSFYFPMSIDKKLSFFCL 3 | AGYTTESGRQEEQTTCTTEGWSPEPRCFKKCTKPDLSNGYISDVKLLYKIQENMRYGCAS 4 | GYKTTGGKDEEVVQCLSDGWSSQPTCRKEHETCLAPELYNGNYSTTQKTFKVKDKVQYEC 5 | ATGYYTAGGKKTEEVECLTYGWSLTPKCTKLKCSSLRLIENGYFHPVKQTYEEGDVVQFF 6 | CHENYYLSGSDLIQCYNFGWYPESPVCEGRRNRCPPPPLPINSKIQTHSTTYRHGEIVHI 7 | ECELNFEIHGSAEIRCEDGKWTEPPKCIEGQEKVACEEPPFIENGAANLHSKIYYNGDKV 8 | TYACKSGYLLHGSNEITCNRGKWTLPPECVENNENCKHPPVVMNGAVADGILASYATGSS 9 | VEYRCNEYYLLRGSKISRCEQGKWSSPPVCLEPCTVNVDYMNRNNIEMKWKYEGKVLHGD 10 | LIDFVCKQGYDLSPLTPLSELSVQCNRGEVKYPLCTRKESKGMCTSPPLIKHGVIISSTV 11 | DTYENGSSVEYRCFDHHFLEGSREAYCLDGMWTTPPLCLEPCTLSFTEMEKNNLLLKWDF 12 | DNRPHILHGEYIEFICRGDTYPAELYITGSILRMQCDRGQLKYPRCIPRQSTLSYQEPLR 13 | T 14 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q7Z340.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q7Z340|ZN551_HUMAN Zinc finger protein 551 OS=Homo sapiens OX=9606 GN=ZNF551 PE=1 SV=3 2 | MPAPVGRRSPPSPRSSMAAVALRDSAQGMTFEDVAIYFSQEEWELLDESQRFLYCDVMLE 3 | NFAHVTSLGYCHGMENEAIASEQSVSIQVRTSKGNTPTQKTHLSEIKMCVPVLKDILPAA 4 | EHQTTSPVQKSYLGSTSMRGFCFSADLHQHQKHYNEEEPWKRKVDEATFVTGCRFHVLNY 5 | FTCGEAFPAPTDLLQHEATPSGEEPHSSSSKHIQAFFNAKSYYKWGEYRKASSHKHTLVQ 6 | HQSVCSEGGLYECSKCEKAFTCKNTLVQHQQIHTGQKMFECSECEESFSKKCHLILHKII 7 | HTGERPYECSDREKAFIHKSEFIHHQRRHTGGVRHECGECRKTFSYKSNLIEHQRVHTGE 8 | RPYECGECGKSFRQSSSLFRHQRVHSGERPYQCCECGKSFRQIFNLIRHRRVHTGEMPYQ 9 | CSDCGKSFSCKSELIQHQRIHSGERPYECRECGKSFRQFSNLIRHRSIHTGDRPYECSEC 10 | EKSFSRKFILIQHQRVHTGERPYECSECGKSFTRKSDLIQHRRIHTGTRPYECSECGKSF 11 | RQRSGLIQHRRLHTGERPYECSECGKSFSQSASLIQHQRVHTGERPYECSECGKSFSQSS 12 | SLIQHQRGHTGERPYECSQCGKPFTHKSDLIQHQRVHTGERPYECSECGKSFSRKSNLIR 13 | HRRVHTEERP 14 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q9H461.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q9H461|FZD8_HUMAN Frizzled-8 OS=Homo sapiens OX=9606 GN=FZD8 PE=1 SV=1 2 | MEWGYLLEVTSLLAALALLQRSSGAAAASAKELACQEITVPLCKGIGYNYTYMPNQFNHD 3 | TQDEAGLEVHQFWPLVEIQCSPDLKFFLCSMYTPICLEDYKKPLPPCRSVCERAKAGCAP 4 | LMRQYGFAWPDRMRCDRLPEQGNPDTLCMDYNRTDLTTAAPSPPRRLPPPPPGEQPPSGS 5 | GHGRPPGARPPHRGGGRGGGGGDAAAPPARGGGGGGKARPPGGGAAPCEPGCQCRAPMVS 6 | VSSERHPLYNRVKTGQIANCALPCHNPFFSQDERAFTVFWIGLWSVLCFVSTFATVSTFL 7 | IDMERFKYPERPIIFLSACYLFVSVGYLVRLVAGHEKVACSGGAPGAGGAGGAGGAAAGA 8 | GAAGAGAGGPGGRGEYEELGAVEQHVRYETTGPALCTVVFLLVYFFGMASSIWWVILSLT 9 | WFLAAGMKWGNEAIAGYSQYFHLAAWLVPSVKSIAVLALSSVDGDPVAGICYVGNQSLDN 10 | LRGFVLAPLVIYLFIGTMFLLAGFVSLFRIRSVIKQQDGPTKTHKLEKLMIRLGLFTVLY 11 | TVPAAVVVACLFYEQHNRPRWEATHNCPCLRDLQPDQARRPDYAVFMLKYFMCLVVGITS 12 | GVWVWSGKTLESWRSLCTRCCWASKGAAVGGGAGATAAGGGGGPGGGGGGGPGGGGGPGG 13 | GGGSLYSDVSTGLTWRSGTASSVSYPKQMPLSQV 14 | -------------------------------------------------------------------------------- /data/uniprotSeq/O75509.fasta: -------------------------------------------------------------------------------- 1 | >sp|O75509|TNR21_HUMAN Tumor necrosis factor receptor superfamily member 21 OS=Homo sapiens OX=9606 GN=TNFRSF21 PE=1 SV=1 2 | MGTSPSSSTALASCSRIARRATATMIAGSLLLLGFLSTTTAQPEQKASNLIGTYRHVDRA 3 | TGQVLTCDKCPAGTYVSEHCTNTSLRVCSSCPVGTFTRHENGIEKCHDCSQPCPWPMIEK 4 | LPCAALTDRECTCPPGMFQSNATCAPHTVCPVGWGVRKKGTETEDVRCKQCARGTFSDVP 5 | SSVMKCKAYTDCLSQNLVVIKPGTKETDNVCGTLPSFSSSTSPSPGTAIFPRPEHMETHE 6 | VPSSTYVPKGMNSTESNSSASVRPKVLSSIQEGTVPDNTSSARGKEDVNKTLPNLQVVNH 7 | QQGPHHRHILKLLPSMEATGGEKSSTPIKGPKRGHPRQNLHKHFDINEHLPWMIVLFLLL 8 | VLVVIVVCSIRKSSRTLKKGPRQDPSAIVEKAGLKKSMTPTQNREKWIYYCNGHGIDILK 9 | LVAAQVGSQWKDIYQFLCNASEREVAAFSNGYTADHERAYAALQHWTIRGPEASLAQLIS 10 | ALRQHRRNDVVEKIRGLMEDTTQLETDKLALPMSPSPLSPSPIPSPNAKLENSALLTVEP 11 | SPQDKNKGFFVDESEPLLRCDSTSSGSSALSRNGSFITKEKKDTVLRQVRLDPCDLQPIF 12 | DDMLHFLNPEELRVIEEIPQAEDKLDRLFEIIGVKSQEASQTLLDSVYSHLPDLL 13 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q92542.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q92542|NICA_HUMAN Nicastrin OS=Homo sapiens OX=9606 GN=NCSTN PE=1 SV=2 2 | MATAGGGSGADPGSRGLLRLLSFCVLLAGLCRGNSVERKIYIPLNKTAPCVRLLNATHQI 3 | GCQSSISGDTGVIHVVEKEEDLQWVLTDGPNPPYMVLLESKHFTRDLMEKLKGRTSRIAG 4 | LAVSLTKPSPASGFSPSVQCPNDGFGVYSNSYGPEFAHCREIQWNSLGNGLAYEDFSFPI 5 | FLLEDENETKVIKQCYQDHNLSQNGSAPTFPLCAMQLFSHMHAVISTATCMRRSSIQSTF 6 | SINPEIVCDPLSDYNVWSMLKPINTTGTLKPDDRVVVAATRLDSRSFFWNVAPGAESAVA 7 | SFVTQLAAAEALQKAPDVTTLPRNVMFVFFQGETFDYIGSSRMVYDMEKGKFPVQLENVD 8 | SFVELGQVALRTSLELWMHTDPVSQKNESVRNQVEDLLATLEKSGAGVPAVILRRPNQSQ 9 | PLPPSSLQRFLRARNISGVVLADHSGAFHNKYYQSIYDTAENINVSYPEWLSPEEDLNFV 10 | TDTAKALADVATVLGRALYELAGGTNFSDTVQADPQTVTRLLYGFLIKANNSWFQSILRQ 11 | DLRSYLGDGPLQHYIAVSSPTNTTYVVQYALANLTGTVVNLTREQCQDPSKVPSENKDLY 12 | EYSWVQGPLHSNETDRLPRCVRSTARLARALSPAFELSQWSSTEYSTWTESRWKDIRARI 13 | FLIASKELELITLTVGFGILIFSLIVTYCINAKADVLFIAPREPGAVSY 14 | -------------------------------------------------------------------------------- /src/params/constants.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Summer Gu on 8/16/22. 3 | // 4 | 5 | #ifndef RNA_DESIGN_CONSTANTS_H 6 | #define RNA_DESIGN_CONSTANTS_H 7 | #include 8 | 9 | /** The gas constant */ 10 | #define GASCONST 1.98717 /* in [cal/K] */ 11 | /** 0 deg Celsius in Kelvin */ 12 | #define K0 273.15 13 | /** Infinity as used in minimization routines */ 14 | #define inf 10000000 /* (INT_MAX/10) */ 15 | #define INF 10000000 16 | 17 | #define EMAX (INF/10) 18 | /** forbidden */ 19 | #define FORBIDDEN 9999 20 | /** bonus contribution */ 21 | #define BONUS 10000 22 | /** The number of distinguishable base pairs */ 23 | #define NBPAIRS 7 24 | /** The minimum loop length */ 25 | #define TURN 3 26 | /** The maximum loop length */ 27 | #define MAXLOOP 30 28 | 29 | 30 | #define UNIT 100 31 | 32 | #define EPSILON 0.00001 33 | 34 | const string PARAMSPATH = {}; //"../data/InputFiles/"; 35 | 36 | 37 | #endif //RNA_DESIGN_CONSTANTS_H 38 | -------------------------------------------------------------------------------- /data/uniprotSeq/P78563.fasta: -------------------------------------------------------------------------------- 1 | >sp|P78563|RED1_HUMAN Double-stranded RNA-specific editase 1 OS=Homo sapiens OX=9606 GN=ADARB1 PE=1 SV=1 2 | MDIEDEENMSSSSTDVKENRNLDNVSPKDGSTPGPGEGSQLSNGGGGGPGRKRPLEEGSN 3 | GHSKYRLKKRRKTPGPVLPKNALMQLNEIKPGLQYTLLSQTGPVHAPLFVMSVEVNGQVF 4 | EGSGPTKKKAKLHAAEKALRSFVQFPNASEAHLAMGRTLSVNTDFTSDQADFPDTLFNGF 5 | ETPDKAEPPFYVGSNGDDSFSSSGDLSLSASPVPASLAQPPLPVLPPFPPPSGKNPVMIL 6 | NELRPGLKYDFLSESGESHAKSFVMSVVVDGQFFEGSGRNKKLAKARAAQSALAAIFNLH 7 | LDQTPSRQPIPSEGLQLHLPQVLADAVSRLVLGKFGDLTDNFSSPHARRKVLAGVVMTTG 8 | TDVKDAKVISVSTGTKCINGEYMSDRGLALNDCHAEIISRRSLLRFLYTQLELYLNNKDD 9 | QKRSIFQKSERGGFRLKENVQFHLYISTSPCGDARIFSPHEPILEGSRSYTQAGVQWCNH 10 | GSLQPRPPGLLSDPSTSTFQGAGTTEPADRHPNRKARGQLRTKIESGEGTIPVRSNASIQ 11 | TWDGVLQGERLLTMSCSDKIARWNVVGIQGSLLSIFVEPIYFSSIILGSLYHGDHLSRAM 12 | YQRISNIEDLPPLYTLNKPLLSGISNAEARQPGKAPNFSVNWTVGDSAIEVINATTGKDE 13 | LGRASRLCKHALYCRWMRVHGKVPSHLLRSKITKPNVYHESKLAAKEYQAAKARLFTAFI 14 | KAGLGAWVEKPTEQDQFSLTP 15 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q96NI6.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q96NI6|LRFN5_HUMAN Leucine-rich repeat and fibronectin type-III domain-containing protein 5 OS=Homo sapiens OX=9606 GN=LRFN5 PE=1 SV=2 2 | MEKILFYLFLIGIAVKAQICPKRCVCQILSPNLATLCAKKGLLFVPPNIDRRTVELRLAD 3 | NFVTNIKRKDFANMTSLVDLTLSRNTISFITPHAFADLRNLRALHLNSNRLTKITNDMFS 4 | GLSNLHHLILNNNQLTLISSTAFDDVFALEELDLSYNNLETIPWDAVEKMVSLHTLSLDH 5 | NMIDNIPKGTFSHLHKMTRLDVTSNKLQKLPPDPLFQRAQVLATSGIISPSTFALSFGGN 6 | PLHCNCELLWLRRLSREDDLETCASPPLLTGRYFWSIPEEEFLCEPPLITRHTHEMRVLE 7 | GQRATLRCKARGDPEPAIHWISPEGKLISNATRSLVYDNGTLDILITTVKDTGAFTCIAS 8 | NPAGEATQIVDLHIIKLPHLLNSTNHIHEPDPGSSDISTSTKSGSNTSSSNGDTKLSQDK 9 | IVVAEATSSTALLKFNFQRNIPGIRMFQIQYNGTYDDTLVYRMIPPTSKTFLVNNLAAGT 10 | MYDLCVLAIYDDGITSLTATRVVGCIQFTTEQDYVRCHFMQSQFLGGTMIIIIGGIIVAS 11 | VLVFIIILMIRYKVCNNNGQHKVTKVSNVYSQTNGAQIQGCSVTLPQSVSKQAVGHEENA 12 | QCCKATSDNVIQSSETCSSQDSSTTTSALPPSWTSSTSVSQKQKRKTGTKPSTEPQNEAV 13 | TNVESQNTNRNNSTALQLASRPPDSVTEGPTSKRAHIKPNALLTNVDQIVQETQRLELI 14 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q9BXB4.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q9BXB4|OSB11_HUMAN Oxysterol-binding protein-related protein 11 OS=Homo sapiens OX=9606 GN=OSBPL11 PE=1 SV=2 2 | MQGGEPVSTMKVSESEGKLEGQATAVTPNKNSSCGGGISSSSSSRGGSAKGWQYSDHMEN 3 | VYGYLMKYTNLVTGWQYRFFVLNNEAGLLEYFVNEQSRNQKPRGTLQLAGAVISPSDEDS 4 | HTFTVNAASGEQYKLRATDAKERQHWVSRLQICTQHHTEAIGKNNPPLKSRSFSLASSSN 5 | SPISQRRPSQNAISFFNVGHSKLQSLSKRTNLPPDHLVEVREMMSHAEGQQRDLIRRIEC 6 | LPTSGHLSSLDQDLLMLKATSMATMNCLNDCFHILQLQHASHQKGSLPSGTTIEWLEPKI 7 | SLSNHYKNGADQPFATDQSKPVAVPEEQPVAESGLLAREPEEINADDEIEDTCDHKEDDL 8 | GAVEEQRSVILHLLSQLKLGMDLTRVVLPTFILEKRSLLEMYADFMSHPDLFIAITNGAT 9 | AEDRMIRFVEYYLTSFHEGRKGAIAKKPYNPIIGETFHCSWKMPKSEVASSVFSSSSTQG 10 | VTNHAPLSGESLTQVGSDCYTVRFVAEQVSHHPPVSGFYAECTERKMCVNAHVWTKSKFL 11 | GMSIGVTMVGEGILSLLEHGEEYTFSLPCAYARSILTVPWVELGGKVSVNCAKTGYSASI 12 | TFHTKPFYGGKLHRVTAEVKHNITNTVVCRVQGEWNSVLEFTYSNGETKYVDLTKLAVTK 13 | KRVRPLEKQDPFESRRLWKNVTDSLRESEIDKATEHKHTLEERQRTEERHRTETGTPWKT 14 | KYFIKEGDGWVYHKPLWKIIPTTQPAE 15 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q9UFB7.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q9UFB7|ZBT47_HUMAN Zinc finger and BTB domain-containing protein 47 OS=Homo sapiens OX=9606 GN=ZBTB47 PE=1 SV=3 2 | MGRLNEQRLFQPDLCDVDLVLVPQRSVFPAHKGVLAAYSQFFHSLFTQNKQLQRVELSLE 3 | ALAPGGLQQILNFIYTSKLLVNAANVHEVLSAASLLQMADIAASCQELLDARSLGPPGPG 4 | TVALAQPAASCTPAAPPYYCDIKQEADTPGLPKIYAREGPDPYSVRVEDGAGTAGGTVPA 5 | TIGPAQPFFKEEKEGGVEEAGGPPASLCKLEGGEELEEELGGSGTYSRREQSQIIVEVNL 6 | NNQTLHVSTGPEGKPGAGPSPATVVLGREDGLQRHSDEEEEDDEEEEEEEEEEEGGGSGR 7 | EEEEEEEGGSQGEEEEEEEDGHSEQEEEEEEEEEEGPSEQDQESSEEEEGEEGEAGGKQG 8 | PRGSRSSRADPPPHSHMATRSRENARRRGTPEPEEAGRRGGKRPKPPPGVASASARGPPA 9 | TDGLGAKVKLEEKQHHPCQKCPRVFNNRWYLEKHMNVTHSRMQICDQCGKRFLLESELLL 10 | HRQTDCERNIQCVTCGKAFKKLWSLHEHNKIVHGYAEKKFSCEICEKKFYTMAHVRKHMV 11 | AHTKDMPFTCETCGKSFKRSMSLKVHSLQHSGEKPFRCENCNERFQYKYQLRSHMSIHIG 12 | HKQFMCQWCGKDFNMKQYFDEHMKTHTGEKPYICEICGKSFTSRPNMKRHRRTHTGEKPY 13 | PCDVCGQRFRFSNMLKAHKEKCFRVSHTLAGDGVPAAPGLPPTQPQAHALPLLPGLPQTL 14 | PPPPHLPPPPPLFPTTASPGGRMNANN 15 | -------------------------------------------------------------------------------- /data/InputFiles/mismatch1nI.csv: -------------------------------------------------------------------------------- 1 | Pairs,N,A,C,G,U 2 | NP,100000,100000,100000,100000,100000 3 | NP,100000,100000,100000,100000,100000 4 | NP,100000,100000,100000,100000,100000 5 | NP,100000,100000,100000,100000,100000 6 | NP,100000,100000,100000,100000,100000 7 | CG,0,0,0,0,0 8 | CG,0,0,0,0,0 9 | CG,0,0,0,0,0 10 | CG,0,0,0,0,0 11 | CG,0,0,0,0,0 12 | GC,0,0,0,0,0 13 | GC,0,0,0,0,0 14 | GC,0,0,0,0,0 15 | GC,0,0,0,0,0 16 | GC,0,0,0,0,0 17 | GU,70,70,70,70,70 18 | GU,70,70,70,70,70 19 | GU,70,70,70,70,70 20 | GU,70,70,70,70,70 21 | GU,70,70,70,70,70 22 | UG,70,70,70,70,70 23 | UG,70,70,70,70,70 24 | UG,70,70,70,70,70 25 | UG,70,70,70,70,70 26 | UG,70,70,70,70,70 27 | AU,70,70,70,70,70 28 | AU,70,70,70,70,70 29 | AU,70,70,70,70,70 30 | AU,70,70,70,70,70 31 | AU,70,70,70,70,70 32 | UA,70,70,70,70,70 33 | UA,70,70,70,70,70 34 | UA,70,70,70,70,70 35 | UA,70,70,70,70,70 36 | UA,70,70,70,70,70 37 | NN,70,70,70,70,70 38 | NN,70,70,70,70,70 39 | NN,70,70,70,70,70 40 | NN,70,70,70,70,70 41 | NN,70,70,70,70,70 42 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q9HCI6.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q9HCI6|K1586_HUMAN E3 SUMO-protein ligase KIAA1586 OS=Homo sapiens OX=9606 GN=KIAA1586 PE=1 SV=2 2 | MGDPGSEIIESVPPAGPEASESTTDENEDDIQFVSEGPSRPVLEYIDLVCGDDENPSAYY 3 | SDILFPKMPKRQGDFLHFLNVKKVKTDTENNEVSKNHCRLSKAKEPHFEYIEQPIIEEKP 4 | SLSSKKEIDNLVLPDCWNEKQAFMFTEQYKWLEIKEGKLGCKDCSAVRHLGSKAEKHVHV 5 | SKEWIAYLVTPNGSNKTTRQASLRKKIREHDVSKAHGKIQDLLKESTNDSICNLVHKQNN 6 | KNIDATVKVFNTVYSLVKHNRPLSDIEGARELQEKNGEVNCLNTRYSATRIAEHIAKEMK 7 | MKIFKNIIEENAKICIIIDEASTVSKKTTLVIYLQCTIQSAPAPVMLFVALKELVSTIAE 8 | CIVNTLLTTLNDCGFTNEYLKANLIAFCSDGANTILGRKSGVATKLLENFPEIIIWNCLN 9 | HRLQLSLDDSISEIKQINHLKIFIDKIYSIYHQPNKNQTKLLGTVAKELETEIIKIGRVM 10 | GPRWAACSLQAATAVWHAYPILYMHFSHSYSGLAKRLANINFLQDLALMIDILEEFSVLS 11 | TALQSRSTNIKKAQKLIKRTIRALENLKIGTGKYESQIEDLIKSDKFKDIPFNKNNKFNA 12 | LPRSILLDNIIQHMNLRLLSDRNHEDIFNYFDLLEPSTWPYEEITSPWIAGEKTLFHLCK 13 | ILKYEVDLNDFREFVNNNIKSNNVSIPTTIYKAKKIVSTIAINSAEAERGFNLMNIICTR 14 | VRNSLTIDHVSDLMTINLLGKELADWDATPFVKSWSNCNHRLATDTRVRQKSTKVFHENQ 15 | LAIWNLK 16 | -------------------------------------------------------------------------------- /data/uniprotSeq/C9J798.fasta: -------------------------------------------------------------------------------- 1 | >sp|C9J798|RAS4B_HUMAN Ras GTPase-activating protein 4B OS=Homo sapiens OX=9606 GN=RASA4B PE=3 SV=2 2 | MAKRSSLYIRIVEGKNLPAKDITGSSDPYCIVKVDNEPIIRTATVWKTLCPFWGEEYQVH 3 | LPPTFHAVAFYVMDEDALSRDDVIGKVCLTRDTIASHPKGFSGWAHLTEVDPDEEVQGEI 4 | HLRLEVWPGARACRLRCSVLEARDLAPKDRNGTSDPFVRVRYKGRTRETSIVKKSCYPRW 5 | NETFEFELQEGAMEALCVEAWDWDLVSRNDFLGKVVIDVQRLRVVQQEEGWFRLQPDQSK 6 | SRRHDEGNLGSLQLEVRLRDETVLPSSYYQPLVHLLCHEVKLGMQGPGQLIPLIEETTST 7 | ECRQDVATNLLKLFLGQGLAKDFLDLLFQLELSRTSETNTLFRSNSLASKSVESFLKVAG 8 | MQYLHGVLGPIINKVFEEKKYVELDPSKVEVKDVGCSGLHRPQTEAEVLEQSAQTLRAHL 9 | GALLSALSRSVRACPAVVRATFRQLFRRVRERFPGAQHENVPFIAVTSFLCLRFFSPAIM 10 | SPKLFHLRERHADARTSRTLLLLAKAVQNVGNMDTPASRAKEAWMEPLQPTVHQGVAQLK 11 | DFITKLVDIEEKDELDLQRTLSLQAPPVKEGPLFIHRTKGKGPLMSSSFKKLYFSLTTEA 12 | LSFAKTPSSKKSALIKLANIRAAEKVEEKSFGGSHVMQVIYTDDAGRPQTAYLQCKCVNE 13 | LNQWLSALRKVSINNTGLLGSYHPGVFRGDKWSCCHQKEKTGQGCDKTRSRVTLQEWNDP 14 | LDHDLEAQLIYRHLLGVEAMLWERHRELSGGAEAGTVPTSPGKVPEDSLARLLRVLQDLR 15 | EAHSSSPAGSPPSEPNCLLELQT -------------------------------------------------------------------------------- /data/uniprotSeq/Q9UKQ2.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q9UKQ2|ADA28_HUMAN Disintegrin and metalloproteinase domain-containing protein 28 OS=Homo sapiens OX=9606 GN=ADAM28 PE=1 SV=3 2 | MLQGLLPVSLLLSVAVSAIKELPGVKKYEVVYPIRLHPLHKREAKEPEQQEQFETELKYK 3 | MTINGKIAVLYLKKNKNLLAPGYTETYYNSTGKEITTSPQIMDDCYYQGHILNEKVSDAS 4 | ISTCRGLRGYFSQGDQRYFIEPLSPIHRDGQEHALFKYNPDEKNYDSTCGMDGVLWAHDL 5 | QQNIALPATKLVKLKDRKVQEHEKYIEYYLVLDNGEFKRYNENQDEIRKRVFEMANYVNM 6 | LYKKLNTHVALVGMEIWTDKDKIKITPNASFTLENFSKWRGSVLSRRKRHDIAQLITATE 7 | LAGTTVGLAFMSTMCSPYSVGVVQDHSDNLLRVAGTMAHEMGHNFGMFHDDYSCKCPSTI 8 | CVMDKALSFYIPTDFSSCSRLSYDKFFEDKLSNCLFNAPLPTDIISTPICGNQLVEMGED 9 | CDCGTSEECTNICCDAKTCKIKATFQCALGECCEKCQFKKAGMVCRPAKDECDLPEMCNG 10 | KSGNCPDDRFQVNGFPCHHGKGHCLMGTCPTLQEQCTELWGPGTEVADKSCYNRNEGGSK 11 | YGYCRRVDDTLIPCKANDTMCGKLFCQGGSDNLPWKGRIVTFLTCKTFDPEDTSQEIGMV 12 | ANGTKCGDNKVCINAECVDIEKAYKSTNCSSKCKGHAVCDHELQCQCEEGWIPPDCDDSS 13 | VVFHFSIVVGVLFPMAVIFVVVAMVIRHQSSREKQKKDQRPLSTTGTRPHKQKRKPQMVK 14 | AVQPQEMSQMKPHVYDLPVEGNEPPASFHKDTNALPPTVFKDNPVSTPKDSNPKA 15 | -------------------------------------------------------------------------------- /data/InputFiles/mismatch23I.csv: -------------------------------------------------------------------------------- 1 | Pairs,N,A,C,G,U 2 | NP,100000,100000,100000,100000,100000 3 | NP,100000,100000,100000,100000,100000 4 | NP,100000,100000,100000,100000,100000 5 | NP,100000,100000,100000,100000,100000 6 | NP,100000,100000,100000,100000,100000 7 | CG,0,0,0,0,0 8 | CG,0,0,0,-50,0 9 | CG,0,0,0,0,0 10 | CG,0,-110,0,-70,0 11 | CG,0,0,0,0,-30 12 | GC,0,0,0,0,0 13 | GC,0,0,0,0,0 14 | GC,0,0,0,0,0 15 | GC,0,-120,0,-70,0 16 | GC,0,0,0,0,-30 17 | GU,70,70,70,70,70 18 | GU,70,70,70,70,70 19 | GU,70,70,70,70,70 20 | GU,70,-40,70,0,70 21 | GU,70,70,70,70,40 22 | UG,70,70,70,70,70 23 | UG,70,70,70,20,70 24 | UG,70,70,70,70,70 25 | UG,70,-40,70,0,70 26 | UG,70,70,70,70,40 27 | AU,70,70,70,70,70 28 | AU,70,70,70,70,70 29 | AU,70,70,70,70,70 30 | AU,70,-40,70,0,70 31 | AU,70,70,70,70,40 32 | UA,70,70,70,70,70 33 | UA,70,70,70,20,70 34 | UA,70,70,70,70,70 35 | UA,70,-40,70,0,70 36 | UA,70,70,70,70,40 37 | NN,70,70,70,70,70 38 | NN,70,70,70,70,70 39 | NN,70,70,70,70,70 40 | NN,70,-40,70,0,70 41 | NN,70,70,70,70,40 42 | -------------------------------------------------------------------------------- /data/InputFiles/mismatchI.csv: -------------------------------------------------------------------------------- 1 | Pairs,N,A,C,G,U 2 | NP,100000,100000,100000,100000,100000 3 | NP,100000,100000,100000,100000,100000 4 | NP,100000,100000,100000,100000,100000 5 | NP,100000,100000,100000,100000,100000 6 | NP,100000,100000,100000,100000,100000 7 | CG,0,0,0,0,0 8 | CG,0,0,0,-80,0 9 | CG,0,0,0,0,0 10 | CG,0,-100,0,-100,0 11 | CG,0,0,0,0,-60 12 | GC,0,0,0,0,0 13 | GC,0,0,0,-80,0 14 | GC,0,0,0,0,0 15 | GC,0,-100,0,-100,0 16 | GC,0,0,0,0,-60 17 | GU,70,70,70,70,70 18 | GU,70,70,70,-10,70 19 | GU,70,70,70,70,70 20 | GU,70,-30,70,-30,70 21 | GU,70,70,70,70,10 22 | UG,70,70,70,70,70 23 | UG,70,70,70,-10,70 24 | UG,70,70,70,70,70 25 | UG,70,-30,70,-30,70 26 | UG,70,70,70,70,10 27 | AU,70,70,70,70,70 28 | AU,70,70,70,-10,70 29 | AU,70,70,70,70,70 30 | AU,70,-30,70,-30,70 31 | AU,70,70,70,70,10 32 | UA,70,70,70,70,70 33 | UA,70,70,70,-10,70 34 | UA,70,70,70,70,70 35 | UA,70,-30,70,-30,70 36 | UA,70,70,70,70,10 37 | NN,70,70,70,70,70 38 | NN,70,70,70,-10,70 39 | NN,70,70,70,70,70 40 | NN,70,-30,70,-30,70 41 | NN,70,70,70,70,10 42 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q9QC07.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q9QC07|POK18_HUMAN Endogenous retrovirus group K member 18 Pol protein OS=Homo sapiens OX=9606 GN=ERVK-18 PE=3 SV=2 2 | NKSRKRRNRVSFLGVTTVEPPKPIPLTWKTEKLVWVNQWPLPKQKLEALHLLANEQLEKG 3 | HIEPSFSPWNSPVFVIQKKSSKWRMLTDLRAVNAVIQPMGPLQPGLPSPAMIPKDWPLII 4 | IDLKDCFFTIPLAEQDCEKFAFTIPAINNKEPATRFQWKVLPQGMLNSPTICQTFVGRAL 5 | QPVRDKFSDCYIIHYFDDILCAAETKDKLIDCYTFLQAEVANAGLAIASDKIQTSTPFHY 6 | LGMQIENRKIKPQKIEIRKDTLKTLNDFQKLLGDINWIRPTLGIPTYAMSNLFSILRGDS 7 | DLNSKRMLTPEATKEIKLVEEKIQSAQINRIDPLAPLQLLIFATAHSPTGIIIQNTDLVE 8 | WSFLPHSTVKTFTLYLDQIATLIGPTRLRIIKLCGNDPDKIVVPLTKEQVRQAFINSGAW 9 | QIGLANFVGIIDNHYPKTKIFQFLKLTTWILPKITRREPLENALTVFTDGSSNGKVAYTG 10 | PKERVIKTPYQSAQRAELVAVITVLQDFDQPINIISDSAYVVQATRDVETALIKYSMDDQ 11 | LNQLFNLLQQTVRKRNFPFYITHIRAHTNLPGPLTKANEQADLLVSSAFIKAQELHALTH 12 | VNAAGLKNKFDVTWKQAKDIVQHCTQCQVLDLPTQEAGVNPEVCVLMHYGKWMSHMYLHL 13 | GRLSYVHVTVDTYSHFMCATCQTGESTSHVKKHLLSCFAVMGVPEKIKTDNGPGYCSKAF 14 | QKFLSQWKISHTTGIPYNSQGQAIVERTNRTLKTQLVKQKEGGDSKECTTPQMQLNLALY 15 | TLNFLNIYRNQTTTSAEHLTGKKNSPHEGKLI 16 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q8TF61.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q8TF61|FBX41_HUMAN F-box only protein 41 OS=Homo sapiens OX=9606 GN=FBXO41 PE=2 SV=5 2 | MASLDLPYRCPRCGEHKRFRSLSSLRAHLEYSHTYETLYILSKTNSICDGAAAAAAAAAA 3 | ASGFPLAPEPAALLAVPGARREVFESTSFQGKEQAAGPSPAAPHLLHHHHHHAPLAHFPG 4 | DLVPASLPCEELAEPGLVPAAAARYALREIEIPLGELFARKSVASSACSTPPPGPGPGPC 5 | PGPASASPASPSPADVAYEEGLARLKIRALEKLEVDRRLERLSEEVEQKIAGQVGRLQAE 6 | LERKAAELETARQESARLGREKEELEERASELSRQVDVSVELLASLKQDLVHKEQELSRK 7 | QQEVVQIDQFLKETAAREASAKLRLQQFIEELLERADRAERQLQVISSSCGSTPSASLGR 8 | GGGGGGAGPNARGPGRMREHHVGPAVPNTYAVSRHGSSPSTGASSRVPAASQSSGCYDSD 9 | SLELPRPEEGAPEDSGPGGLGTRAQAANGGSERSQPPRSSGLRRQAIQNWQRRPRRHSTE 10 | GEEGDVSDVGSRTTESEAEGPLDAPRPGPAMAGPLSSCRLSARPEGGSGRGRRAERVSPS 11 | RSNEVISPEILKMRAALFCIFTYLDTRTLLHAAEVCRDWRFVARHPAVWTRVLLENARVC 12 | SKFLAMLAQWCTQAHSLTLQNLKPRQRGKKESKEEYARSTRGCLEAGLESLLKAAGGNLL 13 | ILRISHCPNILTDRSLWLASCYCRALQAVTYRSATDPVGHEVIWALGAGCREIVSLQVAP 14 | LHPCQQPTRFSNRCLQMIGRCWPHLRALGVGGAGCGVQGLASLARNCMRLQVLELDHVSE 15 | ITQEVAAEVCREGLKGLEMLVLTATPVTPKALLHFNSICRNLKSIVVQIGIADYFKEPSS 16 | PEAQKLFEDMVTKLQALRRRPGFSKILHIKVEGGC 17 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q8NB90.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q8NB90|AFG2H_HUMAN ATPase family protein 2 homolog OS=Homo sapiens OX=9606 GN=SPATA5 PE=1 SV=3 2 | MSSKKNRKRLNQSAENGSSLPSAASSCAEARAPSAGSDFAATSGTLTVTNLLEKVDDKIP 3 | KTFQNSLIHLGLNTMKSANICIGRPVLLTSLNGKQEVYTAWPMAGFPGGKVGLSEMAQKN 4 | VGVRPGDAIQVQPLVGAVLQAEEMDVALSDKDMEINEEELTGCILRKLDGKIVLPGNFLY 5 | CTFYGRPYKLQVLRVKGADGMILGGPQSDSDTDAQRMAFEQSSMETSSLELSLQLSQLDL 6 | EDTQIPTSRSTPYKPIDDRITNKASDVLLDVTQSPGDGSGLMLEEVTGLKCNFESAREGN 7 | EQLTEEERLLKFSIGAKCNTDTFYFISSTTRVNFTEIDKNSKEQDNQFKVTYDMIGGLSS 8 | QLKAIREIIELPLKQPELFKSYGIPAPRGVLLYGPPGTGKTMIARAVANEVGAYVSVING 9 | PEIISKFYGETEAKLRQIFAEATLRHPSIIFIDELDALCPKREGAQNEVEKRVVASLLTL 10 | MDGIGSEVSEGQVLVLGATNRPHALDAALRRPGRFDKEIEIGVPNAQDRLDILQKLLRRV 11 | PHLLTEAELLQLANSAHGYVGADLKVLCNEAGLCALRRILKKQPNLPDVKVAGLVKITLK 12 | DFLQAMNDIRPSAMREIAIDVPNVSWSDIGGLESIKLKLEQAVEWPLKHPESFIRMGIQP 13 | PKGVLLYGPPGCSKTMIAKALANESGLNFLAIKGPELMNKYVGESERAVRETFRKARAVA 14 | PSIIFFDELDALAVERGSSLGAGNVADRVLAQLLTEMDGIEQLKDVTILAATNRPDRIDK 15 | ALMRPGRIDRIIYVPLPDAATRREIFKLQFHSMPVSNEVDLDELILQTDAYSGAEIVAVC 16 | REAALLALEEDIQANLIMKRHFTQALSTVTPRIPESLRRFYEDYQEKSGLHTL 17 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q9H3R1.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q9H3R1|NDST4_HUMAN Bifunctional heparan sulfate N-deacetylase/N-sulfotransferase 4 OS=Homo sapiens OX=9606 GN=NDST4 PE=2 SV=1 2 | MNLIVKLRRSFRTLIVLLATFCLVSIVISAYFLYSGYKQEMTLIETTAEAECTDIKILPY 3 | RSMELKTVKPIDTSKTDPTVLLFVESQYSQLGQDIIAILESSRFQYHMVIAPGKGDIPPL 4 | TDNGKGKYTLVIYENILKYVSMDSWNRELLEKYCVEYSVSIIGFHKANENSLPSTQLKGF 5 | PLNLFNNLALKDCFVNPQSPLLHITKAPKVEKGPLPGEDWTIFQYNHSTYQPVLLTELQT 6 | EKSLSSLSSKTLFATVIQDLGLHDGIQRVLFGNNLNFWLHKLIFIDAISFLSGKRLTLSL 7 | DRYILVDIDDIFVGKEGTRMNVKDVKALLETQNLLRTQVANFTFNLGFSGKFYHTGTEEE 8 | DEGDDLLLRSVDEFWWFPHMWSHMQPHLFHNESSLVEQMILNKEFALEHGIPINMGYAVA 9 | PHHSGVYPVHIQLYAAWKKVWGIQVTSTEEYPHLKPARYRKGFIHNSIMVLPRQTCGLFT 10 | HTIFYKEYPGGPQELDKSIRGGELFLTILLNPISIFMTHLSNYGNDRLGLYTFVNLVNFV 11 | QSWTNLKLQTLPPVQLAHQYFELFPEQKDPLWQNPCDDKRHKDIWSREKTCDHLPKFLVI 12 | GPQKTGTTALYLFLLMHPSIISNLPSPKTFEEVQFFNGNNYHKGIDWYMDFFPTPSNTTS 13 | DFLFEKSANYFHSEEAPRRAASLVPKAKIITILIDPSDRAYSWYQHQRSHEDPAALRFNF 14 | YEVISTGHWAPSDLKTLQRRCLVPGWYAVHIERWLTYFATSQLLIIDGQQLRSDPATVMD 15 | EVQKFLGVTPRYNYSEALTFDPQKGFWCQLLEGGKTKCLGKSKGRKYPPMDPESRTFLSN 16 | YYRDHNVELSKLLHRLGQPLPSWLRQELQKVR 17 | -------------------------------------------------------------------------------- /data/InputFiles/mismatch1nI_H.csv: -------------------------------------------------------------------------------- 1 | Pairs,N,A,C,G,U 2 | NP,100000,100000,100000,100000,100000 3 | NP,100000,100000,100000,100000,100000 4 | NP,100000,100000,100000,100000,100000 5 | NP,100000,100000,100000,100000,100000 6 | NP,100000,100000,100000,100000,100000 7 | CG,0,0,0,0,0 8 | CG,0,0,0,0,0 9 | CG,0,0,0,0,0 10 | CG,0,0,0,0,0 11 | CG,0,0,0,0,0 12 | GC,0,0,0,0,0 13 | GC,0,0,0,0,0 14 | GC,0,0,0,0,0 15 | GC,0,0,0,0,0 16 | GC,0,0,0,0,0 17 | GU,500,500,500,500,500 18 | GU,500,500,500,500,500 19 | GU,500,500,500,500,500 20 | GU,500,500,500,500,500 21 | GU,500,500,500,500,500 22 | UG,500,500,500,500,500 23 | UG,500,500,500,500,500 24 | UG,500,500,500,500,500 25 | UG,500,500,500,500,500 26 | UG,500,500,500,500,500 27 | AU,500,500,500,500,500 28 | AU,500,500,500,500,500 29 | AU,500,500,500,500,500 30 | AU,500,500,500,500,500 31 | AU,500,500,500,500,500 32 | UA,500,500,500,500,500 33 | UA,500,500,500,500,500 34 | UA,500,500,500,500,500 35 | UA,500,500,500,500,500 36 | UA,500,500,500,500,500 37 | NN,500,500,500,500,500 38 | NN,500,500,500,500,500 39 | NN,500,500,500,500,500 40 | NN,500,500,500,500,500 41 | NN,500,500,500,500,500 42 | -------------------------------------------------------------------------------- /data/InputFiles/mismatchI_H.csv: -------------------------------------------------------------------------------- 1 | Pairs,N,A,C,G,U 2 | NP,100000,100000,100000,100000,100000 3 | NP,100000,100000,100000,100000,100000 4 | NP,100000,100000,100000,100000,100000 5 | NP,100000,100000,100000,100000,100000 6 | NP,100000,100000,100000,100000,100000 7 | CG,280,0,0,280,0 8 | CG,0,0,0,-340,0 9 | CG,0,0,0,0,0 10 | CG,280,-760,0,280,0 11 | CG,0,0,0,0,-580 12 | GC,280,0,0,280,0 13 | GC,0,0,0,-340,0 14 | GC,0,0,0,0,0 15 | GC,280,-760,0,280,0 16 | GC,0,0,0,0,-580 17 | GU,790,500,500,790,500 18 | GU,500,500,500,170,500 19 | GU,500,500,500,500,500 20 | GU,790,-260,500,790,500 21 | GU,500,500,500,500,-80 22 | UG,790,500,500,790,500 23 | UG,500,500,500,170,500 24 | UG,500,500,500,500,500 25 | UG,790,-260,500,790,500 26 | UG,500,500,500,500,-80 27 | AU,790,500,500,790,500 28 | AU,500,500,500,170,500 29 | AU,500,500,500,500,500 30 | AU,790,-260,500,790,500 31 | AU,500,500,500,500,-80 32 | UA,790,500,500,790,500 33 | UA,500,500,500,170,500 34 | UA,500,500,500,500,500 35 | UA,790,-260,500,790,500 36 | UA,500,500,500,500,-80 37 | NN,790,500,500,790,500 38 | NN,500,500,500,170,500 39 | NN,500,500,500,500,500 40 | NN,790,-260,500,790,500 41 | NN,500,500,500,500,-80 42 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q9Y5G9.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q9Y5G9|PCDG4_HUMAN Protocadherin gamma-A4 OS=Homo sapiens OX=9606 GN=PCDHGA4 PE=1 SV=2 2 | MHFILDPEDPGAPQASTEGKPKHRRLRGGVVMAAPPARPDHTRLLQICLLLGVLVEIRAE 3 | QILYSVFEEQEEGSVVGNIAKDLGLAPRELAERGVRIVSRGRTQLFALNPRSGTLVTAGR 4 | IDREELCDRSPNCVTNLEILLEDTVKILRVEVEIIDVNDNPPSFGTEQREIKVAENENPG 5 | ARFPLPEAFDPDVGVNSLQGYQLNSNGYFSLDVQSGADGIKYPELVLERALDREEEAVHH 6 | LVLTAFDGGDPVRSGTARILIILVDTNDNAPVFTQPEYHVSVRENVPVGTRLLTVKATDP 7 | DEGANGDVTYSFRKVRDKISQLFQLNSLSGDITILGGLDYEDSGFYDIDVEAHDGPGLRA 8 | RSKVLVTVLDENDNAPEVTVTSLTSSVQESSSPGTVIALFNVHDSDSGGNGLVTCSIPDN 9 | LPFTLEKTYGNYYRLLTHRTLDREEVSEYNITVTATDQGTPPLSTETHISLQVMDINDNP 10 | PTFPHASYSAYIPENNPRGASILSMTAQDPDSGDNARITYSLAEDTFQGAPLSSYVSINS 11 | NTGILYALCSFDYEQFRDLQLLMTASDSGDPPLSSNVSLSLFVLDQNDNVPEILYPTFPT 12 | DGSTGVELAPRSADSGYLVTKVVAVDRDSGQNAWLSYSLLKSSEPGLFAVGLHTGEVRTA 13 | RALLDRDALKQRLVVVVQDHGQPPLSATVTLTVAVADSIPDVLADLGSLKPSADPDDSGL 14 | TLYLVVAVAAVSCVFLAFVTVLLALKLRRWHKSRLLHAEGSRLAGVPASHFVGVDGVRAF 15 | LQTYSHEVSLTADSRKSHLIFSQPSYADTLISRESCEKSEPLLITQDLLETKGDPNLQQA 16 | PPNTDWRFSQAQRPGTSGSQNGDDTGTWPNNQFDTEMLQAMILASASEAADGSSTLGGGA 17 | GTMGLSARYGPQFTLQHVPDYRQNVYIPGSNATLTNAAGKRDGKAPAGGNGNKKKSGKKE 18 | KK 19 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q6UXY8.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q6UXY8|TMC5_HUMAN Transmembrane channel-like protein 5 OS=Homo sapiens OX=9606 GN=TMC5 PE=2 SV=3 2 | MSAYYRNNWSEEDPDYPDYSGSQNRTQGYLKTQGYPDVPGPLNNPDYPGTRSNPYSVASR 3 | TRPDYPGSLAEPNYPRSLSNPDYSGTRSNAYSAASRTSPDHPTSLPEPDYSEFQSHPYHR 4 | ASSRQPDYPGSQRNPDFAGSSSSGNYAGSRTHPDHFGSLEPDYPGAQSNSDHPGPRANLN 5 | HPGSRKNLEHTSFRINPYADSLGKPDYPGADIQPNSPPFFGEPDYPSAEDNQNLPSTWRE 6 | PDYSDAENGHDYGSSETPKMTRGVLSRTSSIQPSFRHRSDDPVGSLWGENDYPEGIEMAS 7 | MEMANSYGHSLPGAPGSGYVNPAYVGESGPVHAYGNPPLSECDWHKSPQGQKLIASLIPM 8 | TSRDRIKAIRNQPRTMEEKRNLRKIVDKEKSKQTHRILQLNCCIQCLNSISRAYRRSKNS 9 | LSEILNSISLWQKTLKIIGGKFGTSVLSYFNFLRWLLKFNIFSFILNFSFIIIPQFTVAK 10 | KNTLQFTGLEFFTGVGYFRDTVMYYGFYTNSTIQHGNSGASYNMQLAYIFTIGACLTTCF 11 | FSLLFSMAKYFRNNFINPHIYSGGITKLIFCWDFTVTHEKAVKLKQKNLSTEIRENLSEL 12 | RQENSKLTFNQLLTRFSAYMVAWVVSTGVAIACCAAVYYLAEYNLEFLKTHSNPGAVLLL 13 | PFVVSCINLAVPCIYSMFRLVERYEMPRHEVYVLLIRNIFLKISIIGILCYYWLNTVALS 14 | GEECWETLIGQDIYRLLLMDFVFSLVNSFLGEFLRRIIGMQLITSLGLQEFDIARNVLEL 15 | IYAQTLVWIGIFFCPLLPFIQMIMLFIMFYSKNISLMMNFQPPSKAWRASQMMTFFIFLL 16 | FFPSFTGVLCTLAITIWRLKPSADCGPFRGLPLFIHSIYSWIDTLSTRPGYLWVVWIYRN 17 | LIGSVHFFFILTLIVLIITYLYWQITEGRKIMIRLLHEQIINEGKDKMFLIEKLIKLQDM 18 | EKKANPSSLVLERREVEQQGFLHLGEHDGSLDLRSRRSVQEGNPRA 19 | -------------------------------------------------------------------------------- /data/InputFiles/mismatchH.csv: -------------------------------------------------------------------------------- 1 | Pairs,N,A,C,G,U 2 | NP,100000,100000,100000,100000,100000 3 | NP,100000,100000,100000,100000,100000 4 | NP,100000,100000,100000,100000,100000 5 | NP,100000,100000,100000,100000,100000 6 | NP,100000,100000,100000,100000,100000 7 | CG,-80,-100,-110,-100,-80 8 | CG,-140,-150,-150,-140,-150 9 | CG,-80,-100,-110,-100,-80 10 | CG,-150,-230,-150,-240,-150 11 | CG,-100,-100,-140,-100,-210 12 | GC,-50,-110,-70,-110,-50 13 | GC,-110,-110,-150,-130,-150 14 | GC,-50,-110,-70,-110,-50 15 | GC,-150,-250,-150,-220,-150 16 | GC,-100,-110,-100,-110,-160 17 | GU,20,20,-20,-10,-20 18 | GU,20,20,-50,-30,-50 19 | GU,-10,-10,-20,-10,-20 20 | GU,-50,-100,-50,-110,-50 21 | GU,-10,-10,-30,-10,-100 22 | UG,0,-20,-10,-20,0 23 | UG,-30,-50,-30,-60,-30 24 | UG,0,-20,-10,-20,0 25 | UG,-30,-90,-30,-110,-30 26 | UG,-10,-20,-10,-20,-90 27 | AU,-10,-10,-20,-10,-20 28 | AU,-30,-30,-50,-30,-50 29 | AU,-10,-10,-20,-10,-20 30 | AU,-50,-120,-50,-110,-50 31 | AU,-10,-10,-30,-10,-120 32 | UA,0,-20,-10,-20,0 33 | UA,-30,-50,-30,-50,-30 34 | UA,0,-20,-10,-20,0 35 | UA,-30,-150,-30,-150,-30 36 | UA,-10,-20,-10,-20,-90 37 | NN,20,20,-10,-10,0 38 | NN,20,20,-30,-30,-30 39 | NN,0,-10,-10,-10,0 40 | NN,-30,-90,-30,-110,-30 41 | NN,-10,-10,-10,-10,-90 42 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q6ZRS4.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q6ZRS4|ITPI1_HUMAN Protein ITPRID1 OS=Homo sapiens OX=9606 GN=ITPRID1 PE=2 SV=2 2 | MMAQKSQGSDNLQEGQEKSKREILKCTKSAWAPLDEWLPPDPEEESQSLTIPMLEDSKQE 3 | SIQQWLDSGFFVSANENFQQVIDRTVSLYEQGMVQMTVKDYMRSLHQFSETPILSRGTSF 4 | NSCYSTASVPQSIPEWLEFWEIDPVEILLDLGFGADEPDICMQIPARFLGCGSAARGINI 5 | RVFLEAQKQRMDIENPNLYGRFRQLEILDHVTNAFSSLLSDVSILPNRAEEKAGGESVQR 6 | TSVSAAKEHRRRMGKLLRRASKQNIRRDCNPEVSESFKVKDEVFVPFTKPWDCGAELAAT 7 | SINHKQNHLSLSVEHQSLQACDDLLPYPPHGLLSKQWPCSSMPAKQAPPSCVSEGSVKGR 8 | TQKENLFQTNKLKSLSHLAGKGPDSFEMEEVQSFEEETGNPLDMTSGTVGARVDRANSCQ 9 | SDSSGFLEEPLEPLPLQMPSLPNSQSPAENGGRKPRDQSHSLVSSQDCQLESDGPDSKSR 10 | ASMSFSSQEANALEQRASVSVMEEEFLLEAMEGPPELYIPDMACAKTTTRGECPRKDSHL 11 | WQLLPMPHAEYEVTRPTATSKYDHPLGFMVTHVTEMQDSFVRPEGAGKVQSHHNESQRSP 12 | GNDHTQDKFLHVDSEAPREEESSGFCPHTNHSLLVPESSSQCIPKHSEITPYATDLAQTS 13 | EKLIPHLHKLPGDPAQVKSRSGTLGQILPGTEAEMENLPLNTGSSRSVMTQMSSSLVSAA 14 | QRAVALGTGPRGTSLECTVCDPVTATETRLGTKARQLNDASIQTSALSNKTLTHGPQPLT 15 | KSVSLDSGFSSICPMGTCHAIPAHCCICCHHHPHCHGERQSPGPEPSVCRHCLCSLTGHQ 16 | EAQFMTTLKALQDTTVRELCSCTVHEMEAMKTICQSFREYLEEIEQHLMGQQALFSRDMS 17 | EEEREEAEQLQTLREALRQQVAELEFQLGDRAQQIREGILLQLEVLTAEPPEHYSNLHQY 18 | NWIEESNGQTSCSKIHPGMAPRTVFPPDDGQEAPCSGGTQLAAFTPPTLENSTRMSPSSS 19 | AWAKLGPTPLSNCPVGEKDADVFL 20 | -------------------------------------------------------------------------------- /data/InputFiles/mismatchH_H.csv: -------------------------------------------------------------------------------- 1 | Pairs,N,A,C,G,U 2 | NP,100000,100000,100000,100000,100000 3 | NP,100000,100000,100000,100000,100000 4 | NP,100000,100000,100000,100000,100000 5 | NP,100000,100000,100000,100000,100000 6 | NP,100000,100000,100000,100000,100000 7 | CG,560,-570,560,-560,-270 8 | CG,-560,-910,-560,-560,-560 9 | CG,-270,-570,-340,-570,-270 10 | CG,560,-1400,560,-920,-560 11 | CG,-530,-570,-530,-570,-1440 12 | GC,50,-520,50,-560,-400 13 | GC,-400,-520,-400,-560,-400 14 | GC,50,-720,50,-720,-420 15 | GC,-400,-1290,-400,-620,-400 16 | GC,-30,-720,-30,-720,-1080 17 | GU,970,140,970,140,570 18 | GU,570,30,570,20,570 19 | GU,970,140,970,140,340 20 | GU,570,-270,570,20,570 21 | GU,830,140,830,140,-50 22 | UG,230,100,230,220,190 23 | UG,-110,-110,-260,-520,-260 24 | UG,190,-60,-140,-60,190 25 | UG,220,100,-260,220,-260 26 | UG,230,-60,230,-60,-70 27 | AU,970,140,970,140,570 28 | AU,570,-20,570,20,570 29 | AU,970,140,970,140,340 30 | AU,570,-520,570,20,570 31 | AU,830,140,830,140,-380 32 | UA,230,-30,230,-60,190 33 | UA,-30,-30,-260,-520,-260 34 | UA,190,-60,-140,-60,190 35 | UA,-260,-590,-260,-520,-260 36 | UA,230,-60,230,-60,-70 37 | NN,970,140,970,220,570 38 | NN,570,30,570,20,570 39 | NN,970,140,970,140,340 40 | NN,570,100,570,220,570 41 | NN,830,140,830,140,-50 42 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q8IZA0.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q8IZA0|K319L_HUMAN Dyslexia-associated protein KIAA0319-like protein OS=Homo sapiens OX=9606 GN=KIAA0319L PE=1 SV=2 2 | MEKRLGVKPNPASWILSGYYWQTSAKWLRSLYLFYTCFCFSVLWLSTDASESRCQQGKTQ 3 | FGVGLRSGGENHLWLLEGTPSLQSCWAACCQDSACHVFWWLEGMCIQADCSRPQSCRAFR 4 | THSSNSMLVFLKKFQTADDLGFLPEDDVPHLLGLGWNWASWRQSPPRAALRPAVSSSDQQ 5 | SLIRKLQKRGSPSDVVTPIVTQHSKVNDSNELGGLTTSGSAEVHKAITISSPLTTDLTAE 6 | LSGGPKNVSVQPEISEGLATTPSTQQVKSSEKTQIAVPQPVAPSYSYATPTPQASFQSTS 7 | APYPVIKELVVSAGESVQITLPKNEVQLNAYVLQEPPKGETYTYDWQLITHPRDYSGEME 8 | GKHSQILKLSKLTPGLYEFKVIVEGQNAHGEGYVNVTVKPEPRKNRPPIAIVSPQFQEIS 9 | LPTTSTVIDGSQSTDDDKIVQYHWEELKGPLREEKISEDTAILKLSKLVPGNYTFSLTVV 10 | DSDGATNSTTANLTVNKAVDYPPVANAGPNQVITLPQNSITLFGNQSTDDHGITSYEWSL 11 | SPSSKGKVVEMQGVRTPTLQLSAMQEGDYTYQLTVTDTIGQQATAQVTVIVQPENNKPPQ 12 | ADAGPDKELTLPVDSTTLDGSKSSDDQKIISYLWEKTQGPDGVQLENANSSVATVTGLQV 13 | GTYVFTLTVKDERNLQSQSSVNVIVKEEINKPPIAKITGNVVITLPTSTAELDGSKSSDD 14 | KGIVSYLWTRDEGSPAAGEVLNHSDHHPILFLSNLVEGTYTFHLKVTDAKGESDTDRTTV 15 | EVKPDPRKNNLVEIILDINVSQLTERLKGMFIRQIGVLLGVLDSDIIVQKIQPYTEQSTK 16 | MVFFVQNEPPHQIFKGHEVAAMLKSELRKQKADFLIFRALEVNTVTCQLNCSDHGHCDSF 17 | TKRCICDPFWMENFIKVQLRDGDSNCEWSVLYVIIATFVIVVALGILSWTVICCCKRQKG 18 | KPKRKSKYKILDATDQESLELKPTSRAGIKQKGLLLSSSLMHSESELDSDDAIFTWPDRE 19 | KGKLLHGQNGSVPNGQTPLKARSPREEIL 20 | -------------------------------------------------------------------------------- /data/uniprotSeq/ReverseCovid.fasta: -------------------------------------------------------------------------------- 1 | > Reverse Engineered Covid Sequence 2 | MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQDVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYGDCLGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAALQIPFAMQMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQALNTLVKQLSSNFGAISSVLNDILSRLDPPEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRASANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDGKAHFPREGVFVSNGTHWFVTQRNFYEPQIITTDNTFVSGNCDVVIGIVNNTVYDPLQPELDSFKEELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDLQELGKYEQYIKWPWYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDDSEPVLKGVKLHYT 3 | -------------------------------------------------------------------------------- /data/uniprotSeq/O75151.fasta: -------------------------------------------------------------------------------- 1 | >sp|O75151|PHF2_HUMAN Lysine-specific demethylase PHF2 OS=Homo sapiens OX=9606 GN=PHF2 PE=1 SV=4 2 | MATVPVYCVCRLPYDVTRFMIECDACKDWFHGSCVGVEEEEAPDIDIYHCPNCEKTHGKS 3 | TLKKKRTWHKHGPGQAPDVKPVQNGSQLFIKELRSRTFPSAEDVVARVPGSQLTLGYMEE 4 | HGFTEPILVPKKDGLGLAVPAPTFYVSDVENYVGPERSVDVTDVTKQKDCKMKLKEFVDY 5 | YYSTNRKRVLNVTNLEFSDTRMSSFVEPPDIVKKLSWVENYWPDDALLAKPKVTKYCLIC 6 | VKDSYTDFHIDSGGASAWYHVLKGEKTFYLIRPASANISLYERWRSASNHSEMFFADQVD 7 | KCYKCIVKQGQTLFIPSGWIYATLTPVDCLAFAGHFLHSLSVEMQMRAYEVERRLKLGSL 8 | TQFPNFETACWYMGKHLLEAFKGSHKSGKQLPPHLVQGAKILNGAFRSWTKKQALAEHED 9 | ELPEHFKPSQLIKDLAKEIRLSENASKAVRPEVNTVASSDEVCDGDREKEEPPSPIEATP 10 | PQSLLEKVSKKKTPKTVKMPKPSKIPKPPKPPKPPRPPKTLKLKDGGKKKGKKSRESASP 11 | TIPNLDLLEAHTKEALTKMEPPKKGKATKSVLSVPNKDVVHMQNDVERLEIREQTKSKSE 12 | AKWKYKNSKPDSLLKMEEEQKLEKSPLAGNKDNKFSFSFSNKKLLGSKALRPPTSPGVFG 13 | ALQNFKEDKPKPVRDEYEYVSDDGELKIDEFPIRRKKNAPKRDLSFLLDKKAVLPTPVTK 14 | PKLDSAAYKSDDSSDEGSLHIDTDTKPGRNARVKKESGSSAAGILDLLQASEEVGALEYN 15 | PSSQPPASPSTQEAIQGMLSMANLQASDSCLQTTWGAGQAKGSSLAAHGARKNGGGSGKS 16 | AGKRLLKRAAKNSVDLDDYEEEQDHLDACFKDSDYVYPSLESDEDNPIFKSRSKKRKGSD 17 | DAPYSPTARVGPSVPRQDRPVREGTRVASIETGLAAAAAKLSQQEEQKSKKKKSAKRKLT 18 | PNTTSPSTSTSISAGTTSTSTTPASTTPASTTPASTSTASSQASQEGSSPEPPPESHSSS 19 | LADHEYTAAGTFTGAQAGRTSQPMAPGVFLTQRRPSASSPNNNTAAKGKRTKKGMATAKQ 20 | RLGKILKIHRNGKLLL 21 | -------------------------------------------------------------------------------- /data/uniprotSeq/O60721.fasta: -------------------------------------------------------------------------------- 1 | >sp|O60721|NCKX1_HUMAN Sodium/potassium/calcium exchanger 1 OS=Homo sapiens OX=9606 GN=SLC24A1 PE=1 SV=1 2 | MGKLIRMGPQERWLLRTKRLHWSRLLFLLGMLIIGSTYQHLRRPRGLSSLWAAVSSHQPI 3 | KLASRDLSSEEMMMMSSSPSKPSSEMGGKMLVPQASVGSDEATLSMTVENIPSMPKRTAK 4 | MIPTTTKNNYSPTAAGTERRKEDTPTSSRTLTYYTSTSSRQIVKKYTPTPRGEMKSYSPT 5 | QVREKVKYTPSPRGRRVGTYVPSTFMTMETSHAITPRTTVKDSDITATYKILETNSLKRI 6 | MEETTPTTLKGMFDSTPTFLTHEVEANVLTSPRSVMEKNNLFPPRRVESNSSAHPWGLVG 7 | KSNPKTPQGTVLLHTPATSEGQVTISTMTGSSPAETKAFTAAWSLRNPSPRTSVSAIKTA 8 | PAIVWRLAKKPSTAPSTSTTPTVRAKLTMQVHHCVVVKPTPAMLTTPSPSLTTALLPEEL 9 | SPSPSVLPPSLPDLHPKGEYPPDLFSVEERRQGWVVLHVFGMMYVFVALAIVCDEYFVPA 10 | LGVITDKLQISEDVAGATFMAAGGSAPELFTSLIGVFISHSNVGIGTIVGSAVFNILFVI 11 | GTCSLFSREILNLTWWPLFRDVSFYILDLIMLILFFLDSLIAWWESLLLLLAYAFYVFTM 12 | KWNKHIEVWVKEQLSRRPVAKVMALEDLSKPGDGAIAVDELQDNKKLKLPSLLTRGSSST 13 | SLHNSTIRSTIYQLMLHSLDPLREVRLAKEKEEESLNQGARAQPQAKAESKPEEEEPAKL 14 | PAVTVTPAPVPDIKGDQKENPGGQEDVAEAESTGEMPGEEGETAGEGETEEKSGGETQPE 15 | GEGETETQGKGEECEDENEAEGKGDNEGEDEGEIHAEDGEMKGNEGETESQELSAENHGE 16 | AKNDEKGVEDGGGSDGGDSEEEEEEEEEQEEEEEEEEQEEEEEEEEEEEEKGNEEPLSLD 17 | WPETRQKQAIYLFLLPIVFPLWLTVPDVRRQESRKFFVFTFLGSIMWIAMFSYLMVWWAH 18 | QVGETIGISEEIMGLTILAAGTSIPDLITSVIVARKGLGDMAVSSSVGSNIFDITVGLPV 19 | PWLLFSLINGLQPVPVSSNGLFCAIVLLFLMLLFVISSIASCKWRMNKILGFTMFLLYFV 20 | FLIISVMLEDRIISCPVSV 21 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q15147.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q15147|PLCB4_HUMAN 1-phosphatidylinositol 4,5-bisphosphate phosphodiesterase beta-4 OS=Homo sapiens OX=9606 GN=PLCB4 PE=1 SV=3 2 | MAKPYEFNWQKEVPSFLQEGAVFDRYEEESFVFEPNCLFKVDEFGFFLTWRSEGKEGQVL 3 | ECSLINSIRSGAIPKDPKILAALEAVGKSENDLEGRIVCVCSGTDLVNISFTYMVAENPE 4 | VTKQWVEGLRSIIHNFRANNVSPMTCLKKHWMKLAFMTNTNGKIPVRSITRTFASGKTEK 5 | VIFQALKELGLPSGKNDEIEPTAFSYEKFYELTQKICPRTDIEDLFKKINGDKTDYLTVD 6 | QLVSFLNEHQRDPRLNEILFPFYDAKRAMQIIEMYEPDEDLKKKGLISSDGFCRYLMSDE 7 | NAPVFLDRLELYQEMDHPLAHYFISSSHNTYLTGRQFGGKSSVEMYRQVLLAGCRCVELD 8 | CWDGKGEDQEPIITHGKAMCTDILFKDVIQAIKETAFVTSEYPVILSFENHCSKYQQYKM 9 | SKYCEDLFGDLLLKQALESHPLEPGRALPSPNDLKRKILIKNKRLKPEVEKKQLEALRSM 10 | MEAGESASPANILEDDNEEEIESADQEEEAHPEFKFGNELSADDLGHKEAVANSVKKGLV 11 | TVEDEQAWMASYKYVGATTNIHPYLSTMINYAQPVKFQGFHVAEERNIHYNMSSFNESVG 12 | LGYLKTHAIEFVNYNKRQMSRIYPKGGRVDSSNYMPQIFWNAGCQMVSLNYQTPDLAMQL 13 | NQGKFEYNGSCGYLLKPDFMRRPDRTFDPFSETPVDGVIAATCSVQVISGQFLSDKKIGT 14 | YVEVDMYGLPTDTIRKEFRTRMVMNNGLNPVYNEESFVFRKVILPDLAVLRIAVYDDNNK 15 | LIGQRILPLDGLQAGYRHISLRNEGNKPLSLPTIFCNIVLKTYVPDGFGDIVDALSDPKK 16 | FLSITEKRADQMRAMGIETSDIADVPSDTSKNDKKGKANTAKANVTPQSSSELRPTTTAA 17 | LASGVEAKKGIELIPQVRIEDLKQMKAYLKHLKKQQKELNSLKKKHAKEHSTMQKLHCTQ 18 | VDKIVAQYDKEKSTHEKILEKAMKKKGGSNCLEMKKETEIKIQTLTSDHKSKVKEIVAQH 19 | TKEWSEMINTHSAEEQEIRDLHLSQQCELLKKLLINAHEQQTQQLKLSHDRESKEMRAHQ 20 | AKISMENSKAISQDKSIKNKAERERRVRELNSSNTKKFLEERKRLAMKQSKEMDQLKKVQ 21 | LEHLEFLEKQNEQAKEMQQMVKLEAEMDRRPATVV 22 | -------------------------------------------------------------------------------- /data/uniprotSeq/P54098.fasta: -------------------------------------------------------------------------------- 1 | >sp|P54098|DPOG1_HUMAN DNA polymerase subunit gamma-1 OS=Homo sapiens OX=9606 GN=POLG PE=1 SV=1 2 | MSRLLWRKVAGATVGPGPVPAPGRWVSSSVPASDPSDGQRRRQQQQQQQQQQQQQPQQPQ 3 | VLSSEGGQLRHNPLDIQMLSRGLHEQIFGQGGEMPGEAAVRRSVEHLQKHGLWGQPAVPL 4 | PDVELRLPPLYGDNLDQHFRLLAQKQSLPYLEAANLLLQAQLPPKPPAWAWAEGWTRYGP 5 | EGEAVPVAIPEERALVFDVEVCLAEGTCPTLAVAISPSAWYSWCSQRLVEERYSWTSQLS 6 | PADLIPLEVPTGASSPTQRDWQEQLVVGHNVSFDRAHIREQYLIQGSRMRFLDTMSMHMA 7 | ISGLSSFQRSLWIAAKQGKHKVQPPTKQGQKSQRKARRGPAISSWDWLDISSVNSLAEVH 8 | RLYVGGPPLEKEPRELFVKGTMKDIRENFQDLMQYCAQDVWATHEVFQQQLPLFLERCPH 9 | PVTLAGMLEMGVSYLPVNQNWERYLAEAQGTYEELQREMKKSLMDLANDACQLLSGERYK 10 | EDPWLWDLEWDLQEFKQKKAKKVKKEPATASKLPIEGAGAPGDPMDQEDLGPCSEEEEFQ 11 | QDVMARACLQKLKGTTELLPKRPQHLPGHPGWYRKLCPRLDDPAWTPGPSLLSLQMRVTP 12 | KLMALTWDGFPLHYSERHGWGYLVPGRRDNLAKLPTGTTLESAGVVCPYRAIESLYRKHC 13 | LEQGKQQLMPQEAGLAEEFLLTDNSAIWQTVEELDYLEVEAEAKMENLRAAVPGQPLALT 14 | ARGGPKDTQPSYHHGNGPYNDVDIPGCWFFKLPHKDGNSCNVGSPFAKDFLPKMEDGTLQ 15 | AGPGGASGPRALEINKMISFWRNAHKRISSQMVVWLPRSALPRAVIRHPDYDEEGLYGAI 16 | LPQVVTAGTITRRAVEPTWLTASNARPDRVGSELKAMVQAPPGYTLVGADVDSQELWIAA 17 | VLGDAHFAGMHGCTAFGWMTLQGRKSRGTDLHSKTATTVGISREHAKIFNYGRIYGAGQP 18 | FAERLLMQFNHRLTQQEAAEKAQQMYAATKGLRWYRLSDEGEWLVRELNLPVDRTEGGWI 19 | SLQDLRKVQRETARKSQWKKWEVVAERAWKGGTESEMFNKLESIATSDIPRTPVLGCCIS 20 | RALEPSAVQEEFMTSRVNWVVQSSAVDYLHLMLVAMKWLFEEFAIDGRFCISIHDEVRYL 21 | VREEDRYRAALALQITNLLTRCMFAYKLGLNDLPQSVAFFSAVDIDRCLRKEVTMDCKTP 22 | SNPTGMERRYGIPQGEALDIYQIIELTKGSLEKRSQPGP 23 | -------------------------------------------------------------------------------- /data/uniprotSeq/P0DTC2.fasta: -------------------------------------------------------------------------------- 1 | >sp|P0DTC2|SPIKE_SARS2 Spike glycoprotein OS=Severe acute respiratory syndrome coronavirus 2 OX=2697049 GN=S PE=1 SV=1 2 | MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFS 3 | NVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIV 4 | NNATNVVIKVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLE 5 | GKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQT 6 | LLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETK 7 | CTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISN 8 | CVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIAD 9 | YNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPC 10 | NGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVN 11 | FNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITP 12 | GTNTSNQVAVLYQDVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSY 13 | ECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTI 14 | SVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQE 15 | VFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYGDC 16 | LGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAALQIPFAM 17 | QMAYRFNGIGVTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQALN 18 | TLVKQLSSNFGAISSVLNDILSRLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRA 19 | SANLAATKMSECVLGQSKRVDFCGKGYHLMSFPQSAPHGVVFLHVTYVPAQEKNFTTAPA 20 | ICHDGKAHFPREGVFVSNGTHWFVTQRNFYEPQIITTDNTFVSGNCDVVIGIVNNTVYDP 21 | LQPELDSFKEELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVAKNLNESLIDL 22 | QELGKYEQYIKWPWYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDD 23 | SEPVLKGVKLHYT 24 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q6VMQ6.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q6VMQ6|MCAF1_HUMAN Activating transcription factor 7-interacting protein 1 OS=Homo sapiens OX=9606 GN=ATF7IP PE=1 SV=3 2 | MDSLEEPQKKVFKARKTMRVSDRQQLEAVYKVKEELLKTDVKLLNGNHENGDLDPTSPLE 3 | NMDYIKDKEEVNGIEEICFDPEGSKAEWKETPCILSVNVKNKQDDDLNCEPLSPHNITPE 4 | PVSKLPAEPVSGDPAPGDLDAGDPASGVLASGDSTSGDPTSSEPSSSDAASGDATSGDAP 5 | SGDVSPGDATSGDATADDLSSGDPTSSDPIPGEPVPVEPISGDCAADDIASSEITSVDLA 6 | SGAPASTDPASDDLASGDLSSSELASDDLATGELASDELTSESTFDRTFEPKSVPVCEPV 7 | PEIDNIEPSSNKDDDFLEKNGADEKLEQIQSKDSLDEKNKADNNIDANEETLETDDTTIC 8 | SDRPPENEKKVEEDIITELALGEDAISSSMEIDQGEKNEDETSADLVETINENVIEDNKS 9 | ENILENTDSMETDEIIPILEKLAPSEDELTCFSKTSLLPIDETNPDLEEKMESSFGSPSK 10 | QESSESLPKEAFLVLSDEEDISGEKDESEVISQNETCSPAEVESNEKDNKPEEEEQVIHE 11 | DDERPSEKNEFSRRKRSKSEDMDNVQSKRRRYMEEEYEAEFQVKITAKGDINQKLQKVIQ 12 | WLLEEKLCALQCAVFDKTLAELKTRVEKIECNKRHKTVLTELQAKIARLTKRFEAAKEDL 13 | KKRHEHPPNPPVSPGKTVNDVNSNNNMSYRNAGTVRQMLESKRNVSESAPPSFQTPVNTV 14 | SSTNLVTPPAVVSSQPKLQTPVTSGSLTATSVLPAPNTATVVATTQVPSGNPQPTISLQP 15 | LPVILHVPVAVSSQPQLLQSHPGTLVTNQPSGNVEFISVQSPPTVSGLTKNPVSLPSLPN 16 | PTKPNNVPSVPSPSIQRNPTASAAPLGTTLAVQAVPTAHSIVQATRTSLPTVGPSGLYSP 17 | STNRGPIQMKIPISAFSTSSAAEQNSNTTPRIENQTNKTIDASVSKKAADSTSQCGKATG 18 | SDSSGVIDLTMDDEESGASQDPKKLNHTPVSTMSSSQPVSRPLQPIQPAPPLQPSGVPTS 19 | GPSQTTIHLLPTAPTTVNVTHRPVTQVTTRLPVPRAPANHQVVYTTLPAPPAQAPLRGTV 20 | MQAPAVRQVNPQNSVTVRVPQTTTYVVNNGLTLGSTGPQLTVHHRPPQVHTEPPRPVHPA 21 | PLPEAPQPQRLPPEAASTSLPQKPHLKLARVQSQNGIVLSWSVLEVDRSCATVDSYHLYA 22 | YHEEPSATVPSQWKKIGEVKALPLPMACTLTQFVSGSKYYFAVRAKDIYGRFGPFCDPQS 23 | TDVISSTQSS 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2023, El-Kebir Group 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q5TZJ5.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q5TZJ5|S31A1_HUMAN Spermatogenesis-associated protein 31A1 OS=Homo sapiens OX=9606 GN=SPATA31A1 PE=3 SV=1 2 | MENLPFPLKLLSASSLNAPSSTPWVLDIFLTLVFALGFFFLLLPYLSYFRCDDPPSPSPG 3 | KRKCPVGRRRRPRGRMKNHSLRAGRECPRGLQETSDLLSQLQSLLGPHLDKGDFGQLSGP 4 | DPPGEVGERAPDGASQSSHEPMEDAAPILSPLASPDPQAKHPQDLASTPSPGPMTTSVSS 5 | LSASQPPEPSLPLEHPSPEPPALFPHPPHTPDPLACSPPPPKGFTAPPLRDSTLITPSHC 6 | DSVALPLGTVPQSLSPHEDLVASVPAISGLGGSNSHVSASSRWQETARTSCAFNSSVQQD 7 | HLSRHPPETYQMEAGSLFLLSSDGQNAVGIQVTETAKVNIWEEKENVGSFTDRMTPEKHL 8 | NSLRNLAKSLDAEQDTTNPKPFWNMGENSKQLPGPQKLSDPRLWQESFWKNYSQLFWGLP 9 | SLHSESLVANAWVTDRSYTLQSPPFLFNEMSNVCPIQRETTMSPLLFQAQPPSHLGPECQ 10 | PFISSTPQFRPTPMAQAEAQAHLQSSFPVLSPAFPSLIKNTGVACPASQNKVQALSLPET 11 | QHPEWPLLRRQLEGRLALPSRVQKSQDVFSVSTPNLPQESLTSILPENFPVSPELRRQLE 12 | QHIKKWIIQHWGNLGRIQESLDLMQLRDESPGTSQAKGKPSPWQSSMSTGESSKEAQKVK 13 | FQLERDPCPHLGQILGETPQNLSRDMKSFPRKVLGVTSEESERNLRKPLRSDSGSDLLRC 14 | TERTHIENILKAHMGRNLGQTNEGLIPVRVRRSWLAVNQALPVSNTHVKTSNLAAPKSGK 15 | ACVNTAQVLSFLEPCTQQGLGAHIVRFWAKHRWGLPLRVLKPIQCFKLEKVSSLSLTQLA 16 | GPSSATCESGAGSEVEVDMFLRKPPMASLRKQVLTKASDHMPESLLASSPAWKQFQRAPR 17 | GIPSWNDHGPLKPPPAGQEGRWPSKPLTYSLTGSTQQSRSLGAQSSKAGETREAVPQCRV 18 | PLETCMLANLQATSEDMHGFEAPGTSKSSLHPRVSVSQDPRKLCLMEEVVNEFEPGMATK 19 | SETQPQVCAAVVLLPDGQASVVPHASENLVSQVPQGHLQSMPAGNMRASQELHDLMAARR 20 | SKLVHEEPRNPNCQGSCKNQRPMFPPIHKSEKSRKPNLEKHEERLEGLRTPQLTPVRKTE 21 | DTHQDEGVQLLPSKKQPPSVSHFGGNIKQFFQWIFSKKKSKPAPVTAESQKTVKNRSCVY 22 | SSSAEAQGLMTAVGQMLDEKMSLCHARHASKVNQHKQKFQAPVCGFPCNHRHLFYSEHGR 23 | ILSYAASSQQATLKSQGCPNRDRQIRNQQPLKSVRCNNEQWGLRHPQILHPKKAVSPVSP 24 | LQHWPKTSGASSHHHHCPRHCLLWEGI 25 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q96HA7.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q96HA7|TONSL_HUMAN Tonsoku-like protein OS=Homo sapiens OX=9606 GN=TONSL PE=1 SV=2 2 | MSLERELRQLSKAKAKAQRAGQRREEAALCHQLGELLAGHGRYAEALEQHWQELQLRERA 3 | DDPLGCAVAHRKIGERLAEMEDYPAALQHQHQYLELAHSLRNHTELQRAWATIGRTHLDI 4 | YDHCQSRDALLQAQAAFEKSLAIVDEELEGTLAQGELNEMRTRLYLNLGLTFESLQQTAL 5 | CNDYFRKSIFLAEQNHLYEDLFRARYNLGTIHWRAGQHSQAMRCLEGARECAHTMRKRFM 6 | ESECCVVIAQVLQDLGDFLAAKRALKKAYRLGSQKPVQRAAICQNLQHVLAVVRLQQQLE 7 | EAEGRDPQGAMVICEQLGDLFSKAGDFPRAAEAYQKQLRFAELLDRPGAERAIIHVSLAT 8 | TLGDMKDHHGAVRHYEEELRLRSGNVLEEAKTWLNIALSREEAGDAYELLAPCFQKALSC 9 | AQQAQRPQLQRQVLQHLHTVQLRLQPQEAPETETRLRELSVAEDEDEEEEAEEAAATAES 10 | EALEAGEVELSEGEDDTDGLTPQLEEDEELQGHLGRRKGSKWNRRNDMGETLLHRACIEG 11 | QLRRVQDLVRQGHPLNPRDYCGWTPLHEACNYGHLEIVRFLLDHGAAVDDPGGQGCEGIT 12 | PLHDALNCGHFEVAELLLERGASVTLRTRKGLSPLETLQQWVKLYRRDLDLETRQKARAM 13 | EMLLQAAASGQDPHSSQAFHTPSSLLFDPETSPPLSPCPEPPSNSTRLPEASQAHVRVSP 14 | GQAAPAMARPRRSRHGPASSSSSSEGEDSAGPARPSQKRPRCSATAQRVAAWTPGPASNR 15 | EAATASTSRAAYQAAIRGVGSAQSRLGPGPPRGHSKALAPQAALIPEEECLAGDWLELDM 16 | PLTRSRRPRPRGTGDNRRPSSTSGSDSEESRPRARAKQVRLTCMQSCSAPVNAGPSSLAS 17 | EPPGSPSTPRVSEPSGDSSAAGQPLGPAPPPPIRVRVQVQDHLFLIPVPHSSDTHSVAWL 18 | AEQAAQRYYQTCGLLPRLTLRKEGALLAPQDLIPDVLQSNDEVLAEVTSWDLPPLTDRYR 19 | RACQSLGQGEHQQVLQAVELQGLGLSFSACSLALDQAQLTPLLRALKLHTALRELRLAGN 20 | RLGDKCVAELVAALGTMPSLALLDLSSNHLGPEGLRQLAMGLPGQATLQSLEELDLSMNP 21 | LGDGCGQSLASLLHACPLLSTLRLQACGFGPSFFLSHQTALGSAFQDAEHLKTLSLSYNA 22 | LGAPALARTLQSLPAGTLLHLELSSVAAGKGDSDLMEPVFRYLAKEGCALAHLTLSANHL 23 | GDKAVRDLCRCLSLCPSLISLDLSANPEISCASLEELLSTLQKRPQGLSFLGLSGCAVQG 24 | PLGLGLWDKIAAQLRELQLCSRRLCAEDRDALRQLQPSRPGPGECTLDHGSKLFFRRL 25 | -------------------------------------------------------------------------------- /data/uniprotSeq/Q5VYS8.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q5VYS8|TUT7_HUMAN Terminal uridylyltransferase 7 OS=Homo sapiens OX=9606 GN=TUT7 PE=1 SV=1 2 | MGDTAKPYFVKRTKDRGTMDDDDFRRGHPQQDYLIIDDHAKGHGSKMEKGLQKKKITPGN 3 | YGNTPRKGPCAVSSNPYAFKNPIYSQPAWMNDSHKDQSKRWLSDEHTGNSDNWREFKPGP 4 | RIPVINRQRKDSFQENEDGYRWQDTRGCRTVRRLFHKDLTSLETTSEMEAGSPENKKQRS 5 | RPRKPRKTRNEENEQDGDLEGPVIDESVLSTKELLGLQQAEERLKRDCIDRLKRRPRNYP 6 | TAKYTCRLCDVLIESIAFAHKHIKEKRHKKNIKEKQEEELLTTLPPPTPSQINAVGIAID 7 | KVVQEFGLHNENLEQRLEIKRIMENVFQHKLPDCSLRLYGSSCSRLGFKNSDVNIDIQFP 8 | AIMSQPDVLLLVQECLKNSDSFIDVDADFHARVPVVVCREKQSGLLCKVSAGNENACLTT 9 | KHLTALGKLEPKLVPLVIAFRYWAKLCSIDRPEEGGLPPYVFALMAIFFLQQRKEPLLPV 10 | YLGSWIEGFSLSKLGNFNLQDIEKDVVIWEHTDSAAGDTGITKEEAPRETPIKRGQVSLI 11 | LDVKHQPSVPVGQLWVELLRFYALEFNLADLVISIRVKELVSRELKDWPKKRIAIEDPYS 12 | VKRNVARTLNSQPVFEYILHCLRTTYKYFALPHKITKSSLLKPLNAITCISEHSKEVINH 13 | HPDVQTKDDKLKNSVLAQGPGATSSAANTCKVQPLTLKETAESFGSPPKEEMGNEHISVH 14 | PENSDCIQADVNSDDYKGDKVYHPETGRKNEKEKVGRKGKHLLTVDQKRGEHVVCGSTRN 15 | NESESTLDLEGFQNPTAKECEGLATLDNKADLDGESTEGTEELEDSLNHFTHSVQGQTSE 16 | MIPSDEEEEDDEEEEEEEEPRLTINQREDEDGMANEDELDNTYTGSGDEDALSEEDDELG 17 | EAAKYEDVKECGKHVERALLVELNKISLKEENVCEEKNSPVDQSDFFYEFSKLIFTKGKS 18 | PTVVCSLCKREGHLKKDCPEDFKRIQLEPLPPLTPKFLNILDQVCIQCYKDFSPTIIEDQ 19 | AREHIRQNLESFIRQDFPGTKLSLFGSSKNGFGFKQSDLDVCMTINGLETAEGLDCVRTI 20 | EELARVLRKHSGLRNILPITTAKVPIVKFFHLRSGLEVDISLYNTLALHNTRLLSAYSAI 21 | DPRVKYLCYTMKVFTKMCDIGDASRGSLSSYAYTLMVLYFLQQRNPPVIPVLQEIYKGEK 22 | KPEIFVDGWNIYFFDQIDELPTYWSECGKNTESVGQLWLGLLRFYTEEFDFKEHVISIRR 23 | KSLLTTFKKQWTSKYIVIEDPFDLNHNLGAGLSRKMTNFIMKAFINGRRVFGIPVKGFPK 24 | DYPSKMEYFFDPDVLTEGELAPNDRCCRICGKIGHFMKDCPMRRKVRRRRDQEDALNQRY 25 | PENKEKRSKEDKEIHNKYTEREVSTKEDKPIQCTPQKAKPMRAAADLGREKILRPPVEKW 26 | KRQDDKDLREKRCFICGREGHIKKECPQFKGSSGSLSSKYMTQGKASAKRTQQES 27 | -------------------------------------------------------------------------------- /src/Nussinov.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by xinyu on 4/10/2022. 3 | // 4 | 5 | #ifndef RNA_DESIGN_NUSSINOV_H 6 | #define RNA_DESIGN_NUSSINOV_H 7 | #include 8 | #include 9 | 10 | using namespace std; 11 | 12 | class Nussinov { 13 | vector value; 14 | vector protein; 15 | // vector n_codon; 16 | // string rna; 17 | int n,g; 18 | vector> codon_index; 19 | vector> codon_index_x; 20 | vector> codon_index_y; 21 | 22 | public: 23 | Nussinov(vector protein, int n=0, int g=0); 24 | Nussinov(const Nussinov &); 25 | ~Nussinov(); 26 | tuple nussinov (ostream& fout); 27 | string get_rna(); 28 | string find_bp(int a, int b, int i, int j, int x, int y, int seq); 29 | void lambda_sweep(double incr, ostream& fout, string outfile); 30 | tuple nussinov_CAI(double lambda, ostream& fout); 31 | string nussinov_CAI_tb(int a, int b, int i, int j, int x, int y, double lambda); 32 | inline double & Access(int a, int b, int i, int j, int x, int y); 33 | private: 34 | void initialize(); 35 | }; 36 | 37 | inline double & Nussinov::Access(int a, int b, int i, int j, int x, int y) { 38 | // cout << "a: " << a << ",b: " << b << ",i: " << i << ",j: " << j << ",x: " << x << ",y: " << y << endl; 39 | // if (a == b) { 40 | // assert(x==y); 41 | // if (i > j) { 42 | // throw std::invalid_argument("i > j"); 43 | // } 44 | // assert(j>=i); 45 | // 46 | // } 47 | // cout << "a: " << a << ",b: " << b << ",i: " << i << ",j: " << j << ",x: " << x << ",y: " << y << ",index: " << 36*(9*(n-1)*(b-a)+9*(2*b-a)+(3*j-2*i))+6*x+y << ",size: " << value.size() << endl; 48 | // assert(36*(9*(n-1)*(b-a)+9*(2*b-a)+(3*j-2*i))+6*x+y < value.size()); 49 | return value[36*(9*(n-1)*(b-a)+9*(2*b-a)+(6-3*i+j))+6*x+y]; 50 | } 51 | 52 | #endif //RNA_DESIGN_NUSSINOV_H 53 | -------------------------------------------------------------------------------- /data/uniprotSeq/P51805.fasta: -------------------------------------------------------------------------------- 1 | >sp|P51805|PLXA3_HUMAN Plexin-A3 OS=Homo sapiens OX=9606 GN=PLXNA3 PE=1 SV=3 2 | MPSVCLLLLLFLAVGGALGNRPFRAFVVTDTTLTHLAVHRVTGEVFVGAVNRVFKLAPNL 3 | TELRAHVTGPVEDNARCYPPPSMRVCAHRLAPVDNINKLLLIDYAARRLVACGSIWQGIC 4 | QFLRLDDLFKLGEPHHRKEHYLSGAQEPDSMAGVIVEQGQGPSKLFVGTAVDGKSEYFPT 5 | LSSRKLISDEDSADMFSLVYQDEFVSSQIKIPSDTLSLYPAFDIYYIYGFVSASFVYFLT 6 | LQLDTQQTLLDTAGEKFFTSKIVRMCAGDSEFYSYVEFPIGCSWRGVEYRLVQSAHLAKP 7 | GLLLAQALGVPADEDVLFTIFSQGQKNRASPPRQTILCLFTLSNINAHIRRRIQSCYRGE 8 | GTLALPWLLNKELPCINTPMQINGNFCGLVLNQPLGGLHVIEGLPLLADSTDGMASVAAY 9 | TYRQHSVVFIGTRSGSLKKVRVDGFQDAHLYETVPVVDGSPILRDLLFSPDHRHIYLLSE 10 | KQVSQLPVETCEQYQSCAACLGSGDPHCGWCVLRHRCCREGACLGASAPHGFAEELSKCV 11 | QVRVRPNNVSVTSPGVQLTVTLHNVPDLSAGVSCAFEAAAENEAVLLPSGELLCPSPSLQ 12 | ELRALTRGHGATRTVRLQLLSKETGVRFAGADFVFYNCSVLQSCMSCVGSPYPCHWCKYR 13 | HTCTSRPHECSFQEGRVHSPEGCPEILPSGDLLIPVGVMQPLTLRAKNLPQPQSGQKNYE 14 | CVVRVQGRQQRVPAVRFNSSSVQCQNASYSYEGDEHGDTELDFSVVWDGDFPIDKPPSFR 15 | ALLYKCWAQRPSCGLCLKADPRFNCGWCISEHRCQLRTHCPAPKTNWMHLSQKGTRCSHP 16 | RITQIHPLVGPKEGGTRVTIVGENLGLLSREVGLRVAGVRCNSIPAEYISAERIVCEMEE 17 | SLVPSPPPGPVELCVGDCSADFRTQSEQVYSFVTPTFDQVSPSRGPASGGTRLTISGSSL 18 | DAGSRVTVTVRDSECQFVRRDAKAIVCISPLSTLGPSQAPITLAIDRANISSPGLIYTYT 19 | QDPTVTRLEPTWSIINGSTAITVSGTHLLTVQEPRVRAKYRGIETTNTCQVINDTAMLCK 20 | APGIFLGRPQPRAQGEHPDEFGFLLDHVQTARSLNRSSFTYYPDPSFEPLGPSGVLDVKP 21 | GSHVVLKGKNLIPAAAGSSRLNYTVLIGGQPCSLTVSDTQLLCDSPSQTGRQPVMVLVGG 22 | LEFWLGTLHISAERALTLPAMMGLAAGGGLLLLAITAVLVAYKRKTQDADRTLKRLQLQM 23 | DNLESRVALECKEAFAELQTDINELTNHMDEVQIPFLDYRTYAVRVLFPGIEAHPVLKEL 24 | DTPPNVEKALRLFGQLLHSRAFVLTFIHTLEAQSSFSMRDRGTVASLTMVALQSRLDYAT 25 | GLLKQLLADLIEKNLESKNHPKLLLRRTESVAEKMLTNWFTFLLHKFLKECAGEPLFLLY 26 | CAIKQQMEKGPIDAITGEARYSLSEDKLIRQQIDYKTLTLHCVCPENEGSAQVPVKVLNC 27 | DSITQAKDKLLDTVYKGIPYSQRPKAEDMDLEWRQGRMTRIILQDEDVTTKIECDWKRLN 28 | SLAHYQVTDGSLVALVPKQVSAYNMANSFTFTRSLSRYESLLRTASSPDSLRSRAPMITP 29 | DQETGTKLWHLVKNHDHADHREGDRGSKMVSEIYLTRLLATKGTLQKFVDDLFETVFSTA 30 | HRGSALPLAIKYMFDFLDEQADQRQISDPDVRHTWKSNCLPLRFWVNVIKNPQFVFDIHK 31 | NSITDACLSVVAQTFMDSCSTSEHRLGKDSPSNKLLYAKDIPNYKSWVERYYRDIAKMAS 32 | ISDQDMDAYLVEQSRLHASDFSVLSALNELYFYVTKYRQEILTALDRDASCRKHKLRQKL 33 | EQIISLVSSDS 34 | -------------------------------------------------------------------------------- /src/NussinovAlgorithm.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by xinyu on 4/24/2022. 3 | // 4 | 5 | #include "NussinovAlgorithm.h" 6 | #include "utils.h" 7 | #include 8 | using namespace std; 9 | 10 | NussinovAlgorithm::NussinovAlgorithm(vector& rna, int n, int g):rna(rna),n(n),g(g) { 11 | dp.resize(n*n, -1); 12 | } 13 | 14 | NussinovAlgorithm::~NussinovAlgorithm() { 15 | 16 | } 17 | 18 | int NussinovAlgorithm::nussinov(int i, int j) { 19 | if (dp[index(i,j)] != -1) return dp[index(i,j)]; 20 | 21 | if (i+g >= j) { 22 | dp[index(i,j)] = 0; 23 | return 0; 24 | } 25 | 26 | int ret = 0; 27 | if (i+g < j) ret = max(ret, nussinov(i+1, j)); 28 | if (i+g < j) ret = max(ret, nussinov(i, j-1)); 29 | // cout << rna[i] << " " << rna[j] << endl; 30 | if (complementary(rna[i],rna[j])) { 31 | 32 | if (i+g < j) ret = max(ret, nussinov(i+1, j-1) + 1); 33 | 34 | if (i+g == j-1) ret = max(ret, 1); 35 | } 36 | if (i+g < j) { 37 | for (int k=i+g; k j) return ""; 48 | if (i == j) return "."; 49 | 50 | if (nussinov(i,j) == nussinov(i+1,j)) return "." + get_bp(i+1, j); 51 | if (nussinov(i,j) == nussinov(i,j-1)) return get_bp(i,j-1) + "."; 52 | 53 | if (complementary(rna[i], rna[j]) && nussinov(i,j) == nussinov(i+1, j-1) + 1) { 54 | return "(" + get_bp(i+1, j-1) + ")"; 55 | } 56 | 57 | for (int k=i+1; ksp|Q9NR99|MXRA5_HUMAN Matrix-remodeling-associated protein 5 OS=Homo sapiens OX=9606 GN=MXRA5 PE=1 SV=3 2 | MPKRAHWGALSVVLILLWGHPRVALACPHPCACYVPSEVHCTFRSLASVPAGIAKHVERI 3 | NLGFNSIQALSETSFAGLTKLELLMIHGNEIPSIPDGALRDLSSLQVFKFSYNKLRVITG 4 | QTLQGLSNLMRLHIDHNKIEFIHPQAFNGLTSLRLLHLEGNLLHQLHPSTFSTFTFLDYF 5 | RLSTIRHLYLAENMVRTLPASMLRNMPLLENLYLQGNPWTCDCEMRWFLEWDAKSRGILK 6 | CKKDKAYEGGQLCAMCFSPKKLYKHEIHKLKDMTCLKPSIESPLRQNRSRSIEEEQEQEE 7 | DGGSQLILEKFQLPQWSISLNMTDEHGNMVNLVCDIKKPMDVYKIHLNQTDPPDIDINAT 8 | VALDFECPMTRENYEKLWKLIAYYSEVPVKLHRELMLSKDPRVSYQYRQDADEEALYYTG 9 | VRAQILAEPEWVMQPSIDIQLNRRQSTAKKVLLSYYTQYSQTISTKDTRQARGRSWVMIE 10 | PSGAVQRDQTVLEGGPCQLSCNVKASESPSIFWVLPDGSILKAPMDDPDSKFSILSSGWL 11 | RIKSMEPSDSGLYQCIAQVRDEMDRMVYRVLVQSPSTQPAEKDTVTIGKNPGESVTLPCN 12 | ALAIPEAHLSWILPNRRIINDLANTSHVYMLPNGTLSIPKVQVSDSGYYRCVAVNQQGAD 13 | HFTVGITVTKKGSGLPSKRGRRPGAKALSRVREDIVEDEGGSGMGDEENTSRRLLHPKDQ 14 | EVFLKTKDDAINGDKKAKKGRRKLKLWKHSEKEPETNVAEGRRVFESRRRINMANKQINP 15 | ERWADILAKVRGKNLPKGTEVPPLIKTTSPPSLSLEVTPPFPAISPPSASPVQTVTSAEE 16 | SSADVPLLGEEEHVLGTISSASMGLEHNHNGVILVEPEVTSTPLEEVVDDLSEKTEEITS 17 | TEGDLKGTAAPTLISEPYEPSPTLHTLDTVYEKPTHEETATEGWSAADVGSSPEPTSSEY 18 | EPPLDAVSLAESEPMQYFDPDLETKSQPDEDKMKEDTFAHLTPTPTIWVNDSSTSQLFED 19 | STIGEPGVPGQSHLQGLTDNIHLVKSSLSTQDTLLIKKGMKEMSQTLQGGNMLEGDPTHS 20 | RSSESEGQESKSITLPDSTLGIMSSMSPVKKPAETTVGTLLDKDTTTATTTPRQKVAPSS 21 | TMSTHPSRRRPNGRRRLRPNKFRHRHKQTPPTTFAPSETFSTQPTQAPDIKISSQVESSL 22 | VPTAWVDNTVNTPKQLEMEKNAEPTSKGTPRRKHGKRPNKHRYTPSTVSSRASGSKPSPS 23 | PENKHRNIVTPSSETILLPRTVSLKTEGPYDSLDYMTTTRKIYSSYPKVQETLPVTYKPT 24 | SDGKEIKDDVATNVDKHKSDILVTGESITNAIPTSRSLVSTMGEFKEESSPVGFPGTPTW 25 | NPSRTAQPGRLQTGIPVTTSGENLTDPPLLKELEDVDFTSEFLSSLTVSTPFHQEEAGSS 26 | TTLSSIKVEVASSQAETTTLDQDHLETTVAILLSETRPQNHTPTAARMKEPASSSPSTIL 27 | MSLGQTTTTKPALPSPRISQASRDSKENVFLNYVGNPETEATPVNNEGTQHMSGPNELST 28 | PSSDQDAFNLSTKLELEKQVFGSRSLPRGPDSQRQDGRVHASHQLTRVPAKPILPTATVR 29 | LPEMSTQSASRYFVTSQSPRHWTNKPEITTYPSGALPENKQFTTPRLSSTTIPLPLHMSK 30 | PSIPSKFTDRRTDQFNGYSKVFGNNNIPEARNPVGKPPSPRIPHYSNGRLPFFTNKTLSF 31 | PQLGVTRRPQIPTSPAPVMRERKVIPGSYNRIHSHSTFHLDFGPPAPPLLHTPQTTGSPS 32 | TNLQNIPMVSSTQSSISFITSSVQSSGSFHQSSSKFFAGGPPASKFWSLGEKPQILTKSP 33 | QTVSVTAETDTVFPCEATGKPKPFVTWTKVSTGALMTPNTRIQRFEVLKNGTLVIRKVQV 34 | QDRGQYMCTASNLHGLDRMVVLLSVTVQQPQILASHYQDVTVYLGDTIAMECLAKGTPAP 35 | QISWIFPDRRVWQTVSPVEGRITLHENRTLSIKEASFSDRGVYKCVASNAAGADSLAIRL 36 | HVAALPPVIHQEKLENISLPPGLSIHIHCTAKAAPLPSVRWVLGDGTQIRPSQFLHGNLF 37 | VFPNGTLYIRNLAPKDSGRYECVAANLVGSARRTVQLNVQRAAANARITGTSPRRTDVRY 38 | GGTLKLDCSASGDPWPRILWRLPSKRMIDALFSFDSRIKVFANGTLVVKSVTDKDAGDYL 39 | CVARNKVGDDYVVLKVDVVMKPAKIEHKEENDHKVFYGGDLKVDCVATGLPNPEISWSLP 40 | DGSLVNSFMQSDDSGGRTKRYVVFNNGTLYFNEVGMREEGDYTCFAENQVGKDEMRVRVK 41 | VVTAPATIRNKTYLAVQVPYGDVVTVACEAKGEPMPKVTWLSPTNKVIPTSSEKYQIYQD 42 | GTLLIQKAQRSDSGNYTCLVRNSAGEDRKTVWIHVNVQPPKINGNPNPITTVREIAAGGS 43 | RKLIDCKAEGIPTPRVLWAFPEGVVLPAPYYGNRITVHGNGSLDIRSLRKSDSVQLVCMA 44 | RNEGGEARLILQLTVLEPMEKPIFHDPISEKITAMAGHTISLNCSAAGTPTPSLVWVLPN 45 | GTDLQSGQQLQRFYHKADGMLHISGLSSVDAGAYRCVARNAAGHTERLVSLKVGLKPEAN 46 | KQYHNLVSIINGETLKLPCTPPGAGQGRFSWTLPNGMHLEGPQTLGRVSLLDNGTLTVRE 47 | ASVFDRGTYVCRMETEYGPSVTSIPVIVIAYPPRITSEPTPVIYTRPGNTVKLNCMAMGI 48 | PKADITWELPDKSHLKAGVQARLYGNRFLHPQGSLTIQHATQRDAGFYKCMAKNILGSDS 49 | KTTYIHVF 50 | -------------------------------------------------------------------------------- /src/BeamZuker.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Summer Gu on 6/4/25. 3 | // 4 | 5 | #ifndef DERNA_BEAMZUKER_H 6 | #define DERNA_BEAMZUKER_H 7 | #include "default.h" 8 | #include "utils.h" 9 | 10 | class BeamZuker { 11 | unordered_map O, E, M, TM; 12 | // unordered_map Z2,E2,M2,TM2; 13 | vector protein, nucle_seq, basepair; 14 | vector start_index, index_offset; 15 | vector ava_nucle_p, ava_nucle_m, codon_selection; 16 | vector bp_bond; 17 | vector sector; 18 | int n, k, minX, minY, last_idx; 19 | int e0; 20 | double e; 21 | 22 | public: 23 | BeamZuker(int n,vector &, int k = 10); 24 | void init_values(); 25 | 26 | inline int index(int,int,int) const; 27 | inline int index(int,int,int,int,int,int) const; 28 | inline int ava_nucleotides_int(int a, int x, int i, int dir); 29 | 30 | 31 | /** 32 | * Fill the vector O where O[i] stores the minimum value at i 33 | * combining free energy and codon adaptation index 34 | * 35 | * @param ostream object for stdout 36 | * @param lambda is a weight between 0 and 1 37 | * */ 38 | double calculate_CAI_O(ostream &, double lambda); 39 | 40 | /** 41 | * Fill the vector E1 where E1[i] stores the minimum value at i 42 | * if the two positions mapped to i are eligible base pairs 43 | * 44 | * @param ostream object for stdout 45 | * @param lambda is a weight between 0 and 1 46 | * */ 47 | void calculate_CAI_E(double lambda); 48 | 49 | /** 50 | * Fill the vector M1 where M1[i] stores the minimum value of the 51 | * multiloop formed between the two positions mapped to i 52 | * 53 | * @param a is the position index of the left amino acid in the protein seq 54 | * @param b is the position index of the right amino acid in the protein seq 55 | * @param x is a possible codon of the amino acid at position a 56 | * @param y is a possible codon of the amino acid at position b 57 | * @param i is the position index in codon x, 0 <= i <= 2 58 | * @param j is the position index in codon y, 0 <= j <= 2 59 | * @param lambda is a weight between 0 and 1 60 | * @param ostream object for stdout 61 | * */ 62 | void calculate_CAI_M(int a, int b, int i, int j, int x, int y, double lambda); //vector & 63 | 64 | }; 65 | 66 | inline int BeamZuker::index(int a, int x, int i) const { 67 | return 18*a+3*x+i; 68 | } 69 | 70 | inline int BeamZuker::index(int a, int b, int i, int j, int x, int y) const { 71 | int idx = a*n+b; 72 | return start_index[idx] + index_offset[idx] * (3*i+j) + n_codon[protein[b]] * x + y; 73 | } 74 | 75 | inline int BeamZuker::ava_nucleotides_int(int a, int x, int i, int dir) { 76 | int s = 0; 77 | if (dir == 1) { 78 | if (i <= 1) { 79 | s |= (1 << nucleotides[protein[a]][x][i+1]); 80 | } 81 | else { 82 | int pna = protein[a+1]; 83 | int an = n_codon[pna]; 84 | for (int x1 = 0; x1 < an; ++x1) { 85 | s |= (1 << nucleotides[pna][x1][0]); 86 | } 87 | 88 | } 89 | } 90 | else { 91 | if (i >= 1) { 92 | s |= (1 << nucleotides[protein[a]][x][i-1]); 93 | } 94 | else { 95 | int ppa = protein[a-1]; 96 | int ap = n_codon[ppa]; 97 | for (int x1 = 0; x1 < ap; ++x1) { 98 | s |= (1 << nucleotides[ppa][x1][2]); 99 | } 100 | } 101 | } 102 | 103 | return s; 104 | } 105 | #endif //DERNA_BEAMZUKER_H 106 | -------------------------------------------------------------------------------- /src/utils.h: -------------------------------------------------------------------------------- 1 | 2 | // 3 | // Created by xinyu on 3/23/2022. 4 | // 5 | 6 | #ifndef RNA_DESIGN_UTILS_H 7 | #define RNA_DESIGN_UTILS_H 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "Nussinov.h" 13 | #include "default.h" 14 | using namespace std; 15 | 16 | int aa_index (char aa); 17 | 18 | int to_int(char a); 19 | 20 | vector filterCandidates( 21 | const vector& candidates, 22 | const unordered_map& letter_map); 23 | 24 | void char2num(vector &, string &); 25 | 26 | int n_index(char n); 27 | 28 | int n_index2(int n1, int n2); 29 | 30 | bool complementary(int X, int Y); 31 | 32 | int sigma(int a, int i); 33 | 34 | double getCAI(const vector &, const vector &); 35 | 36 | double stand_getCAI(const vector &, const vector &); 37 | 38 | int m(int n, int a, int b, int i, int j, int x=-1); // 39 | 40 | void write_csv(string, const vector>> &); 41 | 42 | 43 | double getCAI_s(const vector &, const vector &); 44 | 45 | double stand_getCAI_s(const vector &, const vector &); 46 | 47 | bool compare(double x, double y, double epislon=0.00001); 48 | 49 | bool greaterThan(double a, double b); 50 | 51 | bool basepair(int X, int Y); 52 | 53 | string num2String(vector &); 54 | 55 | void transform2num(vector&, string); 56 | 57 | int get_index(vector& seqs, string & seq); 58 | 59 | int getxPos(int, vector &); 60 | 61 | 62 | bool add_auterminal(int a, int b); 63 | 64 | bool add_ggmm(int a, int b); 65 | 66 | bool add_uugamm(int a, int b); 67 | 68 | int l(int a, int i, int b, int j); 69 | 70 | vector read_fasta(string &, ostream&); 71 | 72 | vector read_rna(string & input); 73 | 74 | pair find_amino_acid_and_codon_index(const vector& codon); 75 | 76 | double evaluate_CAI(string &,vector &,int); 77 | 78 | double evaluate_CAI(string &, int type = 0); 79 | 80 | double evaluate_CAI(vector &, vector &); 81 | 82 | double evaluate_CAI_N(string &,vector &,int); 83 | 84 | double evaluate_MFE(string &); 85 | 86 | double evaluate_MFE(vector &, string & bp); 87 | 88 | int evaluate_BP_N(string &, int); 89 | 90 | void usage(); 91 | void help(); 92 | 93 | bool is_complete_path(const Path& path); 94 | 95 | // Save and load helpers 96 | template 97 | void save_vector_binary(const vector &vec, const string &filename) { 98 | ofstream file(filename, ios::binary); 99 | size_t size = vec.size(); 100 | file.write(reinterpret_cast(&size), sizeof(size)); 101 | file.write(reinterpret_cast(vec.data()), size * sizeof(T)); 102 | } 103 | 104 | template 105 | void load_vector_binary(vector &vec, const string &filename) { 106 | ifstream file(filename, ios::binary); 107 | size_t size; 108 | file.read(reinterpret_cast(&size), sizeof(size)); 109 | vec.resize(size); 110 | file.read(reinterpret_cast(vec.data()), size * sizeof(T)); 111 | } 112 | 113 | template 114 | void save_vector_vector_binary(const vector> &vecs, const string &filename) { 115 | ofstream file(filename, ios::binary); 116 | size_t outer_size = vecs.size(); 117 | file.write(reinterpret_cast(&outer_size), sizeof(outer_size)); 118 | for (const auto& vec : vecs) { 119 | size_t inner_size = vec.size(); 120 | file.write(reinterpret_cast(&inner_size), sizeof(inner_size)); 121 | file.write(reinterpret_cast(vec.data()), inner_size * sizeof(T)); 122 | } 123 | } 124 | 125 | template 126 | void load_vector_vector_binary(vector> &vecs, const string &filename) { 127 | ifstream file(filename, ios::binary); 128 | size_t outer_size; 129 | file.read(reinterpret_cast(&outer_size), sizeof(outer_size)); 130 | vecs.resize(outer_size); 131 | for (auto& vec : vecs) { 132 | size_t inner_size; 133 | file.read(reinterpret_cast(&inner_size), sizeof(inner_size)); 134 | vec.resize(inner_size); 135 | file.read(reinterpret_cast(vec.data()), inner_size * sizeof(T)); 136 | } 137 | } 138 | 139 | #endif //RNA_DESIGN_UTILS_H 140 | -------------------------------------------------------------------------------- /src/ZukerAlgorithm.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by xinyu on 4/26/2022. 3 | // 4 | 5 | #ifndef RNA_DESIGN_ZUKERALGORITHM_H 6 | #define RNA_DESIGN_ZUKERALGORITHM_H 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "utils.h" 13 | #include "default.h" 14 | 15 | 16 | using namespace std; 17 | 18 | class ZukerAlgorithm 19 | { 20 | vector & seq; 21 | string bp; 22 | int n; 23 | vector W; 24 | vector V; 25 | vector WM; 26 | vector TM; 27 | vector WB; 28 | vector VB; 29 | vector MB; 30 | vector sector; 31 | vector base_pair; 32 | vector to_char; 33 | 34 | 35 | public: 36 | ZukerAlgorithm(vector & seq, int n); 37 | ZukerAlgorithm(const ZukerAlgorithm &); 38 | ~ZukerAlgorithm(); 39 | void calculate_V(); 40 | int calculate_W(); 41 | void calculate_WM(int, int); 42 | inline int hairpin_loop(int, int, int,int,int,int) const; 43 | inline int stacking(int, int, int,int) const; 44 | inline int bulge_loop(int, int, int, int, int) const; 45 | inline int interior_loop(int, int, int, int,int,int,int,int,int,int) const; 46 | int index(int, int); 47 | void traceback(); 48 | void traceback_2(); 49 | void get_bp(string &); 50 | 51 | private: 52 | void assign_seq2str(string &, int) const; 53 | }; 54 | 55 | inline int ZukerAlgorithm::bulge_loop(int i, int j, int h, int k, int l) const { 56 | int type = BP_pair[i+1][j+1]; 57 | int type2 = rtype[BP_pair[h+1][k+1]]; 58 | int bulge_energy; 59 | // add penalty based on size 60 | bulge_energy = (l <= MAXLOOP) ? bulge[l] : bulge[30]+(int)(lxc*Log[l]); 61 | 62 | // if len 1, include the delta G of intervening NN (SantaLucia 2004) 63 | if (l == 1) { 64 | bulge_energy += stackE[type][type2];//stacks[stack_index(i, k, j, h)]; 65 | } else { 66 | bulge_energy += AU[i][j]; 67 | bulge_energy += AU[k][h]; 68 | } 69 | 70 | // add penalty for AU terminal 71 | return bulge_energy; 72 | } 73 | 74 | inline int ZukerAlgorithm::stacking(int i, int j, int p, int q) const { 75 | int type = BP_pair[i+1][j+1]; 76 | int type2 = rtype[BP_pair[p+1][q+1]]; 77 | int stacking_energy = stackE[type][type2];//stacks[stack_index(i, j1, j, i1)]; 78 | return stacking_energy; 79 | 80 | } 81 | 82 | inline int ZukerAlgorithm::interior_loop(int i, int j, int h, int k, int i1, int j1, int h1, int k1, int n1, int n2) const { 83 | int energy; 84 | int type = BP_pair[i+1][j+1]; 85 | int type2 = rtype[BP_pair[h+1][k+1]]; 86 | int nl, ns; 87 | 88 | nl = max(n1, n2); 89 | ns = min(n1, n2); 90 | if (ns==1) { 91 | if (nl==1) /* 1x1 loop */ 92 | return int11[type][type2][i1+1][j1+1]; 93 | if (nl==2) { /* 2x1 loop */ 94 | if (n1==1) 95 | energy = int21[type][type2][i1+1][k1+1][j1+1]; 96 | else 97 | energy = int21[type2][type][k1+1][i1+1][h1+1]; 98 | return energy; 99 | } 100 | else { /* 1xn loop */ 101 | energy = (nl+1<=MAXLOOP)?(internal_loop[nl+1]) : (internal_loop[30]+(int)(lxc*Log[nl+1])); 102 | energy += min(MAX_NINIO, (nl-ns)*ninio); 103 | energy += mismatch1nI[type][i1+1][j1+1] + mismatch1nI[type2][k1+1][h1+1]; 104 | return energy; 105 | } 106 | } 107 | else if (ns==2) { 108 | if(nl==2) { /* 2x2 loop */ 109 | return int22[type][type2][i1+1][h1+1][k1+1][j1+1];} 110 | else if (nl==3){ /* 2x3 loop */ 111 | energy = internal_loop[5]+ninio; 112 | energy += mismatch23I[type][i1+1][j1+1] + mismatch23I[type2][k1+1][h1+1]; 113 | return energy; 114 | } 115 | 116 | } 117 | { /* generic interior loop (no else here!)*/ 118 | energy = (n1+n2<=MAXLOOP)?(internal_loop[n1+n2]) : (internal_loop[30]+(int)(lxc*Log[n1+n2])); 119 | energy += min(MAX_NINIO, (nl-ns)*ninio); 120 | 121 | energy += mismatchI[type][i1+1][j1+1] + mismatchI[type2][k1+1][h1+1]; 122 | } 123 | return energy; 124 | } 125 | 126 | inline int ZukerAlgorithm::hairpin_loop(int xi, int yj, int xi_, int _yj, int l, int i) const { 127 | int hairpin_energy; 128 | int type = BP_pair[xi+1][yj+1]; 129 | 130 | // add penalty based on size 131 | hairpin_energy = (l <= 30) ? hairpins[l] : hairpins[30]+(int)(lxc*Log[l]);//hairpinLoops[l]; 132 | 133 | if (l == 3 || l == 4 || l == 6) { 134 | // int index; 135 | string s(l + 2, '.'); 136 | assign_seq2str(s, i); 137 | // cout << s << endl; 138 | 139 | switch (l) { 140 | case 3: 141 | if (hairpinE.count(s) > 0) { 142 | return hairpinE[s]; 143 | } 144 | return hairpin_energy + AU[xi][yj]; 145 | case 4: 146 | if (hairpinE.count(s) > 0) { 147 | return hairpinE[s]; 148 | } 149 | break; 150 | case 6: 151 | if (hairpinE.count(s) > 0) { 152 | return hairpinE[s]; 153 | } 154 | break; 155 | default: 156 | cout << s << " " << l << endl; 157 | exit(5); 158 | break; 159 | } 160 | 161 | } 162 | 163 | // add penalty for a terminal mismatch 164 | hairpin_energy += mismatchH[type][xi_+1][_yj+1];//T_mm[stack_index(xi, _yj, yj, xi_)]; 165 | 166 | // hairpin_energy += add_auterminal(xi, yj,tempf); 167 | return hairpin_energy; 168 | 169 | } 170 | 171 | #endif //RNA_DESIGN_ZUKERALGORITHM_H 172 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DERNA 2 | 3 | [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/derna/README.html#package-derna) 4 | 5 | DERNA is a tool that enables the design of RNA sequences based on protein sequences. 6 | DERNA accepts a protein sequence as input and provides a collection of Pareto optimal solutions consisting of RNA sequences that optimize both minimum free energy and codon adaptation index (CAI). Additionally, DERNA can function as a tool for predicting RNA structures and calculating CAI for given RNA sequences. 7 | 8 | If you find this tool useful in your research, please cite the following paper: 9 | 10 | ```bibtex 11 | @article{gu2024derna, 12 | title={DERNA enables pareto optimal RNA design}, 13 | author={Gu, Xinyu and Qi, Yuanyuan and El-Kebir, Mohammed}, 14 | journal={Journal of Computational Biology}, 15 | volume={31}, 16 | number={3}, 17 | pages={179--196}, 18 | year={2024}, 19 | publisher={Mary Ann Liebert, Inc., publishers 140 Huguenot Street, 3rd Floor New~…}, 20 | doi={10.1089/cmb.2023.0283}, 21 | url={https://doi.org/10.1089/cmb.2023.0283} 22 | } 23 | ``` 24 | 25 | ## Contents 26 | 27 | 1. [Installation](#install) 28 | * [Using conda](#conda) (recommended) 29 | * [Build from source](#compilation) (alternative) 30 | * [Dependencies](#dep) 31 | * [Compilation](#comp) 32 | 2. [Usage instructions](#usage) 33 | * [Examples](#example) 34 | 35 | 36 | 37 | 38 | ## Installation 39 | 40 | 41 | 42 | ### Using conda 43 | 44 | 1. Create a new conda environment named "derna" and install dependencies: 45 | 46 | ```bash 47 | conda create -n derna 48 | ``` 49 | 50 | 2. Then activate the created environment: `conda activate derna`. 51 | 3. Install the package into current environment "derna": 52 | 53 | ```bash 54 | conda install -c bioconda derna 55 | ``` 56 | 57 | 58 | 59 | ### Build from source 60 | 61 | 62 | 63 | #### Dependencies 64 | 65 | * Recent C++ compiler (C++11) 66 | 67 | 68 | #### Compilation 69 | 70 | ``` 71 | mkdir build 72 | cd build 73 | cmake .. 74 | make 75 | ``` 76 | 77 | 78 | ## Usage instructions 79 | 80 | ``` 81 | -i - 82 | -o - 83 | -m - model <0,1,-1> , 0 for nussinov, 1 for zuker, -1 for eval 84 | -s - mode <1,2,3>, 1 for mfe, 2 for mfe+cai, 3 for sweep 85 | -l - lambda <[0,1]> 86 | -a - sweep increment <(0,1]> 87 | -r - 88 | -O - 89 | -g - <[0,inf)> 90 | -t - threshold tau <(0,1)> 91 | -p - threshold tau2 <(0,1)> 92 | -c - 93 | -d - 94 | ``` 95 | 96 | ``` 97 | ./derna -i -o -m -s ... 98 | ``` 99 | 100 | ``` 101 | input: input file path 102 | output: output file path 103 | model: integer 0 for Nussinov based model, 1 for Zuker based model, -1 for eval model 104 | mode: integer 1 for only MFE mode, integer 2 for MFE + CAI mode, integer 3 for lambda swipe mode 105 | lambda: lambda value for MFE + CAI mode or lambda swipe mode 106 | incr: increment interval for lambda swipe mode 107 | swipe: swipe output csv file name 108 | g: minimal gap allowed in Nussinov based model 109 | rna: input rna file path for eval model 110 | ``` 111 | 112 | 113 | ### Examples 114 | 115 | #### Fix $\lambda$ 116 | 117 | `./derna -i ../data/uniprotSeq/P15421.fasta -o P15421_fixed_lambda.txt -m 1 -s 2 -l 0.5` 118 | 119 | `cat P15421_fixed_lambda.txt` 120 | 121 | ``` 122 | protein sequence: MYGKIIFVLLLSGIVSISASSTTGVAMHTSTSSSVTKSYISSQTNGITLINWWAMARVIFEVMLVVVGMIILISYCIR 123 | 124 | lambda: 0.5 125 | Zuker CAI 126 | Energy: -74.2202 127 | Time taken by DP is : 32sec 128 | lambda: 0.5,O: -7422.02,mfe: -14870,cai: -25.9662,combined: -7422.02 129 | zuker cai bp: (((((((((.....((((((((((.((((((((.(((((((((...))))))))).)))))))).)))))))))))))))))))((((((....((((((((.(((.((((((((((.(((((((((.(((((.((((((((((((((.((((((((((((....)))))).)))))))))))))))))))).))))))))))))))))))))))))))))))))))))))))),size: 234 130 | zuker rna: AUGUAUGGCXXXXXCAUCUUCGUCUUGCUGCUCUCCGGGAUCGUXUCGAUCUCGGCGAGCAGCACGACGGGGGUGGCCAUGCAUACGAGUXXXXGCAGUAGCXUGAXUAAGAGUUAUXUAUCCUCACXGACCAACGGCAUCACCUUGAXAAAUUGGUGGGCGXXGGCCCGCXUAAUUUUCGAGGUGAUGCUGGUGGUCGUGGGGAUGAUAAUUCUUAUCAGCUACUGCAUUCGU.size: 234 131 | zuker cai rna: AUGUAUGGCAAGAUCAUCUUCGUCUUGCUGCUCUCCGGGAUCGUGUCGAUCUCGGCGAGCAGCACGACGGGGGUGGCCAUGCAUACGAGUACCAGCAGUAGCGUGACUAAGAGUUAUAUAUCCUCACAGACCAACGGCAUCACCUUGAUAAAUUGGUGGGCGAUGGCCCGCGUAAUUUUCGAGGUGAUGCUGGUGGUCGUGGGGAUGAUAAUUCUUAUCAGCUACUGCAUUCGU.size: 234 132 | Codon Adaptation Index: 0.716842 133 | Minimum Free Energy: -148.7 134 | 135 | ``` 136 | 137 | #### Sweep (default thresholds) 138 | 139 | `./derna -i ../data/uniprotSeq/P15421.fasta -o P15421_sweep.txt -O P15421_sweep -m 1 -s 3` 140 | 141 | Estimated time: 10min 142 | 143 | ### Evaluate an RNA sequence 144 | 145 | `./derna -i ../data/uniprotSeq/P15421.fasta -o P15421_evaluation.txt -r ./data/RNA/P15421_rna.txt -m -1` 146 | 147 | `cat P15421_evaluation.txt` 148 | 149 | ``` 150 | protein sequence: MYGKIIFVLLLSGIVSISASSTTGVAMHTSTSSSVTKSYISSQTNGITLINWWAMARVIFEVMLVVVGMIILISYCIR 151 | eval MFE: -148.7 152 | eval CAI: -28.3932 153 | eval standard CAI: 0.694881 154 | 155 | ``` 156 | 157 | #### Only consider MFE 158 | 159 | `./derna -i ../data/uniprotSeq/P15421.fasta -o P15421_MFE_only.txt -m 1 -s 1` 160 | 161 | `cat P15421_MFE_only.txt` 162 | 163 | ``` 164 | protein sequence: MYGKIIFVLLLSGIVSISASSTTGVAMHTSTSSSVTKSYISSQTNGITLINWWAMARVIFEVMLVVVGMIILISYCIR 165 | 166 | Zuker 167 | Energy: -148.7 168 | Time taken by DP is : 3sec 169 | Time taken : 3sec 170 | zuker bp:(((((((((.....((((((((((.((((((((.(((((((((...))))))))).)))))))).)))))))))))))))))))((((((....((((((((.(((.((((((((((.(((((((((.(((((.((((((((((((((.((((((((((((....)))))).)))))))))))))))))))).))))))))))))))))))))))))))))))))))))))))), size: 234 171 | zuker rna:AUGUAUGGCXXXXXCAUCUUCGUCCUGCUGCUCUCCGGGAUCGUXUCGAUCUCGGCGAGCAGCACGACGGGGGUGGCCAUGCAUACGAGUXXXXGCAGUAGCXUGAXUAAGAGUUAUXUAUCCUCACXGACCAACGGCAUCACCUUGAXAAAUUGGUGGGCGXXGGCCCGCXUAAUUUUCGAGGUGAUGCUGGUGGUCGUGGGGAUGAUAAUUCUUAUCAGCUACUGCAUUCGU, size: 234 172 | zuker rna:AUGUAUGGCAAAAUCAUCUUCGUCCUGCUGCUCUCCGGGAUCGUUUCGAUCUCGGCGAGCAGCACGACGGGGGUGGCCAUGCAUACGAGUACUAGCAGUAGCGUGACUAAGAGUUAUAUAUCCUCACAGACCAACGGCAUCACCUUGAUAAAUUGGUGGGCGAUGGCCCGCGUAAUUUUCGAGGUGAUGCUGGUGGUCGUGGGGAUGAUAAUUCUUAUCAGCUACUGCAUUCGU, size: 234 173 | zuker cai: 0.694881 174 | 175 | ``` 176 | 177 | #### Nussinov based model (Fixed $\lambda$) 178 | 179 | `./derna -i ../data/uniprotSeq/P15421.fasta -o P15421_nussinov.txt -m 0 -s 2 -l 0.5 -g 1` 180 | 181 | #### Specify Codon Usage Table 182 | 183 | `./derna -i ../data/uniprotSeq/P15421.fasta -o P15421_fixed_lambda.txt -m 1 -s 2 -l 0.5 -c ./data/InputFiles/sample_codon_usage.csv` 184 | 185 | #### Specify Energy Parameters 186 | 187 | `./derna -i ../data/uniprotSeq/P15421.fasta -o P15421_fixed_lambda.txt -m 1 -s 2 -l 0.5 -d ./data/InputFiles/` 188 | -------------------------------------------------------------------------------- /src/default.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Summer Gu on 8/16/22. 3 | // 4 | 5 | #ifndef RNA_DESIGN_DEFAULT_H 6 | #define RNA_DESIGN_DEFAULT_H 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | using namespace std; 13 | 14 | #include "params/constants.h" 15 | #define RESCALE_dG(dG, dH, dT) ((dH) - ((dH) - (dG)) * dT) 16 | #define PUBLIC 17 | 18 | struct BeamEntry { 19 | double score; 20 | int a, b, i, j, x, y; 21 | int backtrace_type; 22 | double mfe, cai; 23 | vector bt_info; 24 | 25 | // BeamEntry() : score(inf), a(-1), b(-1), i(-1), j(-1), x(-1), y(-1), 26 | // backtrace_type(0), mfe(0.0), cai(0.0), bt_info() {} 27 | 28 | BeamEntry(double s = inf, int a_ = -1, int b_ = -1, int i_ = -1, int j_ = -1, int x_ = -1, int y_ = -1, 29 | int bt_type = 0, double mfe_ = 0, double cai_ = 0, vector bt = {}) 30 | : score(s), a(a_), b(b_), i(i_), j(j_), x(x_), y(y_), 31 | backtrace_type(bt_type), mfe(mfe_), cai(cai_), bt_info(std::move(bt)) {} 32 | 33 | bool operator<(const BeamEntry &other) const { 34 | return score < other.score; 35 | } 36 | }; 37 | 38 | struct LambdaResult { 39 | double mfe_value; 40 | double cai_value; 41 | double CAI_value; 42 | double O_val; 43 | }; 44 | 45 | typedef struct st { 46 | int i; // index i 47 | int j; // index j 48 | int ml; //? 49 | } st; 50 | 51 | typedef struct stack_ { 52 | int a; 53 | int b; 54 | int i; // index i 55 | int j; // index j 56 | int x; // nucleotide at i 57 | int y; // nucleotide at j 58 | int ml; //? 59 | double change; 60 | // vector bi; 61 | } stack_; 62 | 63 | typedef struct bond { 64 | int i; 65 | int j; 66 | } bond; 67 | 68 | struct Path { 69 | vector sector_stack; 70 | vector nucle_seq; 71 | vector codon_selection; 72 | vector bp_bond; 73 | double change = 0; 74 | int t, s; 75 | 76 | 77 | Path(int n) { 78 | nucle_seq.resize(3*n, -1); 79 | codon_selection.resize(n, -1); 80 | bp_bond.resize(3*n); 81 | // sector_stack.resize(3*n); 82 | bp_bond[0].i = 0; 83 | t = 0, s = 0; 84 | } 85 | 86 | Path() = default; 87 | }; 88 | 89 | struct PathHash { 90 | size_t operator()(const Path& p) const { 91 | size_t h = 0; 92 | 93 | // Hash nucle_seq 94 | for (int x : p.nucle_seq) { 95 | h ^= std::hash{}(x) + 0x9e3779b9 + (h << 6) + (h >> 2); 96 | } 97 | 98 | // Hash change (rounded to avoid floating point noise) 99 | long long rounded = static_cast(p.change * 1e6); // adjust precision as needed 100 | h ^= std::hash{}(rounded) + 0x9e3779b9 + (h << 6) + (h >> 2); 101 | 102 | return h; 103 | } 104 | }; 105 | 106 | struct PathEqual { 107 | bool operator()(const Path& a, const Path& b) const { 108 | return a.nucle_seq == b.nucle_seq && std::abs(a.change - b.change) < 1e-6; 109 | } 110 | }; 111 | 112 | struct OptionKey { 113 | int t; 114 | double e1, e2; 115 | 116 | bool operator<(const OptionKey& other) const { 117 | if (t != other.t) return t < other.t; 118 | if (std::abs(e1 - other.e1) > EPSILON) return e1 < other.e1; 119 | if (std::abs(e2 - other.e2) > EPSILON) return e2 < other.e2; 120 | return false; 121 | } 122 | }; 123 | 124 | extern vector Log; 125 | 126 | 127 | extern double lxc37; /* parameter for logarithmic loop 128 | energy extrapolation */ 129 | 130 | extern int BP_pair[5][5]; 131 | extern int rtype[7]; 132 | extern int nucleotides[20][6][3]; 133 | extern bool codonMatch[20][3][3][4][4]; 134 | extern double codon_cai[20][6]; 135 | extern double codon_cai_s[20][6]; 136 | extern double max_codon_cai[20][6][16]; 137 | extern int max_cai_pos[20]; 138 | extern int AU[4][4]; 139 | extern vector to_char; 140 | extern vector n_codon; 141 | extern int internal_loop_map[3][3]; 142 | 143 | 144 | extern double lxc; 145 | extern int stackE[NBPAIRS+1][NBPAIRS+1]; 146 | extern int hairpins[31]; 147 | extern int bulge[31]; 148 | extern int internal_loop[31]; 149 | extern int mismatchI[NBPAIRS+1][5][5]; 150 | extern int mismatch1nI[NBPAIRS+1][5][5]; 151 | extern int mismatch23I[NBPAIRS+1][5][5]; 152 | extern int mismatchH[NBPAIRS+1][5][5]; 153 | extern int int11[NBPAIRS+1][NBPAIRS+1][5][5]; 154 | extern int int21[NBPAIRS+1][NBPAIRS+1][5][5][5]; 155 | extern int int22[NBPAIRS+1][NBPAIRS+1][5][5][5][5]; 156 | extern int ML_BASE; 157 | extern int ML_closing; 158 | extern int ML_intern; 159 | extern int ninio; 160 | extern int TerminalAU; 161 | 162 | extern int stack37[NBPAIRS+1][NBPAIRS+1]; 163 | extern int stackdH[NBPAIRS+1][NBPAIRS+1]; /* stack enthalpies */ 164 | 165 | extern int hairpin37[31]; 166 | extern int hairpindH[31]; 167 | extern int bulge37[31]; 168 | extern int bulgedH[31]; 169 | extern int internal_loop37[31]; 170 | extern int internal_loopdH[31]; 171 | extern int mismatchI37[NBPAIRS+1][5][5]; /* interior loop mismatches */ 172 | extern int mismatchIdH[NBPAIRS+1][5][5]; /* interior loop mismatches */ 173 | extern int mismatch1nI37[NBPAIRS+1][5][5]; /* interior loop mismatches */ 174 | extern int mismatch23I37[NBPAIRS+1][5][5]; /* interior loop mismatches */ 175 | extern int mismatch1nIdH[NBPAIRS+1][5][5]; /* interior loop mismatches */ 176 | extern int mismatch23IdH[NBPAIRS+1][5][5]; /* interior loop mismatches */ 177 | extern int mismatchH37[NBPAIRS+1][5][5]; /* same for hairpins */ 178 | extern int mismatchM37[NBPAIRS+1][5][5]; /* same for multiloops */ 179 | extern int mismatchHdH[NBPAIRS+1][5][5]; /* same for hairpins */ 180 | extern int mismatchMdH[NBPAIRS+1][5][5]; /* same for multiloops */ 181 | extern int mismatchExt37[NBPAIRS+1][5][5]; 182 | extern int mismatchExtdH[NBPAIRS+1][5][5]; 183 | extern int interiorLoop[4][4][4][4][4][4][4][4][MAXLOOP][MAXLOOP]; 184 | 185 | extern int dangle5_37[NBPAIRS+1][5]; /* 5' dangle exterior of pair */ 186 | extern int dangle3_37[NBPAIRS+1][5]; /* 3' dangle */ 187 | extern int dangle3_dH[NBPAIRS+1][5]; /* corresponding enthalpies */ 188 | extern int dangle5_dH[NBPAIRS+1][5]; 189 | 190 | extern int int11_37[NBPAIRS+1][NBPAIRS+1][5][5]; /* 1x1 interior loops */ 191 | extern int int11_dH[NBPAIRS+1][NBPAIRS+1][5][5]; 192 | 193 | extern int int21_37[NBPAIRS+1][NBPAIRS+1][5][5][5]; /* 2x1 interior loops */ 194 | extern int int21_dH[NBPAIRS+1][NBPAIRS+1][5][5][5]; 195 | 196 | extern int int22_37[NBPAIRS+1][NBPAIRS+1][5][5][5][5]; /* 2x2 interior loops */ 197 | extern int int22_dH[NBPAIRS+1][NBPAIRS+1][5][5][5][5]; 198 | 199 | /* constants for linearly destabilizing contributions for multi-loops 200 | F = ML_closing + ML_intern*(k-1) + ML_BASE*u */ 201 | 202 | 203 | extern int ML_BASE37; 204 | extern int ML_BASEdH; 205 | 206 | extern int ML_closing37; 207 | extern int ML_closingdH; 208 | 209 | extern int ML_intern37; 210 | extern int ML_interndH; 211 | 212 | //extern int TripleC37; 213 | //extern int TripleCdH; 214 | //extern int MultipleCA37; 215 | //extern int MultipleCAdH; 216 | //extern int MultipleCB37; 217 | //extern int MultipleCBdH; 218 | 219 | /* Ninio-correction for asymmetric internal loops with branches n1 and n2 */ 220 | /* ninio_energy = min{max_ninio, |n1-n2|*F_ninio[min{4.0, n1, n2}] } */ 221 | extern int MAX_NINIO; /* maximum correction */ 222 | 223 | extern int ninio37; 224 | extern int niniodH; 225 | /* penalty for helices terminated by AU (actually not GC) */ 226 | 227 | extern int TerminalAU37; 228 | extern int TerminalAUdH; 229 | /* penalty for forming bi-molecular duplex */ 230 | //extern int DuplexInit37; 231 | //extern int DuplexInitdH; 232 | /* stabilizing contribution due to special hairpins of size 4 (tetraloops) */ 233 | extern vector TetraloopSeq; /* string containing the special tetraloops */ 234 | extern int Tetraloop37[16]; /* Bonus energy for special tetraloops */ 235 | extern int TetraloopdH[16]; 236 | extern vector TriloopSeq; /* string containing the special triloops */ 237 | extern int Triloop37[2]; /* Bonus energy for special Triloops */ 238 | extern int TriloopdH[2]; /* Bonus energy for special Triloops */ 239 | extern vector HexaloopSeq; /* string containing the special triloops */ 240 | extern int Hexaloop37[4]; /* Bonus energy for special Triloops */ 241 | extern int HexaloopdH[4]; /* Bonus energy for special Triloops */ 242 | 243 | extern int Tetraloop[16]; 244 | extern int Triloop[2]; 245 | extern int Hexaloop[4]; 246 | extern unordered_map hairpinE; 247 | extern unordered_map max_cai_map; 248 | extern unordered_map pair2pos; 249 | 250 | 251 | extern double Tmeasure; /* temperature of param measurements */ 252 | 253 | void fill_stack(const string &filename, int data, char delimeter = ','); 254 | void fill_mismatch(const string &filename, int data, char delimeter = ','); 255 | void fill_intl11(const string &filename, int data, char delimeter = ','); 256 | void fill_intl21(const string &filename, int data, char delimeter = ','); 257 | void fill_intl22(const string &filename, int data, char delimeter = ','); 258 | void fill_codon(const string &filename, char delimeter = ','); 259 | void fill_miscellaneous(const string &filename, char delimeter = ','); 260 | void scale_params(const string &file = {}, const string ¶mspath = {} ,double temp = 37); 261 | 262 | inline bool exists (const std::string& name) { 263 | ifstream f(name.c_str()); 264 | return f.good(); 265 | } 266 | 267 | 268 | #endif //RNA_DESIGN_DEFAULT_H 269 | -------------------------------------------------------------------------------- /data/InputFiles/intl11.csv: -------------------------------------------------------------------------------- 1 | Pairs,N,A,C,G,U 2 | NP,100000,100000,100000,100000,100000 3 | NP,100000,100000,100000,100000,100000 4 | NP,100000,100000,100000,100000,100000 5 | NP,100000,100000,100000,100000,100000 6 | NP,100000,100000,100000,100000,100000 7 | NP,100000,100000,100000,100000,100000 8 | NP,100000,100000,100000,100000,100000 9 | NP,100000,100000,100000,100000,100000 10 | NP,100000,100000,100000,100000,100000 11 | NP,100000,100000,100000,100000,100000 12 | NP,100000,100000,100000,100000,100000 13 | NP,100000,100000,100000,100000,100000 14 | NP,100000,100000,100000,100000,100000 15 | NP,100000,100000,100000,100000,100000 16 | NP,100000,100000,100000,100000,100000 17 | NP,100000,100000,100000,100000,100000 18 | NP,100000,100000,100000,100000,100000 19 | NP,100000,100000,100000,100000,100000 20 | NP,100000,100000,100000,100000,100000 21 | NP,100000,100000,100000,100000,100000 22 | NP,100000,100000,100000,100000,100000 23 | NP,100000,100000,100000,100000,100000 24 | NP,100000,100000,100000,100000,100000 25 | NP,100000,100000,100000,100000,100000 26 | NP,100000,100000,100000,100000,100000 27 | NP,100000,100000,100000,100000,100000 28 | NP,100000,100000,100000,100000,100000 29 | NP,100000,100000,100000,100000,100000 30 | NP,100000,100000,100000,100000,100000 31 | NP,100000,100000,100000,100000,100000 32 | NP,100000,100000,100000,100000,100000 33 | NP,100000,100000,100000,100000,100000 34 | NP,100000,100000,100000,100000,100000 35 | NP,100000,100000,100000,100000,100000 36 | NP,100000,100000,100000,100000,100000 37 | NP,100000,100000,100000,100000,100000 38 | NP,100000,100000,100000,100000,100000 39 | NP,100000,100000,100000,100000,100000 40 | NP,100000,100000,100000,100000,100000 41 | NP,100000,100000,100000,100000,100000 42 | CG,100000,100000,100000,100000,100000 43 | CG,100000,100000,100000,100000,100000 44 | CG,100000,100000,100000,100000,100000 45 | CG,100000,100000,100000,100000,100000 46 | CG,100000,100000,100000,100000,100000 47 | CG,90,90,50,50,50 48 | CG,90,90,50,50,50 49 | CG,50,50,50,50,50 50 | CG,50,50,50,-140,50 51 | CG,50,50,50,50,40 52 | CG,90,90,50,50,60 53 | CG,90,90,-40,50,50 54 | CG,60,30,50,50,60 55 | CG,50,-10,50,-220,50 56 | CG,50,50,0,50,-10 57 | CG,120,120,120,120,120 58 | CG,120,60,50,120,120 59 | CG,120,120,120,120,120 60 | CG,120,-20,120,-140,120 61 | CG,120,120,100,120,110 62 | CG,220,220,170,120,120 63 | CG,220,220,130,120,120 64 | CG,170,120,170,120,120 65 | CG,120,120,120,-140,120 66 | CG,120,120,120,120,110 67 | CG,120,120,120,120,120 68 | CG,120,120,120,120,120 69 | CG,120,120,120,120,120 70 | CG,120,120,120,-140,120 71 | CG,120,120,120,120,80 72 | CG,120,120,120,120,120 73 | CG,120,120,120,120,120 74 | CG,120,120,120,120,120 75 | CG,120,120,120,-140,120 76 | CG,120,120,120,120,120 77 | CG,220,220,170,120,120 78 | CG,220,220,130,120,120 79 | CG,170,120,170,120,120 80 | CG,120,120,120,-140,120 81 | CG,120,120,120,120,120 82 | GC,100000,100000,100000,100000,100000 83 | GC,100000,100000,100000,100000,100000 84 | GC,100000,100000,100000,100000,100000 85 | GC,100000,100000,100000,100000,100000 86 | GC,100000,100000,100000,100000,100000 87 | GC,90,90,60,50,50 88 | GC,90,90,30,-10,50 89 | GC,50,-40,50,50,0 90 | GC,50,50,50,-220,50 91 | GC,60,50,60,50,-10 92 | GC,80,80,50,50,50 93 | GC,80,80,50,50,50 94 | GC,50,50,50,50,50 95 | GC,50,50,50,-230,50 96 | GC,50,50,50,50,-60 97 | GC,190,190,120,150,150 98 | GC,190,190,120,150,120 99 | GC,120,120,120,120,120 100 | GC,120,120,120,-140,120 101 | GC,150,120,120,120,150 102 | GC,160,160,120,120,120 103 | GC,160,160,120,100,120 104 | GC,120,120,120,120,120 105 | GC,120,120,120,-140,120 106 | GC,120,120,120,120,70 107 | GC,120,120,120,120,120 108 | GC,120,120,120,120,120 109 | GC,120,120,120,120,120 110 | GC,120,120,120,-140,120 111 | GC,120,120,120,120,80 112 | GC,120,120,120,120,120 113 | GC,120,120,120,120,120 114 | GC,120,120,120,120,120 115 | GC,120,120,120,-140,120 116 | GC,120,120,120,120,120 117 | GC,190,190,120,150,150 118 | GC,190,190,120,150,120 119 | GC,120,120,120,120,120 120 | GC,120,120,120,-140,120 121 | GC,150,120,120,120,150 122 | GU,100000,100000,100000,100000,100000 123 | GU,100000,100000,100000,100000,100000 124 | GU,100000,100000,100000,100000,100000 125 | GU,100000,100000,100000,100000,100000 126 | GU,100000,100000,100000,100000,100000 127 | GU,120,120,120,120,120 128 | GU,120,60,120,-20,120 129 | GU,120,50,120,120,100 130 | GU,120,120,120,-140,120 131 | GU,120,120,120,120,110 132 | GU,190,190,120,120,150 133 | GU,190,190,120,120,120 134 | GU,120,120,120,120,120 135 | GU,150,150,120,-140,120 136 | GU,150,120,120,120,150 137 | GU,190,190,190,190,190 138 | GU,190,190,190,190,190 139 | GU,190,190,190,190,190 140 | GU,190,190,190,-70,190 141 | GU,190,190,190,190,120 142 | GU,190,190,190,190,190 143 | GU,190,190,190,190,190 144 | GU,190,190,190,190,190 145 | GU,190,190,190,-70,190 146 | GU,190,190,190,190,160 147 | GU,190,190,190,190,190 148 | GU,190,190,190,190,190 149 | GU,190,190,190,190,190 150 | GU,190,190,190,-70,190 151 | GU,190,190,190,190,120 152 | GU,190,190,190,190,190 153 | GU,190,190,190,190,190 154 | GU,190,190,190,190,190 155 | GU,190,190,190,-70,190 156 | GU,190,190,190,190,160 157 | GU,190,190,190,190,190 158 | GU,190,190,190,190,190 159 | GU,190,190,190,190,190 160 | GU,190,190,190,-70,190 161 | GU,190,190,190,190,160 162 | UG,100000,100000,100000,100000,100000 163 | UG,100000,100000,100000,100000,100000 164 | UG,100000,100000,100000,100000,100000 165 | UG,100000,100000,100000,100000,100000 166 | UG,100000,100000,100000,100000,100000 167 | UG,220,220,170,120,120 168 | UG,220,220,120,120,120 169 | UG,170,130,170,120,120 170 | UG,120,120,120,-140,120 171 | UG,120,120,120,120,110 172 | UG,160,160,120,120,120 173 | UG,160,160,120,120,120 174 | UG,120,120,120,120,120 175 | UG,120,100,120,-140,120 176 | UG,120,120,120,120,70 177 | UG,190,190,190,190,190 178 | UG,190,190,190,190,190 179 | UG,190,190,190,190,190 180 | UG,190,190,190,-70,190 181 | UG,190,190,190,190,160 182 | UG,190,190,190,190,190 183 | UG,190,190,190,190,190 184 | UG,190,190,190,190,190 185 | UG,190,190,190,-70,190 186 | UG,190,190,190,190,190 187 | UG,190,190,190,190,190 188 | UG,190,190,190,190,190 189 | UG,190,190,190,190,190 190 | UG,190,190,190,-70,190 191 | UG,190,190,190,190,160 192 | UG,190,190,190,190,190 193 | UG,190,190,190,190,190 194 | UG,190,190,190,190,190 195 | UG,190,190,190,-70,190 196 | UG,190,190,190,190,190 197 | UG,220,220,190,190,190 198 | UG,220,220,190,190,190 199 | UG,190,190,190,190,190 200 | UG,190,190,190,-70,190 201 | UG,190,190,190,190,190 202 | AU,100000,100000,100000,100000,100000 203 | AU,100000,100000,100000,100000,100000 204 | AU,100000,100000,100000,100000,100000 205 | AU,100000,100000,100000,100000,100000 206 | AU,100000,100000,100000,100000,100000 207 | AU,120,120,120,120,120 208 | AU,120,120,120,120,120 209 | AU,120,120,120,120,120 210 | AU,120,120,120,-140,120 211 | AU,120,120,120,120,80 212 | AU,120,120,120,120,120 213 | AU,120,120,120,120,120 214 | AU,120,120,120,120,120 215 | AU,120,120,120,-140,120 216 | AU,120,120,120,120,80 217 | AU,190,190,190,190,190 218 | AU,190,190,190,190,190 219 | AU,190,190,190,190,190 220 | AU,190,190,190,-70,190 221 | AU,190,190,190,190,120 222 | AU,190,190,190,190,190 223 | AU,190,190,190,190,190 224 | AU,190,190,190,190,190 225 | AU,190,190,190,-70,190 226 | AU,190,190,190,190,160 227 | AU,190,190,190,190,190 228 | AU,190,190,190,190,190 229 | AU,190,190,190,190,190 230 | AU,190,190,190,-70,190 231 | AU,190,190,190,190,120 232 | AU,190,190,190,190,190 233 | AU,190,190,190,190,190 234 | AU,190,190,190,190,190 235 | AU,190,190,190,-70,190 236 | AU,190,190,190,190,150 237 | AU,190,190,190,190,190 238 | AU,190,190,190,190,190 239 | AU,190,190,190,190,190 240 | AU,190,190,190,-70,190 241 | AU,190,190,190,190,160 242 | UA,100000,100000,100000,100000,100000 243 | UA,100000,100000,100000,100000,100000 244 | UA,100000,100000,100000,100000,100000 245 | UA,100000,100000,100000,100000,100000 246 | UA,100000,100000,100000,100000,100000 247 | UA,120,120,120,120,120 248 | UA,120,120,120,120,120 249 | UA,120,120,120,120,120 250 | UA,120,120,120,-140,120 251 | UA,120,120,120,120,120 252 | UA,120,120,120,120,120 253 | UA,120,120,120,120,120 254 | UA,120,120,120,120,120 255 | UA,120,120,120,-140,120 256 | UA,120,120,120,120,120 257 | UA,190,190,190,190,190 258 | UA,190,190,190,190,190 259 | UA,190,190,190,190,190 260 | UA,190,190,190,-70,190 261 | UA,190,190,190,190,160 262 | UA,190,190,190,190,190 263 | UA,190,190,190,190,190 264 | UA,190,190,190,190,190 265 | UA,190,190,190,-70,190 266 | UA,190,190,190,190,190 267 | UA,190,190,190,190,190 268 | UA,190,190,190,190,190 269 | UA,190,190,190,190,190 270 | UA,190,190,190,-70,190 271 | UA,190,190,190,190,150 272 | UA,190,190,190,190,190 273 | UA,190,190,190,190,190 274 | UA,190,190,190,190,190 275 | UA,190,190,190,-70,190 276 | UA,190,190,190,190,170 277 | UA,190,190,190,190,190 278 | UA,190,190,190,190,190 279 | UA,190,190,190,190,190 280 | UA,190,190,190,-70,190 281 | UA,190,190,190,190,190 282 | NN,100000,100000,100000,100000,100000 283 | NN,100000,100000,100000,100000,100000 284 | NN,100000,100000,100000,100000,100000 285 | NN,100000,100000,100000,100000,100000 286 | NN,100000,100000,100000,100000,100000 287 | NN,220,220,170,120,120 288 | NN,220,220,120,120,120 289 | NN,170,130,170,120,120 290 | NN,120,120,120,-140,120 291 | NN,120,120,120,120,120 292 | NN,190,190,120,120,150 293 | NN,190,190,120,120,120 294 | NN,120,120,120,120,120 295 | NN,150,150,120,-140,120 296 | NN,150,120,120,120,150 297 | NN,190,190,190,190,190 298 | NN,190,190,190,190,190 299 | NN,190,190,190,190,190 300 | NN,190,190,190,-70,190 301 | NN,190,190,190,190,160 302 | NN,220,220,190,190,190 303 | NN,220,220,190,190,190 304 | NN,190,190,190,190,190 305 | NN,190,190,190,-70,190 306 | NN,190,190,190,190,190 307 | NN,190,190,190,190,190 308 | NN,190,190,190,190,190 309 | NN,190,190,190,190,190 310 | NN,190,190,190,-70,190 311 | NN,190,190,190,190,160 312 | NN,190,190,190,190,190 313 | NN,190,190,190,190,190 314 | NN,190,190,190,190,190 315 | NN,190,190,190,-70,190 316 | NN,190,190,190,190,190 317 | NN,220,220,190,190,190 318 | NN,220,220,190,190,190 319 | NN,190,190,190,190,190 320 | NN,190,190,190,-70,190 321 | NN,190,190,190,190,190 322 | -------------------------------------------------------------------------------- /data/InputFiles/intl11_H.csv: -------------------------------------------------------------------------------- 1 | Pairs,N,A,C,G,U 2 | NP,100000,100000,100000,100000,100000 3 | NP,100000,100000,100000,100000,100000 4 | NP,100000,100000,100000,100000,100000 5 | NP,100000,100000,100000,100000,100000 6 | NP,100000,100000,100000,100000,100000 7 | NP,100000,100000,100000,100000,100000 8 | NP,100000,100000,100000,100000,100000 9 | NP,100000,100000,100000,100000,100000 10 | NP,100000,100000,100000,100000,100000 11 | NP,100000,100000,100000,100000,100000 12 | NP,100000,100000,100000,100000,100000 13 | NP,100000,100000,100000,100000,100000 14 | NP,100000,100000,100000,100000,100000 15 | NP,100000,100000,100000,100000,100000 16 | NP,100000,100000,100000,100000,100000 17 | NP,100000,100000,100000,100000,100000 18 | NP,100000,100000,100000,100000,100000 19 | NP,100000,100000,100000,100000,100000 20 | NP,100000,100000,100000,100000,100000 21 | NP,100000,100000,100000,100000,100000 22 | NP,100000,100000,100000,100000,100000 23 | NP,100000,100000,100000,100000,100000 24 | NP,100000,100000,100000,100000,100000 25 | NP,100000,100000,100000,100000,100000 26 | NP,100000,100000,100000,100000,100000 27 | NP,100000,100000,100000,100000,100000 28 | NP,100000,100000,100000,100000,100000 29 | NP,100000,100000,100000,100000,100000 30 | NP,100000,100000,100000,100000,100000 31 | NP,100000,100000,100000,100000,100000 32 | NP,100000,100000,100000,100000,100000 33 | NP,100000,100000,100000,100000,100000 34 | NP,100000,100000,100000,100000,100000 35 | NP,100000,100000,100000,100000,100000 36 | NP,100000,100000,100000,100000,100000 37 | NP,100000,100000,100000,100000,100000 38 | NP,100000,100000,100000,100000,100000 39 | NP,100000,100000,100000,100000,100000 40 | NP,100000,100000,100000,100000,100000 41 | NP,100000,100000,100000,100000,100000 42 | CG,100000,100000,100000,100000,100000 43 | CG,100000,100000,100000,100000,100000 44 | CG,100000,100000,100000,100000,100000 45 | CG,100000,100000,100000,100000,100000 46 | CG,100000,100000,100000,100000,100000 47 | CG,-1050,-1050,-1050,-1050,-1050 48 | CG,-1050,-1050,-1050,-1050,-1050 49 | CG,-1050,-1050,-1050,-1050,-1050 50 | CG,-1050,-1050,-1050,-1840,-1050 51 | CG,-1050,-1050,-1050,-1050,-1050 52 | CG,-1050,-1050,-1050,-1050,-1050 53 | CG,-1050,-1050,-1050,-1050,-1050 54 | CG,-1050,-1050,-1050,-1050,-1050 55 | CG,-1050,-1050,-1050,-1840,-1050 56 | CG,-1050,-1050,-1050,-1050,-1390 57 | CG,-550,-550,-550,-550,-550 58 | CG,-550,-550,-550,-550,-550 59 | CG,-550,-550,-550,-550,-550 60 | CG,-550,-550,-550,-1340,-550 61 | CG,-550,-550,-550,-550,-890 62 | CG,-550,-550,-550,-550,-550 63 | CG,-550,-550,-550,-550,-550 64 | CG,-550,-550,-550,-550,-550 65 | CG,-550,-550,-550,-1340,-550 66 | CG,-550,-550,-550,-550,-550 67 | CG,-550,-550,-550,-550,-550 68 | CG,-550,-550,-550,-550,-550 69 | CG,-550,-550,-550,-550,-550 70 | CG,-550,-550,-550,-1340,-550 71 | CG,-550,-550,-550,-550,-890 72 | CG,-550,-550,-550,-550,-550 73 | CG,-550,-550,-550,-550,-550 74 | CG,-550,-550,-550,-550,-550 75 | CG,-550,-550,-550,-1340,-550 76 | CG,-550,-550,-550,-550,-550 77 | CG,-550,-550,-550,-550,-550 78 | CG,-550,-550,-550,-550,-550 79 | CG,-550,-550,-550,-550,-550 80 | CG,-550,-550,-550,-1340,-550 81 | CG,-550,-550,-550,-550,-550 82 | GC,100000,100000,100000,100000,100000 83 | GC,100000,100000,100000,100000,100000 84 | GC,100000,100000,100000,100000,100000 85 | GC,100000,100000,100000,100000,100000 86 | GC,100000,100000,100000,100000,100000 87 | GC,-1050,-1050,-1050,-1050,-1050 88 | GC,-1050,-1050,-1050,-1050,-1050 89 | GC,-1050,-1050,-1050,-1050,-1050 90 | GC,-1050,-1050,-1050,-1840,-1050 91 | GC,-1050,-1050,-1050,-1050,-1390 92 | GC,-1050,-1050,-1050,-1050,-1050 93 | GC,-1050,-1050,-1050,-1050,-1050 94 | GC,-1050,-1050,-1050,-1050,-1050 95 | GC,-1050,-1050,-1050,-1840,-1050 96 | GC,-1050,-1050,-1050,-1050,-1730 97 | GC,-550,-550,-550,-550,-550 98 | GC,-550,-550,-550,-550,-550 99 | GC,-550,-550,-550,-550,-550 100 | GC,-550,-550,-550,-1340,-550 101 | GC,-550,-550,-550,-550,-1230 102 | GC,-550,-550,-550,-550,-550 103 | GC,-550,-550,-550,-550,-550 104 | GC,-550,-550,-550,-550,-550 105 | GC,-550,-550,-550,-1340,-550 106 | GC,-550,-550,-550,-550,-890 107 | GC,-550,-550,-550,-550,-550 108 | GC,-550,-550,-550,-550,-550 109 | GC,-550,-550,-550,-550,-550 110 | GC,-550,-550,-550,-1340,-550 111 | GC,-550,-550,-550,-550,-1230 112 | GC,-550,-550,-550,-550,-550 113 | GC,-550,-550,-550,-550,-550 114 | GC,-550,-550,-550,-550,-550 115 | GC,-550,-550,-550,-1340,-550 116 | GC,-550,-550,-550,-550,-890 117 | GC,-550,-550,-550,-550,-550 118 | GC,-550,-550,-550,-550,-550 119 | GC,-550,-550,-550,-550,-550 120 | GC,-550,-550,-550,-1340,-550 121 | GC,-550,-550,-550,-550,-890 122 | GU,100000,100000,100000,100000,100000 123 | GU,100000,100000,100000,100000,100000 124 | GU,100000,100000,100000,100000,100000 125 | GU,100000,100000,100000,100000,100000 126 | GU,100000,100000,100000,100000,100000 127 | GU,-550,-550,-550,-550,-550 128 | GU,-550,-550,-550,-550,-550 129 | GU,-550,-550,-550,-550,-550 130 | GU,-550,-550,-550,-1340,-550 131 | GU,-550,-550,-550,-550,-890 132 | GU,-550,-550,-550,-550,-550 133 | GU,-550,-550,-550,-550,-550 134 | GU,-550,-550,-550,-550,-550 135 | GU,-550,-550,-550,-1340,-550 136 | GU,-550,-550,-550,-550,-1230 137 | GU,-50,-50,-50,-50,-50 138 | GU,-50,-50,-50,-50,-50 139 | GU,-50,-50,-50,-50,-50 140 | GU,-50,-50,-50,-830,-50 141 | GU,-50,-50,-50,-50,-730 142 | GU,-50,-50,-50,-50,-50 143 | GU,-50,-50,-50,-50,-50 144 | GU,-50,-50,-50,-50,-50 145 | GU,-50,-50,-50,-830,-50 146 | GU,-50,-50,-50,-50,-390 147 | GU,-50,-50,-50,-50,-50 148 | GU,-50,-50,-50,-50,-50 149 | GU,-50,-50,-50,-50,-50 150 | GU,-50,-50,-50,-830,-50 151 | GU,-50,-50,-50,-50,-730 152 | GU,-50,-50,-50,-50,-50 153 | GU,-50,-50,-50,-50,-50 154 | GU,-50,-50,-50,-50,-50 155 | GU,-50,-50,-50,-830,-50 156 | GU,-50,-50,-50,-50,-390 157 | GU,-50,-50,-50,-50,-50 158 | GU,-50,-50,-50,-50,-50 159 | GU,-50,-50,-50,-50,-50 160 | GU,-50,-50,-50,-830,-50 161 | GU,-50,-50,-50,-50,-390 162 | UG,100000,100000,100000,100000,100000 163 | UG,100000,100000,100000,100000,100000 164 | UG,100000,100000,100000,100000,100000 165 | UG,100000,100000,100000,100000,100000 166 | UG,100000,100000,100000,100000,100000 167 | UG,-550,-550,-550,-550,-550 168 | UG,-550,-550,-550,-550,-550 169 | UG,-550,-550,-550,-550,-550 170 | UG,-550,-550,-550,-1340,-550 171 | UG,-550,-550,-550,-550,-550 172 | UG,-550,-550,-550,-550,-550 173 | UG,-550,-550,-550,-550,-550 174 | UG,-550,-550,-550,-550,-550 175 | UG,-550,-550,-550,-1340,-550 176 | UG,-550,-550,-550,-550,-890 177 | UG,-50,-50,-50,-50,-50 178 | UG,-50,-50,-50,-50,-50 179 | UG,-50,-50,-50,-50,-50 180 | UG,-50,-50,-50,-830,-50 181 | UG,-50,-50,-50,-50,-390 182 | UG,-50,-50,-50,-50,-50 183 | UG,-50,-50,-50,-50,-50 184 | UG,-50,-50,-50,-50,-50 185 | UG,-50,-50,-50,-830,-50 186 | UG,-50,-50,-50,-50,-50 187 | UG,-50,-50,-50,-50,-50 188 | UG,-50,-50,-50,-50,-50 189 | UG,-50,-50,-50,-50,-50 190 | UG,-50,-50,-50,-830,-50 191 | UG,-50,-50,-50,-50,-390 192 | UG,-50,-50,-50,-50,-50 193 | UG,-50,-50,-50,-50,-50 194 | UG,-50,-50,-50,-50,-50 195 | UG,-50,-50,-50,-830,-50 196 | UG,-50,-50,-50,-50,-50 197 | UG,-50,-50,-50,-50,-50 198 | UG,-50,-50,-50,-50,-50 199 | UG,-50,-50,-50,-50,-50 200 | UG,-50,-50,-50,-830,-50 201 | UG,-50,-50,-50,-50,-50 202 | AU,100000,100000,100000,100000,100000 203 | AU,100000,100000,100000,100000,100000 204 | AU,100000,100000,100000,100000,100000 205 | AU,100000,100000,100000,100000,100000 206 | AU,100000,100000,100000,100000,100000 207 | AU,-550,-550,-550,-550,-550 208 | AU,-550,-550,-550,-550,-550 209 | AU,-550,-550,-550,-550,-550 210 | AU,-550,-550,-550,-1340,-550 211 | AU,-550,-550,-550,-550,-890 212 | AU,-550,-550,-550,-550,-550 213 | AU,-550,-550,-550,-550,-550 214 | AU,-550,-550,-550,-550,-550 215 | AU,-550,-550,-550,-1340,-550 216 | AU,-550,-550,-550,-550,-1230 217 | AU,-50,-50,-50,-50,-50 218 | AU,-50,-50,-50,-50,-50 219 | AU,-50,-50,-50,-50,-50 220 | AU,-50,-50,-50,-830,-50 221 | AU,-50,-50,-50,-50,-730 222 | AU,-50,-50,-50,-50,-50 223 | AU,-50,-50,-50,-50,-50 224 | AU,-50,-50,-50,-50,-50 225 | AU,-50,-50,-50,-830,-50 226 | AU,-50,-50,-50,-50,-390 227 | AU,-50,-50,-50,-50,-50 228 | AU,-50,-50,-50,-50,-50 229 | AU,-50,-50,-50,-50,-50 230 | AU,-50,-50,-50,-830,-50 231 | AU,-50,-50,-50,-50,-730 232 | AU,-50,-50,-50,-50,-50 233 | AU,-50,-50,-50,-50,-50 234 | AU,-50,-50,-50,-50,-50 235 | AU,-50,-50,-50,-830,-50 236 | AU,-50,-50,-50,-50,-390 237 | AU,-50,-50,-50,-50,-50 238 | AU,-50,-50,-50,-50,-50 239 | AU,-50,-50,-50,-50,-50 240 | AU,-50,-50,-50,-830,-50 241 | AU,-50,-50,-50,-50,-390 242 | UA,100000,100000,100000,100000,100000 243 | UA,100000,100000,100000,100000,100000 244 | UA,100000,100000,100000,100000,100000 245 | UA,100000,100000,100000,100000,100000 246 | UA,100000,100000,100000,100000,100000 247 | UA,-550,-550,-550,-550,-550 248 | UA,-550,-550,-550,-550,-550 249 | UA,-550,-550,-550,-550,-550 250 | UA,-550,-550,-550,-1340,-550 251 | UA,-550,-550,-550,-550,-550 252 | UA,-550,-550,-550,-550,-550 253 | UA,-550,-550,-550,-550,-550 254 | UA,-550,-550,-550,-550,-550 255 | UA,-550,-550,-550,-1340,-550 256 | UA,-550,-550,-550,-550,-890 257 | UA,-50,-50,-50,-50,-50 258 | UA,-50,-50,-50,-50,-50 259 | UA,-50,-50,-50,-50,-50 260 | UA,-50,-50,-50,-830,-50 261 | UA,-50,-50,-50,-50,-390 262 | UA,-50,-50,-50,-50,-50 263 | UA,-50,-50,-50,-50,-50 264 | UA,-50,-50,-50,-50,-50 265 | UA,-50,-50,-50,-830,-50 266 | UA,-50,-50,-50,-50,-50 267 | UA,-50,-50,-50,-50,-50 268 | UA,-50,-50,-50,-50,-50 269 | UA,-50,-50,-50,-50,-50 270 | UA,-50,-50,-50,-830,-50 271 | UA,-50,-50,-50,-50,-390 272 | UA,-50,-50,-50,-50,-50 273 | UA,-50,-50,-50,-50,-50 274 | UA,-50,-50,-50,-50,-50 275 | UA,-50,-50,-50,-830,-50 276 | UA,-50,-50,-50,-50,-50 277 | UA,-50,-50,-50,-50,-50 278 | UA,-50,-50,-50,-50,-50 279 | UA,-50,-50,-50,-50,-50 280 | UA,-50,-50,-50,-830,-50 281 | UA,-50,-50,-50,-50,-50 282 | NN,100000,100000,100000,100000,100000 283 | NN,100000,100000,100000,100000,100000 284 | NN,100000,100000,100000,100000,100000 285 | NN,100000,100000,100000,100000,100000 286 | NN,100000,100000,100000,100000,100000 287 | NN,-550,-550,-550,-550,-550 288 | NN,-550,-550,-550,-550,-550 289 | NN,-550,-550,-550,-550,-550 290 | NN,-550,-550,-550,-1340,-550 291 | NN,-550,-550,-550,-550,-550 292 | NN,-550,-550,-550,-550,-550 293 | NN,-550,-550,-550,-550,-550 294 | NN,-550,-550,-550,-550,-550 295 | NN,-550,-550,-550,-1340,-550 296 | NN,-550,-550,-550,-550,-890 297 | NN,-50,-50,-50,-50,-50 298 | NN,-50,-50,-50,-50,-50 299 | NN,-50,-50,-50,-50,-50 300 | NN,-50,-50,-50,-830,-50 301 | NN,-50,-50,-50,-50,-390 302 | NN,-50,-50,-50,-50,-50 303 | NN,-50,-50,-50,-50,-50 304 | NN,-50,-50,-50,-50,-50 305 | NN,-50,-50,-50,-830,-50 306 | NN,-50,-50,-50,-50,-50 307 | NN,-50,-50,-50,-50,-50 308 | NN,-50,-50,-50,-50,-50 309 | NN,-50,-50,-50,-50,-50 310 | NN,-50,-50,-50,-830,-50 311 | NN,-50,-50,-50,-50,-390 312 | NN,-50,-50,-50,-50,-50 313 | NN,-50,-50,-50,-50,-50 314 | NN,-50,-50,-50,-50,-50 315 | NN,-50,-50,-50,-830,-50 316 | NN,-50,-50,-50,-50,-50 317 | NN,-50,-50,-50,-50,-50 318 | NN,-50,-50,-50,-50,-50 319 | NN,-50,-50,-50,-50,-50 320 | NN,-50,-50,-50,-830,-50 321 | NN,-50,-50,-50,-50,-50 322 | -------------------------------------------------------------------------------- /src/BeamZuker.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Summer Gu on 6/4/25. 3 | // 4 | 5 | #include "Zuker.h" 6 | #include "BeamZuker.h" 7 | #include "utils.h" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include "params/constants.h" 15 | 16 | using namespace std; 17 | 18 | BeamZuker::BeamZuker(int n, vector & protein, int k):protein(protein),n(n),k(k) { 19 | codon_selection.resize(n,-1); 20 | minX = -1, minY = -1; 21 | init_values(); 22 | start_index.resize(n*n, 0); 23 | index_offset.resize(n*n, 0); 24 | int idx = 0; 25 | last_idx = 0; 26 | // cnt = 0; 27 | for (int a = 0; a < n; a++) { 28 | for (int b = 0; b < n; b++) { 29 | idx = n*a+b; 30 | start_index[idx] = last_idx; 31 | index_offset[idx] = n_codon[protein[a]] * n_codon[protein[b]]; 32 | last_idx += index_offset[idx] * 9; 33 | } 34 | } 35 | } 36 | 37 | //((X == 0 && Y == 3) || (X == 3 && Y == 0) || (X == 1 && Y == 2) || (X == 2 && Y == 1) || (X == 2 && Y == 3) || (X == 3 && Y == 2)) 38 | void BeamZuker::init_values() { 39 | basepair.resize(16,0); 40 | basepair[(0<<2)+3] = 1; 41 | basepair[(3<<2)+0] = 1; 42 | basepair[(1<<2)+2] = 1; 43 | basepair[(2<<2)+1] = 1; 44 | basepair[(2<<2)+3] = 1; 45 | basepair[(3<<2)+2] = 1; 46 | nucle_seq.resize(3*n,-1); 47 | sector.resize(3*n); 48 | bp_bond.resize(3*n); 49 | ava_nucle_p.resize(3*6*n, 0); 50 | ava_nucle_m.resize(3*6*n, 0); 51 | for (int a = 0; a < n; ++a) { 52 | for (int x = 0; x < n_codon[protein[a]]; ++x) { 53 | for (int i = 0; i < 3; ++i) { 54 | int idx = index(a,x,i); 55 | if (a < n-1) ava_nucle_p[idx] = ava_nucleotides_int(a,x,i,1); 56 | if (a > 0) ava_nucle_m[idx] = ava_nucleotides_int(a,x,i,-1); 57 | } 58 | } 59 | } 60 | } 61 | 62 | double BeamZuker::calculate_CAI_O(std::ostream & fout, double lambda) { 63 | auto start = std::chrono::high_resolution_clock::now(); 64 | fout << "Zuker CAI" << std::endl; 65 | 66 | int idx; 67 | priority_queue prev_beam, curr_beam; 68 | unordered_map> pq; 69 | 70 | int a = 0; 71 | int pa = protein[a]; 72 | const int n_codon_a = n_codon[pa]; 73 | for (int i = 0; i < 3; ++i) { 74 | for (int j = i; j < 3; ++j) { 75 | for (int x = 0; x < n_codon_a; ++x) { 76 | double mfe = 0.0; 77 | double cai = (lambda - 1) * codon_cai[pa][x]; 78 | double score = mfe + cai; 79 | BeamEntry en(score, a, a, i, j, x, x, 0, mfe, cai); 80 | prev_beam.push(en); 81 | if ((int)prev_beam.size() > k) prev_beam.pop(); 82 | 83 | idx = index(a,a,i,j,x,x); 84 | O[idx] = en; 85 | } 86 | } 87 | } 88 | 89 | calculate_CAI_E(lambda); 90 | cout << "E done" << endl; 91 | 92 | int nuc_len = 3*n; 93 | // int la = sigma(a,i); 94 | 95 | for (int len = 1; len < nuc_len; ++len) { 96 | 97 | auto temp = prev_beam; 98 | 99 | for (const auto& prev : prev_beam) { 100 | a = prev.a; 101 | int b = prev.b, i = prev.i, j = prev.j; 102 | int x = prev.x, y = prev.y; 103 | idx = index(a,b,i,j,x,y); 104 | int pb = protein[b]; 105 | 106 | { 107 | int xi = nucleotides[protein[a]][x][i]; 108 | int yj = nucleotides[protein[b]][y][j]; 109 | int type = BP_pair[xi+1][yj+1]; 110 | 111 | if (type > 0) { 112 | double mfe = E[idx].mfe + lambda * AU[xi][yj]; 113 | double cai = E[idx].cai; 114 | double energy = mfe + cai; 115 | 116 | BeamEntry prev_entry = O[idx]; 117 | BeamEntry en(energy, a, b, i, j, x, y, -1, mfe, cai, 118 | {}); 119 | next_pq.push(en); 120 | if ((int)next_pq.size() > k) next_pq.pop(); 121 | 122 | if (O.count(idx) == 0 || en.score < O[idx].score) { 123 | O[idx] = en; 124 | } 125 | 126 | } 127 | 128 | } 129 | 130 | if (j < 2) { 131 | int new_j = j + 1; 132 | // int xi = nucleotides[protein[a]][x][i]; 133 | // int yj = nucleotides[protein[b]][y][new_j]; 134 | 135 | BeamEntry prev_entry = O[idx]; 136 | BeamEntry en(prev_entry.score, a, b, i, new_j, x, y, -2, prev_entry.mfe, prev_entry.cai, 137 | {}); 138 | next_pq.push(en); 139 | if ((int)next_pq.size() > k) next_pq.pop(); 140 | 141 | int new_idx = index(a,b,i,new_j,x,y); 142 | if (O.count(new_idx) == 0 || en.score < O[new_idx].score) { 143 | O[new_idx] = en; 144 | } 145 | } 146 | 147 | if (j == 2 && b < n-1) { 148 | int new_b = b + 1; 149 | int new_j = 0; 150 | 151 | BeamEntry prev_entry = O[idx]; 152 | double mfe = prev_entry.mfe; 153 | double cai = prev_entry.cai + (lambda - 1) * codon_cai[pb][y]; 154 | for (int ky = 0; ky < n_codon[protein[b+1]]; ++ky) { 155 | BeamEntry en(prev_entry.score, a, b, i, new_j, x, ky, -3, mfe, cai, 156 | {ky}); 157 | next_pq.push(en); 158 | if ((int)next_pq.size() > k) next_pq.pop(); 159 | 160 | int new_idx = index(a,new_b,i,new_j,x,ky); 161 | if (O.count(new_idx) == 0 || en.score < O[new_idx].score) { 162 | O[new_idx] = en; 163 | } 164 | } 165 | 166 | } 167 | 168 | { 169 | int la = sigma(a,i); 170 | int lb = sigma(b,j); 171 | 172 | for (int lc = la+1; lc <= lb-4; lc++) { 173 | int c = lc / 3; 174 | int i1 = lc % 3; 175 | 176 | int pc = protein[c]; 177 | int n_codon_c = n_codon[pc]; 178 | 179 | for (int hx = 0; hx < n_codon_c; ++hx) { 180 | if (c == a && x != hx) continue; 181 | int left_idx; 182 | if (i1 >= 1) { 183 | left_idx = index(a, c, i, i1 - 1, x, hx); 184 | } else { 185 | int n_codon_cp = n_codon[protein[c-1]]; 186 | for (int ky = 0; ky < n_codon_cp; ++ky) { 187 | 188 | } 189 | } 190 | 191 | } 192 | 193 | 194 | } 195 | 196 | } 197 | 198 | 199 | 200 | 201 | } 202 | 203 | } 204 | 205 | } 206 | 207 | void BeamZuker::calculate_CAI_E(double lambda) { 208 | unordered_map> beams_by_len; // len -> BeamEntries 209 | unordered_map beam_map; // index -> best BeamEntry 210 | vector candidates; 211 | 212 | int nuc_len = 3 * n; 213 | 214 | // === Step 1: Initialization for len = 4 === 215 | for (int a = 0; a < n - 1; ++a) { 216 | for (int i = 0; i < 3; ++i) { 217 | int b = a + 1; 218 | for (int j = 0; j < 3; ++j) { 219 | int pa = protein[a], pb = protein[b]; 220 | for (int x = 0; x < n_codon[pa]; ++x) { 221 | for (int y = 0; y < n_codon[pb]; ++y) { 222 | int xi = nucleotides[pa][x][i]; 223 | int yj = nucleotides[pb][y][j]; 224 | if (BP_pair[xi+1][yj+1] == 0) continue; 225 | int la = sigma(a,i), lb = sigma(b,j); 226 | if (lb - la != 4) continue; 227 | 228 | double mfe = hairpin_CAI(lambda, 4, a, b, pa, pb, 229 | protein[a+1], protein[b-1], 230 | n_codon[protein[a+1]], 231 | n_codon[protein[b-1]], 232 | xi, yj, i, j, x, y); 233 | double cai = (lambda - 1) * (codon_cai[pa][x] + codon_cai[pb][y]); 234 | double score = mfe + cai; 235 | BeamEntry en(score, a, b, i, j, x, y, -1, mfe, cai); 236 | 237 | int idx = index(a,b,i,j,x,y); 238 | beams_by_len[4].push_back(en); 239 | beam_map[idx] = en; 240 | Access_E1(a,b,i,j,x,y) = mfe; 241 | Access_EB(a,b,i,j,x,y) = -1; 242 | E_bt[idx] = {}; 243 | } 244 | } 245 | } 246 | } 247 | } 248 | 249 | for (int len = 5; len < nuc_len; ++len) { 250 | cout << "\rE/M Beam -- Completed: " << (len * 100.0 / nuc_len) << "%" << flush; 251 | candidates.clear(); 252 | 253 | for (auto &[prev_len, beam] : beams_by_len) { 254 | for (const auto &left : beam) { 255 | for (const auto &right : beam) { 256 | if (left.b >= right.a) continue; 257 | 258 | int la = sigma(left.a, left.i); 259 | int lb = sigma(left.b, left.j); 260 | int lc = sigma(right.a, right.i); 261 | int ld = sigma(right.b, right.j); 262 | if (lb < lc || ld - la != len) continue; 263 | 264 | int a = left.a, b = right.b; 265 | int i = left.i, j = right.j; 266 | int x = left.x, y = right.y; 267 | int pa = protein[a], pb = protein[b]; 268 | int xi = nucleotides[pa][x][i]; 269 | int yj = nucleotides[pb][y][j]; 270 | 271 | // try all 3: hairpin, internal, multi 272 | double mfe1 = hairpin_CAI(lambda, len, a, b, pa, pb, 273 | protein[a+1], protein[b-1], 274 | n_codon[protein[a+1]], n_codon[protein[b-1]], 275 | xi, yj, i, j, x, y); 276 | double mfe2 = internal_CAI(lambda, a, b, i, j, x, y, la, ld, xi, yj); 277 | double mfe3 = multi_loop_CAI(lambda, a, b, i, j, x, y, 278 | pa, pb, n_codon[protein[a+1]], n_codon[protein[b-1]]); 279 | double mfe = min({mfe1, mfe2, mfe3}); 280 | double cai = (lambda - 1) * (codon_cai[pa][x] + codon_cai[pb][y]); 281 | double score = mfe + cai; 282 | 283 | BeamEntry en(score, a, b, i, j, x, y, -4, mfe, cai); 284 | candidates.push_back(en); 285 | } 286 | } 287 | } 288 | 289 | // Top-k pruning 290 | sort(candidates.begin(), candidates.end()); 291 | if ((int)candidates.size() > k) candidates.resize(k); 292 | 293 | for (auto &entry : candidates) { 294 | int idx = index(entry.a, entry.b, entry.i, entry.j, entry.x, entry.y); 295 | beams_by_len[len].push_back(entry); 296 | beam_map[idx] = entry; 297 | 298 | Access_E1(entry.a, entry.b, entry.i, entry.j, entry.x, entry.y) = entry.mfe; 299 | Access_EB(entry.a, entry.b, entry.i, entry.j, entry.x, entry.y) = entry.backtrace_type; 300 | E_bt[idx] = entry.bt_info; 301 | } 302 | } 303 | 304 | cout << "\nE/M Beam search complete with CAI" << endl; 305 | 306 | 307 | } 308 | 309 | void BeamZuker::calculate_CAI_M(int a, int b, int i, int j, int x, int y, double lambda) { 310 | 311 | } -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "Nussinov.h" 4 | #include "Zuker.h" 5 | #include "default.h" 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "utils.h" 11 | #include "params/constants.h" 12 | 13 | using namespace std; 14 | 15 | int main(int argc, char *argv[]) { 16 | 17 | int n;//len of protein 18 | string input = "../data/uniprotSeq/P15421.fasta"; 19 | string output = "output.txt"; 20 | string rna_file,swipe_output; 21 | string codon_file = {}; 22 | string param_path = {}; 23 | int model = 1, mode = 1; 24 | double incr = inf, lambda = inf, threshold = 0.0025, threshold2 = 0.00075; 25 | int g = inf; 26 | 27 | if (argc < 2) { 28 | help(); 29 | } 30 | 31 | try { 32 | size_t i = 1; 33 | while ((int)i+1 <= argc) { 34 | string param = argv[i]; 35 | if (argv[i][0] == '-') { 36 | switch (argv[i][1]) { 37 | case 'i': 38 | input = argv[i+1]; 39 | break; 40 | case 'o': 41 | output = argv[i+1]; 42 | break; 43 | case 'm': 44 | model = std::stoi(argv[i+1]); 45 | break; 46 | case 's': 47 | mode = std::stoi(argv[i+1]); 48 | break; 49 | case 'g': 50 | g = std::stoi(argv[i+1]); 51 | break; 52 | case 'l': 53 | lambda = std::stod(argv[i+1]); 54 | break; 55 | case 'a': 56 | incr = std::stod(argv[i+1]); 57 | break; 58 | case 'r': 59 | rna_file = argv[i+1]; 60 | break; 61 | case 'O': 62 | swipe_output = argv[i+1]; 63 | break; 64 | case 'c': 65 | codon_file = argv[i+1]; 66 | break; 67 | case 'd': 68 | param_path = argv[i+1]; 69 | break; 70 | case 't': 71 | threshold = stod(argv[i+1]); 72 | break; 73 | case 'p': 74 | threshold2 = stod(argv[i+1]); 75 | break; 76 | default: 77 | help(); 78 | return(0); 79 | } 80 | } 81 | i += 2; 82 | } 83 | 84 | } catch (const std::exception& e) { 85 | std::cout << "Exception!" << std::endl; 86 | help(); 87 | return -1; 88 | } 89 | 90 | bool nussinov = false, zuker = false, test = false; 91 | bool subopt = false, subopt_all = false; 92 | switch (model) { 93 | case 0: 94 | nussinov = true; 95 | break; 96 | case 1: 97 | zuker = true; 98 | break; 99 | case 2: 100 | zuker = true; 101 | // subopt = true; 102 | break; 103 | case 3: 104 | zuker = true; 105 | // subopt_all = true; 106 | break; 107 | case -1: 108 | test = true; 109 | break; 110 | default: 111 | throw invalid_argument("Invalid Input for Model"); 112 | } 113 | 114 | if (output.empty()) throw invalid_argument("Output File Needed"); 115 | ofstream fout(output); 116 | scale_params(codon_file, param_path); //"../python/pfizer_codon_usage.csv" 117 | 118 | 119 | if (test) { 120 | if (rna_file.empty()) throw invalid_argument("RNA Input File Needed in Test Mode"); 121 | if (input.empty()) throw invalid_argument("Protein Input File Needed in Test Mode"); 122 | vector protein = read_fasta(input, fout); 123 | 124 | vector rna = read_rna(rna_file); 125 | string bp(rna.size(), '.'); 126 | 127 | double cai = getCAI(rna, protein); 128 | double CAI = evaluate_CAI(rna, protein); 129 | double MFE = evaluate_MFE(rna, bp); 130 | 131 | fout << "secondary structure: " << bp << endl; 132 | fout << "eval MFE: " << MFE/100 << endl; 133 | fout << "eval CAI: " << cai << endl; 134 | fout << "eval standard CAI: " << CAI << endl; 135 | return 0; 136 | } 137 | 138 | bool mfe = false; 139 | bool mfe_cai = false; 140 | bool lambda_sweep = false; 141 | bool lambda_sweep2 = false; 142 | switch (mode) { 143 | case 1: 144 | mfe = true; 145 | break; 146 | case 2: 147 | mfe_cai = true; 148 | break; 149 | case 3: 150 | lambda_sweep = true; 151 | break; 152 | case 4: 153 | lambda_sweep2 = true; 154 | break; 155 | default: 156 | throw invalid_argument("Invalid Input for Mode"); 157 | } 158 | 159 | 160 | 161 | if (input.empty()) throw invalid_argument("Input File Needed"); 162 | 163 | vector protein = read_fasta(input, fout); 164 | n = int(protein.size()); 165 | fout << endl; 166 | double n_res = 0; 167 | string rna; 168 | 169 | 170 | if (nussinov && mfe) { 171 | if (g == inf) throw invalid_argument("Invalid Value of g"); 172 | Nussinov N = Nussinov(protein, n, g); 173 | tuple temp = N.nussinov(fout); 174 | n_res = get<0>(temp); 175 | rna = get<1>(temp); 176 | int bp = evaluate_BP_N(rna,g); 177 | fout << "nussinov bp count: " << bp << endl; 178 | } 179 | 180 | if (nussinov && mfe_cai) { 181 | if (g == inf) throw invalid_argument("Invalid Value of g"); 182 | Nussinov N = Nussinov(protein, n, g); 183 | tuple temp = N.nussinov_CAI(lambda, fout); 184 | n_res = get<0>(temp); 185 | rna = get<1>(temp); 186 | int bp = evaluate_BP_N(rna,g); 187 | int type = 0; 188 | double CAI = evaluate_CAI_N(rna,protein,type); 189 | fout << "lambda: " << lambda << endl; 190 | fout << "integrated energy: " << n_res << endl; 191 | fout << "CAI: " << CAI << endl; 192 | fout << "nussinov: " << bp << endl; 193 | } 194 | 195 | if (nussinov && lambda_sweep) { 196 | Nussinov N = Nussinov(protein, n, g); 197 | N.lambda_sweep(incr,fout, swipe_output); 198 | } 199 | 200 | 201 | if (zuker && mfe) { 202 | auto start = chrono::high_resolution_clock::now(); 203 | Zuker Z = Zuker(n,mode,protein); 204 | Z.calculate_Z(fout); 205 | Z.traceback_B(); 206 | auto end = chrono::high_resolution_clock::now(); 207 | long time_take = chrono::duration_cast(end - start).count(); 208 | fout << "Time taken : " << time_take; 209 | fout << "sec" << endl; 210 | string zuker_bp(3*n,'.'), zuker_rna(3*n,'.'), zuker_rna_X(3*n,'.');// zuker_bp2(3*n,'.'),zuker_bp1(3*n,'.'); 211 | vector rna_array(n, zuker_rna); 212 | Z.get_bp(zuker_bp); 213 | Z.get_rna(zuker_rna); 214 | Z.get_rna_X(zuker_rna_X); 215 | double cai = evaluate_CAI(zuker_rna, protein, 0); 216 | 217 | 218 | fout << "zuker bp:" << zuker_bp << ", size: " << zuker_bp.size() << endl; 219 | fout << "zuker rna:" << zuker_rna_X << ", size: " << zuker_rna.size() << endl; 220 | fout << "zuker rna:" << zuker_rna << ", size: " << zuker_rna.size() << endl; 221 | fout << "zuker cai: " << cai << endl; 222 | fout << "other rna: " << endl; 223 | } 224 | 225 | if (zuker && mfe_cai) { 226 | if (lambda == inf) throw invalid_argument("Invalid Value of lambda"); 227 | Zuker Z = Zuker(n,mode,protein); 228 | fout << "lambda: " << lambda << endl; 229 | double energy_cai = Z.calculate_CAI_O(fout, lambda); 230 | 231 | Z.traceback_B2(lambda); 232 | string zuker_cai_rna(3*n,'.'), zuker_cai_bp(3*n,'.'); 233 | string zuker_cai_rna_X(3*n, '.'); 234 | Z.get_rna_X(zuker_cai_rna_X); 235 | Z.get_rna_cai(zuker_cai_rna); 236 | Z.get_bp(zuker_cai_bp); 237 | Z.save_all_vectors("./"); 238 | 239 | int type = 0; 240 | 241 | double CAI_s = evaluate_CAI(zuker_cai_rna,protein,type); 242 | double CAI = evaluate_CAI(zuker_cai_rna,protein,1); 243 | double MFE = evaluate_MFE(zuker_cai_rna); 244 | 245 | cout << "lambda: " << lambda << ",O: " << energy_cai << ",cai: " << CAI << ",cai_s: " << CAI_s << ",mfe: " << MFE << ",combined: " << lambda*MFE+(lambda-1)*CAI << endl; 246 | fout << "zuker cai bp: " << zuker_cai_bp << ",size: " << zuker_cai_bp.size() << endl; 247 | fout << "zuker rna: " << zuker_cai_rna_X << ".size: " << zuker_cai_rna.size() << endl; 248 | fout << "zuker cai rna: " << zuker_cai_rna << ".size: " << zuker_cai_rna.size() << endl; 249 | 250 | fout << "Codon Adaptation Index: " << CAI_s << endl; 251 | fout << "Free Energy: " << MFE/100 << endl; 252 | 253 | 254 | 255 | 256 | if (subopt) { 257 | mt19937 rng(60); 258 | double i = 1; 259 | 260 | // Open CSV file: 261 | ofstream csv_fout("zuker_subopt_sample.csv"); 262 | csv_fout << "gamma,sequence,bp,MFE,CAI,combined,optimal,count\n"; 263 | 264 | 265 | while (i >= 0.99) { 266 | cout << "gamma: " << i << endl; 267 | 268 | Z.traceback_suboptimal(lambda, i, rng); 269 | 270 | string subopt_cai_rna(3*n,'.'), subopt_cai_bp(3*n,'.'); 271 | string subopt_cai_rna_X(3*n, '.'); 272 | 273 | Z.get_rna_X(subopt_cai_rna_X); 274 | Z.get_rna_cai(subopt_cai_rna); 275 | Z.get_bp(subopt_cai_bp); 276 | 277 | CAI_s = evaluate_CAI(subopt_cai_rna,protein,type); 278 | CAI = evaluate_CAI(subopt_cai_rna,protein,1); 279 | MFE = evaluate_MFE(subopt_cai_rna); 280 | 281 | double combined = lambda*MFE + (lambda-1)*CAI; 282 | int is_optimal = (combined == energy_cai ? 1 : 0); // Always 0 for subopt paths in this block 283 | 284 | // vector all_paths; 285 | // Z.traceback_enumerate_dfs(lambda, i, 20000, all_paths); 286 | size_t count = Z.traceback_count_dfs(lambda, i); 287 | 288 | // Console output 289 | cout << "lambda: " << lambda << ",O: " << energy_cai << ",cai: " << CAI 290 | << ",cai_s: " << CAI_s << ",mfe: " << MFE 291 | << ",combined: " << combined << ",count: " << count << endl; 292 | 293 | fout << "zuker cai bp: " << subopt_cai_bp << ",size: " << subopt_cai_bp.size() << endl; 294 | fout << "zuker rna: " << subopt_cai_rna_X << ".size: " << subopt_cai_rna.size() << endl; 295 | fout << "zuker cai rna: " << subopt_cai_rna << ".size: " << subopt_cai_rna.size() << endl; 296 | 297 | fout << "Codon Adaptation Index: " << CAI_s << endl; 298 | fout << "Free Energy: " << MFE/100 << endl; 299 | fout << "---------" << endl; 300 | 301 | // CSV output 302 | csv_fout << i << "," // gamma 303 | << subopt_cai_rna << "," 304 | << subopt_cai_bp << "," 305 | << MFE/100 << "," 306 | << CAI_s << "," 307 | << combined << "," 308 | << is_optimal << "," 309 | << count << "\n"; 310 | 311 | i -= 0.001; 312 | } 313 | 314 | 315 | 316 | csv_fout.close(); 317 | } 318 | 319 | double gamma = 0.99; 320 | if (subopt_all) { 321 | vector all_paths; 322 | Z.traceback_enumerate_dfs(lambda, gamma, inf, all_paths); // e.g. 1000 paths 323 | 324 | cout << "Total paths: " << all_paths.size() << endl; 325 | 326 | ofstream csv_fout("zuker_subopt_paths_0.99.csv"); 327 | csv_fout << "sequence,bp,MFE,CAI,combined,optimal,path\n"; 328 | 329 | csv_fout << zuker_cai_rna << "," 330 | << zuker_cai_bp << "," 331 | << MFE/100 << "," // as you output it in your console 332 | << CAI_s << "," // you said use CAI_s here 333 | << energy_cai << "," 334 | << 1 << "," 335 | << energy_cai << "\n"; 336 | 337 | for (size_t p = 0; p < all_paths.size(); ++p) { 338 | const Path& path = all_paths[p]; 339 | 340 | // Assign path data into Z internal state: 341 | Z.load_path(path); 342 | 343 | // Prepare output strings: 344 | string subopt_cai_rna(3*n, '.'); 345 | string subopt_cai_bp(3*n, '.'); 346 | string subopt_cai_rna_X(3*n, '.'); 347 | 348 | // Call your usual functions: 349 | Z.get_rna_X(subopt_cai_rna_X); 350 | Z.get_rna_cai(subopt_cai_rna); 351 | Z.get_bp(subopt_cai_bp); 352 | 353 | CAI_s = evaluate_CAI(subopt_cai_rna, protein, type); 354 | CAI = evaluate_CAI(subopt_cai_rna, protein, 1); 355 | MFE = evaluate_MFE(subopt_cai_rna); 356 | 357 | double combined = lambda*MFE + (lambda-1)*CAI; 358 | int is_optimal = (combined == energy_cai ? 1 : 0); 359 | 360 | // Console output 361 | cout << "Path " << p+1 << " / " << all_paths.size() << endl; 362 | cout << "RNA_X : " << subopt_cai_rna_X << endl; 363 | cout << "RNA : " << subopt_cai_rna << endl; 364 | cout << "BP : " << subopt_cai_bp << endl; 365 | cout << "lambda: " << lambda << ",O: " << energy_cai << ",cai: " << CAI << ",cai_s: " << CAI_s << ",mfe: " << MFE << ",combined: " << combined << ",path sum: " << path.change << endl; 366 | cout << "---------" << endl; 367 | 368 | // CSV output 369 | csv_fout << subopt_cai_rna << "," 370 | << subopt_cai_bp << "," 371 | << MFE/100 << "," // as you output it in your console 372 | << CAI_s << "," // you said use CAI_s here 373 | << combined << "," 374 | << is_optimal << "," 375 | << path.change << "\n"; 376 | } 377 | csv_fout.close(); 378 | } 379 | } 380 | 381 | if (zuker && lambda_sweep) { 382 | Zuker Z = Zuker(n,mode,protein); 383 | Z.lambda_sweep_2(threshold,threshold2, fout,swipe_output); 384 | } 385 | 386 | if (zuker && lambda_sweep2) { 387 | if (lambda == inf) throw invalid_argument("Invalid Value of lambda"); 388 | if (incr == inf) throw invalid_argument("Invalid increment"); 389 | Zuker Z = Zuker(n,mode,protein); 390 | Z.lambda_sweep(incr,fout,swipe_output); 391 | } 392 | 393 | return 0; 394 | } 395 | 396 | 397 | 398 | 399 | 400 | -------------------------------------------------------------------------------- /src/utils.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by xinyu on 3/23/2022. 3 | // 4 | 5 | #include "utils.h" 6 | #include "ZukerAlgorithm.h" 7 | #include "NussinovAlgorithm.h" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | using namespace std; 18 | int getxPos(int, const string &); 19 | 20 | struct CodonTable { 21 | int aa; 22 | vector nucleotides; 23 | vector codon_usages; 24 | }; 25 | 26 | typedef struct CodonTable CodonTable; 27 | 28 | // amino acid A = 0, R = 1, N = 2, D = 3, C = 4, Q = 5, E = 6, G = 7, H = 8, M = 9, I = 10, L = 11, K = 12, F = 13, P = 14 29 | // S = 15, T = 16, W = 17, Y = 18, V = 19 30 | // nucleotide A = 0, C = 1, G = 2, U = 3 31 | 32 | 33 | 34 | // amino acid A = 0, R = 1, N = 2, D = 3, C = 4, Q = 5, E = 6, G = 7, H = 8, M = 9, I = 10, L = 11, K = 12, F = 13, P = 14 35 | // S = 15, T = 16, W = 17, Y = 18, V = 19 36 | int aa_index(char aa) { 37 | switch (aa) { 38 | case 'A': return 0; 39 | case 'R': return 1; 40 | case 'N': return 2; 41 | case 'D': return 3; 42 | case 'C': return 4; 43 | case 'Q': return 5; 44 | case 'E': return 6; 45 | case 'G': return 7; 46 | case 'H': return 8; 47 | case 'M': return 9; 48 | case 'I': return 10; 49 | case 'L': return 11; 50 | case 'K': return 12; 51 | case 'F': return 13; 52 | case 'P': return 14; 53 | case 'S': return 15; 54 | case 'T': return 16; 55 | case 'W': return 17; 56 | case 'Y': return 18; 57 | case 'V': return 19; 58 | default: 59 | cout << (int)aa << " " << (char)aa << endl; 60 | throw invalid_argument("no char found"); 61 | } 62 | } 63 | 64 | int n_index(char n) { 65 | switch (n) { 66 | case 'A': return 0; 67 | case 'C': return 1; 68 | case 'G': return 2; 69 | case 'U': return 3; 70 | default: throw invalid_argument("invalid input"); 71 | } 72 | } 73 | 74 | // AU, UA, GC, CG, GU, UG 75 | // nucleotide A = 0, C = 1, G = 2, U = 3 76 | int n_index2(int n1, int n2) { 77 | switch (n1-n2) { 78 | case -3: return 0; 79 | case 1: 80 | switch (n1) { 81 | case 2: return 2; 82 | default: return 5; 83 | } 84 | case -1: 85 | switch (n1) { 86 | case 2: return 4; 87 | default: return abs(n1-n2); 88 | } 89 | default: 90 | return abs(n1-n2); 91 | } 92 | } 93 | 94 | 95 | // nucleotide A = 0, C = 1, G = 2, U = 3 96 | bool complementary(int X, int Y) 97 | { 98 | return ((X == 0 && Y == 3) || (X == 3 && Y == 0) || (X == 1 && Y == 2) || (X == 2 && Y == 1)); 99 | } 100 | 101 | int sigma(int a, int i) { 102 | return 3*a+i; 103 | } 104 | 105 | int m(int n, int a, int b, int i, int j, int x) { // 106 | switch (x) { 107 | case -1: 108 | return 9*(n-1)*(b-a)+9*(2*b-a)+(6-3*i+j); 109 | default: 110 | return 6*(9*(n-1)*(b-a)+9*(2*b-a)+(6-3*i+j))+x; 111 | } 112 | 113 | } 114 | 115 | void transform2num(vector & target, string s) { 116 | for (int i = 0; i < (int)s.size(); i++) { 117 | target[i] = n_index(s[i]); 118 | } 119 | } 120 | 121 | string num2String(vector & num) { 122 | int size = (int)num.size(); 123 | string s(size,'.'); 124 | for (int i = 0; i < size; i++) { 125 | s[i] = to_char[num[i]]; 126 | } 127 | return s; 128 | } 129 | 130 | double getCAI(const vector & rna, const vector & protein) { 131 | int n = (int)protein.size(); 132 | 133 | double CAI_ans = 0; 134 | for (int i = 0; i < n; ++i) { 135 | vector codon(3); 136 | int p = protein[i]; 137 | for (int j = 0; j <= 2; j++) { 138 | codon[j] = rna[3*i+j]; 139 | } 140 | 141 | int x = getxPos(p, codon); 142 | 143 | CAI_ans += codon_cai[p][x]; 144 | 145 | } 146 | return CAI_ans; 147 | } 148 | 149 | double stand_getCAI(const vector & rna, const vector & protein) { 150 | int n = (int)protein.size(); 151 | double CAI_ans = 0; 152 | for (int i = 0; i < n; ++i) { 153 | vector codon(3); 154 | int p = protein[i]; 155 | for (int j = 0; j <= 2; j++) { 156 | codon[j] = rna[3*i+j]; 157 | } 158 | // cout << i << endl; 159 | int x = getxPos(p, codon); 160 | CAI_ans += codon_cai[p][x]; 161 | } 162 | cout << CAI_ans/n << endl; 163 | return exp(CAI_ans/n); 164 | } 165 | 166 | double getCAI_s(const vector & rna, const vector & protein) { 167 | int n = (int)protein.size(); 168 | double CAI_ans = 0; 169 | for (int i = 0; i < n; ++i) { 170 | vector codon(3); 171 | 172 | int p = protein[i]; 173 | for (int j = 0; j <= 2; j++) { 174 | codon[j] = rna[3*i+j]; 175 | } 176 | // cout << i << endl; 177 | int x = getxPos(p, codon); 178 | CAI_ans += codon_cai_s[p][x]; 179 | } 180 | return CAI_ans; 181 | } 182 | 183 | double stand_getCAI_s(const vector & rna, const vector & protein) { 184 | // cout << "standard" << endl; 185 | int n = (int)protein.size(); 186 | double CAI_ans = 0; 187 | for (int i = 0; i < n; ++i) { 188 | vector codon(3); 189 | 190 | int p = protein[i]; 191 | for (int j = 0; j <= 2; j++) { 192 | codon[j] = rna[3*i+j]; 193 | } 194 | // cout << "index: " << i << ", protein: " << p << endl; 195 | int x = getxPos(p, codon); 196 | CAI_ans += codon_cai_s[p][x]; 197 | } 198 | 199 | return exp(CAI_ans/n); 200 | } 201 | 202 | int getxPos(int p, vector & codon) { 203 | for (int i = 0; i < 6; ++i) { 204 | vector temp(begin(nucleotides[p][i]), end(nucleotides[p][i])); 205 | if (temp == codon) { 206 | return i; 207 | } 208 | } 209 | cout << p << endl; 210 | for (int i = 0; i < 3; ++i) { 211 | cout << codon[i]; 212 | } 213 | cout << endl; 214 | throw invalid_argument("match not found"); 215 | } 216 | 217 | 218 | // nucleotide A = 0, C = 1, G = 2, U = 3 219 | int to_int(char a) { 220 | switch (a) { 221 | case 'A': return 0; 222 | case 'C': return 1; 223 | case 'G': return 2; 224 | case 'U': return 3; 225 | default: 226 | cout << "char: " << char(a) << endl; 227 | throw invalid_argument("invalid argument to convert to number"); 228 | } 229 | } 230 | 231 | void char2num(vector& target, string & s) { 232 | for (int i = 0; i < (int)s.size(); ++i) { 233 | target[i] = to_int(s[i]); 234 | } 235 | } 236 | 237 | 238 | void write_csv(string filename, const vector>> & dataset) { 239 | ofstream output(filename); 240 | for (int i = 0; i < (int)dataset.size(); i++) { 241 | output << dataset[i].first; 242 | if (i != (int)dataset.size() - 1) output << ","; 243 | } 244 | output << "\n"; 245 | for (int i = 0; i < (int)dataset[0].second.size(); i++) { 246 | for (int j = 0; j < (int)dataset.size(); j++) { 247 | output << dataset[j].second[i]; 248 | if(j != (int)dataset.size() - 1) output << ","; 249 | } 250 | output << "\n"; 251 | } 252 | output.close(); 253 | } 254 | 255 | 256 | bool compare(double x, double y, double epsilon) { 257 | if(fabs(x - y) < epsilon) return true; 258 | return false; 259 | } 260 | 261 | bool greaterThan(double a, double b) 262 | { 263 | return a > b && !compare(a, b); 264 | } 265 | 266 | 267 | // nucleotide A = 0, C = 1, G = 2, U = 3 268 | bool basepair(int X, int Y) 269 | { 270 | return ((X == 0 && Y == 3) || (X == 3 && Y == 0) || (X == 1 && Y == 2) || (X == 2 && Y == 1) || (X == 2 && Y == 3) || (X == 3 && Y == 2)); 271 | } 272 | 273 | 274 | bool add_auterminal(int a, int b) { 275 | return (a == 0 && b == 3) || (a == 3 && b == 0) || (a == 2 && b == 3) || (a == 3 && b == 2); 276 | } 277 | 278 | 279 | // nucleotide A = 0, C = 1, G = 2, U = 3 280 | bool add_ggmm(int a, int b) { 281 | return (a == 2 && b == 2); 282 | } 283 | 284 | 285 | // nucleotide A = 0, C = 1, G = 2, U = 3 286 | bool add_uugamm(int a, int b) { 287 | return (a == 3 && b == 3) || (a == 2 && b == 0); 288 | } 289 | 290 | 291 | int l(int a, int i, int b, int j) { 292 | return sigma(b,j) - sigma(a,i) - 1; 293 | } 294 | 295 | 296 | int get_index(vector & seqs, string & seq) { 297 | auto index = find(seqs.begin(), seqs.end(), seq); 298 | 299 | if (index != seqs.end()) { 300 | return index - seqs.begin(); 301 | } 302 | return -1; 303 | } 304 | 305 | 306 | void help() 307 | { 308 | printf("Usage:\n"); 309 | printf(" -i -- input file path\n"); 310 | printf(" -o -- output file path\n"); 311 | printf(" -m -- model <0,1,-1> , 0 for Nussinov-based model, 1 for Zuker-based model, -1 for Evaluation\n"); 312 | printf(" -s -- mode <1,2,3>, 1 for MFE only, 2 for balancing MFE and CAI at fixed lambda, 3 for lambda sweep\n"); 313 | printf(" -l -- lambda <[0,1]>\n"); 314 | printf(" -a -- sweep increment <(0,1]>\n"); 315 | printf(" -r -- input rna file path\n"); 316 | printf(" -O -- sweep output csv file name\n"); 317 | printf(" -g -- minimum gap allowed in Nussinov <[0,inf)>\n"); 318 | printf(" -t -- threshold tau1 <(0,1)>\n"); 319 | printf(" -p -- threshold tau2 <(0,1)>\n"); 320 | printf(" -c -- codon usage table file path\n"); 321 | printf(" -d -- directory to energy parameters\n"); 322 | printf(" ...\n"); 323 | } 324 | 325 | vector read_rna(string & input) { 326 | ifstream fin(input); 327 | vector rna; 328 | string line; 329 | char byte; 330 | if (!fin.is_open()) { 331 | cout << "Could not open the RNA file - '" << input << "'" << endl; 332 | exit(1); 333 | } 334 | if (fin.is_open()) { 335 | 336 | while (fin.get(byte)) { 337 | if (byte != '\n' && byte != '\r') { 338 | rna.push_back(to_int(byte)); 339 | } 340 | } 341 | } 342 | return rna; 343 | } 344 | 345 | 346 | vector read_fasta(string & input, ostream& fout) { 347 | ifstream fin(input); 348 | vector protein; 349 | string line; 350 | char byte; 351 | if (!fin.is_open()) { 352 | fout << "Could not open the FASTA file - '" << input << "'" << endl; 353 | exit(1); 354 | } 355 | if (fin.is_open()) { 356 | getline (fin, line); 357 | fout << "protein sequence: "; 358 | while (fin.get(byte)) { 359 | // cout << byte << endl; 360 | if (byte != '\n' && byte != '\r') { 361 | fout << byte; 362 | protein.push_back(aa_index(byte)); 363 | } 364 | } 365 | fout << endl; 366 | } 367 | return protein; 368 | } 369 | 370 | double evaluate_CAI(string & rna,vector & protein,int type) { 371 | int l = int(rna.size()); 372 | 373 | // cout << rna << endl; 374 | 375 | vector seq(l); 376 | char2num(seq, rna); 377 | double CAI; 378 | if (type == 1) CAI = getCAI(seq, protein); 379 | else CAI = stand_getCAI_s(seq, protein); 380 | 381 | return CAI; 382 | 383 | } 384 | 385 | pair find_amino_acid_and_codon_index(const vector& codon) { 386 | if (codon.size() != 3) { 387 | throw std::invalid_argument("Codon must have exactly 3 elements"); 388 | } 389 | 390 | for (int aa = 0; aa < 20; ++aa) { 391 | for (int k = 0; k < 6; ++k) { 392 | const int* triplet = nucleotides[aa][k]; 393 | if (triplet[0] == -1) break; // End of valid codons for this aa 394 | 395 | if (triplet[0] == codon[0] && 396 | triplet[1] == codon[1] && 397 | triplet[2] == codon[2]) { 398 | return {aa, k}; // Found: (amino acid index, codon index) 399 | } 400 | } 401 | } 402 | 403 | return {-1, -1}; // Not found 404 | } 405 | 406 | double evaluate_CAI(string & rna, int type) { 407 | int l = int(rna.size()); 408 | vector seq(l); 409 | char2num(seq, rna); 410 | double CAI; 411 | int n = (int)(l/3); 412 | double CAI_ans = 0; 413 | int p, x; 414 | 415 | for (int i = 0; i < n; ++i) { 416 | vector codon(3); 417 | 418 | for (int j = 0; j <= 2; j++) { 419 | codon[j] = seq[3*i+j]; 420 | } 421 | tie(p, x) = find_amino_acid_and_codon_index(codon); 422 | // cout << "index: " << i << ", protein: " << p << endl; 423 | if (type == 0) { 424 | CAI_ans += codon_cai_s[p][x]; 425 | } else { 426 | CAI_ans += codon_cai[p][x]; 427 | } 428 | } 429 | 430 | if (type == 1) return CAI_ans; 431 | 432 | return exp(CAI_ans/n); 433 | 434 | } 435 | 436 | 437 | double evaluate_CAI(vector & rna,vector & protein) { 438 | 439 | double CAI = stand_getCAI_s(rna, protein); 440 | return CAI; 441 | 442 | } 443 | 444 | double evaluate_MFE(string & rna) { 445 | int l = int(rna.size()); 446 | 447 | // cout << rna << endl; 448 | 449 | vector seq(l); 450 | char2num(seq, rna); 451 | ZukerAlgorithm Zu = ZukerAlgorithm(seq,l); 452 | double mfe = Zu.calculate_W(); 453 | return mfe; 454 | 455 | } 456 | 457 | vector filterCandidates( 458 | const vector& candidates, 459 | const unordered_map& letter_map) { 460 | 461 | vector filtered; 462 | 463 | for (const auto& s : candidates) { 464 | bool match = true; 465 | for (const auto& [idx, ch] : letter_map) { 466 | if (idx >= s.size() || s[idx] != ch) { 467 | match = false; 468 | break; 469 | } 470 | } 471 | if (match) { 472 | filtered.push_back(s); 473 | } 474 | } 475 | 476 | return filtered; 477 | } 478 | 479 | 480 | double evaluate_MFE(vector & rna, string & bp) { 481 | int l = int(rna.size()); 482 | ZukerAlgorithm Zu = ZukerAlgorithm(rna,l); 483 | double mfe = Zu.calculate_W(); 484 | if (!bp.empty()) { 485 | Zu.traceback_2(); 486 | Zu.get_bp(bp); 487 | } 488 | return mfe; 489 | 490 | } 491 | 492 | double evaluate_CAI_N(string & rna,vector & protein,int type) { 493 | int l = int(rna.size()); 494 | vector seq(l); 495 | transform2num(seq,rna); 496 | double CAI; 497 | if (type == 1) CAI = getCAI_s(seq, protein); 498 | else CAI = stand_getCAI_s(seq, protein); 499 | return CAI; 500 | } 501 | 502 | int evaluate_BP_N(string & rna, int g) { 503 | int l = int(rna.size()); 504 | vector seq(l); 505 | transform2num(seq,rna); 506 | NussinovAlgorithm F = NussinovAlgorithm(seq, l, g); 507 | int bp = F.nussinov(0, l-1); 508 | return bp; 509 | 510 | } 511 | 512 | bool is_complete_path(const Path& path) { 513 | return path.sector_stack.empty(); 514 | } 515 | -------------------------------------------------------------------------------- /src/params/intl11.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Summer Gu on 8/16/22. 3 | // 4 | 5 | #ifndef RNA_DESIGN_INTL11_H 6 | #define RNA_DESIGN_INTL11_H 7 | PUBLIC int int11_37[NBPAIRS+1][NBPAIRS+1][5][5] = 8 | {{{{ INF, INF, INF, INF, INF} 9 | ,{ INF, INF, INF, INF, INF} 10 | ,{ INF, INF, INF, INF, INF} 11 | ,{ INF, INF, INF, INF, INF} 12 | ,{ INF, INF, INF, INF, INF} 13 | } 14 | ,{{ INF, INF, INF, INF, INF} 15 | ,{ INF, INF, INF, INF, INF} 16 | ,{ INF, INF, INF, INF, INF} 17 | ,{ INF, INF, INF, INF, INF} 18 | ,{ INF, INF, INF, INF, INF} 19 | } 20 | ,{{ INF, INF, INF, INF, INF} 21 | ,{ INF, INF, INF, INF, INF} 22 | ,{ INF, INF, INF, INF, INF} 23 | ,{ INF, INF, INF, INF, INF} 24 | ,{ INF, INF, INF, INF, INF} 25 | } 26 | ,{{ INF, INF, INF, INF, INF} 27 | ,{ INF, INF, INF, INF, INF} 28 | ,{ INF, INF, INF, INF, INF} 29 | ,{ INF, INF, INF, INF, INF} 30 | ,{ INF, INF, INF, INF, INF} 31 | } 32 | ,{{ INF, INF, INF, INF, INF} 33 | ,{ INF, INF, INF, INF, INF} 34 | ,{ INF, INF, INF, INF, INF} 35 | ,{ INF, INF, INF, INF, INF} 36 | ,{ INF, INF, INF, INF, INF} 37 | } 38 | ,{{ INF, INF, INF, INF, INF} 39 | ,{ INF, INF, INF, INF, INF} 40 | ,{ INF, INF, INF, INF, INF} 41 | ,{ INF, INF, INF, INF, INF} 42 | ,{ INF, INF, INF, INF, INF} 43 | } 44 | ,{{ INF, INF, INF, INF, INF} 45 | ,{ INF, INF, INF, INF, INF} 46 | ,{ INF, INF, INF, INF, INF} 47 | ,{ INF, INF, INF, INF, INF} 48 | ,{ INF, INF, INF, INF, INF} 49 | } 50 | ,{{ INF, INF, INF, INF, INF} 51 | ,{ INF, INF, INF, INF, INF} 52 | ,{ INF, INF, INF, INF, INF} 53 | ,{ INF, INF, INF, INF, INF} 54 | ,{ INF, INF, INF, INF, INF} 55 | } 56 | } 57 | ,{{{ INF, INF, INF, INF, INF} 58 | ,{ INF, INF, INF, INF, INF} 59 | ,{ INF, INF, INF, INF, INF} 60 | ,{ INF, INF, INF, INF, INF} 61 | ,{ INF, INF, INF, INF, INF} 62 | } 63 | ,{{ 90, 90, 50, 50, 50} 64 | ,{ 90, 90, 50, 50, 50} 65 | ,{ 50, 50, 50, 50, 50} 66 | ,{ 50, 50, 50, -140, 50} 67 | ,{ 50, 50, 50, 50, 40} 68 | } 69 | ,{{ 90, 90, 50, 50, 60} 70 | ,{ 90, 90, -40, 50, 50} 71 | ,{ 60, 30, 50, 50, 60} 72 | ,{ 50, -10, 50, -220, 50} 73 | ,{ 50, 50, 0, 50, -10} 74 | } 75 | ,{{ 120, 120, 120, 120, 120} 76 | ,{ 120, 60, 50, 120, 120} 77 | ,{ 120, 120, 120, 120, 120} 78 | ,{ 120, -20, 120, -140, 120} 79 | ,{ 120, 120, 100, 120, 110} 80 | } 81 | ,{{ 220, 220, 170, 120, 120} 82 | ,{ 220, 220, 130, 120, 120} 83 | ,{ 170, 120, 170, 120, 120} 84 | ,{ 120, 120, 120, -140, 120} 85 | ,{ 120, 120, 120, 120, 110} 86 | } 87 | ,{{ 120, 120, 120, 120, 120} 88 | ,{ 120, 120, 120, 120, 120} 89 | ,{ 120, 120, 120, 120, 120} 90 | ,{ 120, 120, 120, -140, 120} 91 | ,{ 120, 120, 120, 120, 80} 92 | } 93 | ,{{ 120, 120, 120, 120, 120} 94 | ,{ 120, 120, 120, 120, 120} 95 | ,{ 120, 120, 120, 120, 120} 96 | ,{ 120, 120, 120, -140, 120} 97 | ,{ 120, 120, 120, 120, 120} 98 | } 99 | ,{{ 220, 220, 170, 120, 120} 100 | ,{ 220, 220, 130, 120, 120} 101 | ,{ 170, 120, 170, 120, 120} 102 | ,{ 120, 120, 120, -140, 120} 103 | ,{ 120, 120, 120, 120, 120} 104 | } 105 | } 106 | ,{{{ INF, INF, INF, INF, INF} 107 | ,{ INF, INF, INF, INF, INF} 108 | ,{ INF, INF, INF, INF, INF} 109 | ,{ INF, INF, INF, INF, INF} 110 | ,{ INF, INF, INF, INF, INF} 111 | } 112 | ,{{ 90, 90, 60, 50, 50} 113 | ,{ 90, 90, 30, -10, 50} 114 | ,{ 50, -40, 50, 50, 0} 115 | ,{ 50, 50, 50, -220, 50} 116 | ,{ 60, 50, 60, 50, -10} 117 | } 118 | ,{{ 80, 80, 50, 50, 50} 119 | ,{ 80, 80, 50, 50, 50} 120 | ,{ 50, 50, 50, 50, 50} 121 | ,{ 50, 50, 50, -230, 50} 122 | ,{ 50, 50, 50, 50, -60} 123 | } 124 | ,{{ 190, 190, 120, 150, 150} 125 | ,{ 190, 190, 120, 150, 120} 126 | ,{ 120, 120, 120, 120, 120} 127 | ,{ 120, 120, 120, -140, 120} 128 | ,{ 150, 120, 120, 120, 150} 129 | } 130 | ,{{ 160, 160, 120, 120, 120} 131 | ,{ 160, 160, 120, 100, 120} 132 | ,{ 120, 120, 120, 120, 120} 133 | ,{ 120, 120, 120, -140, 120} 134 | ,{ 120, 120, 120, 120, 70} 135 | } 136 | ,{{ 120, 120, 120, 120, 120} 137 | ,{ 120, 120, 120, 120, 120} 138 | ,{ 120, 120, 120, 120, 120} 139 | ,{ 120, 120, 120, -140, 120} 140 | ,{ 120, 120, 120, 120, 80} 141 | } 142 | ,{{ 120, 120, 120, 120, 120} 143 | ,{ 120, 120, 120, 120, 120} 144 | ,{ 120, 120, 120, 120, 120} 145 | ,{ 120, 120, 120, -140, 120} 146 | ,{ 120, 120, 120, 120, 120} 147 | } 148 | ,{{ 190, 190, 120, 150, 150} 149 | ,{ 190, 190, 120, 150, 120} 150 | ,{ 120, 120, 120, 120, 120} 151 | ,{ 120, 120, 120, -140, 120} 152 | ,{ 150, 120, 120, 120, 150} 153 | } 154 | } 155 | ,{{{ INF, INF, INF, INF, INF} 156 | ,{ INF, INF, INF, INF, INF} 157 | ,{ INF, INF, INF, INF, INF} 158 | ,{ INF, INF, INF, INF, INF} 159 | ,{ INF, INF, INF, INF, INF} 160 | } 161 | ,{{ 120, 120, 120, 120, 120} 162 | ,{ 120, 60, 120, -20, 120} 163 | ,{ 120, 50, 120, 120, 100} 164 | ,{ 120, 120, 120, -140, 120} 165 | ,{ 120, 120, 120, 120, 110} 166 | } 167 | ,{{ 190, 190, 120, 120, 150} 168 | ,{ 190, 190, 120, 120, 120} 169 | ,{ 120, 120, 120, 120, 120} 170 | ,{ 150, 150, 120, -140, 120} 171 | ,{ 150, 120, 120, 120, 150} 172 | } 173 | ,{{ 190, 190, 190, 190, 190} 174 | ,{ 190, 190, 190, 190, 190} 175 | ,{ 190, 190, 190, 190, 190} 176 | ,{ 190, 190, 190, -70, 190} 177 | ,{ 190, 190, 190, 190, 120} 178 | } 179 | ,{{ 190, 190, 190, 190, 190} 180 | ,{ 190, 190, 190, 190, 190} 181 | ,{ 190, 190, 190, 190, 190} 182 | ,{ 190, 190, 190, -70, 190} 183 | ,{ 190, 190, 190, 190, 160} 184 | } 185 | ,{{ 190, 190, 190, 190, 190} 186 | ,{ 190, 190, 190, 190, 190} 187 | ,{ 190, 190, 190, 190, 190} 188 | ,{ 190, 190, 190, -70, 190} 189 | ,{ 190, 190, 190, 190, 120} 190 | } 191 | ,{{ 190, 190, 190, 190, 190} 192 | ,{ 190, 190, 190, 190, 190} 193 | ,{ 190, 190, 190, 190, 190} 194 | ,{ 190, 190, 190, -70, 190} 195 | ,{ 190, 190, 190, 190, 160} 196 | } 197 | ,{{ 190, 190, 190, 190, 190} 198 | ,{ 190, 190, 190, 190, 190} 199 | ,{ 190, 190, 190, 190, 190} 200 | ,{ 190, 190, 190, -70, 190} 201 | ,{ 190, 190, 190, 190, 160} 202 | } 203 | } 204 | ,{{{ INF, INF, INF, INF, INF} 205 | ,{ INF, INF, INF, INF, INF} 206 | ,{ INF, INF, INF, INF, INF} 207 | ,{ INF, INF, INF, INF, INF} 208 | ,{ INF, INF, INF, INF, INF} 209 | } 210 | ,{{ 220, 220, 170, 120, 120} 211 | ,{ 220, 220, 120, 120, 120} 212 | ,{ 170, 130, 170, 120, 120} 213 | ,{ 120, 120, 120, -140, 120} 214 | ,{ 120, 120, 120, 120, 110} 215 | } 216 | ,{{ 160, 160, 120, 120, 120} 217 | ,{ 160, 160, 120, 120, 120} 218 | ,{ 120, 120, 120, 120, 120} 219 | ,{ 120, 100, 120, -140, 120} 220 | ,{ 120, 120, 120, 120, 70} 221 | } 222 | ,{{ 190, 190, 190, 190, 190} 223 | ,{ 190, 190, 190, 190, 190} 224 | ,{ 190, 190, 190, 190, 190} 225 | ,{ 190, 190, 190, -70, 190} 226 | ,{ 190, 190, 190, 190, 160} 227 | } 228 | ,{{ 190, 190, 190, 190, 190} 229 | ,{ 190, 190, 190, 190, 190} 230 | ,{ 190, 190, 190, 190, 190} 231 | ,{ 190, 190, 190, -70, 190} 232 | ,{ 190, 190, 190, 190, 190} 233 | } 234 | ,{{ 190, 190, 190, 190, 190} 235 | ,{ 190, 190, 190, 190, 190} 236 | ,{ 190, 190, 190, 190, 190} 237 | ,{ 190, 190, 190, -70, 190} 238 | ,{ 190, 190, 190, 190, 160} 239 | } 240 | ,{{ 190, 190, 190, 190, 190} 241 | ,{ 190, 190, 190, 190, 190} 242 | ,{ 190, 190, 190, 190, 190} 243 | ,{ 190, 190, 190, -70, 190} 244 | ,{ 190, 190, 190, 190, 190} 245 | } 246 | ,{{ 220, 220, 190, 190, 190} 247 | ,{ 220, 220, 190, 190, 190} 248 | ,{ 190, 190, 190, 190, 190} 249 | ,{ 190, 190, 190, -70, 190} 250 | ,{ 190, 190, 190, 190, 190} 251 | } 252 | } 253 | ,{{{ INF, INF, INF, INF, INF} 254 | ,{ INF, INF, INF, INF, INF} 255 | ,{ INF, INF, INF, INF, INF} 256 | ,{ INF, INF, INF, INF, INF} 257 | ,{ INF, INF, INF, INF, INF} 258 | } 259 | ,{{ 120, 120, 120, 120, 120} 260 | ,{ 120, 120, 120, 120, 120} 261 | ,{ 120, 120, 120, 120, 120} 262 | ,{ 120, 120, 120, -140, 120} 263 | ,{ 120, 120, 120, 120, 80} 264 | } 265 | ,{{ 120, 120, 120, 120, 120} 266 | ,{ 120, 120, 120, 120, 120} 267 | ,{ 120, 120, 120, 120, 120} 268 | ,{ 120, 120, 120, -140, 120} 269 | ,{ 120, 120, 120, 120, 80} 270 | } 271 | ,{{ 190, 190, 190, 190, 190} 272 | ,{ 190, 190, 190, 190, 190} 273 | ,{ 190, 190, 190, 190, 190} 274 | ,{ 190, 190, 190, -70, 190} 275 | ,{ 190, 190, 190, 190, 120} 276 | } 277 | ,{{ 190, 190, 190, 190, 190} 278 | ,{ 190, 190, 190, 190, 190} 279 | ,{ 190, 190, 190, 190, 190} 280 | ,{ 190, 190, 190, -70, 190} 281 | ,{ 190, 190, 190, 190, 160} 282 | } 283 | ,{{ 190, 190, 190, 190, 190} 284 | ,{ 190, 190, 190, 190, 190} 285 | ,{ 190, 190, 190, 190, 190} 286 | ,{ 190, 190, 190, -70, 190} 287 | ,{ 190, 190, 190, 190, 120} 288 | } 289 | ,{{ 190, 190, 190, 190, 190} 290 | ,{ 190, 190, 190, 190, 190} 291 | ,{ 190, 190, 190, 190, 190} 292 | ,{ 190, 190, 190, -70, 190} 293 | ,{ 190, 190, 190, 190, 150} 294 | } 295 | ,{{ 190, 190, 190, 190, 190} 296 | ,{ 190, 190, 190, 190, 190} 297 | ,{ 190, 190, 190, 190, 190} 298 | ,{ 190, 190, 190, -70, 190} 299 | ,{ 190, 190, 190, 190, 160} 300 | } 301 | } 302 | ,{{{ INF, INF, INF, INF, INF} 303 | ,{ INF, INF, INF, INF, INF} 304 | ,{ INF, INF, INF, INF, INF} 305 | ,{ INF, INF, INF, INF, INF} 306 | ,{ INF, INF, INF, INF, INF} 307 | } 308 | ,{{ 120, 120, 120, 120, 120} 309 | ,{ 120, 120, 120, 120, 120} 310 | ,{ 120, 120, 120, 120, 120} 311 | ,{ 120, 120, 120, -140, 120} 312 | ,{ 120, 120, 120, 120, 120} 313 | } 314 | ,{{ 120, 120, 120, 120, 120} 315 | ,{ 120, 120, 120, 120, 120} 316 | ,{ 120, 120, 120, 120, 120} 317 | ,{ 120, 120, 120, -140, 120} 318 | ,{ 120, 120, 120, 120, 120} 319 | } 320 | ,{{ 190, 190, 190, 190, 190} 321 | ,{ 190, 190, 190, 190, 190} 322 | ,{ 190, 190, 190, 190, 190} 323 | ,{ 190, 190, 190, -70, 190} 324 | ,{ 190, 190, 190, 190, 160} 325 | } 326 | ,{{ 190, 190, 190, 190, 190} 327 | ,{ 190, 190, 190, 190, 190} 328 | ,{ 190, 190, 190, 190, 190} 329 | ,{ 190, 190, 190, -70, 190} 330 | ,{ 190, 190, 190, 190, 190} 331 | } 332 | ,{{ 190, 190, 190, 190, 190} 333 | ,{ 190, 190, 190, 190, 190} 334 | ,{ 190, 190, 190, 190, 190} 335 | ,{ 190, 190, 190, -70, 190} 336 | ,{ 190, 190, 190, 190, 150} 337 | } 338 | ,{{ 190, 190, 190, 190, 190} 339 | ,{ 190, 190, 190, 190, 190} 340 | ,{ 190, 190, 190, 190, 190} 341 | ,{ 190, 190, 190, -70, 190} 342 | ,{ 190, 190, 190, 190, 170} 343 | } 344 | ,{{ 190, 190, 190, 190, 190} 345 | ,{ 190, 190, 190, 190, 190} 346 | ,{ 190, 190, 190, 190, 190} 347 | ,{ 190, 190, 190, -70, 190} 348 | ,{ 190, 190, 190, 190, 190} 349 | } 350 | } 351 | ,{{{ INF, INF, INF, INF, INF} 352 | ,{ INF, INF, INF, INF, INF} 353 | ,{ INF, INF, INF, INF, INF} 354 | ,{ INF, INF, INF, INF, INF} 355 | ,{ INF, INF, INF, INF, INF} 356 | } 357 | ,{{ 220, 220, 170, 120, 120} 358 | ,{ 220, 220, 120, 120, 120} 359 | ,{ 170, 130, 170, 120, 120} 360 | ,{ 120, 120, 120, -140, 120} 361 | ,{ 120, 120, 120, 120, 120} 362 | } 363 | ,{{ 190, 190, 120, 120, 150} 364 | ,{ 190, 190, 120, 120, 120} 365 | ,{ 120, 120, 120, 120, 120} 366 | ,{ 150, 150, 120, -140, 120} 367 | ,{ 150, 120, 120, 120, 150} 368 | } 369 | ,{{ 190, 190, 190, 190, 190} 370 | ,{ 190, 190, 190, 190, 190} 371 | ,{ 190, 190, 190, 190, 190} 372 | ,{ 190, 190, 190, -70, 190} 373 | ,{ 190, 190, 190, 190, 160} 374 | } 375 | ,{{ 220, 220, 190, 190, 190} 376 | ,{ 220, 220, 190, 190, 190} 377 | ,{ 190, 190, 190, 190, 190} 378 | ,{ 190, 190, 190, -70, 190} 379 | ,{ 190, 190, 190, 190, 190} 380 | } 381 | ,{{ 190, 190, 190, 190, 190} 382 | ,{ 190, 190, 190, 190, 190} 383 | ,{ 190, 190, 190, 190, 190} 384 | ,{ 190, 190, 190, -70, 190} 385 | ,{ 190, 190, 190, 190, 160} 386 | } 387 | ,{{ 190, 190, 190, 190, 190} 388 | ,{ 190, 190, 190, 190, 190} 389 | ,{ 190, 190, 190, 190, 190} 390 | ,{ 190, 190, 190, -70, 190} 391 | ,{ 190, 190, 190, 190, 190} 392 | } 393 | ,{{ 220, 220, 190, 190, 190} 394 | ,{ 220, 220, 190, 190, 190} 395 | ,{ 190, 190, 190, 190, 190} 396 | ,{ 190, 190, 190, -70, 190} 397 | ,{ 190, 190, 190, 190, 190} 398 | } 399 | }}; 400 | 401 | #endif //RNA_DESIGN_INTL11_H 402 | --------------------------------------------------------------------------------