├── Body ├── Images-chap4 │ ├── digits.tex │ ├── fig1.pdf │ ├── fig2.pdf │ ├── fig3.pdf │ ├── hiv1p.png │ ├── mydoc.pdf │ ├── tree.pdf │ ├── voice.pdf │ ├── digits.pdf │ ├── pockets.png │ ├── barChart.pdf │ ├── flowcyto.pdf │ ├── mutstats.pdf │ ├── entropy-new.pdf │ ├── fixedDeltas.pdf │ ├── henikoff-repo.pdf │ ├── finalBbsWithInset.pdf │ ├── finalCleanedWithInset.png │ ├── table_results1.tex │ ├── table_results2.tex │ ├── foo.py │ ├── makeHisto.py │ ├── fig_voiceAlign.tex │ ├── table_matrix.tex │ ├── fig_pda.tex │ └── fig_pdaAlign.tex ├── appb │ ├── align_8c__incl.md5 │ ├── bitSet_8c__incl.md5 │ ├── bitSet_8h__incl.md5 │ ├── convll_8c__incl.md5 │ ├── convll_8h__incl.md5 │ ├── gemoda-r_8c__incl.md5 │ ├── gemoda-s_8c__incl.md5 │ ├── matrices_8c__incl.md5 │ ├── newConv_8c__incl.md5 │ ├── patStats_8c__incl.md5 │ ├── patStats_8h__incl.md5 │ ├── realIo_8c__incl.md5 │ ├── realIo_8h__incl.md5 │ ├── words_8c__incl.md5 │ ├── bitSet_8h__dep__incl.md5 │ ├── convll_8h__dep__incl.md5 │ ├── fastaSeqIO_8c__incl.md5 │ ├── fastaSeqIO_8h__incl.md5 │ ├── matdata_8h__dep__incl.md5 │ ├── matrices_8h__dep__incl.md5 │ ├── matrixmap_8h__incl.md5 │ ├── patStats_8h__dep__incl.md5 │ ├── realCompare_8c__incl.md5 │ ├── realCompare_8h__incl.md5 │ ├── realIo_8h__dep__incl.md5 │ ├── spat_8h__dep__incl.md5 │ ├── fastaSeqIO_8h__dep__incl.md5 │ ├── matrixmap_8h__dep__incl.md5 │ ├── realCompare_8h__dep__incl.md5 │ ├── structcnode__coll__graph.md5 │ ├── structmnode__coll__graph.md5 │ ├── structsHash__t__coll__graph.md5 │ ├── structsPat__t__coll__graph.md5 │ ├── structbitGraph__t__coll__graph.md5 │ ├── refman.pdf │ ├── Helvetica.ttf │ ├── align_8c__incl.pdf │ ├── bitSet_8c__incl.pdf │ ├── bitSet_8h__incl.pdf │ ├── convll_8c__incl.pdf │ ├── convll_8h__incl.pdf │ ├── dir_000000_dep.pdf │ ├── realIo_8c__incl.pdf │ ├── realIo_8h__incl.pdf │ ├── words_8c__incl.pdf │ ├── gemoda-r_8c__incl.pdf │ ├── gemoda-s_8c__incl.pdf │ ├── matrices_8c__incl.pdf │ ├── matrixmap_8h__incl.pdf │ ├── newConv_8c__incl.pdf │ ├── patStats_8c__incl.pdf │ ├── patStats_8h__incl.pdf │ ├── spat_8h__dep__incl.pdf │ ├── bitSet_8h__dep__incl.pdf │ ├── convll_8h__dep__incl.pdf │ ├── fastaSeqIO_8c__incl.pdf │ ├── fastaSeqIO_8h__incl.pdf │ ├── realCompare_8c__incl.pdf │ ├── realCompare_8h__incl.pdf │ ├── realIo_8h__dep__incl.pdf │ ├── matdata_8h__dep__incl.pdf │ ├── matrices_8h__dep__incl.pdf │ ├── matrixmap_8h__dep__incl.pdf │ ├── patStats_8h__dep__incl.pdf │ ├── fastaSeqIO_8h__dep__incl.pdf │ ├── realCompare_8h__dep__incl.pdf │ ├── structcnode__coll__graph.pdf │ ├── structmnode__coll__graph.pdf │ ├── structsHash__t__coll__graph.pdf │ ├── structsPat__t__coll__graph.pdf │ ├── structbitGraph__t__coll__graph.pdf │ ├── dirs.tex │ ├── refman.ilg │ ├── Makefile │ ├── dir_000000.tex │ ├── spat_8h.tex │ ├── matdata_8h.tex │ ├── annotated.tex │ ├── structfSeq__t.tex │ ├── structsSize__t.tex │ ├── structmnode.tex │ ├── files.tex │ ├── refman.tex │ ├── doxygen.sty │ ├── matrixmap_8h.tex │ ├── structcSet__t.tex │ ├── refman.out │ ├── structsOffset__t.tex │ ├── structbitGraph__t.tex │ ├── structsHashEntry__t.tex │ ├── matrices_8c.tex │ ├── structsPat__t.tex │ ├── structsHash__t.tex │ ├── structbitSet__t.tex │ ├── structcnode.tex │ ├── appb.tex │ ├── patStats_8h.tex │ ├── structrdh__t.tex │ └── fastaSeqIO_8h.tex ├── .DS_Store ├── chap4.tex ├── Images-appa │ ├── aas.pdf │ ├── bases.pdf │ ├── synth1-spectra.pdf │ ├── pwm-run.txt │ └── splicing.py ├── Images-chap1 │ ├── gibbs.pdf │ ├── moore.pdf │ ├── omes.pdf │ ├── yeast.pdf │ ├── genbank.pdf │ ├── hairpin.pdf │ ├── pwmHits.pdf │ ├── gaussian.pdf │ ├── hematopo.pdf │ ├── dependencies.pdf │ ├── yeast-logo1.pdf │ ├── yeast-logo2.pdf │ ├── 20060328-093630.pdf │ ├── genbank-record.pdf │ ├── table_breast.tex │ ├── pwms.tex │ ├── table_gene_similarity.tex │ ├── pwm.tex │ └── regexs.tex ├── Images-chap2 │ ├── dot.pdf │ ├── dot2.pdf │ ├── ibm.png │ ├── space.pdf │ ├── space.png │ ├── growth.pdf │ ├── 1vm5-balls.png │ ├── alignment.pdf │ ├── amp-align.pdf │ ├── bootstrap.pdf │ ├── evolution.pdf │ ├── frog-amps.pdf │ ├── hemolysis.pdf │ ├── membrane.pdf │ ├── pepwheel.pdf │ ├── 1vm5-ribbon.png │ ├── aurein.fa │ ├── barActivity.pdf │ ├── control-gel.pdf │ ├── detailed-graph.pdf │ ├── supplement table.csv │ ├── mic-results.tex │ ├── chrisResults2.tex │ ├── peptides.tex │ ├── antimicrobialnames.tex │ ├── motif-conservation.tex │ └── chrisResults1.tex ├── Images-chap3 │ ├── hmm.png │ ├── hmmer.png │ ├── lexa.pdf │ ├── lexa1.pdf │ ├── lexa2.pdf │ ├── rmsd.pdf │ ├── spot.pdf │ ├── galt-hit.png │ ├── hmm-graph.pdf │ ├── hmm-graph.png │ ├── spot-logo.pdf │ ├── gemoda_fig1.pdf │ ├── gemoda_fig2.pdf │ ├── gemoda_fig3.pdf │ ├── gemoda_fig4.pdf │ ├── gemoda_fig5.pdf │ ├── naturalexample.pdf │ ├── responseexample.pdf │ ├── lexa.fa │ ├── hmm-graph.svg │ └── spot.fa ├── index.tex ├── contents.tex ├── biblio.tex ├── chap5.tex ├── appa.tex ├── abstract.tex └── header.tex ├── hypernat.sty ├── Makefile ├── README.markdown ├── main.tex ├── caslon.sty ├── patchcmd.sty ├── nfssext.sty ├── import.sty └── xkeyval.sty /Body/Images-chap4/digits.tex: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Body/appb/align_8c__incl.md5: -------------------------------------------------------------------------------- 1 | 26ca21c3273f76b797a6583398f2d5b7 -------------------------------------------------------------------------------- /Body/appb/bitSet_8c__incl.md5: -------------------------------------------------------------------------------- 1 | cbba908ab7480b76bfc7f8682e4ea0a2 -------------------------------------------------------------------------------- /Body/appb/bitSet_8h__incl.md5: -------------------------------------------------------------------------------- 1 | 705fe0d3ea06927b7bf30e45367830f6 -------------------------------------------------------------------------------- /Body/appb/convll_8c__incl.md5: -------------------------------------------------------------------------------- 1 | bf04321fa4930008d54664f011b9e1ba -------------------------------------------------------------------------------- /Body/appb/convll_8h__incl.md5: -------------------------------------------------------------------------------- 1 | 5d56a587a052907e945962b37d4461b2 -------------------------------------------------------------------------------- /Body/appb/gemoda-r_8c__incl.md5: -------------------------------------------------------------------------------- 1 | 55b39bde17383a8e6e569b43fbc6ad8f -------------------------------------------------------------------------------- /Body/appb/gemoda-s_8c__incl.md5: -------------------------------------------------------------------------------- 1 | 4b0862961cfa5d9977bdc97644a28e82 -------------------------------------------------------------------------------- /Body/appb/matrices_8c__incl.md5: -------------------------------------------------------------------------------- 1 | 54220a469b03ecab4742241bc5b3162c -------------------------------------------------------------------------------- /Body/appb/newConv_8c__incl.md5: -------------------------------------------------------------------------------- 1 | b112d3b87c8f9e67556bf55907540c4c -------------------------------------------------------------------------------- /Body/appb/patStats_8c__incl.md5: -------------------------------------------------------------------------------- 1 | c801017eec84f3d57a9c8621465ad99f -------------------------------------------------------------------------------- /Body/appb/patStats_8h__incl.md5: -------------------------------------------------------------------------------- 1 | eb8cf1fd866a801503115a2ac6c8323c -------------------------------------------------------------------------------- /Body/appb/realIo_8c__incl.md5: -------------------------------------------------------------------------------- 1 | 13078b0de4fe413c755b3c82f5d50996 -------------------------------------------------------------------------------- /Body/appb/realIo_8h__incl.md5: -------------------------------------------------------------------------------- 1 | ce36f6c7882d578cc55b26cd3e7127d0 -------------------------------------------------------------------------------- /Body/appb/words_8c__incl.md5: -------------------------------------------------------------------------------- 1 | 15deb756a35c878438ef773f8c5de03a -------------------------------------------------------------------------------- /Body/appb/bitSet_8h__dep__incl.md5: -------------------------------------------------------------------------------- 1 | e603122ecf64338d69663fdf7d9368f0 -------------------------------------------------------------------------------- /Body/appb/convll_8h__dep__incl.md5: -------------------------------------------------------------------------------- 1 | c1bc52d2efec6b823441a080ea210190 -------------------------------------------------------------------------------- /Body/appb/fastaSeqIO_8c__incl.md5: -------------------------------------------------------------------------------- 1 | d192be00e3f999824bee010b6e9cb925 -------------------------------------------------------------------------------- /Body/appb/fastaSeqIO_8h__incl.md5: -------------------------------------------------------------------------------- 1 | bac0d000e5cb998bf0360c9d9c1d5a6a -------------------------------------------------------------------------------- /Body/appb/matdata_8h__dep__incl.md5: -------------------------------------------------------------------------------- 1 | 63e1462325f87980179c4863d4a132f5 -------------------------------------------------------------------------------- /Body/appb/matrices_8h__dep__incl.md5: -------------------------------------------------------------------------------- 1 | 784662357f6e472d9d6548852e20229e -------------------------------------------------------------------------------- /Body/appb/matrixmap_8h__incl.md5: -------------------------------------------------------------------------------- 1 | ed7559ba0cb74e5f701acbe6ba59e6c1 -------------------------------------------------------------------------------- /Body/appb/patStats_8h__dep__incl.md5: -------------------------------------------------------------------------------- 1 | b41dea82dc817a6523a74b0ba7a20ca8 -------------------------------------------------------------------------------- /Body/appb/realCompare_8c__incl.md5: -------------------------------------------------------------------------------- 1 | d835df36927ec33ae76e2cbbb803ea79 -------------------------------------------------------------------------------- /Body/appb/realCompare_8h__incl.md5: -------------------------------------------------------------------------------- 1 | 66710967416cc9a04b3f634cceb54e64 -------------------------------------------------------------------------------- /Body/appb/realIo_8h__dep__incl.md5: -------------------------------------------------------------------------------- 1 | be4c6afb86ff393c2e97afc84a7ca673 -------------------------------------------------------------------------------- /Body/appb/spat_8h__dep__incl.md5: -------------------------------------------------------------------------------- 1 | 95894656e0b98d55f0e630a695e9cedc -------------------------------------------------------------------------------- /Body/appb/fastaSeqIO_8h__dep__incl.md5: -------------------------------------------------------------------------------- 1 | 2f5f8f32244315492abf972397b123d4 -------------------------------------------------------------------------------- /Body/appb/matrixmap_8h__dep__incl.md5: -------------------------------------------------------------------------------- 1 | 1914bd7c853aa40dca17ddecb6e0a17a -------------------------------------------------------------------------------- /Body/appb/realCompare_8h__dep__incl.md5: -------------------------------------------------------------------------------- 1 | ca447a21c49eaacbb0cf7795f87523fd -------------------------------------------------------------------------------- /Body/appb/structcnode__coll__graph.md5: -------------------------------------------------------------------------------- 1 | cd2e08000b2a03fac28866489985f7df -------------------------------------------------------------------------------- /Body/appb/structmnode__coll__graph.md5: -------------------------------------------------------------------------------- 1 | aef9ecc2403e5a1ec59608bd4c10d9af -------------------------------------------------------------------------------- /Body/appb/structsHash__t__coll__graph.md5: -------------------------------------------------------------------------------- 1 | d7ea06dbdc950592a94841a2d5c278c9 -------------------------------------------------------------------------------- /Body/appb/structsPat__t__coll__graph.md5: -------------------------------------------------------------------------------- 1 | 88394fa1c861513de74e7391cc1185a4 -------------------------------------------------------------------------------- /Body/appb/structbitGraph__t__coll__graph.md5: -------------------------------------------------------------------------------- 1 | cc8933b828d2e59015c4562cb09ebd0e -------------------------------------------------------------------------------- /hypernat.sty: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/hypernat.sty -------------------------------------------------------------------------------- /Body/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/.DS_Store -------------------------------------------------------------------------------- /Body/chap4.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/chap4.tex -------------------------------------------------------------------------------- /Body/appb/refman.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/refman.pdf -------------------------------------------------------------------------------- /Body/Images-appa/aas.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-appa/aas.pdf -------------------------------------------------------------------------------- /Body/appb/Helvetica.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/Helvetica.ttf -------------------------------------------------------------------------------- /Body/Images-appa/bases.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-appa/bases.pdf -------------------------------------------------------------------------------- /Body/Images-chap1/gibbs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap1/gibbs.pdf -------------------------------------------------------------------------------- /Body/Images-chap1/moore.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap1/moore.pdf -------------------------------------------------------------------------------- /Body/Images-chap1/omes.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap1/omes.pdf -------------------------------------------------------------------------------- /Body/Images-chap1/yeast.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap1/yeast.pdf -------------------------------------------------------------------------------- /Body/Images-chap2/dot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap2/dot.pdf -------------------------------------------------------------------------------- /Body/Images-chap2/dot2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap2/dot2.pdf -------------------------------------------------------------------------------- /Body/Images-chap2/ibm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap2/ibm.png -------------------------------------------------------------------------------- /Body/Images-chap2/space.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap2/space.pdf -------------------------------------------------------------------------------- /Body/Images-chap2/space.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap2/space.png -------------------------------------------------------------------------------- /Body/Images-chap3/hmm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap3/hmm.png -------------------------------------------------------------------------------- /Body/Images-chap3/hmmer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap3/hmmer.png -------------------------------------------------------------------------------- /Body/Images-chap3/lexa.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap3/lexa.pdf -------------------------------------------------------------------------------- /Body/Images-chap3/lexa1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap3/lexa1.pdf -------------------------------------------------------------------------------- /Body/Images-chap3/lexa2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap3/lexa2.pdf -------------------------------------------------------------------------------- /Body/Images-chap3/rmsd.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap3/rmsd.pdf -------------------------------------------------------------------------------- /Body/Images-chap3/spot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap3/spot.pdf -------------------------------------------------------------------------------- /Body/Images-chap4/fig1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap4/fig1.pdf -------------------------------------------------------------------------------- /Body/Images-chap4/fig2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap4/fig2.pdf -------------------------------------------------------------------------------- /Body/Images-chap4/fig3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap4/fig3.pdf -------------------------------------------------------------------------------- /Body/Images-chap4/hiv1p.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap4/hiv1p.png -------------------------------------------------------------------------------- /Body/Images-chap4/mydoc.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap4/mydoc.pdf -------------------------------------------------------------------------------- /Body/Images-chap4/tree.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap4/tree.pdf -------------------------------------------------------------------------------- /Body/Images-chap4/voice.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap4/voice.pdf -------------------------------------------------------------------------------- /Body/index.tex: -------------------------------------------------------------------------------- 1 | 2 | %\index{structure!protein|see{protein, structure}} 3 | 4 | \clearpage 5 | \printindex 6 | -------------------------------------------------------------------------------- /Body/Images-chap1/genbank.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap1/genbank.pdf -------------------------------------------------------------------------------- /Body/Images-chap1/hairpin.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap1/hairpin.pdf -------------------------------------------------------------------------------- /Body/Images-chap1/pwmHits.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap1/pwmHits.pdf -------------------------------------------------------------------------------- /Body/Images-chap2/growth.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap2/growth.pdf -------------------------------------------------------------------------------- /Body/Images-chap4/digits.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap4/digits.pdf -------------------------------------------------------------------------------- /Body/Images-chap4/pockets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap4/pockets.png -------------------------------------------------------------------------------- /Body/appb/align_8c__incl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/align_8c__incl.pdf -------------------------------------------------------------------------------- /Body/appb/bitSet_8c__incl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/bitSet_8c__incl.pdf -------------------------------------------------------------------------------- /Body/appb/bitSet_8h__incl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/bitSet_8h__incl.pdf -------------------------------------------------------------------------------- /Body/appb/convll_8c__incl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/convll_8c__incl.pdf -------------------------------------------------------------------------------- /Body/appb/convll_8h__incl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/convll_8h__incl.pdf -------------------------------------------------------------------------------- /Body/appb/dir_000000_dep.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/dir_000000_dep.pdf -------------------------------------------------------------------------------- /Body/appb/realIo_8c__incl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/realIo_8c__incl.pdf -------------------------------------------------------------------------------- /Body/appb/realIo_8h__incl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/realIo_8h__incl.pdf -------------------------------------------------------------------------------- /Body/appb/words_8c__incl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/words_8c__incl.pdf -------------------------------------------------------------------------------- /Body/Images-chap1/gaussian.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap1/gaussian.pdf -------------------------------------------------------------------------------- /Body/Images-chap1/hematopo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap1/hematopo.pdf -------------------------------------------------------------------------------- /Body/Images-chap2/1vm5-balls.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap2/1vm5-balls.png -------------------------------------------------------------------------------- /Body/Images-chap2/alignment.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap2/alignment.pdf -------------------------------------------------------------------------------- /Body/Images-chap2/amp-align.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap2/amp-align.pdf -------------------------------------------------------------------------------- /Body/Images-chap2/bootstrap.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap2/bootstrap.pdf -------------------------------------------------------------------------------- /Body/Images-chap2/evolution.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap2/evolution.pdf -------------------------------------------------------------------------------- /Body/Images-chap2/frog-amps.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap2/frog-amps.pdf -------------------------------------------------------------------------------- /Body/Images-chap2/hemolysis.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap2/hemolysis.pdf -------------------------------------------------------------------------------- /Body/Images-chap2/membrane.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap2/membrane.pdf -------------------------------------------------------------------------------- /Body/Images-chap2/pepwheel.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap2/pepwheel.pdf -------------------------------------------------------------------------------- /Body/Images-chap3/galt-hit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap3/galt-hit.png -------------------------------------------------------------------------------- /Body/Images-chap3/hmm-graph.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap3/hmm-graph.pdf -------------------------------------------------------------------------------- /Body/Images-chap3/hmm-graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap3/hmm-graph.png -------------------------------------------------------------------------------- /Body/Images-chap3/spot-logo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap3/spot-logo.pdf -------------------------------------------------------------------------------- /Body/Images-chap4/barChart.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap4/barChart.pdf -------------------------------------------------------------------------------- /Body/Images-chap4/flowcyto.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap4/flowcyto.pdf -------------------------------------------------------------------------------- /Body/Images-chap4/mutstats.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap4/mutstats.pdf -------------------------------------------------------------------------------- /Body/appb/gemoda-r_8c__incl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/gemoda-r_8c__incl.pdf -------------------------------------------------------------------------------- /Body/appb/gemoda-s_8c__incl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/gemoda-s_8c__incl.pdf -------------------------------------------------------------------------------- /Body/appb/matrices_8c__incl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/matrices_8c__incl.pdf -------------------------------------------------------------------------------- /Body/appb/matrixmap_8h__incl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/matrixmap_8h__incl.pdf -------------------------------------------------------------------------------- /Body/appb/newConv_8c__incl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/newConv_8c__incl.pdf -------------------------------------------------------------------------------- /Body/appb/patStats_8c__incl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/patStats_8c__incl.pdf -------------------------------------------------------------------------------- /Body/appb/patStats_8h__incl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/patStats_8h__incl.pdf -------------------------------------------------------------------------------- /Body/appb/spat_8h__dep__incl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/spat_8h__dep__incl.pdf -------------------------------------------------------------------------------- /Body/Images-chap1/dependencies.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap1/dependencies.pdf -------------------------------------------------------------------------------- /Body/Images-chap1/yeast-logo1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap1/yeast-logo1.pdf -------------------------------------------------------------------------------- /Body/Images-chap1/yeast-logo2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap1/yeast-logo2.pdf -------------------------------------------------------------------------------- /Body/Images-chap2/1vm5-ribbon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap2/1vm5-ribbon.png -------------------------------------------------------------------------------- /Body/Images-chap2/aurein.fa: -------------------------------------------------------------------------------- 1 | >sp|P82387|AUR12_LITRA Aurein-1.2 - Litoria raniformis (Southern bell frog). 2 | GLFDIIKKIAESF 3 | -------------------------------------------------------------------------------- /Body/Images-chap2/barActivity.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap2/barActivity.pdf -------------------------------------------------------------------------------- /Body/Images-chap2/control-gel.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap2/control-gel.pdf -------------------------------------------------------------------------------- /Body/Images-chap3/gemoda_fig1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap3/gemoda_fig1.pdf -------------------------------------------------------------------------------- /Body/Images-chap3/gemoda_fig2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap3/gemoda_fig2.pdf -------------------------------------------------------------------------------- /Body/Images-chap3/gemoda_fig3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap3/gemoda_fig3.pdf -------------------------------------------------------------------------------- /Body/Images-chap3/gemoda_fig4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap3/gemoda_fig4.pdf -------------------------------------------------------------------------------- /Body/Images-chap3/gemoda_fig5.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap3/gemoda_fig5.pdf -------------------------------------------------------------------------------- /Body/Images-chap4/entropy-new.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap4/entropy-new.pdf -------------------------------------------------------------------------------- /Body/Images-chap4/fixedDeltas.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap4/fixedDeltas.pdf -------------------------------------------------------------------------------- /Body/appb/bitSet_8h__dep__incl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/bitSet_8h__dep__incl.pdf -------------------------------------------------------------------------------- /Body/appb/convll_8h__dep__incl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/convll_8h__dep__incl.pdf -------------------------------------------------------------------------------- /Body/appb/fastaSeqIO_8c__incl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/fastaSeqIO_8c__incl.pdf -------------------------------------------------------------------------------- /Body/appb/fastaSeqIO_8h__incl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/fastaSeqIO_8h__incl.pdf -------------------------------------------------------------------------------- /Body/appb/realCompare_8c__incl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/realCompare_8c__incl.pdf -------------------------------------------------------------------------------- /Body/appb/realCompare_8h__incl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/realCompare_8h__incl.pdf -------------------------------------------------------------------------------- /Body/appb/realIo_8h__dep__incl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/realIo_8h__dep__incl.pdf -------------------------------------------------------------------------------- /Body/Images-appa/synth1-spectra.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-appa/synth1-spectra.pdf -------------------------------------------------------------------------------- /Body/Images-chap1/20060328-093630.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap1/20060328-093630.pdf -------------------------------------------------------------------------------- /Body/Images-chap1/genbank-record.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap1/genbank-record.pdf -------------------------------------------------------------------------------- /Body/Images-chap2/detailed-graph.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap2/detailed-graph.pdf -------------------------------------------------------------------------------- /Body/Images-chap3/naturalexample.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap3/naturalexample.pdf -------------------------------------------------------------------------------- /Body/Images-chap3/responseexample.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap3/responseexample.pdf -------------------------------------------------------------------------------- /Body/Images-chap4/henikoff-repo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap4/henikoff-repo.pdf -------------------------------------------------------------------------------- /Body/appb/matdata_8h__dep__incl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/matdata_8h__dep__incl.pdf -------------------------------------------------------------------------------- /Body/appb/matrices_8h__dep__incl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/matrices_8h__dep__incl.pdf -------------------------------------------------------------------------------- /Body/appb/matrixmap_8h__dep__incl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/matrixmap_8h__dep__incl.pdf -------------------------------------------------------------------------------- /Body/appb/patStats_8h__dep__incl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/patStats_8h__dep__incl.pdf -------------------------------------------------------------------------------- /Body/Images-chap4/finalBbsWithInset.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap4/finalBbsWithInset.pdf -------------------------------------------------------------------------------- /Body/appb/fastaSeqIO_8h__dep__incl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/fastaSeqIO_8h__dep__incl.pdf -------------------------------------------------------------------------------- /Body/appb/realCompare_8h__dep__incl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/realCompare_8h__dep__incl.pdf -------------------------------------------------------------------------------- /Body/appb/structcnode__coll__graph.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/structcnode__coll__graph.pdf -------------------------------------------------------------------------------- /Body/appb/structmnode__coll__graph.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/structmnode__coll__graph.pdf -------------------------------------------------------------------------------- /Body/appb/structsHash__t__coll__graph.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/structsHash__t__coll__graph.pdf -------------------------------------------------------------------------------- /Body/appb/structsPat__t__coll__graph.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/structsPat__t__coll__graph.pdf -------------------------------------------------------------------------------- /Body/Images-chap4/finalCleanedWithInset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/Images-chap4/finalCleanedWithInset.png -------------------------------------------------------------------------------- /Body/appb/structbitGraph__t__coll__graph.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kljensen/mit-phd-thesis/HEAD/Body/appb/structbitGraph__t__coll__graph.pdf -------------------------------------------------------------------------------- /Body/appb/dirs.tex: -------------------------------------------------------------------------------- 1 | \section{gemoda Directories} 2 | This directory hierarchy is sorted roughly, but not completely, alphabetically:\begin{CompactList} 3 | \item \contentsline{section}{Fasta\-Seq\-IO}{\pageref{dir_000000}}{} 4 | \end{CompactList} 5 | -------------------------------------------------------------------------------- /Body/Images-chap3/lexa.fa: -------------------------------------------------------------------------------- 1 | >1 2 | TGCTGTATATACTCACAGCA 3 | >2 4 | AACTGTATATACACCCAGGG 5 | >3 6 | TACTGTATGAGCATACAGTA 7 | >4 8 | ACCTGAATGAATATACAGTA 9 | >5 10 | TACTGTACATCCATACAGTA 11 | >6 12 | TACTGTATATTCATTCAGGT 13 | >7 14 | AACTGTTTTTTTATCCAGTA 15 | >8 16 | ATCTGTATATATACCCAGCT 17 | >9 18 | TACTGTATATAAAAACAGTA 19 | -------------------------------------------------------------------------------- /Body/Images-chap4/table_results1.tex: -------------------------------------------------------------------------------- 1 | 2 | \begin{tabular}{ccc}\hline\hline 3 | Experiment & Classification & \parbox{4.8cm}{\centering \vspace{1mm}Classification in\\ 4 | Alimoglu \& Alpaydin, 1996\vspace{1mm}} \\ \hline 5 | 1 & 97.34\% & 97.80\% \\ 6 | 2 & 99.64\% & n/a \\ 7 | %3 & 0.46\% & n/a \\ 8 | \hline\hline 9 | \end{tabular} 10 | -------------------------------------------------------------------------------- /Body/appb/refman.ilg: -------------------------------------------------------------------------------- 1 | This is makeindex, version 2.14 [02-Oct-2002] (kpathsea + Thai support). 2 | Scanning input file refman.idx....done (789 entries accepted, 0 rejected). 3 | Sorting entries.........done (8008 comparisons). 4 | Generating output file refman.ind....done (1194 lines written, 0 warnings). 5 | Output written in refman.ind. 6 | Transcript written in refman.ilg. 7 | -------------------------------------------------------------------------------- /Body/contents.tex: -------------------------------------------------------------------------------- 1 | % -*- Mode:TeX -*- 2 | %% This file simply contains the commands that actually generate the table of 3 | %% contents and lists of figures and tables. You can omit any or all of 4 | %% these files by simply taking out the appropriate command. For more 5 | %% information on these files, see appendix C.3.3 of the LaTeX manual. 6 | \tableofcontents 7 | \newpage 8 | \listoffigures 9 | \newpage 10 | \listoftables 11 | 12 | -------------------------------------------------------------------------------- /Body/Images-chap4/table_results2.tex: -------------------------------------------------------------------------------- 1 | 2 | \begin{tabular}{cccc}\hline\hline 3 | Experiment & Classification & \parbox{2.5cm}{\vspace{1mm}Classification\\ with clustering\vspace{1mm}} & \parbox{4.5cm}{\centering \vspace{1mm}Classification in\\ 4 | Dietterich \& Bakiri, 1995\vspace{1mm}} \\ \hline 5 | 1 & 93.84\% & 98.91\% & 96.73\% \\ 6 | 2 & 92.61\% & 98.61\% & n/a \\ 7 | %3 & 5.11\% & 0.71\% & n/a \\ 8 | \hline\hline 9 | \end{tabular} 10 | -------------------------------------------------------------------------------- /Body/biblio.tex: -------------------------------------------------------------------------------- 1 | %% This defines the bibliography file (main.bib) and the bibliography style. 2 | %% If you want to create a bibliography file by hand, change the contents of 3 | %% this file to a `thebibliography' environment. For more information 4 | %% see section 4.3 of the LaTeX manual. 5 | %\bibliographystyle{plainnat} 6 | \bibliographystyle{abbrvnat} 7 | 8 | \clearpage 9 | %\addcontentsline{toc}{chapter}{Bibliography} 10 | \bibliography{References/research-new} 11 | -------------------------------------------------------------------------------- /Body/appb/Makefile: -------------------------------------------------------------------------------- 1 | all clean: refman.pdf 2 | 3 | refman.pdf: refman.tex 4 | pdflatex refman.tex 5 | makeindex refman.idx 6 | pdflatex refman.tex 7 | 8 | latex_count=5 ; \ 9 | while egrep -s 'Rerun (LaTeX|to get cross-references right)' refman.log && [ $$latex_count -gt 0 ] ;\ 10 | do \ 11 | echo "Rerunning latex...." ;\ 12 | pdflatex refman.tex ;\ 13 | latex_count=`expr $$latex_count - 1` ;\ 14 | done 15 | 16 | 17 | clean: 18 | rm -f *.ps *.dvi *.aux *.toc *.idx *.ind *.ilg *.log *.out refman.pdf 19 | -------------------------------------------------------------------------------- /Body/appb/dir_000000.tex: -------------------------------------------------------------------------------- 1 | \hypertarget{dir_000000}{ 2 | \section{Fasta\-Seq\-IO/ Directory Reference} 3 | \label{dir_000000}\index{FastaSeqIO/ Directory Reference@{FastaSeqIO/ Directory Reference}} 4 | } 5 | 6 | 7 | \begin{figure}[H] 8 | \begin{center} 9 | \leavevmode 10 | \includegraphics[width=53pt]{dir_000000_dep} 11 | \end{center} 12 | \end{figure} 13 | \subsection*{Files} 14 | \begin{CompactItemize} 15 | \item 16 | file \hyperlink{fastaSeqIO_8c}{fasta\-Seq\-IO.c} 17 | \item 18 | file \hyperlink{fastaSeqIO_8h}{fasta\-Seq\-IO.h} 19 | \end{CompactItemize} 20 | -------------------------------------------------------------------------------- /Body/appb/spat_8h.tex: -------------------------------------------------------------------------------- 1 | \hypertarget{spat_8h}{ 2 | \section{spat.h File Reference} 3 | \label{spat_8h}\index{spat.h@{spat.h}} 4 | } 5 | 6 | 7 | This graph shows which files directly or indirectly include this file:\begin{figure}[H] 8 | \begin{center} 9 | \leavevmode 10 | \includegraphics[width=93pt]{spat_8h__dep__incl} 11 | \end{center} 12 | \end{figure} 13 | \subsection*{Data Structures} 14 | \begin{CompactItemize} 15 | \item 16 | struct \hyperlink{structsOffset__t}{s\-Offset\_\-t} 17 | \item 18 | struct \hyperlink{structsPat__t}{s\-Pat\_\-t} 19 | \end{CompactItemize} 20 | -------------------------------------------------------------------------------- /Body/Images-chap2/supplement table.csv: -------------------------------------------------------------------------------- 1 | DESIGNED PEPTIDES SHUFFLED PEPTIDES 2 | Peptide number Sequence S. aureus B. anthracis Peptide number Sequence S. aureus B. anthracis 3 | 28 FLGVVFKLASKVFPAVFGKV 8 16 28 GVSVAGAKKVKVLFVFPFLF + + 4 | 51 FLFRVASKVFPALIGKFKKK 16 16 51 RKVAPALIKSFVFLFKFKKG 128 256 5 | 22 LGALFRVASKVFPAVISMVK 64 64 22 SVPSVGAVLFFKRAAVMKLI + + 6 | 63 LPVLFKLASKVFPAVFSSLK 128 128 63 VSVKKVLPFAPLKSLLSFAF + + 7 | 5 FLFGLASKVFPAVYCKVTRK 256 128 5 FLPVLVKVFRYSKKTAAGCF + + 8 | 43 SFVFKLASKVVPSVFSALTR 256 128 43 ASPTVFRSSVFLSLFVVAKK + + 9 | 35 LPVVFRVASKVFPALISKLT 256 128 35 KVFIATLVVSSFLLAKPPRV + + 10 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for LaTeX documents 2 | # 3 | TARGET = main.pdf 4 | TOUCH=touch 5 | PS = dvips 6 | PDFLATEX = pdflatex 7 | LATEX = $(PDFLATEX) 8 | BIBTEX = bibtex 9 | PS2PDF= ps2pdf 10 | DELETE = *.aux *.log *.ps *.dvi *.bbl *.blg *~ $(TARGET) *.brf *.idx *.ilg *.ind *.lof *.lot *.out *.toc 11 | DVIPSFLAGS= -Pcmz -Pamz -Ppdf -G0 -tletter 12 | TOUCH=touch 13 | MAKE=make 14 | MAKEIDX=makeindex 15 | 16 | .SUFFIXES: .pdf .tex .ps 17 | 18 | all: $(TARGET) 19 | $(MAKE) $(TARGET) 20 | 21 | $(TARGET): 22 | 23 | .ps.pdf: 24 | $(PS2PDF) $< 25 | 26 | .dvi.ps: 27 | $(PS) $(DVIPSFLAGS) -t letter -o $@ $< 28 | 29 | .tex.pdf: 30 | $(LATEX) $< 31 | $(LATEX) $< 32 | $(BIBTEX) $* 33 | # $(MAKEIDX) $* 34 | $(LATEX) $< 35 | $(BIBTEX) $* 36 | $(LATEX) $< 37 | 38 | clean: 39 | rm -f $(DELETE) 40 | -------------------------------------------------------------------------------- /Body/Images-chap4/foo.py: -------------------------------------------------------------------------------- 1 | # a bar plot with errorbars 2 | # a bar plot with errorbars 3 | from pylab import * 4 | 5 | N = 5 6 | menMeans = (20, 35, 30, 35, 27) 7 | menStd = ( 2, 3, 4, 1, 2) 8 | 9 | ind = arange(N) # the x locations for the groups 10 | width = 0.35 # the width of the bars 11 | #width = 0.55 # the width of the bars 12 | p1 = bar(ind, menMeans, width, color='r', yerr=menStd) 13 | print ind 14 | 15 | womenMeans = (25, 32, 34, 20, 25) 16 | womenStd = ( 3, 5, 2, 3, 3) 17 | p2 = bar(ind+width, womenMeans, width, color='y', yerr=womenStd) 18 | p3 = bar(ind+2*width, womenMeans, width, color='b', yerr=womenStd) 19 | p4 = bar(ind+3*width, womenMeans, width, color='g', yerr=womenStd) 20 | 21 | ylabel('Scores') 22 | title('Scores by group and gender') 23 | xticks(ind+width, ('G1', 'G2', 'G3', 'G4', 'G5') ) 24 | xlim(-width,len(ind)) 25 | yticks(arange(0,41,10)) 26 | 27 | legend( (p1[0], p2[0]), ('Men', 'Women'), shadow=True) 28 | show() 29 | 30 | -------------------------------------------------------------------------------- /Body/Images-chap4/makeHisto.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env,python 2 | # a stacked bar plot with errorbars 3 | from pylab import * 4 | 5 | fullData=(85.94, 92.59, 88.83, 90.39, 91, 93.73) 6 | fullData_std=(4, 3.04, 3.68, 3.33, 3.11, 2.52) 7 | 8 | cfsData=(86.78, 89.14, 84.77, 90.26, 89.25, 85.98) 9 | cfsData_std=(3.62, 3.6, 4.04, 3.28, 3.36, 4.03) 10 | 11 | pcaData=(82.48, 92.11, 88.67, 89.13, 93, 89.94) 12 | pcaData_std=(4.32, 3.2, 3.88, 3.81, 2.82, 3.53) 13 | 14 | ind = arange(len(fullData)) # the x locations for the groups 15 | width = 0.25 # the width of the bars: can also be len(x) sequence 16 | 17 | p1 = bar(ind, fullData, width, color='r', yerr=fullData_std) 18 | p2 = bar(ind+width, cfsData, width, color='b', yerr=cfsData_std) 19 | p3 = bar(ind+2*width, pcaData, width, color='g', yerr=pcaData_std) 20 | 21 | ylabel('Percent correct classification') 22 | title('Results by model and representation') 23 | xticks(ind+width, ('DT', 'LR',\ 24 | 'NB', 'BN', 25 | 'SVM', 'SVM-rbf') ) 26 | yticks(arange(50,100,10)) 27 | legend( (p1[0], p2[0], p3[0]), ('FULL', 'CFS', 'PCA') ) 28 | xlim(-width,len(ind)) 29 | ylim(50,100) 30 | 31 | show() 32 | -------------------------------------------------------------------------------- /README.markdown: -------------------------------------------------------------------------------- 1 | # MIT Ph.D. Thesis template in LaTeX 2 | 3 | > [!CAUTION] 4 | > The [MIT thesis format](https://web.mit.edu/thesis/tex/) is different now. I believe you can use [this LaTeX package](https://ctan.org/pkg/mitthesis) 5 | 6 | This is a [LaTeX](http://www.latex-project.org/) template for a Ph.D. thesis at [MIT](http://web.mit.edu). As far as I know, it conformed to all the [requirements](http://libraries.mit.edu/archives/thesis-specs/) in 2006 when I submitted it. There are a few things to note: 7 | 8 | * The final output looks something like the PDF here: [Kyle Jensen's MIT Ph.D. thesis](https://github.com/kljensen/mit-phd-thesis/raw/6bdb27e0d7650af6e52ee2e73ddbcb7d90f9229d/thesis.pdf) 9 | * I'm sure successfully compiling the LaTeX document will require some debugging. 10 | * There are a number of dependencies including tetex, pdflatex, and the [Adobe Caslon Pro](http://en.wikipedia.org/wiki/Caslon) font by default. 11 | * The main LaTeX file is just a skeleton that includes chapters in the Body/ directory using the LaTeX "\input" command. 12 | * You can turn off the 2-sided mode in main.tex. 13 | 14 | Kyle 15 | 16 | -------------------------------------------------------------------------------- /Body/Images-appa/pwm-run.txt: -------------------------------------------------------------------------------- 1 | >cat motifs.txt 2 | AGCTGAC 3 | GACTAAT 4 | GGCTAAT 5 | TTCTAAC 6 | ....edited for space... 7 | TACTAAC 8 | TACTAAC 9 | TTTTAAC 10 | 11 | >motif.py motifs.txt 12 | Frequency Matrix: 13 | A 0.067 0.627 0.000 0.000 0.893 1.000 0.000 14 | T 0.773 0.240 0.120 1.000 0.027 0.000 0.133 15 | G 0.093 0.120 0.000 0.000 0.080 0.000 0.000 16 | C 0.067 0.013 0.880 0.000 0.000 0.000 0.867 17 | 18 | Position 1: entropy = 1.127, information = 0.873 19 | Position 2: entropy = 1.367, information = 0.633 20 | Position 3: entropy = 0.529, information = 1.471 21 | Position 4: entropy = 0.000, information = 2.000 22 | Position 5: entropy = 0.576, information = 1.424 23 | Position 6: entropy = 0.000, information = 2.000 24 | Position 7: entropy = 0.567, information = 1.433 25 | 26 | Total Information Content = 9.834 27 | 28 | Sequence AGCTGAC has score s = 2.999 29 | Sequence GACTAAT has score s = 6.650 30 | Sequence GGCTAAT has score s = 4.266 31 | Sequence CATTAAC has score s = 5.991 32 | ...edited for space... 33 | Sequence TACTAAC has score s = 12.401 34 | Sequence TACTAAC has score s = 12.401 35 | Sequence TTTTAAC has score s = 8.142 36 | Mean score = 9.834 37 | -------------------------------------------------------------------------------- /Body/Images-chap1/table_breast.tex: -------------------------------------------------------------------------------- 1 | \vspace{0.2cm} 2 | \begin{tabular}{ccccccccccc}\rule{0pt}{15mm}% vertical placement p 324 latex companion} 3 | \begin{rotate}{45}recurrent?\end{rotate} & \begin{rotate}{45}radius\end{rotate} & \begin{rotate}{45}texture\end{rotate} & 4 | \begin{rotate}{45}perimeter\end{rotate} & \begin{rotate}{45}area\end{rotate} & \begin{rotate}{45}smoothness\end{rotate} & \begin{rotate}{45}compactness\end{rotate} & 5 | \begin{rotate}{45}concavity\end{rotate} & \begin{rotate}{45}concave points\end{rotate} & \begin{rotate}{45}symmetry\end{rotate} & 6 | \begin{rotate}{45}fractal dimension\end{rotate}\\\hline\hline 7 | no & 3 & 2 & 2 & 2 & 0 & 1 & 6 & 2 & 3 & 4 \\ 8 | no & 3 & 6 & 6 & 6 & 3 & 4 & 0 & 5 & 9 & 2 \\ 9 | no & 5 & 2 & 2 & 2 & 2 & 4 & 1 & 2 & 6 & 2 \\ 10 | no & 0 & 5 & 10 & 1 & 5 & 9 & 2 & 9 & 8 & 1 \\ 11 | yes & 4 & 4 & 1 & 3 & 1 & 1 & 0 & 3 & 5 & 3 \\ 12 | yes & 0 & 3 & 4 & 1 & 1 & 1 & 1 & 6 & 5 & 2 \\ 13 | no & 3 & 2 & 1 & 2 & 1 & 1 & 2 & 2 & 6 & 1 \\ 14 | yes & 1 & 1 & 5 & 2 & 1 & 1 & 3 & 5 & 4 & 3 \\ 15 | no & 0 & 4 & 5 & 1 & 2 & 2 & 4 & 6 & 6 & 1 \\ 16 | no & 0 & 5 & 6 & 0 & 5 & 1 & 7 & 7 & 7 & 5 \\ 17 | no & 2 & 0 & 1 & 1 & 0 & 1 & 5 & 2 & 2 & 1 \\\hline\hline 18 | \end{tabular} 19 | -------------------------------------------------------------------------------- /Body/chap5.tex: -------------------------------------------------------------------------------- 1 | \chapter{Conclusions} 2 | We feel that the novelty in the work lies principally 3 | in two features: 1) the combination of an exhaustive 4 | method with a flexible motif representation; and, 5 | 2) the extension of this algorithm to problems 6 | of a generic nature. As the reviewer notes, 7 | the ideas behind these features are drawn 8 | from a variety of sources --- specifically, 9 | Teiresias~\citep{rigoutsos1998combinatorial}, 10 | Winnower~\citep{pevzner2000combinatorial}, the 11 | algorithm by~\cite{mancheron2003pattern}; 12 | and algorithms by~\cite{zaki2000scalable},~\cite{zaki1998theoretical}, 13 | and~\cite{mancheron2003pattern}. We feel that, 14 | in addition, there are ideas 15 | that are introduced for the first time in this 16 | manuscript. For example, the convolution algorithm presented 17 | is unique from that described by~\cite{rigoutsos1998combinatorial} 18 | in that it utilizes the offsets of $L$--length windows rather 19 | than fixed strings. This slight difference makes 20 | convolution significantly more difficult and is one 21 | of the features that allows the Gemoda algorithm to 22 | discover motifs that cannot be well--represented using 23 | regular expressions. 24 | -------------------------------------------------------------------------------- /Body/appb/matdata_8h.tex: -------------------------------------------------------------------------------- 1 | \hypertarget{matdata_8h}{ 2 | \section{matdata.h File Reference} 3 | \label{matdata_8h}\index{matdata.h@{matdata.h}} 4 | } 5 | 6 | 7 | This graph shows which files directly or indirectly include this file:\begin{figure}[H] 8 | \begin{center} 9 | \leavevmode 10 | \includegraphics[width=153pt]{matdata_8h__dep__incl} 11 | \end{center} 12 | \end{figure} 13 | \subsection*{Defines} 14 | \begin{CompactItemize} 15 | \item 16 | \#define \hyperlink{matdata_8h_a0}{MATRIX\_\-SIZE}~23 17 | \end{CompactItemize} 18 | 19 | 20 | \subsection*{Detailed Description} 21 | This file defines the size of the scoring matrices so that we don't have to pound-include the whole \hyperlink{matrices_8h}{matrices.h} file due to worries about incompatibilities with earlier extern variable declarations. 22 | 23 | Definition in file \hyperlink{matdata_8h-source}{matdata.h}. 24 | 25 | \subsection*{Define Documentation} 26 | \hypertarget{matdata_8h_a0}{ 27 | \index{matdata.h@{matdata.h}!MATRIX_SIZE@{MATRIX\_\-SIZE}} 28 | \index{MATRIX_SIZE@{MATRIX\_\-SIZE}!matdata.h@{matdata.h}} 29 | \subsubsection[MATRIX\_\-SIZE]{\setlength{\rightskip}{0pt plus 5cm}\#define MATRIX\_\-SIZE~23}} 30 | \label{matdata_8h_a0} 31 | 32 | 33 | 34 | 35 | Definition at line 10 of file matdata.h. 36 | 37 | Referenced by main(). 38 | -------------------------------------------------------------------------------- /Body/appb/annotated.tex: -------------------------------------------------------------------------------- 1 | \section{gemoda Data Structures} 2 | Here are the data structures with brief descriptions:\begin{CompactList} 3 | \item\contentsline{section}{\hyperlink{structbitGraph__t}{bit\-Graph\_\-t} }{\pageref{structbitGraph__t}}{} 4 | \item\contentsline{section}{\hyperlink{structbitSet__t}{bit\-Set\_\-t} }{\pageref{structbitSet__t}}{} 5 | \item\contentsline{section}{\hyperlink{structcnode}{cnode} }{\pageref{structcnode}}{} 6 | \item\contentsline{section}{\hyperlink{structcSet__t}{c\-Set\_\-t} }{\pageref{structcSet__t}}{} 7 | \item\contentsline{section}{\hyperlink{structfSeq__t}{f\-Seq\_\-t} }{\pageref{structfSeq__t}}{} 8 | \item\contentsline{section}{\hyperlink{structmnode}{mnode} }{\pageref{structmnode}}{} 9 | \item\contentsline{section}{\hyperlink{structrdh__t}{rdh\_\-t} }{\pageref{structrdh__t}}{} 10 | \item\contentsline{section}{\hyperlink{structsHash__t}{s\-Hash\_\-t} }{\pageref{structsHash__t}}{} 11 | \item\contentsline{section}{\hyperlink{structsHashEntry__t}{s\-Hash\-Entry\_\-t} }{\pageref{structsHashEntry__t}}{} 12 | \item\contentsline{section}{\hyperlink{structsOffset__t}{s\-Offset\_\-t} }{\pageref{structsOffset__t}}{} 13 | \item\contentsline{section}{\hyperlink{structsPat__t}{s\-Pat\_\-t} }{\pageref{structsPat__t}}{} 14 | \item\contentsline{section}{\hyperlink{structsSize__t}{s\-Size\_\-t} }{\pageref{structsSize__t}}{} 15 | \end{CompactList} 16 | -------------------------------------------------------------------------------- /Body/Images-chap2/mic-results.tex: -------------------------------------------------------------------------------- 1 | 2 | \begin{table}[ptbh] 3 | \caption[Minimum inhibitory concentration of the preliminary design synthetic AmPs against a variety of bacteria]{Minimum inhibitory concentration of the preliminary design synthetic AmPs against a variety of bacteria. 4 | In the table MIC$_{50}$ is the concentration of peptide, in 5 | $\mu$g/mL, required to inhibit 50\% of the bacterial growth. 6 | A ``-'' indicates that the MIC is greater than 15 $\mu$g/mL. 7 | }\label{table:mic} 8 | \begin{center} 9 | \footnotesize 10 | 11 | \begin{tabular}{lcccccc}% \hline \hline 12 | % first row 13 | & \multicolumn{2}{c}{ \underline{synth--1} } 14 | & \multicolumn{2}{c}{ \underline{synth--2} } 15 | & \multicolumn{2}{c}{ \underline{synth--3} } \\ 16 | % second row 17 | & \scriptsize MIC$_{50}$ & \scriptsize MIC$_{80}$ 18 | & \scriptsize MIC$_{50}$ & \scriptsize MIC$_{80}$ 19 | & \scriptsize MIC$_{50}$ & \scriptsize MIC$_{80}$ \\ %\hline 20 | % 21 | \multicolumn{2}{l}{\bfseries Gram--positive:}\\ 22 | % third row 23 | \emph{~~B. subtilis} 24 | & 7.5 25 | & 10 26 | & 3.5 27 | & 6 28 | & 4.5 29 | & 8.5\\ 30 | % forth row 31 | \emph{~~C. glutamicum} 32 | & 4 33 | & 13.5 34 | & 3.5 35 | & 11 36 | & 4 37 | & 10\\ 38 | % 39 | \multicolumn{2}{l}{\bfseries Gram--negative:}\\ 40 | % 41 | \emph{~~E. coli} 42 | & - 43 | & - 44 | & - 45 | & - 46 | & - 47 | & -\\ 48 | % 49 | \emph{~~C. rodentium} 50 | & - 51 | & - 52 | & - 53 | & - 54 | & 12 55 | & 15\\ 56 | %\hline\hline 57 | \end{tabular} 58 | \end{center} 59 | 60 | \end{table} 61 | -------------------------------------------------------------------------------- /Body/appb/structfSeq__t.tex: -------------------------------------------------------------------------------- 1 | \hypertarget{structfSeq__t}{ 2 | \section{f\-Seq\_\-t Struct Reference} 3 | \label{structfSeq__t}\index{fSeq_t@{fSeq\_\-t}} 4 | } 5 | {\tt \#include $<$fasta\-Seq\-IO.h$>$} 6 | 7 | \subsection*{Data Fields} 8 | \begin{CompactItemize} 9 | \item 10 | char $\ast$ \hyperlink{structfSeq__t_o0}{seq} 11 | \item 12 | char $\ast$ \hyperlink{structfSeq__t_o1}{label} 13 | \end{CompactItemize} 14 | 15 | 16 | \subsection*{Detailed Description} 17 | 18 | 19 | 20 | 21 | Definition at line 12 of file fasta\-Seq\-IO.h. 22 | 23 | \subsection*{Field Documentation} 24 | \hypertarget{structfSeq__t_o1}{ 25 | \index{fSeq_t@{f\-Seq\_\-t}!label@{label}} 26 | \index{label@{label}!fSeq_t@{f\-Seq\_\-t}} 27 | \subsubsection[label]{\setlength{\rightskip}{0pt plus 5cm}char$\ast$ \hyperlink{structfSeq__t_o1}{f\-Seq\_\-t::label}}} 28 | \label{structfSeq__t_o1} 29 | 30 | 31 | 32 | 33 | Definition at line 14 of file fasta\-Seq\-IO.h. 34 | 35 | Referenced by Free\-FSeqs(), init\-Aof\-FSeqs(), and Read\-FSeqs().\hypertarget{structfSeq__t_o0}{ 36 | \index{fSeq_t@{f\-Seq\_\-t}!seq@{seq}} 37 | \index{seq@{seq}!fSeq_t@{f\-Seq\_\-t}} 38 | \subsubsection[seq]{\setlength{\rightskip}{0pt plus 5cm}char$\ast$ \hyperlink{structfSeq__t_o0}{f\-Seq\_\-t::seq}}} 39 | \label{structfSeq__t_o0} 40 | 41 | 42 | 43 | 44 | Definition at line 13 of file fasta\-Seq\-IO.h. 45 | 46 | Referenced by Free\-FSeqs(), init\-Aof\-FSeqs(), print\-FSeq\-Sub\-Seq(), Read\-FSeqs(), and Read\-Txt\-Seqs(). 47 | 48 | The documentation for this struct was generated from the following file:\begin{CompactItemize} 49 | \item 50 | Fasta\-Seq\-IO/\hyperlink{fastaSeqIO_8h}{fasta\-Seq\-IO.h}\end{CompactItemize} 51 | -------------------------------------------------------------------------------- /Body/Images-chap2/chrisResults2.tex: -------------------------------------------------------------------------------- 1 | \begin{table}[ptbh] 2 | \caption[Antimicrobial activity of rationally designed and shuffled peptides against 3 | \emph{S. aureus} and \emph{B. anthracis}]{Antimicrobial activity of rationally designed and shuffled peptides against 4 | \emph{S. aureus} and \emph{B. anthracis}. 5 | Each entry shows the minimum inhibitory concentration in $\mu$g/mL\@. ``+'' = MIC greater than 256 $\mu g/mL$. ++ = MIC 6 | greater than 128 $\mu g/mL$, not sufficiently soluble to test at 256 7 | $\mu g/mL$.} 8 | \label{table:chrisResults2} 9 | \centering \scriptsize 10 | \begin{tabular}{llcclcc} \hline\hline 11 | Peptide & Sequence & \emph{S. aureus} & \emph{B. anthracis} & Shuffled Sequence & \emph{S. aureus} & \emph{B. anthracis} \\ 12 | \rowcolor[gray]{0.9} 13 | 28 & \texttt{FLGVVFKLASKVFPAVFGKV} & 8 & 16 & \texttt{GVSVAGAKKVKVLFVFPFLF} & + & + \\ 14 | 51 & \texttt{FLFRVASKVFPALIGKFKKK} & 16 & 16 & \texttt{RKVAPALIKSFVFLFKFKKG} & 128 & 256 \\ 15 | \rowcolor[gray]{0.9} 16 | 22 & \texttt{LGALFRVASKVFPAVISMVK} & 64 & 64 & \texttt{SVPSVGAVLFFKRAAVMKLI} & + & + \\ 17 | 63 & \texttt{LPVLFKLASKVFPAVFSSLK} & 128 & 128 & \texttt{VSVKKVLPFAPLKSLLSFAF} & + & + \\ 18 | \rowcolor[gray]{0.9} 19 | 5 & \texttt{FLFGLASKVFPAVYCKVTRK} & 256 & 128 & \texttt{FLPVLVKVFRYSKKTAAGCF} & + & + \\ 20 | 43 & \texttt{SFVFKLASKVVPSVFSALTR} & 256 & 128 & \texttt{ASPTVFRSSVFLSLFVVAKK} & + & + \\ 21 | \rowcolor[gray]{0.9} 22 | 35 & \texttt{LPVVFRVASKVFPALISKLT} & 256 & 128 & \texttt{KVFIATLVVSSFLLAKPPRV} & + & + \\ 23 | \hline\hline 24 | \end{tabular} 25 | \end{table} 26 | -------------------------------------------------------------------------------- /Body/appb/structsSize__t.tex: -------------------------------------------------------------------------------- 1 | \hypertarget{structsSize__t}{ 2 | \section{s\-Size\_\-t Struct Reference} 3 | \label{structsSize__t}\index{sSize_t@{sSize\_\-t}} 4 | } 5 | \subsection*{Data Fields} 6 | \begin{CompactItemize} 7 | \item 8 | int \hyperlink{structsSize__t_o0}{start} 9 | \item 10 | int \hyperlink{structsSize__t_o1}{stop} 11 | \item 12 | int \hyperlink{structsSize__t_o2}{size} 13 | \end{CompactItemize} 14 | 15 | 16 | \subsection*{Detailed Description} 17 | 18 | 19 | 20 | 21 | Definition at line 165 of file fasta\-Seq\-IO.c. 22 | 23 | \subsection*{Field Documentation} 24 | \hypertarget{structsSize__t_o2}{ 25 | \index{sSize_t@{s\-Size\_\-t}!size@{size}} 26 | \index{size@{size}!sSize_t@{s\-Size\_\-t}} 27 | \subsubsection[size]{\setlength{\rightskip}{0pt plus 5cm}int \hyperlink{structsSize__t_o2}{s\-Size\_\-t::size}}} 28 | \label{structsSize__t_o2} 29 | 30 | 31 | 32 | 33 | Definition at line 168 of file fasta\-Seq\-IO.c. 34 | 35 | Referenced by Read\-FSeqs().\hypertarget{structsSize__t_o0}{ 36 | \index{sSize_t@{s\-Size\_\-t}!start@{start}} 37 | \index{start@{start}!sSize_t@{s\-Size\_\-t}} 38 | \subsubsection[start]{\setlength{\rightskip}{0pt plus 5cm}int \hyperlink{structsSize__t_o0}{s\-Size\_\-t::start}}} 39 | \label{structsSize__t_o0} 40 | 41 | 42 | 43 | 44 | Definition at line 166 of file fasta\-Seq\-IO.c. 45 | 46 | Referenced by Read\-FSeqs().\hypertarget{structsSize__t_o1}{ 47 | \index{sSize_t@{s\-Size\_\-t}!stop@{stop}} 48 | \index{stop@{stop}!sSize_t@{s\-Size\_\-t}} 49 | \subsubsection[stop]{\setlength{\rightskip}{0pt plus 5cm}int \hyperlink{structsSize__t_o1}{s\-Size\_\-t::stop}}} 50 | \label{structsSize__t_o1} 51 | 52 | 53 | 54 | 55 | Definition at line 167 of file fasta\-Seq\-IO.c. 56 | 57 | Referenced by Read\-FSeqs(). 58 | 59 | The documentation for this struct was generated from the following file:\begin{CompactItemize} 60 | \item 61 | Fasta\-Seq\-IO/\hyperlink{fastaSeqIO_8c}{fasta\-Seq\-IO.c}\end{CompactItemize} 62 | -------------------------------------------------------------------------------- /Body/Images-chap2/peptides.tex: -------------------------------------------------------------------------------- 1 | 2 | \begin{sidewaystable}[ptbh] 3 | \caption[The preliminary design synthetic antimicrobial peptides used in this study]{The preliminary design synthetic antimicrobial peptides used in this study 4 | For each synthetic AmP we also designed two sequences 5 | (``negative'' a and b), which have the same amino acid 6 | composition as the synthetic peptide but have an $S$--score 7 | of zero. The table also shows statistics relavant to 8 | AmPs, which were calculated using the EMBOSS software 9 | package\cite{rice2000emboss}. Note that synth--3 has only 10 | one negative version. Also, the peptides synth--1, 2, and 3 11 | were the \emph{only} peptides designed using our grammatical 12 | approach that were synthesized and tested experimentally. 13 | }\label{table:peptides} \begin{center} 14 | \begin{tabular}{lccccl} \hline \hline 15 | Peptide & S & Size & Charge & pI & Sequence\\ \hline 16 | \textbf{synth--1:} & & 20 & 4.5 & 11.92\\ 17 | ~~~synth--1 & 1 & & & & {\footnotesize \ttfamily NKVKKPLTGAHRLLFTFLFV} \\ 18 | ~~~negative--1a & 0 & & & & {\footnotesize \ttfamily VVLKLLFFKFNLPHKTRTAG} \\ 19 | ~~~negative--1b & 0 & & & & {\footnotesize \ttfamily LVLTFLFATPKLNGRVKKFH} \\ 20 | \textbf{synth--2:} & & 31 & 10.0 & 11.28 \\ 21 | ~~~synth--2 & 1 & & & & {\footnotesize \ttfamily MKKIKKEAGKNILKLAPKEVAAKKSKKSPTK} \\ 22 | ~~~negative--2a & 0 & & & & {\footnotesize \ttfamily PAAGESKVKANKKKAKILPTMKLKKEIKKKS} \\ 23 | ~~~negative--2b & 0 & & & & {\footnotesize \ttfamily SEASLKAKIKKIAMKKVTKGKAKNKPKLPEK} \\ 24 | \textbf{synth--3:}& & 63 & 3.0 & 10.41\\ 25 | ~~~synth--3 & 0.92 & & & & {\footnotesize \ttfamily MKDKNSTGPLLSALLLAVTAGGSPVAAAPWNPFAAILKAALQIAGAAEPKEVTAKKGPTKADA}\\ 26 | ~~~negative--3a & 0 & & & & {\footnotesize \ttfamily GWAGLVAETAIADKMSLKAAGEPPNQNDGAVLKTPPKAAASAKPLGAAKTLAFISPVTLALAK}\\ 27 | %~~~negative--3b & 0 & 63 & 3.0 & 10.41 & {\footnotesize \ttfamily AAKGVAAAPEANALSAWTTPMGLGGSIGFDKPPKKALKNKLTPAAVKSVLLPALATIAQEDAA} 28 | \\ \hline\hline 29 | \end{tabular} \end{center} 30 | 31 | \end{sidewaystable} 32 | -------------------------------------------------------------------------------- /Body/appb/structmnode.tex: -------------------------------------------------------------------------------- 1 | \hypertarget{structmnode}{ 2 | \section{mnode Struct Reference} 3 | \label{structmnode}\index{mnode@{mnode}} 4 | } 5 | {\tt \#include $<$convll.h$>$} 6 | 7 | Collaboration diagram for mnode:\begin{figure}[H] 8 | \begin{center} 9 | \leavevmode 10 | \includegraphics[width=60pt]{structmnode__coll__graph} 11 | \end{center} 12 | \end{figure} 13 | \subsection*{Data Fields} 14 | \begin{CompactItemize} 15 | \item 16 | int \hyperlink{structmnode_o0}{clique\-Membership} 17 | \item 18 | \hyperlink{structmnode}{mnode} $\ast$ \hyperlink{structmnode_o1}{next} 19 | \end{CompactItemize} 20 | 21 | 22 | \subsection*{Detailed Description} 23 | This data structure is just a link to list of integers used for bookkeeping during the convolution stage. 24 | 25 | 26 | 27 | Definition at line 49 of file convll.h. 28 | 29 | \subsection*{Field Documentation} 30 | \hypertarget{structmnode_o0}{ 31 | \index{mnode@{mnode}!cliqueMembership@{cliqueMembership}} 32 | \index{cliqueMembership@{cliqueMembership}!mnode@{mnode}} 33 | \subsubsection[cliqueMembership]{\setlength{\rightskip}{0pt plus 5cm}int \hyperlink{structmnode_o0}{mnode::clique\-Membership}}} 34 | \label{structmnode_o0} 35 | 36 | 37 | Clique to which this belongs. 38 | 39 | Definition at line 52 of file convll.h. 40 | 41 | Referenced by mll\-To\-CSet(), print\-Member\-Stacks(), push\-Mem\-Stack(), and set\-Stack\-True().\hypertarget{structmnode_o1}{ 42 | \index{mnode@{mnode}!next@{next}} 43 | \index{next@{next}!mnode@{mnode}} 44 | \subsubsection[next]{\setlength{\rightskip}{0pt plus 5cm}struct \hyperlink{structmnode}{mnode}$\ast$ \hyperlink{structmnode_o1}{mnode::next}}} 45 | \label{structmnode_o1} 46 | 47 | 48 | A pointer to the next member in the linked list of mll\_\-t space objects. 49 | 50 | Definition at line 55 of file convll.h. 51 | 52 | Referenced by mll\-To\-CSet(), pop\-Mem\-Stack(), print\-Member\-Stacks(), push\-Mem\-Stack(), and set\-Stack\-True(). 53 | 54 | The documentation for this struct was generated from the following file:\begin{CompactItemize} 55 | \item 56 | \hyperlink{convll_8h}{convll.h}\end{CompactItemize} 57 | -------------------------------------------------------------------------------- /main.tex: -------------------------------------------------------------------------------- 1 | % -*- Mode:TeX -*- 2 | 3 | %% The documentclass options along with the pagestyle can be used to generate 4 | %% a technical report, a draft copy, or a regular thesis. You may need to 5 | %% re-specify the pagestyle after you \include cover.tex. For more 6 | %% information, see the first few lines of mitthesis.cls. 7 | 8 | %\documentclass[12pt,vi,twoside]{mitthesis} 9 | %% 10 | %% If you want your thesis copyright to you instead of MIT, use the 11 | %% ``vi'' option, as above. 12 | %% 13 | %\documentclass[12pt,twoside,leftblank]{mitthesis} 14 | %% 15 | %% If you want blank pages before new chapters to be labelled ``This 16 | %% Page Intentionally Left Blank'', use the ``leftblank'' option, as 17 | %% above. 18 | 19 | %\documentclass[12pt]{mitthesis} 20 | %\documentclass[12pt,singlespace,twoside]{mitthesis} 21 | \documentclass[12pt,twoside]{mitthesis} 22 | %\documentclass[12pt,oneside]{mitthesis} 23 | %\usepackage{lgrind} 24 | \input{Body/header.tex} 25 | \pagestyle{plain} 26 | 27 | %% This bit allows you to either specify only the files which you wish to 28 | %% process, or `all' to process all files which you \include. 29 | %% Krishna Sethuraman (1990). 30 | 31 | %\typein [\files]{Enter file names to process, (chap1,chap2 ...), or `all' to process all files:} 32 | %\def\all{all} 33 | %\ifx\files\all \typeout{Including all files.} \else \typeout{Including only \files.} \includeonly{\files} \fi 34 | 35 | \begin{document} 36 | %\fontencoding{LY1}\fontfamily{ACaslonPro}\mdweight 37 | 38 | \input{Body/cover} 39 | \pagestyle{plain} 40 | \input{Body/contents} 41 | 42 | %now start the fancy headings 43 | \pagestyle{fancyplain} 44 | \addtolength{\headheight}{\baselineskip} 45 | %add a nice little line underneath the heading 46 | %\renewcommand{\headrulewidth}{0.6pt} 47 | 48 | 49 | \input{Body/chap1} 50 | \input{Body/chap2} 51 | \input{Body/chap3} 52 | \input{Body/chap4} 53 | 54 | \clearpage 55 | \appendix 56 | %\addcontentsline{toc}{part}{Appendix} 57 | 58 | \input{Body/appa} 59 | \import{./Body/appb/}{appb.tex} 60 | %\input{Body/appc} 61 | \input{Body/biblio.tex} 62 | %\input{Body/index.tex} 63 | \end{document} 64 | 65 | -------------------------------------------------------------------------------- /Body/Images-chap4/fig_voiceAlign.tex: -------------------------------------------------------------------------------- 1 | \psset{xunit=1cm,yunit=1cm} 2 | \readdata{\dataA}{Figures/voiceSeq1-xy.dat} 3 | \readdata{\dataB}{Figures/voiceSeq2-xy.dat} 4 | \begin{pspicture}(0,0)(10,10)%\showgrid 5 | \rput(2,9){ 6 | \rput(-1,0){ 7 | \psaxes[tickstyle=bottom, dy=\psyunit,Dy=1,Oy=0,Ox=0,Dx=100](0,0)(0,-1)(8,1) 8 | } 9 | \dataplot[plotstyle=line,linecolor=black,linewidth=0.1mm]{\dataA} 10 | } 11 | \rput(2,1){ 12 | \rput(-1,0){ 13 | \psaxes[tickstyle=bottom, dy=\psyunit,Dy=1,Oy=0,Ox=0,Dx=100](0,0)(0,-1)(8,1) 14 | } 15 | \dataplot[plotstyle=line,linecolor=black,linewidth=0.1mm]{\dataB} 16 | } 17 | \rput[l](0.4, 5.5){\normalsize \texttt{SSEMSBVFIHIMBXBMFMLFTYVMMSMTBZBTMMGTZXWTBBWICDGGG}} 18 | \rput[l](0.4, 5){\normalsize \texttt{:...:.......:...............:..:..::.:..:..:.....}} 19 | \rput[l](0.4, 4.5){\normalsize \texttt{SPQISVBWFFPVBYPPPSYZXVWSSTVBBVWTSPGTBXXYBWFIKGIIM}} 20 | \rput(0, 0){ 21 | \psline[linestyle=dotted](4,2)(0.55,4.3) 22 | \psline[linestyle=dotted](4.4,2)(10.6,4.3) 23 | \psline[linestyle=dotted](4.4,2)(4.4,1) 24 | \psline[linestyle=dotted](4.2,2)(4.2,1) 25 | } 26 | \rput(0, 0){ 27 | \psline[linestyle=dotted](4,8)(0.55,5.7) 28 | \psline[linestyle=dotted](4.4,8)(10.6,5.7) 29 | \psline[linestyle=dotted](4,8)(4,9) 30 | \psline[linestyle=dotted](4.4,8)(4.4,9) 31 | } 32 | \rput[bl](8.5, 8){Speaker 1} 33 | \rput[tl](8.5, 2){Speaker 2} 34 | 35 | 36 | 37 | % \rput(0.8,150){ 38 | % \rotatebox{90}{ \# sequences} 39 | % } 40 | % \rput(8.5,150){ 41 | % \rotatebox{-90}{ \# patterns} 42 | % } 43 | % \rput(5,0){ 44 | % bootstrapping iterations 45 | % } 46 | 47 | 48 | % number LSWBBTTTTYZXBW SSEMSBVFIHIMBXBMFMLFTYVMMSMTBZBTMMGTZXWTBBWICD 49 | % .............. :...:.......:...............:..:..::.:..:..:.. 50 | % number TZXYSMFPYVVVTS SPQISVBWFFPVBYPPPSYZXVWSSTVBBVWTSPGTBXXYBWFIKG 51 | % 250 260 270 280 290 300 52 | 53 | % 310 320 330 340 350 360 54 | % number GGGCIWYFMELKKFTKPLILAAAAAAAAAADWZIHWIDDCCDNRAAAAAAAAAAAAAAAA 55 | % ....................:::::::::...... ........:::::::::::::::: 56 | % number IIMKPFIILHGDMSYTIEHEAAAAAAAAARITBTDEMGQEHCRAAAAAAAAAAAAAAAAA 57 | 58 | \end{pspicture} 59 | -------------------------------------------------------------------------------- /Body/appb/files.tex: -------------------------------------------------------------------------------- 1 | \section{gemoda File List} 2 | Here is a list of all files with brief descriptions:\begin{CompactList} 3 | \item\contentsline{section}{\hyperlink{align_8c}{align.c} }{\pageref{align_8c}}{} 4 | \item\contentsline{section}{\hyperlink{bitSet_8c}{bit\-Set.c} }{\pageref{bitSet_8c}}{} 5 | \item\contentsline{section}{\hyperlink{bitSet_8h}{bit\-Set.h} }{\pageref{bitSet_8h}}{} 6 | \item\contentsline{section}{\hyperlink{convll_8c}{convll.c} }{\pageref{convll_8c}}{} 7 | \item\contentsline{section}{\hyperlink{convll_8h}{convll.h} }{\pageref{convll_8h}}{} 8 | \item\contentsline{section}{\hyperlink{gemoda-r_8c}{gemoda-r.c} }{\pageref{gemoda-r_8c}}{} 9 | \item\contentsline{section}{\hyperlink{gemoda-s_8c}{gemoda-s.c} }{\pageref{gemoda-s_8c}}{} 10 | \item\contentsline{section}{\hyperlink{matdata_8h}{matdata.h} }{\pageref{matdata_8h}}{} 11 | \item\contentsline{section}{\hyperlink{matrices_8c}{matrices.c} }{\pageref{matrices_8c}}{} 12 | \item\contentsline{section}{\hyperlink{matrices_8h}{matrices.h} }{\pageref{matrices_8h}}{} 13 | \item\contentsline{section}{\hyperlink{matrixmap_8h}{matrixmap.h} }{\pageref{matrixmap_8h}}{} 14 | \item\contentsline{section}{\hyperlink{newConv_8c}{new\-Conv.c} }{\pageref{newConv_8c}}{} 15 | \item\contentsline{section}{\hyperlink{patStats_8c}{pat\-Stats.c} }{\pageref{patStats_8c}}{} 16 | \item\contentsline{section}{\hyperlink{patStats_8h}{pat\-Stats.h} }{\pageref{patStats_8h}}{} 17 | \item\contentsline{section}{\hyperlink{realCompare_8c}{real\-Compare.c} }{\pageref{realCompare_8c}}{} 18 | \item\contentsline{section}{\hyperlink{realCompare_8h}{real\-Compare.h} }{\pageref{realCompare_8h}}{} 19 | \item\contentsline{section}{\hyperlink{realIo_8c}{real\-Io.c} }{\pageref{realIo_8c}}{} 20 | \item\contentsline{section}{\hyperlink{realIo_8h}{real\-Io.h} }{\pageref{realIo_8h}}{} 21 | \item\contentsline{section}{\hyperlink{spat_8h}{spat.h} }{\pageref{spat_8h}}{} 22 | \item\contentsline{section}{\hyperlink{words_8c}{words.c} }{\pageref{words_8c}}{} 23 | \item\contentsline{section}{Fasta\-Seq\-IO/\hyperlink{fastaSeqIO_8c}{fasta\-Seq\-IO.c} }{\pageref{fastaSeqIO_8c}}{} 24 | \item\contentsline{section}{Fasta\-Seq\-IO/\hyperlink{fastaSeqIO_8h}{fasta\-Seq\-IO.h} }{\pageref{fastaSeqIO_8h}}{} 25 | \end{CompactList} 26 | -------------------------------------------------------------------------------- /Body/appb/refman.tex: -------------------------------------------------------------------------------- 1 | \documentclass[letterpaper]{book} 2 | \usepackage{makeidx} 3 | \usepackage{fancyhdr} 4 | \usepackage{graphicx} 5 | \usepackage{multicol} 6 | \usepackage{float} 7 | \usepackage{textcomp} 8 | \usepackage{alltt} 9 | \usepackage{times} 10 | \ifx\pdfoutput\undefined 11 | \usepackage[ps2pdf, 12 | pagebackref=true, 13 | colorlinks=true, 14 | linkcolor=blue 15 | ]{hyperref} 16 | \usepackage{pspicture} 17 | \else 18 | \usepackage[pdftex, 19 | pagebackref=true, 20 | colorlinks=true, 21 | linkcolor=blue 22 | ]{hyperref} 23 | \fi 24 | \usepackage{doxygen} 25 | \makeindex 26 | \setcounter{tocdepth}{1} 27 | \renewcommand{\footrulewidth}{0.4pt} 28 | \begin{document} 29 | \begin{titlepage} 30 | \vspace*{7cm} 31 | \begin{center} 32 | {\Large gemoda Reference Manual}\\ 33 | \vspace*{1cm} 34 | {\large Generated by Doxygen 1.4.4}\\ 35 | \vspace*{0.5cm} 36 | {\small Fri Mar 3 09:05:14 2006}\\ 37 | \end{center} 38 | \end{titlepage} 39 | \clearemptydoublepage 40 | \pagenumbering{roman} 41 | \tableofcontents 42 | \clearemptydoublepage 43 | \pagenumbering{arabic} 44 | \chapter{gemoda Directory Documentation} 45 | \input{dir_000000} 46 | \chapter{gemoda Data Structure Documentation} 47 | \input{structbitGraph__t} 48 | \include{structbitSet__t} 49 | \include{structcnode} 50 | \include{structcSet__t} 51 | \include{structfSeq__t} 52 | \include{structmnode} 53 | \include{structrdh__t} 54 | \include{structsHash__t} 55 | \include{structsHashEntry__t} 56 | \include{structsOffset__t} 57 | \include{structsPat__t} 58 | \include{structsSize__t} 59 | \chapter{gemoda File Documentation} 60 | \input{align_8c} 61 | \include{bitSet_8c} 62 | \include{bitSet_8h} 63 | \include{convll_8c} 64 | \include{convll_8h} 65 | \include{fastaSeqIO_8c} 66 | \include{fastaSeqIO_8h} 67 | \include{gemoda-r_8c} 68 | \include{gemoda-s_8c} 69 | \include{matdata_8h} 70 | \include{matrices_8c} 71 | \include{matrices_8h} 72 | \include{matrixmap_8h} 73 | \include{newConv_8c} 74 | \include{patStats_8c} 75 | \include{patStats_8h} 76 | \include{realCompare_8c} 77 | \include{realCompare_8h} 78 | \include{realIo_8c} 79 | \include{realIo_8h} 80 | \include{spat_8h} 81 | \include{words_8c} 82 | \printindex 83 | \end{document} 84 | -------------------------------------------------------------------------------- /Body/appb/doxygen.sty: -------------------------------------------------------------------------------- 1 | \NeedsTeXFormat{LaTeX2e} 2 | \ProvidesPackage{doxygen} 3 | \RequirePackage{calc} 4 | \RequirePackage{array} 5 | %%\pagestyle{fancyplain} 6 | %%\newcommand{\clearemptydoublepage}{\newpage{\pagestyle{empty}\cleardoublepage}} 7 | %%\renewcommand{\chaptermark}[1]{\markboth{#1}{}} 8 | %%\renewcommand{\sectionmark}[1]{\markright{\thesection\ #1}} 9 | %%\lhead[\fancyplain{}{\bfseries\thepage}] 10 | %% {\fancyplain{}{\bfseries\rightmark}} 11 | %%\rhead[\fancyplain{}{\bfseries\leftmark}] 12 | %% {\fancyplain{}{\bfseries\thepage}} 13 | %%\rfoot[\fancyplain{}{\bfseries\scriptsize Generated on Fri Mar 3 09:05:14 2006 for gemoda by Doxygen }]{} 14 | %%\lfoot[]{\fancyplain{}{\bfseries\scriptsize Generated on Fri Mar 3 09:05:14 2006 for gemoda by Doxygen }} 15 | %%\cfoot{} 16 | \newenvironment{CompactList} 17 | {\begin{list}{}{ 18 | \setlength{\leftmargin}{0.5cm} 19 | \setlength{\itemsep}{0pt} 20 | \setlength{\parsep}{0pt} 21 | \setlength{\topsep}{0pt} 22 | \renewcommand{\makelabel}{}}} 23 | {\end{list}} 24 | \newenvironment{CompactItemize} 25 | { 26 | \begin{itemize} 27 | \setlength{\itemsep}{-3pt} 28 | \setlength{\parsep}{0pt} 29 | \setlength{\topsep}{0pt} 30 | \setlength{\partopsep}{0pt} 31 | } 32 | {\end{itemize}} 33 | \newcommand{\PBS}[1]{\let\temp=\\#1\let\\=\temp} 34 | \newlength{\tmplength} 35 | \newenvironment{TabularC}[1] 36 | { 37 | \setlength{\tmplength} 38 | {\linewidth/(#1)-\tabcolsep*2-\arrayrulewidth*(#1+1)/(#1)} 39 | \par\begin{tabular*}{\linewidth} 40 | {*{#1}{|>{\PBS\raggedright\hspace{0pt}}p{\the\tmplength}}|} 41 | } 42 | {\end{tabular*}\par} 43 | \newcommand{\entrylabel}[1]{ 44 | {\parbox[b]{\labelwidth-4pt}{\makebox[0pt][l]{\textbf{#1}}\\}}} 45 | \newenvironment{Desc} 46 | {\begin{list}{} 47 | { 48 | \settowidth{\labelwidth}{40pt} 49 | \setlength{\leftmargin}{\labelwidth} 50 | \setlength{\parsep}{0pt} 51 | \setlength{\itemsep}{-4pt} 52 | \renewcommand{\makelabel}{\entrylabel} 53 | } 54 | } 55 | {\end{list}} 56 | \newenvironment{Indent} 57 | {\begin{list}{}{\setlength{\leftmargin}{0.5cm}} 58 | \item[]\ignorespaces} 59 | {\unskip\end{list}} 60 | %%\setlength{\parindent}{0cm} 61 | %%\setlength{\parskip}{0.2cm} 62 | %%\addtocounter{secnumdepth}{1} 63 | %%\sloppy 64 | %%\usepackage[T1]{fontenc} 65 | -------------------------------------------------------------------------------- /Body/Images-chap2/antimicrobialnames.tex: -------------------------------------------------------------------------------- 1 | 2 | \begin{table}[!hbtp] 3 | \centering 4 | \caption{Common antimicrobial peptide families}\label{table:antimicrobialnames} 5 | \begin{tabular}{cccc} \hline \hline 6 | \small acaloleptin & 7 | \small achacin & 8 | \small adenoregulin & 9 | \small alpha--defensin \\ 10 | \small androctonin & 11 | \small andropin & 12 | \small apidaecin & 13 | \small attacin \\ 14 | \small aurein & 15 | \small azurocidin & 16 | \small bactenecin & 17 | \small bactericidin \\ 18 | \small bactinecin & 19 | \small beta--defensin & 20 | \small bombinin & 21 | \small bombolitin \\ 22 | \small buforin & 23 | \small buthinin & 24 | \small caerin & 25 | \small caltrin \\ 26 | \small cathelin & 27 | \small cecropin & 28 | \small ceratotoxin & 29 | \small citropin \\ 30 | \small clavanin & 31 | \small coleoptericin & 32 | \small corticostatin & 33 | \small crabrolin \\ 34 | \small defensin & 35 | \small demidefensin & 36 | \small dermaseptin & 37 | \small dermcidin \\ 38 | \small diptericin & 39 | \small drosocin & 40 | \small drosomycin & 41 | \small enbocin \\ 42 | \small formaecin & 43 | \small gaegurin & 44 | \small gallinacin & 45 | \small gloverin \\ 46 | \small granulysin & 47 | \small hadrurin & 48 | \small heliomicin & 49 | \small hemiptericin \\ 50 | \small hemolin & 51 | \small hepcidin & 52 | \small histatin & 53 | \small holotricin \\ 54 | \small hymenoptaecin & 55 | \small hyphancin & 56 | \small indolicidin & 57 | \small lebocin \\ 58 | \small macin & 59 | \small maculatin & 60 | \small maximin & 61 | \small metalnikowin \\ 62 | \small metchnikowin & 63 | \small misgurin & 64 | \small moricin & 65 | \small myticin \\ 66 | \small mytilin & 67 | \small mytimycin & 68 | \small nk--lysin & 69 | \small penaeidin \\ 70 | \small permatin & 71 | \small phormicin & 72 | \small phylloxin & 73 | \small pleurocidin \\ 74 | \small polyphemusin & 75 | \small ponericin & 76 | \small protegrin & 77 | \small pseudin \\ 78 | \small pyrrhocoricin & 79 | \small ranalexin & 80 | \small ranatuerin & 81 | \small rhinocerosin \\ 82 | \small royalisin & 83 | \small rugosin & 84 | \small salmocidin & 85 | \small sapecin \\ 86 | \small sarcotoxin & 87 | \small sillucin & 88 | \small spingerin & 89 | \small styelin \\ 90 | \small tachycitin & 91 | \small tachyplesin & 92 | \small temporin & 93 | \small tenecin \\ 94 | \small termicin & 95 | \small thanatin & 96 | \small tricholongin & 97 | \small zeamatin \\ \hline \hline 98 | \end{tabular} 99 | \end{table} 100 | -------------------------------------------------------------------------------- /Body/appb/matrixmap_8h.tex: -------------------------------------------------------------------------------- 1 | \hypertarget{matrixmap_8h}{ 2 | \section{matrixmap.h File Reference} 3 | \label{matrixmap_8h}\index{matrixmap.h@{matrixmap.h}} 4 | } 5 | {\tt \#include \char`\"{}matdata.h\char`\"{}}\par 6 | {\tt \#include \char`\"{}matrices.h\char`\"{}}\par 7 | 8 | 9 | Include dependency graph for matrixmap.h:\begin{figure}[H] 10 | \begin{center} 11 | \leavevmode 12 | \includegraphics[width=104pt]{matrixmap_8h__incl} 13 | \end{center} 14 | \end{figure} 15 | 16 | 17 | This graph shows which files directly or indirectly include this file:\begin{figure}[H] 18 | \begin{center} 19 | \leavevmode 20 | \includegraphics[width=104pt]{matrixmap_8h__dep__incl} 21 | \end{center} 22 | \end{figure} 23 | \subsection*{Variables} 24 | \begin{CompactItemize} 25 | \item 26 | \begin{tabbing} 27 | xx\=xx\=xx\=xx\=xx\=xx\=xx\=xx\=xx\=\kill 28 | struct \{\\ 29 | \>char $\ast$ \hyperlink{matrixmap_8h_a0}{name}\\ 30 | \>const int($\ast$ \hyperlink{matrixmap_8h_a1}{mat} )\mbox{[}MATRIX\_SIZE\mbox{]}\\ 31 | \} \hyperlink{matrixmap_8h_a2}{matrix\_map} \mbox{[}$\,$\mbox{]}\\ 32 | 33 | \end{tabbing}\end{CompactItemize} 34 | 35 | 36 | \subsection*{Detailed Description} 37 | This file contains structures and functions for handling scoring matrices. 38 | 39 | Definition in file \hyperlink{matrixmap_8h-source}{matrixmap.h}. 40 | 41 | \subsection*{Variable Documentation} 42 | \hypertarget{matrixmap_8h_a1}{ 43 | \index{matrixmap.h@{matrixmap.h}!mat@{mat}} 44 | \index{mat@{mat}!matrixmap.h@{matrixmap.h}} 45 | \subsubsection[mat]{\setlength{\rightskip}{0pt plus 5cm}const int($\ast$ \hyperlink{matrixmap_8h_a1}{mat})\mbox{[}MATRIX\_\-SIZE\mbox{]}}} 46 | \label{matrixmap_8h_a1} 47 | 48 | 49 | 50 | 51 | Definition at line 15 of file matrixmap.h. 52 | 53 | Referenced by align\-Mat(), align\-Words\-Mat\_\-bit(), and main().\hypertarget{matrixmap_8h_a2}{ 54 | \index{matrixmap.h@{matrixmap.h}!matrix_map@{matrix\_\-map}} 55 | \index{matrix_map@{matrix\_\-map}!matrixmap.h@{matrixmap.h}} 56 | \subsubsection[matrix\_\-map]{\setlength{\rightskip}{0pt plus 5cm}struct \{ ... \} \hyperlink{matrixmap_8h_a2}{matrix\_\-map}\mbox{[}$\,$\mbox{]}}} 57 | \label{matrixmap_8h_a2} 58 | 59 | 60 | This data structure maps the names of common matrices to the names of their variables 61 | 62 | Referenced by get\-Matrix\-By\-Name().\hypertarget{matrixmap_8h_a0}{ 63 | \index{matrixmap.h@{matrixmap.h}!name@{name}} 64 | \index{name@{name}!matrixmap.h@{matrixmap.h}} 65 | \subsubsection[name]{\setlength{\rightskip}{0pt plus 5cm}char$\ast$ \hyperlink{matrixmap_8h_a0}{name}}} 66 | \label{matrixmap_8h_a0} 67 | 68 | 69 | 70 | 71 | Definition at line 14 of file matrixmap.h. 72 | -------------------------------------------------------------------------------- /Body/appb/structcSet__t.tex: -------------------------------------------------------------------------------- 1 | \hypertarget{structcSet__t}{ 2 | \section{c\-Set\_\-t Struct Reference} 3 | \label{structcSet__t}\index{cSet_t@{cSet\_\-t}} 4 | } 5 | {\tt \#include $<$convll.h$>$} 6 | 7 | \subsection*{Data Fields} 8 | \begin{CompactItemize} 9 | \item 10 | int \hyperlink{structcSet__t_o0}{size} 11 | \item 12 | int $\ast$ \hyperlink{structcSet__t_o1}{members} 13 | \end{CompactItemize} 14 | 15 | 16 | \subsection*{Detailed Description} 17 | A c\-Set\_\-t is used to hold a set of integers, in cases where the upper limit of integers size is unknown. Or, in cases where using a bit set would be impractical. This data structure is used throughout the convolution, where we have found heuristically that intersections of this data type are much faster than those for bit\-Set\_\-t's, which would require a bit shift. 18 | 19 | 20 | 21 | Definition at line 21 of file convll.h. 22 | 23 | \subsection*{Field Documentation} 24 | \hypertarget{structcSet__t_o1}{ 25 | \index{cSet_t@{c\-Set\_\-t}!members@{members}} 26 | \index{members@{members}!cSet_t@{c\-Set\_\-t}} 27 | \subsubsection[members]{\setlength{\rightskip}{0pt plus 5cm}int$\ast$ \hyperlink{structcSet__t_o1}{c\-Set\_\-t::members}}} 28 | \label{structcSet__t_o1} 29 | 30 | 31 | Array of pointers to ints that holds the members of this set. 32 | 33 | Definition at line 26 of file convll.h. 34 | 35 | Referenced by add\-To\-Stacks(), bit\-Set\-To\-CSet(), check\-Cliquec\-Set(), find\-Clique\-Centroid(), main(), make\-Alternate\-Centroid(), merge\-Intersect(), mll\-To\-CSet(), output\-Real\-Pats(), output\-Real\-Pats\-WCentroid(), pop\-Cll(), print\-Cll(), print\-Cll\-Pattern(), print\-CSet(), prune\-Cll(), push\-Conv\-Clique(), remove\-Supers(), swap\-Nodec\-Set(), uniq\-Clique(), and whole\-Clique\-Conv().\hypertarget{structcSet__t_o0}{ 36 | \index{cSet_t@{c\-Set\_\-t}!size@{size}} 37 | \index{size@{size}!cSet_t@{c\-Set\_\-t}} 38 | \subsubsection[size]{\setlength{\rightskip}{0pt plus 5cm}int \hyperlink{structcSet__t_o0}{c\-Set\_\-t::size}}} 39 | \label{structcSet__t_o0} 40 | 41 | 42 | Number of members in this set. 43 | 44 | Definition at line 24 of file convll.h. 45 | 46 | Referenced by bit\-Set\-To\-CSet(), calc\-Stat\-Cliq(), check\-Cliquec\-Set(), find\-Clique\-Centroid(), get\-Largest\-Support(), main(), mll\-To\-CSet(), output\-Real\-Pats(), output\-Real\-Pats\-WCentroid(), print\-Cll(), print\-Cll\-Pattern(), print\-CSet(), prune\-Cll(), remove\-Supers(), single\-Clique\-Conv(), uniq\-Clique(), and whole\-Clique\-Conv(). 47 | 48 | The documentation for this struct was generated from the following file:\begin{CompactItemize} 49 | \item 50 | \hyperlink{convll_8h}{convll.h}\end{CompactItemize} 51 | -------------------------------------------------------------------------------- /Body/appb/refman.out: -------------------------------------------------------------------------------- 1 | \BOOKMARK [0][-]{chapter.1}{gemoda Directory Documentation}{} 2 | \BOOKMARK [1][-]{section.1.1}{FastaSeqIO/ Directory Reference}{chapter.1} 3 | \BOOKMARK [0][-]{chapter.2}{gemoda Data Structure Documentation}{} 4 | \BOOKMARK [1][-]{section.2.1}{bitGraph\137t Struct Reference}{chapter.2} 5 | \BOOKMARK [1][-]{section.2.2}{bitSet\137t Struct Reference}{chapter.2} 6 | \BOOKMARK [1][-]{section.2.3}{cnode Struct Reference}{chapter.2} 7 | \BOOKMARK [1][-]{section.2.4}{cSet\137t Struct Reference}{chapter.2} 8 | \BOOKMARK [1][-]{section.2.5}{fSeq\137t Struct Reference}{chapter.2} 9 | \BOOKMARK [1][-]{section.2.6}{mnode Struct Reference}{chapter.2} 10 | \BOOKMARK [1][-]{section.2.7}{rdh\137t Struct Reference}{chapter.2} 11 | \BOOKMARK [1][-]{section.2.8}{sHash\137t Struct Reference}{chapter.2} 12 | \BOOKMARK [1][-]{section.2.9}{sHashEntry\137t Struct Reference}{chapter.2} 13 | \BOOKMARK [1][-]{section.2.10}{sOffset\137t Struct Reference}{chapter.2} 14 | \BOOKMARK [1][-]{section.2.11}{sPat\137t Struct Reference}{chapter.2} 15 | \BOOKMARK [1][-]{section.2.12}{sSize\137t Struct Reference}{chapter.2} 16 | \BOOKMARK [0][-]{chapter.3}{gemoda File Documentation}{} 17 | \BOOKMARK [1][-]{section.3.1}{align.c File Reference}{chapter.3} 18 | \BOOKMARK [1][-]{section.3.2}{bitSet.c File Reference}{chapter.3} 19 | \BOOKMARK [1][-]{section.3.3}{bitSet.h File Reference}{chapter.3} 20 | \BOOKMARK [1][-]{section.3.4}{convll.c File Reference}{chapter.3} 21 | \BOOKMARK [1][-]{section.3.5}{convll.h File Reference}{chapter.3} 22 | \BOOKMARK [1][-]{section.3.6}{FastaSeqIO/fastaSeqIO.c File Reference}{chapter.3} 23 | \BOOKMARK [1][-]{section.3.7}{FastaSeqIO/fastaSeqIO.h File Reference}{chapter.3} 24 | \BOOKMARK [1][-]{section.3.8}{gemoda-r.c File Reference}{chapter.3} 25 | \BOOKMARK [1][-]{section.3.9}{gemoda-s.c File Reference}{chapter.3} 26 | \BOOKMARK [1][-]{section.3.10}{matdata.h File Reference}{chapter.3} 27 | \BOOKMARK [1][-]{section.3.11}{matrices.c File Reference}{chapter.3} 28 | \BOOKMARK [1][-]{section.3.12}{matrices.h File Reference}{chapter.3} 29 | \BOOKMARK [1][-]{section.3.13}{matrixmap.h File Reference}{chapter.3} 30 | \BOOKMARK [1][-]{section.3.14}{newConv.c File Reference}{chapter.3} 31 | \BOOKMARK [1][-]{section.3.15}{patStats.c File Reference}{chapter.3} 32 | \BOOKMARK [1][-]{section.3.16}{patStats.h File Reference}{chapter.3} 33 | \BOOKMARK [1][-]{section.3.17}{realCompare.c File Reference}{chapter.3} 34 | \BOOKMARK [1][-]{section.3.18}{realCompare.h File Reference}{chapter.3} 35 | \BOOKMARK [1][-]{section.3.19}{realIo.c File Reference}{chapter.3} 36 | \BOOKMARK [1][-]{section.3.20}{realIo.h File Reference}{chapter.3} 37 | \BOOKMARK [1][-]{section.3.21}{spat.h File Reference}{chapter.3} 38 | \BOOKMARK [1][-]{section.3.22}{words.c File Reference}{chapter.3} 39 | -------------------------------------------------------------------------------- /Body/appb/structsOffset__t.tex: -------------------------------------------------------------------------------- 1 | \hypertarget{structsOffset__t}{ 2 | \section{s\-Offset\_\-t Struct Reference} 3 | \label{structsOffset__t}\index{sOffset_t@{sOffset\_\-t}} 4 | } 5 | {\tt \#include $<$spat.h$>$} 6 | 7 | \subsection*{Data Fields} 8 | \begin{CompactItemize} 9 | \item 10 | int \hyperlink{structsOffset__t_o0}{seq} 11 | \item 12 | int \hyperlink{structsOffset__t_o1}{pos} 13 | \item 14 | int \hyperlink{structsOffset__t_o2}{next} 15 | \item 16 | int \hyperlink{structsOffset__t_o3}{prev} 17 | \end{CompactItemize} 18 | 19 | 20 | \subsection*{Detailed Description} 21 | This object is used to store the location of a particular word and a set of sequences. That is if we hash a word, we would like to know where it came from. This data structure provides that information. 22 | 23 | 24 | 25 | Definition at line 13 of file spat.h. 26 | 27 | \subsection*{Field Documentation} 28 | \hypertarget{structsOffset__t_o2}{ 29 | \index{sOffset_t@{s\-Offset\_\-t}!next@{next}} 30 | \index{next@{next}!sOffset_t@{s\-Offset\_\-t}} 31 | \subsubsection[next]{\setlength{\rightskip}{0pt plus 5cm}int \hyperlink{structsOffset__t_o2}{s\-Offset\_\-t::next}}} 32 | \label{structsOffset__t_o2} 33 | 34 | 35 | The index of the word that follows this word at {\em pos\/} plus 1. 36 | 37 | Definition at line 23 of file spat.h. 38 | 39 | Referenced by count\-Words2().\hypertarget{structsOffset__t_o1}{ 40 | \index{sOffset_t@{s\-Offset\_\-t}!pos@{pos}} 41 | \index{pos@{pos}!sOffset_t@{s\-Offset\_\-t}} 42 | \subsubsection[pos]{\setlength{\rightskip}{0pt plus 5cm}int \hyperlink{structsOffset__t_o1}{s\-Offset\_\-t::pos}}} 43 | \label{structsOffset__t_o1} 44 | 45 | 46 | The position in the sequence where the word is located. 47 | 48 | Definition at line 20 of file spat.h. 49 | 50 | Referenced by count\-Words2(), and main().\hypertarget{structsOffset__t_o3}{ 51 | \index{sOffset_t@{s\-Offset\_\-t}!prev@{prev}} 52 | \index{prev@{prev}!sOffset_t@{s\-Offset\_\-t}} 53 | \subsubsection[prev]{\setlength{\rightskip}{0pt plus 5cm}int \hyperlink{structsOffset__t_o3}{s\-Offset\_\-t::prev}}} 54 | \label{structsOffset__t_o3} 55 | 56 | 57 | The index of the word that precedes this word at {\em pos\/} minus 1. 58 | 59 | Definition at line 26 of file spat.h. 60 | 61 | Referenced by count\-Words2().\hypertarget{structsOffset__t_o0}{ 62 | \index{sOffset_t@{s\-Offset\_\-t}!seq@{seq}} 63 | \index{seq@{seq}!sOffset_t@{s\-Offset\_\-t}} 64 | \subsubsection[seq]{\setlength{\rightskip}{0pt plus 5cm}int \hyperlink{structsOffset__t_o0}{s\-Offset\_\-t::seq}}} 65 | \label{structsOffset__t_o0} 66 | 67 | 68 | The sequence from which the word came. 69 | 70 | Definition at line 17 of file spat.h. 71 | 72 | Referenced by count\-Words2(), and main(). 73 | 74 | The documentation for this struct was generated from the following file:\begin{CompactItemize} 75 | \item 76 | \hyperlink{spat_8h}{spat.h}\end{CompactItemize} 77 | -------------------------------------------------------------------------------- /Body/Images-chap2/motif-conservation.tex: -------------------------------------------------------------------------------- 1 | \begin{sidewaystable}[ptbh] 2 | \caption[Motif conservation]{Motif conservation for the query shown in Figure and the motif \texttt{L[VQH][ALV][KLPQ][AS][EAF][APQS][ALRV]QA}.} 3 | \label{table:cons} 4 | \centering 5 | \begin{tabular}{lllll} \hline\hline 6 | QUERY & \texttt{LQAQAEPLQA}\\ \hline 7 | P17534 & \texttt{LVLLAFQVQA} & 40.00\% & Cryptdin-related protein 4C-1 & Mus musculus (Mouse). \\ 8 | P19660 & \texttt{LVLPSASAQA} & 30.00\% & Bactenecin 5 precursor (BAC5) & Bos taurus (Bovine). \\ 9 | P19661 & \texttt{LVLPSASAQA} & 30.00\% & Bactenecin 7 precursor (BAC7) & Bos taurus (Bovine). \\ 10 | P28309 & \texttt{LVLLSFQVQA} & 30.00\% & Cryptdin-2 precursor & Mus musculus (Mouse). \\ 11 | P28310 & \texttt{LVLLAFQVQA} & 40.00\% & Cryptdin-3 precursor & Mus musculus (Mouse). \\ 12 | P28311 & \texttt{LVLLAFQVQA} & 40.00\% & Cryptdin-4 precursor & Mus musculus (Mouse). \\ 13 | P28312 & \texttt{LVLLAFQVQA} & 40.00\% & Cryptdin-5 precursor & Mus musculus (Mouse). \\ 14 | P32195 & \texttt{LVVPSASAQA} & 30.00\% & Protegrin 2 precursor (PG-2) & Sus scrofa (Pig). \\ 15 | P33046 & \texttt{LVVPSASAQA} & 30.00\% & Indolicidin precursor & Bos taurus (Bovine). \\ 16 | P49930 & \texttt{LVVPSASAQA} & 30.00\% & Antibacterial peptide PMAP-23 & Sus scrofa (Pig). \\ 17 | P49931 & \texttt{LVVPSASAQA} & 30.00\% & Antibacterial peptide PMAP-36 & Sus scrofa (Pig). \\ 18 | P49932 & \texttt{LVVPSASAQA} & 30.00\% & Antibacterial peptide PMAP-37 & Sus scrofa (Pig). \\ 19 | P50704 & \texttt{LVLLAFQVQA} & 40.00\% & Cryptdin-6/12 precursor & Mus musculus (Mouse). \\ 20 | P50705 & \texttt{LVLLAFQVQA} & 40.00\% & Cryptdin-7 precursor & Mus musculus (Mouse). \\ 21 | P50707 & \texttt{LVLLAFQVQA} & 40.00\% & Cryptdin-9 precursor & Mus musculus (Mouse). \\ 22 | P50708 & \texttt{LVLLAFQVQA} & 40.00\% & Cryptdin-10 precursor (Fragmen & Mus musculus (Mouse). \\ 23 | P50711 & \texttt{LVLLAFQVQA} & 40.00\% & Cryptdin-13 precursor & Mus musculus (Mouse). \\ 24 | P50712 & \texttt{LVLLAFQVQA} & 40.00\% & Cryptdin-14 precursor (Fragmen & Mus musculus (Mouse). \\ 25 | P50713 & \texttt{LVLLAFQVQA} & 40.00\% & Cryptdin-15 precursor & Mus musculus (Mouse). \\ 26 | P50714 & \texttt{LVLLAFQVQA} & 40.00\% & Cryptdin-16 precursor & Mus musculus (Mouse). \\ 27 | P51525 & \texttt{LVVPSASAQA} & 30.00\% & Prophenin-2 precursor (PF-2) ( & Sus scrofa (Pig). \\ 28 | P82270 & \texttt{LHAQAEARQA} & 70.00\% & Theta defensin-1, subunit A pr & Macaca mulatta (Rhesus macaque). \\ 29 | P82271 & \texttt{LHAQAEARQA} & 70.00\% & Theta defensin-1, subunit B pr & Macaca mulatta (Rhesus macaque). \\ 30 | P82318 & \texttt{LQAQAEPLQA} & 100.00\% & Neutrophil defensins 1, 3 and & Macaca mulatta (Rhesus macaque). \\ 31 | Q01524 & \texttt{LQAKAEPLQA} & 90.00\% & Defensin 6 precursor (Defensin & Homo sapiens (Human). 32 | \\ \hline\hline 33 | \end{tabular} 34 | \end{sidewaystable} 35 | -------------------------------------------------------------------------------- /Body/Images-chap1/pwms.tex: -------------------------------------------------------------------------------- 1 | \begin{table}[ptbh] 2 | \caption[Motif discovery tools using position weight matrices or similar models]{ 3 | Motif discovery tools using position weight matrices or similar models. 4 | As discussed in the text, PWMs are more specific than regular expressions; 5 | however, in general, there are fewer algorithms utilizing this motif model. 6 | Most of the later tools shown in the table are geared towards 7 | finding binding sites for regulatory proteins upstream 8 | of sets of co--regulated genes. Of these publications, 9 | the seminal manuscript is that by~\citet{lawrence1993detecting}.} 10 | \label{table:pwmMD} 11 | \centering 12 | \begin{tabular}{lcc} \hline\hline 13 | Authors & Year & Citation \\ \hline 14 | \citeauthor{stormo1989identifying} & \citeyear{stormo1989identifying} & \cite{stormo1989identifying} \\ 15 | \citeauthor{lawrence1993detecting} & \citeyear{lawrence1993detecting} & \cite{lawrence1993detecting} \\ 16 | \citeauthor{liu1994collapsed} & \citeyear{liu1994collapsed} & \cite{liu1994collapsed} \\ 17 | \citeauthor{bailey1994fitting} & \citeyear{bailey1994fitting} & \cite{bailey1994fitting} \\ 18 | \citeauthor{leung1996over} & \citeyear{leung1996over} & \cite{leung1996over} \\ 19 | \citeauthor{goffeau1998genomic-scale} & \citeyear{goffeau1998genomic-scale} & \cite{goffeau1998genomic-scale} \\ 20 | \citeauthor{hertz1999identifying} & \citeyear{hertz1999identifying} & \cite{hertz1999identifying} \\ 21 | \citeauthor{workman2000ann-spec} & \citeyear{workman2000ann-spec} & \cite{workman2000ann-spec} \\ 22 | \citeauthor{hughes2000computational} & \citeyear{hughes2000computational} & \cite{hughes2000computational} \\ 23 | \citeauthor{guhathakurta2001identifying} & \citeyear{guhathakurta2001identifying} & \cite{guhathakurta2001identifying} \\ 24 | \citeauthor{bi2004bipartite} & \citeyear{bi2004bipartite} & \cite{bi2004bipartite} \\ 25 | \citeauthor{raphael2004uniform} & \citeyear{raphael2004uniform} & \cite{raphael2004uniform} \\ 26 | \citeauthor{eskin2004from} & \citeyear{eskin2004from} & \cite{eskin2004from} \\ 27 | \citeauthor{siddharthan2005phylogibbs} & \citeyear{siddharthan2005phylogibbs} & \cite{siddharthan2005phylogibbs} \\ 28 | \citeauthor{liu2005principal} & \citeyear{liu2005principal} & \cite{liu2005principal} \\ 29 | \citeauthor{leung2005finding} & \citeyear{leung2005finding} & \cite{leung2005finding} \\ 30 | \citeauthor{zhong2005rsir} & \citeyear{zhong2005rsir} & \cite{zhong2005rsir} \\ 31 | \citeauthor{tharakaraman2005alignments} & \citeyear{tharakaraman2005alignments} & \cite{tharakaraman2005alignments} \\ 32 | \citeauthor{down2005nestedmica} & \citeyear{down2005nestedmica} & \cite{down2005nestedmica} \\ 33 | \citeauthor{macisaac2006hypothesis-based} & \citeyear{macisaac2006hypothesis-based} & \cite{macisaac2006hypothesis-based} \\ 34 | \hline 35 | \end{tabular} 36 | \end{table} 37 | -------------------------------------------------------------------------------- /Body/Images-chap1/table_gene_similarity.tex: -------------------------------------------------------------------------------- 1 | \begin{tabular}{l}\hline\hline 2 | $>$gi$\|$8571926 Arabidopsis thaliana lipid transfer protein \\ 3 | \ttfamily \footnotesize CCACGCGTCCGAAAAAAAAAACAGAAAGTAACATGAGATCTCTCTTATTAGCCGTGTGCCTGGTTCTTGC \\ 4 | \ttfamily \footnotesize TTTACACTGCGGTGAAGCAGCCGTGTCTTGCAACACGGTGATTGCGGATCTTTACCCTTGCTTATCCTAC \\ 5 | \ttfamily \footnotesize GTGACTCAGGGCGGACCGGTCCCAACCCTCTGCTGCAACGGTCTCACAACACTCAAGAGTCAGGCTCAAA \\ 6 | \ttfamily \footnotesize CTTCTGTGGACCGTCAGGGGGTCTGTCGTTGCATCAAATCTGCTATTGGAGGACTCACTCTCTCTCCTAG \\ 7 | \ttfamily \footnotesize AACCATCCAAAATGCTTTGGAATTGCCTTCTAAATGTGGTGTCGATCTCCCTTACAAGTTCAGCCCTTCC \\ 8 | \ttfamily \footnotesize ACTGACTGCGACAGTATCCAGTGAGACAAGCAGAAAATCTTAAAGGAAGCTACTACAAGAACTATAATAA \\ 9 | \ttfamily \footnotesize CCTAATAATTAATAAATGAGGGCATTGGTTTGCTAGTTGCTAATTGATCAGTGATGTATTGTCATTTTGA \\ 10 | \ttfamily \footnotesize ATGTTCTAATATCAGCAGGCACTTATCTCTGAAAAAAAAAAAAAAAA \\ \\ 11 | $>$gi$\|$8571922 Arabidopsis thaliana lipid transfer protein \\ 12 | \ttfamily \footnotesize CCACGCGTCCGAAAACACAAGCGTAGAAAACAAAACTCAACTAATTGTGTTATCACCCAAAAGAGAAGAG \\ 13 | \ttfamily \footnotesize CAAACACAATGGCTTTCGCTTTGAGGTTCTTCACATGCTTTGTTTTGACAGTGTTCATCGTTGCATCAGT \\ 14 | \ttfamily \footnotesize GGATGCAGCAATAACATGTGGCACAGTGGCAAGTAGCTTGAGTCCATGTCTAGGCTACCTATCGAAGGGT \\ 15 | \ttfamily \footnotesize GGGGTGGTGCCACCTCCGTGCTGTGCAGGAGTCAAAAAGTTGAACGGTATGGCTCAAACCACACCCGACC \\ 16 | \ttfamily \footnotesize GCCAACAAGCATGCAGATGCTTACAGTCCGCTGCAAAAGGGGTTAATCCAAGTCTAGCCTCTGGCCTTCC \\ 17 | \ttfamily \footnotesize TGGAAAGTGCGGTGTTAGCATCCCCTATCCCATCTCCACGAGCACCAACTGCGCCACCATCAAGTGAAGT \\ 18 | \ttfamily \footnotesize GGGGAATAACGACATCATTTGCCTGAAGAGTATGGTTTCGTATACGTAAAATAAGACGGCTATCTAAGCT \\ 19 | \ttfamily \footnotesize GATATTTACCTTGTCTTTGTTTGTCTTGATGGCTTTGTAATCTTTTGCTTTGTTATGTTGTATACTTGTG \\ 20 | \ttfamily \footnotesize TCTTAACATGTTTAAGATATGATAATATATAGTATCGGTACCTTATTAAAAAAAAAAAAAAA \\ \\ 21 | $>$gi$\|$8571922 Arabidopsis thaliana lipid transfer protein \\ 22 | \ttfamily \footnotesize CCACGCGTCCGAAAACACAAGCGTAGAAAACAAAACTCAACTAATTGTGTTATCACCCAAAAGAGAAGAG \\ 23 | \ttfamily \footnotesize CAAACACAATGGCTTTCGCTTTGAGGTTCTTCACATGCTTTGTTTTGACAGTGTTCATCGTTGCATCAGT \\ 24 | \ttfamily \footnotesize GGATGCAGCAATAACATGTGGCACAGTGGCAAGTAGCTTGAGTCCATGTCTAGGCTACCTATCGAAGGGT \\ 25 | \ttfamily \footnotesize GGGGTGGTGCCACCTCCGTGCTGTGCAGGAGTCAAAAAGTTGAACGGTATGGCTCAAACCACACCCGACC \\ 26 | \ttfamily \footnotesize GCCAACAAGCATGCAGATGCTTACAGTCCGCTGCAAAAGGGGTTAATCCAAGTCTAGCCTCTGGCCTTCC \\ 27 | \ttfamily \footnotesize TGGAAAGTGCGGTGTTAGCATCCCCTATCCCATCTCCACGAGCACCAACTGCGCCACCATCAAGTGAAGT \\ 28 | \ttfamily \footnotesize GGGGAATAACGACATCATTTGCCTGAAGAGTATGGTTTCGTATACGTAAAATAAGACGGCTATCTAAGCT \\ 29 | \ttfamily \footnotesize GATATTTACCTTGTCTTTGTTTGTCTTGATGGCTTTGTAATCTTTTGCTTTGTTATGTTGTATACTTGTG \\ 30 | \ttfamily \footnotesize TCTTAACATGTTTAAGATATGATAATATATAGTATCGGTACCTTATTAAAAAAAAAAAAAAA \\\hline\hline 31 | \end{tabular} 32 | -------------------------------------------------------------------------------- /Body/appb/structbitGraph__t.tex: -------------------------------------------------------------------------------- 1 | \hypertarget{structbitGraph__t}{ 2 | \section{bit\-Graph\_\-t Struct Reference} 3 | \label{structbitGraph__t}\index{bitGraph_t@{bitGraph\_\-t}} 4 | } 5 | {\tt \#include $<$bit\-Set.h$>$} 6 | 7 | Collaboration diagram for bit\-Graph\_\-t:\begin{figure}[H] 8 | \begin{center} 9 | \leavevmode 10 | \includegraphics[width=49pt]{structbitGraph__t__coll__graph} 11 | \end{center} 12 | \end{figure} 13 | \subsection*{Data Fields} 14 | \begin{CompactItemize} 15 | \item 16 | int \hyperlink{structbitGraph__t_o0}{size} 17 | \item 18 | \hyperlink{structbitSet__t}{bit\-Set\_\-t} $\ast$$\ast$ \hyperlink{structbitGraph__t_o1}{graph} 19 | \end{CompactItemize} 20 | 21 | 22 | \subsection*{Detailed Description} 23 | A bit graph is an array of bit sets. The graph must be of size size x size. This data structure is used to store adjacency matrices. In particular, a bit graph is used in the clustering step. It can easily be considered a set of sets. 24 | 25 | 26 | 27 | Definition at line 48 of file bit\-Set.h. 28 | 29 | \subsection*{Field Documentation} 30 | \hypertarget{structbitGraph__t_o1}{ 31 | \index{bitGraph_t@{bit\-Graph\_\-t}!graph@{graph}} 32 | \index{graph@{graph}!bitGraph_t@{bit\-Graph\_\-t}} 33 | \subsubsection[graph]{\setlength{\rightskip}{0pt plus 5cm}\hyperlink{structbitSet__t}{bit\-Set\_\-t}$\ast$$\ast$ \hyperlink{structbitGraph__t_o1}{bit\-Graph\_\-t::graph}}} 34 | \label{structbitGraph__t_o1} 35 | 36 | 37 | A pointer used to store an array of \hyperlink{structbitSet__t}{bit\-Set\_\-t} space objects. 38 | 39 | Definition at line 56 of file bit\-Set.h. 40 | 41 | Referenced by bit\-Graph\-Check\-Bit(), bit\-Graph\-Row\-Intersection(), bit\-Graph\-Row\-Union(), bit\-Graph\-Set\-False(), bit\-Graph\-Set\-False\-Diagonal(), bit\-Graph\-Set\-False\-Sym(), bit\-Graph\-Set\-True(), bit\-Graph\-Set\-True\-Diagonal(), bit\-Graph\-Set\-True\-Sym(), copy\-Bit\-Graph(), count\-Bit\-Graph\-Non\-Zero(), delete\-Bit\-Graph(), empty\-Bit\-Graph(), empty\-Bit\-Graph\-Row(), fill\-Bit\-Graph(), filter\-Iter(), find\-Cliques(), get\-Stat\-Mat(), mask\-Bit\-Graph(), new\-Bit\-Graph(), print\-Bit\-Graph(), prune\-Bit\-Graph(), and single\-Linkage().\hypertarget{structbitGraph__t_o0}{ 42 | \index{bitGraph_t@{bit\-Graph\_\-t}!size@{size}} 43 | \index{size@{size}!bitGraph_t@{bit\-Graph\_\-t}} 44 | \subsubsection[size]{\setlength{\rightskip}{0pt plus 5cm}int \hyperlink{structbitGraph__t_o0}{bit\-Graph\_\-t::size}}} 45 | \label{structbitGraph__t_o0} 46 | 47 | 48 | The total size of a bit graph, which is assumed to be symmetric. There are {\em size\/} bit sets in a bit graph, each of size {\em size\/}. 49 | 50 | Definition at line 53 of file bit\-Set.h. 51 | 52 | Referenced by convolve(), copy\-Bit\-Graph(), filter\-Graph(), find\-Cliques(), get\-Stat\-Mat(), main(), new\-Bit\-Graph(), and old\-Get\-Stat\-Mat(). 53 | 54 | The documentation for this struct was generated from the following file:\begin{CompactItemize} 55 | \item 56 | \hyperlink{bitSet_8h}{bit\-Set.h}\end{CompactItemize} 57 | -------------------------------------------------------------------------------- /Body/Images-chap1/pwm.tex: -------------------------------------------------------------------------------- 1 | \newcolumntype{H}{>{\columncolor[gray]{0.8}}c} 2 | 3 | \begin{table}[ptbh] 4 | \caption[The construction of a position weight matrix]{ 5 | The construction of a position weight matrix from 6 | the collection of sequences shown in 7 | Figure~\vref{fig:yeast}. Part A) shows the number of 8 | nucleotides of each type that occur in each of the seven 9 | positions of the aligned sequences. For example, in the 10 | first position, there are 58 thymines. Part B) shows the 11 | frequency matrix $f$, where each $f_{ij}=(c_{ij}/\sum_j 12 | c_{ij})$. Part C) shows the log--odds matrix $\Theta$, 13 | where each $\Theta_{ij} = \log_2 (f_{ij}/q_j)$ 14 | and $q$ is the vector of background frequencies for the 15 | nucleotides. Part D) shows the scoring of three different 16 | sequences. To compute the score for a sequence, the 17 | corresponding nucleotide at each column is looked up in 18 | $\Theta$ and the columns are summed together. 19 | } 20 | \label{table:pwm} 21 | \centering \scriptsize 22 | \begin{tabular}{r|HcHcHcH} %\hline\hline 23 | \multicolumn{8}{l}{} \\ 24 | \multicolumn{8}{l}{A) Count Matrix ($c_{ij}$):} \\ 25 | \multicolumn{8}{l}{} \\ 26 | A & 5 & 47 & 0 & 0 & 67 & 75 & 0 \\ 27 | T & 58 & 18 & 9 & 75 & 2 & 0 & 10 \\ 28 | G & 7 & 9 & 0 & 0 & 6 & 0 & 0 \\ 29 | C & 5 & 1 & 66 & 0 & 0 & 0 & 65 \\ 30 | \multicolumn{4}{c}{ }& $\Downarrow$\\ 31 | \multicolumn{8}{l}{B) Frequency Matrix ($f_{ij}$):} \\ 32 | \multicolumn{8}{l}{} \\ 33 | A & 0.067 & 0.627 & 0.000 & 0.000 & 0.893 & 1.000 & 0.000 \\ 34 | T & 0.773 & 0.240 & 0.120 & 1.000 & 0.027 & 0.000 & 0.133 \\ 35 | G & 0.093 & 0.120 & 0.000 & 0.000 & 0.080 & 0.000 & 0.000 \\ 36 | C & 0.067 & 0.013 & 0.880 & 0.000 & 0.000 & 0.000 & 0.867 \\ 37 | \multicolumn{4}{c}{ }& $\Downarrow$\\ 38 | \multicolumn{8}{l}{C) Log--odds Matrix ($\Theta_{ij}$):} \\ 39 | \multicolumn{8}{l}{} \\ 40 | A & -1.907 & 1.326 & $\varnothing$ & $\varnothing$ & 1.837 & 2.000 & $\varnothing$ \\ 41 | T & 1.629 & -0.059 & -1.059 & 2.000 & -3.229 & $\varnothing$ & -0.907 \\ 42 | G & -1.421 & -1.059 & $\varnothing$ & $\varnothing$ & -1.644 & $\varnothing$ & $\varnothing$ \\ 43 | C & -1.907 & -4.229 & 1.816 & $\varnothing$ & $\varnothing$ & $\varnothing$ & 1.794 \\ 44 | \multicolumn{4}{c}{ }& $\Downarrow$\\ 45 | \multicolumn{8}{l}{D) Example sequence scoring:} \\ 46 | \multicolumn{8}{l}{} \\ 47 | query1 & T & A & C & T & T & A & C\\ 48 | $\Sigma$ & 1.629 & 1.326 & 1.816 & 2.000 & -3.229 & 2.000 & 1.794\\ 49 | \multicolumn{4}{c}{ }& $= 7.335$\\ 50 | query2 & T & T & C & T & A & A & C \\ 51 | $\Sigma$ & 1.629 & -0.059 & 1.816 & 2.000 & 1.837 & 2.000 & 1.794\\ 52 | \multicolumn{4}{c}{ }& $= 11.017$\\ 53 | query3 & G & T & A & T & A & A & T \\ 54 | $\Sigma$ & -1.421 & -0.059 & $\varnothing$ \\ 55 | \multicolumn{4}{c}{ }& $= \varnothing$\\ 56 | %\hline\hline 57 | \end{tabular} 58 | \end{table} 59 | -------------------------------------------------------------------------------- /Body/appb/structsHashEntry__t.tex: -------------------------------------------------------------------------------- 1 | \hypertarget{structsHashEntry__t}{ 2 | \section{s\-Hash\-Entry\_\-t Struct Reference} 3 | \label{structsHashEntry__t}\index{sHashEntry_t@{sHashEntry\_\-t}} 4 | } 5 | \subsection*{Data Fields} 6 | \begin{CompactItemize} 7 | \item 8 | char $\ast$ \hyperlink{structsHashEntry__t_o0}{key} 9 | \item 10 | int \hyperlink{structsHashEntry__t_o1}{L} 11 | \item 12 | int \hyperlink{structsHashEntry__t_o2}{data} 13 | \item 14 | int \hyperlink{structsHashEntry__t_o3}{idx} 15 | \end{CompactItemize} 16 | 17 | 18 | \subsection*{Detailed Description} 19 | Type for a hash table entry. This datatype is used to populate a hash table. The most important members of this data structure are the string, or the key, and the index to which that key hashes. 20 | 21 | 22 | 23 | Definition at line 114 of file words.c. 24 | 25 | \subsection*{Field Documentation} 26 | \hypertarget{structsHashEntry__t_o2}{ 27 | \index{sHashEntry_t@{s\-Hash\-Entry\_\-t}!data@{data}} 28 | \index{data@{data}!sHashEntry_t@{s\-Hash\-Entry\_\-t}} 29 | \subsubsection[data]{\setlength{\rightskip}{0pt plus 5cm}int \hyperlink{structsHashEntry__t_o2}{s\-Hash\-Entry\_\-t::data}}} 30 | \label{structsHashEntry__t_o2} 31 | 32 | 33 | A throw away variable, used to store any necessary data 34 | 35 | Definition at line 121 of file words.c. 36 | 37 | Referenced by count\-Words2(), and print\-SHash().\hypertarget{structsHashEntry__t_o3}{ 38 | \index{sHashEntry_t@{s\-Hash\-Entry\_\-t}!idx@{idx}} 39 | \index{idx@{idx}!sHashEntry_t@{s\-Hash\-Entry\_\-t}} 40 | \subsubsection[idx]{\setlength{\rightskip}{0pt plus 5cm}int \hyperlink{structsHashEntry__t_o3}{s\-Hash\-Entry\_\-t::idx}}} 41 | \label{structsHashEntry__t_o3} 42 | 43 | 44 | The integer to which the {\em key\/} of length {\em L\/} hashes 45 | 46 | Definition at line 123 of file words.c. 47 | 48 | Referenced by count\-Words2().\hypertarget{structsHashEntry__t_o0}{ 49 | \index{sHashEntry_t@{s\-Hash\-Entry\_\-t}!key@{key}} 50 | \index{key@{key}!sHashEntry_t@{s\-Hash\-Entry\_\-t}} 51 | \subsubsection[key]{\setlength{\rightskip}{0pt plus 5cm}char$\ast$ \hyperlink{structsHashEntry__t_o0}{s\-Hash\-Entry\_\-t::key}}} 52 | \label{structsHashEntry__t_o0} 53 | 54 | 55 | A pointer to a string 56 | 57 | Definition at line 117 of file words.c. 58 | 59 | Referenced by count\-Words2(), print\-SHash(), and search\-SHash().\hypertarget{structsHashEntry__t_o1}{ 60 | \index{sHashEntry_t@{s\-Hash\-Entry\_\-t}!L@{L}} 61 | \index{L@{L}!sHashEntry_t@{s\-Hash\-Entry\_\-t}} 62 | \subsubsection[L]{\setlength{\rightskip}{0pt plus 5cm}int \hyperlink{structsHashEntry__t_o1}{s\-Hash\-Entry\_\-t::L}}} 63 | \label{structsHashEntry__t_o1} 64 | 65 | 66 | The length of the string that should be used to compute the hash 67 | 68 | Definition at line 119 of file words.c. 69 | 70 | Referenced by count\-Words2(), print\-SHash(), and search\-SHash(). 71 | 72 | The documentation for this struct was generated from the following file:\begin{CompactItemize} 73 | \item 74 | \hyperlink{words_8c}{words.c}\end{CompactItemize} 75 | -------------------------------------------------------------------------------- /Body/Images-chap4/table_matrix.tex: -------------------------------------------------------------------------------- 1 | 2 | \tiny 3 | \begin{tabular}{c@{\hspace{2mm}}c@{\hspace{2mm}}c@{\hspace{2mm}}c@{\hspace{2mm}}c@{\hspace{2mm}}c@{\hspace{2mm}}c@{\hspace{2mm}}c@{\hspace{2mm}}c@{\hspace{2mm}}c@{\hspace{2mm}}c@{\hspace{2mm}}c@{\hspace{2mm}}c@{\hspace{2mm}}c@{\hspace{2mm}}c@{\hspace{2mm}}c@{\hspace{2mm}}c@{\hspace{2mm}}c@{\hspace{2mm}}c@{\hspace{2mm}}c@{\hspace{2mm}}c@{\hspace{2mm}}c@{\hspace{2mm}}c@{\hspace{2mm}}c@{\hspace{2mm}}c}\hline\hline 4 | & A & R & N & D & C & Q & E & G & H & I & L & K & M & F & P & S & T & W & Y & V & B & Z & X\\ 5 | A & 10 & 9 & 8 & 7 & 6 & 5 & 4 & 3 & 2 & 1 & 0 & -1 & -2 & -3 & -4 & -5 & -6 & -7 & -8 & -9 & -10 & -11 & -12\\ 6 | R & 9 & 10 & 9 & 8 & 7 & 6 & 5 & 4 & 3 & 2 & 1 & 0 & -1 & -2 & -3 & -4 & -5 & -6 & -7 & -8 & -9 & -10 & -11\\ 7 | N & 8 & 9 & 10 & 9 & 8 & 7 & 6 & 5 & 4 & 3 & 2 & 1 & 0 & -1 & -2 & -3 & -4 & -5 & -6 & -7 & -8 & -9 & -10\\ 8 | D & 7 & 8 & 9 & 10 & 9 & 8 & 7 & 6 & 5 & 4 & 3 & 2 & 1 & 0 & -1 & -2 & -3 & -4 & -5 & -6 & -7 & -8 & -9\\ 9 | C & 6 & 7 & 8 & 9 & 10 & 9 & 8 & 7 & 6 & 5 & 4 & 3 & 2 & 1 & 0 & -1 & -2 & -3 & -4 & -5 & -6 & -7 & -8\\ 10 | Q & 5 & 6 & 7 & 8 & 9 & 10 & 9 & 8 & 7 & 6 & 5 & 4 & 3 & 2 & 1 & 0 & -1 & -2 & -3 & -4 & -5 & -6 & -7\\ 11 | E & 4 & 5 & 6 & 7 & 8 & 9 & 10 & 9 & 8 & 7 & 6 & 5 & 4 & 3 & 2 & 1 & 0 & -1 & -2 & -3 & -4 & -5 & -6\\ 12 | G & 3 & 4 & 5 & 6 & 7 & 8 & 9 & 10 & 9 & 8 & 7 & 6 & 5 & 4 & 3 & 2 & 1 & 0 & -1 & -2 & -3 & -4 & -5\\ 13 | H & 2 & 3 & 4 & 5 & 6 & 7 & 8 & 9 & 10 & 9 & 8 & 7 & 6 & 5 & 4 & 3 & 2 & 1 & 0 & -1 & -2 & -3 & -4\\ 14 | I & 1 & 2 & 3 & 4 & 5 & 6 & 7 & 8 & 9 & 10 & 9 & 8 & 7 & 6 & 5 & 4 & 3 & 2 & 1 & 0 & -1 & -2 & -3\\ 15 | L & 0 & 1 & 2 & 3 & 4 & 5 & 6 & 7 & 8 & 9 & 10 & 9 & 8 & 7 & 6 & 5 & 4 & 3 & 2 & 1 & 0 & -1 & -2\\ 16 | K & -1 & 0 & 1 & 2 & 3 & 4 & 5 & 6 & 7 & 8 & 9 & 10 & 9 & 8 & 7 & 6 & 5 & 4 & 3 & 2 & 1 & 0 & -1\\ 17 | M & -2 & -1 & 0 & 1 & 2 & 3 & 4 & 5 & 6 & 7 & 8 & 9 & 10 & 9 & 8 & 7 & 6 & 5 & 4 & 3 & 2 & 1 & 0\\ 18 | F & -3 & -2 & -1 & 0 & 1 & 2 & 3 & 4 & 5 & 6 & 7 & 8 & 9 & 10 & 9 & 8 & 7 & 6 & 5 & 4 & 3 & 2 & 1\\ 19 | P & -4 & -3 & -2 & -1 & 0 & 1 & 2 & 3 & 4 & 5 & 6 & 7 & 8 & 9 & 10 & 9 & 8 & 7 & 6 & 5 & 4 & 3 & 2\\ 20 | S & -5 & -4 & -3 & -2 & -1 & 0 & 1 & 2 & 3 & 4 & 5 & 6 & 7 & 8 & 9 & 10 & 9 & 8 & 7 & 6 & 5 & 4 & 3\\ 21 | T & -6 & -5 & -4 & -3 & -2 & -1 & 0 & 1 & 2 & 3 & 4 & 5 & 6 & 7 & 8 & 9 & 10 & 9 & 8 & 7 & 6 & 5 & 4\\ 22 | W & -7 & -6 & -5 & -4 & -3 & -2 & -1 & 0 & 1 & 2 & 3 & 4 & 5 & 6 & 7 & 8 & 9 & 10 & 9 & 8 & 7 & 6 & 5\\ 23 | Y & -8 & -7 & -6 & -5 & -4 & -3 & -2 & -1 & 0 & 1 & 2 & 3 & 4 & 5 & 6 & 7 & 8 & 9 & 10 & 9 & 8 & 7 & 6\\ 24 | V & -9 & -8 & -7 & -6 & -5 & -4 & -3 & -2 & -1 & 0 & 1 & 2 & 3 & 4 & 5 & 6 & 7 & 8 & 9 & 10 & 9 & 8 & 7\\ 25 | B & -10 & -9 & -8 & -7 & -6 & -5 & -4 & -3 & -2 & -1 & 0 & 1 & 2 & 3 & 4 & 5 & 6 & 7 & 8 & 9 & 10 & 9 & 8\\ 26 | Z & -11 & -10 & -9 & -8 & -7 & -6 & -5 & -4 & -3 & -2 & -1 & 0 & 1 & 2 & 3 & 4 & 5 & 6 & 7 & 8 & 9 & 10 & 9\\ 27 | X & -12 & -11 & -10 & -9 & -8 & -7 & -6 & -5 & -4 & -3 & -2 & -1 & 0 & 1 & 2 & 3 & 4 & 5 & 6 & 7 & 8 & 9 & 10\\ \\ 28 | \hline\hline\end{tabular} 29 | -------------------------------------------------------------------------------- /Body/appb/matrices_8c.tex: -------------------------------------------------------------------------------- 1 | \hypertarget{matrices_8c}{ 2 | \section{matrices.c File Reference} 3 | \label{matrices_8c}\index{matrices.c@{matrices.c}} 4 | } 5 | {\tt \#include $<$stdio.h$>$}\par 6 | {\tt \#include $<$string.h$>$}\par 7 | {\tt \#include \char`\"{}matdata.h\char`\"{}}\par 8 | {\tt \#include \char`\"{}matrixmap.h\char`\"{}}\par 9 | 10 | 11 | Include dependency graph for matrices.c:\begin{figure}[H] 12 | \begin{center} 13 | \leavevmode 14 | \includegraphics[width=154pt]{matrices_8c__incl} 15 | \end{center} 16 | \end{figure} 17 | \subsection*{Defines} 18 | \begin{CompactItemize} 19 | \item 20 | \#define \hyperlink{matrices_8c_a0}{DEFAULT\_\-MATRIX}~\hyperlink{matrices_8h_a12}{blosum62} 21 | \end{CompactItemize} 22 | \subsection*{Functions} 23 | \begin{CompactItemize} 24 | \item 25 | void \hyperlink{matrices_8c_a1}{get\-Matrix\-By\-Name} (char \hyperlink{matrixmap_8h_a0}{name}\mbox{[}$\,$\mbox{]}, const int($\ast$$\ast$matp)\mbox{[}MATRIX\_\-SIZE\mbox{]}) 26 | \end{CompactItemize} 27 | 28 | 29 | \subsection*{Detailed Description} 30 | This file contains functions for handling scoring matrices used for the sequence based Gemoda. 31 | 32 | Definition in file \hyperlink{matrices_8c-source}{matrices.c}. 33 | 34 | \subsection*{Define Documentation} 35 | \hypertarget{matrices_8c_a0}{ 36 | \index{matrices.c@{matrices.c}!DEFAULT_MATRIX@{DEFAULT\_\-MATRIX}} 37 | \index{DEFAULT_MATRIX@{DEFAULT\_\-MATRIX}!matrices.c@{matrices.c}} 38 | \subsubsection[DEFAULT\_\-MATRIX]{\setlength{\rightskip}{0pt plus 5cm}\#define DEFAULT\_\-MATRIX~\hyperlink{matrices_8h_a12}{blosum62}}} 39 | \label{matrices_8c_a0} 40 | 41 | 42 | 43 | 44 | Definition at line 7 of file matrices.c. 45 | 46 | Referenced by get\-Matrix\-By\-Name(). 47 | 48 | \subsection*{Function Documentation} 49 | \hypertarget{matrices_8c_a1}{ 50 | \index{matrices.c@{matrices.c}!getMatrixByName@{getMatrixByName}} 51 | \index{getMatrixByName@{getMatrixByName}!matrices.c@{matrices.c}} 52 | \subsubsection[getMatrixByName]{\setlength{\rightskip}{0pt plus 5cm}void get\-Matrix\-By\-Name (char {\em name}\mbox{[}$\,$\mbox{]}, const int $\ast$$\ast$ {\em matp}\mbox{[}MATRIX\_\-SIZE\mbox{]})}} 53 | \label{matrices_8c_a1} 54 | 55 | 56 | A simple function to take the matrix name argument given as input to gemoda and return the physical memory location of that matrix by using the matrix\_\-map construct. Input: a string containing the matrix name a pointer to a two-dimensional array. Output: None, though the value of the pointer given as input is changed to reflect the location of the matrix 57 | 58 | Definition at line 34 of file matrices.c. 59 | 60 | References DEFAULT\_\-MATRIX, and matrix\_\-map. 61 | 62 | \scriptsize\begin{verbatim}35 { 63 | 36 int i; 64 | 37 for (i = 0; matrix_map[i].name != NULL; i++) 65 | 38 { 66 | 39 if (strcmp (name, matrix_map[i].name) == 0) 67 | 40 { 68 | 41 break; 69 | 42 } 70 | 43 } 71 | 44 if (matrix_map[i].name != NULL) 72 | 45 { 73 | 46 *matp = (matrix_map[i].mat); 74 | 47 } 75 | 48 else 76 | 49 { 77 | 50 *matp = (DEFAULT_MATRIX); 78 | 51 } 79 | 52 } 80 | \end{verbatim} 81 | \normalsize 82 | 83 | 84 | -------------------------------------------------------------------------------- /Body/appb/structsPat__t.tex: -------------------------------------------------------------------------------- 1 | \hypertarget{structsPat__t}{ 2 | \section{s\-Pat\_\-t Struct Reference} 3 | \label{structsPat__t}\index{sPat_t@{sPat\_\-t}} 4 | } 5 | {\tt \#include $<$spat.h$>$} 6 | 7 | Collaboration diagram for s\-Pat\_\-t:\begin{figure}[H] 8 | \begin{center} 9 | \leavevmode 10 | \includegraphics[width=46pt]{structsPat__t__coll__graph} 11 | \end{center} 12 | \end{figure} 13 | \subsection*{Data Fields} 14 | \begin{CompactItemize} 15 | \item 16 | char $\ast$ \hyperlink{structsPat__t_o0}{string} 17 | \item 18 | int \hyperlink{structsPat__t_o1}{length} 19 | \item 20 | int \hyperlink{structsPat__t_o2}{support} 21 | \item 22 | \hyperlink{structsOffset__t}{s\-Offset\_\-t} $\ast$ \hyperlink{structsPat__t_o3}{offset} 23 | \end{CompactItemize} 24 | 25 | 26 | \subsection*{Detailed Description} 27 | This data structure is used to store the locations of all the instances of a particular word of length {\em length\/} in a set of sequences. This data structure is used principally by the string based version of Gemoda and is used to store words that are hashed before the comparison phase. 28 | 29 | 30 | 31 | Definition at line 36 of file spat.h. 32 | 33 | \subsection*{Field Documentation} 34 | \hypertarget{structsPat__t_o1}{ 35 | \index{sPat_t@{s\-Pat\_\-t}!length@{length}} 36 | \index{length@{length}!sPat_t@{s\-Pat\_\-t}} 37 | \subsubsection[length]{\setlength{\rightskip}{0pt plus 5cm}int \hyperlink{structsPat__t_o1}{s\-Pat\_\-t::length}}} 38 | \label{structsPat__t_o1} 39 | 40 | 41 | The length of this word. 42 | 43 | Definition at line 43 of file spat.h. 44 | 45 | Referenced by count\-Words2(), and print\-SPats().\hypertarget{structsPat__t_o3}{ 46 | \index{sPat_t@{s\-Pat\_\-t}!offset@{offset}} 47 | \index{offset@{offset}!sPat_t@{s\-Pat\_\-t}} 48 | \subsubsection[offset]{\setlength{\rightskip}{0pt plus 5cm}\hyperlink{structsOffset__t}{s\-Offset\_\-t}$\ast$ \hyperlink{structsPat__t_o3}{s\-Pat\_\-t::offset}}} 49 | \label{structsPat__t_o3} 50 | 51 | 52 | An array of \hyperlink{structsOffset__t}{s\-Offset\_\-t} objects storing the loci, or offsets where this word occurs. 53 | 54 | Definition at line 50 of file spat.h. 55 | 56 | Referenced by count\-Words2(), destroy\-SPat\-A(), and main().\hypertarget{structsPat__t_o0}{ 57 | \index{sPat_t@{s\-Pat\_\-t}!string@{string}} 58 | \index{string@{string}!sPat_t@{s\-Pat\_\-t}} 59 | \subsubsection[string]{\setlength{\rightskip}{0pt plus 5cm}char$\ast$ \hyperlink{structsPat__t_o0}{s\-Pat\_\-t::string}}} 60 | \label{structsPat__t_o0} 61 | 62 | 63 | The pointer to the string for this word. 64 | 65 | Definition at line 40 of file spat.h. 66 | 67 | Referenced by count\-Words2().\hypertarget{structsPat__t_o2}{ 68 | \index{sPat_t@{s\-Pat\_\-t}!support@{support}} 69 | \index{support@{support}!sPat_t@{s\-Pat\_\-t}} 70 | \subsubsection[support]{\setlength{\rightskip}{0pt plus 5cm}int \hyperlink{structsPat__t_o2}{s\-Pat\_\-t::support}}} 71 | \label{structsPat__t_o2} 72 | 73 | 74 | The number of times this word occurs in the sequence set. 75 | 76 | Definition at line 46 of file spat.h. 77 | 78 | Referenced by count\-Words2(). 79 | 80 | The documentation for this struct was generated from the following file:\begin{CompactItemize} 81 | \item 82 | \hyperlink{spat_8h}{spat.h}\end{CompactItemize} 83 | -------------------------------------------------------------------------------- /caslon.sty: -------------------------------------------------------------------------------- 1 | \RequirePackage[LY1]{fontenc} 2 | \RequirePackage{myfss} 3 | \ProvidesPackage{caslon}[2005/07/11 sets up Caslon font] 4 | 5 | %set default 6 | \renewcommand{\rmdefault}{ACaslonPro}%I'm just assuming it will be the default roman 7 | 8 | %map ornaments 9 | \DeclareTextOrnament{1}{49}{ACaslonPro} 10 | \DeclareTextOrnament{2}{50}{ACaslonPro} 11 | \DeclareTextOrnament{3}{51}{ACaslonPro} 12 | \DeclareTextOrnament{4}{111}{ACaslonPro} 13 | \DeclareTextOrnament{5}{121}{ACaslonPro} 14 | \DeclareTextOrnament{6}{114}{ACaslonPro} 15 | \DeclareTextOrnament{7}{116}{ACaslonPro} 16 | \DeclareTextOrnament{8}{66}{ACaslonPro} 17 | \DeclareTextOrnament{9}{98}{ACaslonPro} 18 | \DeclareTextOrnament{10}{86}{ACaslonPro} 19 | \DeclareTextOrnament{11}{118}{ACaslonPro} 20 | \DeclareTextOrnament{12}{91}{ACaslonPro} 21 | \DeclareTextOrnament{13}{93}{ACaslonPro} 22 | \DeclareTextOrnament{14}{106}{ACaslonPro} 23 | \DeclareTextOrnament{15}{74}{ACaslonPro} 24 | \DeclareTextOrnament{16}{75}{ACaslonPro} 25 | \DeclareTextOrnament{17}{108}{ACaslonPro} 26 | \DeclareTextOrnament{18}{99}{ACaslonPro} 27 | \DeclareTextOrnament{19}{67}{ACaslonPro} 28 | \DeclareTextOrnament{20}{104}{ACaslonPro} 29 | \DeclareTextOrnament{21}{72}{ACaslonPro} 30 | \DeclareTextOrnament{22}{71}{ACaslonPro} 31 | \DeclareTextOrnament{23}{103}{ACaslonPro} 32 | \DeclareTextOrnament{24}{105}{ACaslonPro} 33 | \DeclareTextOrnament{25}{73}{ACaslonPro} 34 | \DeclareTextOrnament{26}{109}{ACaslonPro} 35 | \DeclareTextOrnament{27}{77}{ACaslonPro} 36 | \DeclareTextOrnament{28}{78}{ACaslonPro} 37 | \DeclareTextOrnament{29}{110}{ACaslonPro} 38 | \DeclareTextOrnament{30}{80}{ACaslonPro} 39 | \DeclareTextOrnament{31}{112}{ACaslonPro} 40 | \DeclareTextOrnament{32}{88}{ACaslonPro} 41 | \DeclareTextOrnament{33}{120}{ACaslonPro} 42 | \DeclareTextOrnament{34}{122}{ACaslonPro} 43 | \DeclareTextOrnament{35}{90}{ACaslonPro} 44 | \DeclareTextOrnament{36}{53}{ACaslonPro} 45 | \DeclareTextOrnament{37}{52}{ACaslonPro} 46 | \DeclareTextOrnament{38}{102}{ACaslonPro} 47 | \DeclareTextOrnament{39}{70}{ACaslonPro} 48 | \DeclareTextOrnament{40}{65}{ACaslonPro} 49 | \DeclareTextOrnament{41}{97}{ACaslonPro} 50 | \DeclareTextOrnament{42}{100}{ACaslonPro} 51 | \DeclareTextOrnament{43}{68}{ACaslonPro} 52 | \DeclareTextOrnament{44}{83}{ACaslonPro} 53 | \DeclareTextOrnament{45}{115}{ACaslonPro} 54 | \DeclareTextOrnament{46}{113}{ACaslonPro} 55 | \DeclareTextOrnament{47}{119}{ACaslonPro} 56 | \DeclareTextOrnament{48}{101}{ACaslonPro} 57 | \DeclareTextOrnament{49}{85}{ACaslonPro} 58 | \DeclareTextOrnament{50}{117}{ACaslonPro} 59 | \DeclareTextOrnament{51}{81}{ACaslonPro} 60 | \DeclareTextOrnament{52}{87}{ACaslonPro} 61 | \DeclareTextOrnament{53}{69}{ACaslonPro} 62 | 63 | %are the following always needed, or are they quirks of Caslon? 64 | \newcommand*{\Euro}{ euro}%should be at \char 1 65 | \renewcommand*{\ae}{\char 26} 66 | \renewcommand*{\AE}{\char 29} 67 | \renewcommand*{\o}{\char 28} 68 | \renewcommand*{\O}{\char 31} 69 | \newcommand*{\asterisk}{\char 42} 70 | %\renewcommand*{\"}{\char 127}%don't do this; it breaks all the compounds that you actually use for diereses. 71 | \newcommand*{\porm}{\char 177} 72 | \newcommand*{\squared}{\char 178} 73 | \newcommand*{\cubed}{\char 179} 74 | \newcommand*{\totheone}{\char 185} 75 | \renewcommand*{\texttimes}{\char 215} 76 | \renewcommand*{\textdivide}{\char 247} 77 | -------------------------------------------------------------------------------- /Body/appb/structsHash__t.tex: -------------------------------------------------------------------------------- 1 | \hypertarget{structsHash__t}{ 2 | \section{s\-Hash\_\-t Struct Reference} 3 | \label{structsHash__t}\index{sHash_t@{sHash\_\-t}} 4 | } 5 | Collaboration diagram for s\-Hash\_\-t:\begin{figure}[H] 6 | \begin{center} 7 | \leavevmode 8 | \includegraphics[width=56pt]{structsHash__t__coll__graph} 9 | \end{center} 10 | \end{figure} 11 | \subsection*{Data Fields} 12 | \begin{CompactItemize} 13 | \item 14 | int $\ast$ \hyperlink{structsHash__t_o0}{hash\-Size} 15 | \item 16 | int $\ast$ \hyperlink{structsHash__t_o1}{i\-Hash\-Size} 17 | \item 18 | int \hyperlink{structsHash__t_o2}{total\-Size} 19 | \item 20 | \hyperlink{structsHashEntry__t}{s\-Hash\-Entry\_\-t} $\ast$$\ast$ \hyperlink{structsHash__t_o3}{hash} 21 | \end{CompactItemize} 22 | 23 | 24 | \subsection*{Detailed Description} 25 | A data structure for a hash table. At its root, this structure is just an array of hash entry objects. As well, there are members used to track the size of the hash table. 26 | 27 | 28 | 29 | Definition at line 132 of file words.c. 30 | 31 | \subsection*{Field Documentation} 32 | \hypertarget{structsHash__t_o3}{ 33 | \index{sHash_t@{s\-Hash\_\-t}!hash@{hash}} 34 | \index{hash@{hash}!sHash_t@{s\-Hash\_\-t}} 35 | \subsubsection[hash]{\setlength{\rightskip}{0pt plus 5cm}\hyperlink{structsHashEntry__t}{s\-Hash\-Entry\_\-t}$\ast$$\ast$ \hyperlink{structsHash__t_o3}{s\-Hash\_\-t::hash}}} 36 | \label{structsHash__t_o3} 37 | 38 | 39 | An array \hyperlink{structsHashEntry__t}{s\-Hash\-Entry\_\-t} space objects. 40 | 41 | Definition at line 148 of file words.c. 42 | 43 | Referenced by destroy\-SHash(), print\-SHash(), and search\-SHash().\hypertarget{structsHash__t_o0}{ 44 | \index{sHash_t@{s\-Hash\_\-t}!hashSize@{hashSize}} 45 | \index{hashSize@{hashSize}!sHash_t@{s\-Hash\_\-t}} 46 | \subsubsection[hashSize]{\setlength{\rightskip}{0pt plus 5cm}int$\ast$ \hyperlink{structsHash__t_o0}{s\-Hash\_\-t::hash\-Size}}} 47 | \label{structsHash__t_o0} 48 | 49 | 50 | A pointer to an integer that is used to store an array of integers that keep track of the number of \hyperlink{structsHashEntry__t}{s\-Hash\-Entry\_\-t} objects that are hashed to a particular integer. 51 | 52 | Definition at line 138 of file words.c. 53 | 54 | Referenced by destroy\-SHash(), and search\-SHash().\hypertarget{structsHash__t_o1}{ 55 | \index{sHash_t@{s\-Hash\_\-t}!iHashSize@{iHashSize}} 56 | \index{iHashSize@{iHashSize}!sHash_t@{s\-Hash\_\-t}} 57 | \subsubsection[iHashSize]{\setlength{\rightskip}{0pt plus 5cm}int$\ast$ \hyperlink{structsHash__t_o1}{s\-Hash\_\-t::i\-Hash\-Size}}} 58 | \label{structsHash__t_o1} 59 | 60 | 61 | A pointer to an integer that is used to store an array of integers that keep track of the number of \hyperlink{structsHashEntry__t}{s\-Hash\-Entry\_\-t} objects that are hashed to a particular integer. 62 | 63 | Definition at line 143 of file words.c. 64 | 65 | Referenced by destroy\-SHash(), and search\-SHash().\hypertarget{structsHash__t_o2}{ 66 | \index{sHash_t@{s\-Hash\_\-t}!totalSize@{totalSize}} 67 | \index{totalSize@{totalSize}!sHash_t@{s\-Hash\_\-t}} 68 | \subsubsection[totalSize]{\setlength{\rightskip}{0pt plus 5cm}int \hyperlink{structsHash__t_o2}{s\-Hash\_\-t::total\-Size}}} 69 | \label{structsHash__t_o2} 70 | 71 | 72 | An integer that stores the total number of slots available in our hash. 73 | 74 | Definition at line 146 of file words.c. 75 | 76 | Referenced by init\-SHash(), and search\-SHash(). 77 | 78 | The documentation for this struct was generated from the following file:\begin{CompactItemize} 79 | \item 80 | \hyperlink{words_8c}{words.c}\end{CompactItemize} 81 | -------------------------------------------------------------------------------- /Body/appb/structbitSet__t.tex: -------------------------------------------------------------------------------- 1 | \hypertarget{structbitSet__t}{ 2 | \section{bit\-Set\_\-t Struct Reference} 3 | \label{structbitSet__t}\index{bitSet_t@{bitSet\_\-t}} 4 | } 5 | {\tt \#include $<$bit\-Set.h$>$} 6 | 7 | \subsection*{Data Fields} 8 | \begin{CompactItemize} 9 | \item 10 | int \hyperlink{structbitSet__t_o0}{max} 11 | \item 12 | int \hyperlink{structbitSet__t_o1}{slots} 13 | \item 14 | int \hyperlink{structbitSet__t_o2}{bytes} 15 | \item 16 | \hyperlink{bitSet_8h_a9}{bit\_\-t} $\ast$ \hyperlink{structbitSet__t_o3}{tf} 17 | \end{CompactItemize} 18 | 19 | 20 | \subsection*{Detailed Description} 21 | A bit set is a data structure for storing set objects that allows for quick set operations such as intersections, unions, differences, and so forth. On a standard 32-bit architecture, 32 operations can be performed at the same time, greatly speeding the clique finding stage of the algorithm. 22 | 23 | 24 | 25 | Definition at line 24 of file bit\-Set.h. 26 | 27 | \subsection*{Field Documentation} 28 | \hypertarget{structbitSet__t_o2}{ 29 | \index{bitSet_t@{bit\-Set\_\-t}!bytes@{bytes}} 30 | \index{bytes@{bytes}!bitSet_t@{bit\-Set\_\-t}} 31 | \subsubsection[bytes]{\setlength{\rightskip}{0pt plus 5cm}int \hyperlink{structbitSet__t_o2}{bit\-Set\_\-t::bytes}}} 32 | \label{structbitSet__t_o2} 33 | 34 | 35 | This variable actually holds the total number of bits, rather than the number of bytes. However, we chose to keep this name rather than make a variety of changes. 36 | 37 | Definition at line 37 of file bit\-Set.h. 38 | 39 | Referenced by empty\-Set(), fill\-Set(), and new\-Bit\-Set().\hypertarget{structbitSet__t_o0}{ 40 | \index{bitSet_t@{bit\-Set\_\-t}!max@{max}} 41 | \index{max@{max}!bitSet_t@{bit\-Set\_\-t}} 42 | \subsubsection[max]{\setlength{\rightskip}{0pt plus 5cm}int \hyperlink{structbitSet__t_o0}{bit\-Set\_\-t::max}}} 43 | \label{structbitSet__t_o0} 44 | 45 | 46 | The maximum integer that can be set to true or false. 47 | 48 | Definition at line 28 of file bit\-Set.h. 49 | 50 | Referenced by new\-Bit\-Set(), next\-Bit\-Bit\-Set(), set\-False(), and set\-True().\hypertarget{structbitSet__t_o1}{ 51 | \index{bitSet_t@{bit\-Set\_\-t}!slots@{slots}} 52 | \index{slots@{slots}!bitSet_t@{bit\-Set\_\-t}} 53 | \subsubsection[slots]{\setlength{\rightskip}{0pt plus 5cm}int \hyperlink{structbitSet__t_o1}{bit\-Set\_\-t::slots}}} 54 | \label{structbitSet__t_o1} 55 | 56 | 57 | The total number of slots, where a slot holds a number of bits equal to the size of a bit\_\-t space object. 58 | 59 | Definition at line 32 of file bit\-Set.h. 60 | 61 | Referenced by bit\-Set3Way\-Intersection(), bit\-Set\-Difference(), bit\-Set\-Intersection(), bit\-Set\-Sum(), bit\-Set\-Union(), copy\-Set(), and new\-Bit\-Set().\hypertarget{structbitSet__t_o3}{ 62 | \index{bitSet_t@{bit\-Set\_\-t}!tf@{tf}} 63 | \index{tf@{tf}!bitSet_t@{bit\-Set\_\-t}} 64 | \subsubsection[tf]{\setlength{\rightskip}{0pt plus 5cm}\hyperlink{bitSet_8h_a9}{bit\_\-t}$\ast$ \hyperlink{structbitSet__t_o3}{bit\-Set\_\-t::tf}}} 65 | \label{structbitSet__t_o3} 66 | 67 | 68 | A pointer to a bit\_\-t, which is used to store an array of these objects. 69 | 70 | Definition at line 40 of file bit\-Set.h. 71 | 72 | Referenced by bit\-Set3Way\-Intersection(), bit\-Set\-Difference(), bit\-Set\-Intersection(), bit\-Set\-Sum(), bit\-Set\-Union(), check\-Bit(), copy\-Set(), count\-Set(), delete\-Bit\-Set(), empty\-Set(), fill\-Set(), flip\-Bits(), new\-Bit\-Set(), next\-Bit\-Bit\-Set(), print\-Binary\-Bit\-Set(), set\-False(), and set\-True(). 73 | 74 | The documentation for this struct was generated from the following file:\begin{CompactItemize} 75 | \item 76 | \hyperlink{bitSet_8h}{bit\-Set.h}\end{CompactItemize} 77 | -------------------------------------------------------------------------------- /Body/Images-chap4/fig_pda.tex: -------------------------------------------------------------------------------- 1 | \psset{xunit=0.4cm,yunit=0.4cm} 2 | \begin{pspicture}(-1,-1)(23,31)%\showgrid 3 | \rput(8.625,25){ 4 | \scalebox{0.25}{ 5 | %\psframe[linewidth=1mm](0,0)(23,23) 6 | \pscurve[linewidth=2mm,linecolor=gray](5.5,18.3)(11.5,22.3)(19.5,19.8)(13.5,12.5)(17.5,7.5)(15.5,2.3)(8.5,0.5)(2.5,5.5) 7 | \psframe(0,0)(23,23) 8 | \pnode(0,0){bl} 9 | \pnode(23,0){br} 10 | %\psgrid[griddots=5,subgriddiv=0,gridlabels=0pt](0,0)(23,23) 11 | } 12 | } 13 | \rput(0,0){ 14 | \pnode(0,23){tl} 15 | \pnode(23,23){tr} 16 | \ncline[linestyle=dotted]{bl}{tl} 17 | \ncline[linestyle=dotted]{br}{tr} 18 | \psgrid[griddots=5,subgriddiv=0,gridlabels=0pt](0,0)(23,23) 19 | %\psframe(0,0)(23,23) 20 | } 21 | \rput[tl](-1,22.5){ 22 | \rput(0,0){\small \texttt X} 23 | \rput(0,-1){\small \texttt Z} 24 | \rput(0,-2){\small \texttt B} 25 | \rput(0,-3){\small \texttt V} 26 | \rput(0,-4){\small \texttt Y} 27 | \rput(0,-5){\small \texttt W} 28 | \rput(0,-6){\small \texttt T} 29 | \rput(0,-7){\small \texttt S} 30 | \rput(0,-8){\small \texttt P} 31 | \rput(0,-9){\small \texttt F} 32 | \rput(0,-10){\small \texttt M} 33 | \rput(0,-11){\small \texttt K} 34 | \rput(0,-12){\small \texttt L} 35 | \rput(0,-13){\small \texttt I} 36 | \rput(0,-14){\small \texttt H} 37 | \rput(0,-15){\small \texttt G} 38 | \rput(0,-16){\small \texttt E} 39 | \rput(0,-17){\small \texttt Q} 40 | \rput(0,-18){\small \texttt C} 41 | \rput(0,-19){\small \texttt D} 42 | \rput(0,-20){\small \texttt N} 43 | \rput(0,-21){\small \texttt R} 44 | \rput(0,-22){\small \texttt A} 45 | } 46 | \rput[tl](0.5,-1){ 47 | \rput[t](0,0){\small \texttt A} 48 | \rput[t](1,0){\small \texttt R} 49 | \rput[t](2,0){\small \texttt N} 50 | \rput[t](3,0){\small \texttt D} 51 | \rput[t](4,0){\small \texttt C} 52 | \rput[t](5,0){\small \texttt Q} 53 | \rput[t](6,0){\small \texttt E} 54 | \rput[t](7,0){\small \texttt G} 55 | \rput[t](8,0){\small \texttt H} 56 | \rput[t](9,0){\small \texttt I} 57 | \rput[t](10,0){\small \texttt L} 58 | \rput[t](11,0){\small \texttt K} 59 | \rput[t](12,0){\small \texttt M} 60 | \rput[t](13,0){\small \texttt F} 61 | \rput[t](14,0){\small \texttt P} 62 | \rput[t](15,0){\small \texttt S} 63 | \rput[t](16,0){\small \texttt T} 64 | \rput[t](17,0){\small \texttt W} 65 | \rput[t](18,0){\small \texttt Y} 66 | \rput[t](19,0){\small \texttt V} 67 | \rput[t](20,0){\small \texttt B} 68 | \rput[t](21,0){\small \texttt Z} 69 | \rput[t](22,0){\small \texttt X} 70 | } 71 | \rput(0,0){ 72 | \newcommand{\mydot}{\psdot(0,0)} 73 | \psset{arrowscale=2} 74 | \psset{linewidth=0.5mm} 75 | \pscurve[linewidth=2mm,linecolor=gray](5.5,18.3)(11.5,22.3)(19.5,19.8)(13.5,12.5)(17.5,7.5)(15.5,2.3)(8.5,0.5)(2.5,5.5) 76 | 77 | \rput(5.5,18.3){\rnode{a}{\mydot}} 78 | \rput(5.5,17.0){\texttt{(Q,Y)}} 79 | 80 | \rput(11.5,22.3){\rnode{b}{\mydot}} 81 | \rput[b](12,20.5){\texttt{(K,X)}} 82 | 83 | \rput(19.5,19.8){\rnode{c}{\mydot}} 84 | \rput[l](20.5,19.8){\texttt{(V,V)}} 85 | 86 | \rput(13.5,12.5){\rnode{d}{\mydot}} 87 | \rput[r](12.5,12.5){\texttt{(F,M)}} 88 | 89 | \rput(17.5,7.5){\rnode{e}{\mydot}} 90 | \rput[l](18.5,7.5){\texttt{(W,G)}} 91 | 92 | \rput(15.5,2.3){\rnode{f}{\mydot}} 93 | \rput[t](17.5,2){\texttt{(S,N)}} 94 | 95 | \rput(8.5,0.5){\rnode{g}{\mydot}} 96 | \rput[b](8.8,1.8){\texttt{(H,A)}} 97 | 98 | \rput(2.5,5.5){\rnode{h}{\mydot}} 99 | \rput[b](2.5,6.2){\texttt{(N,Q)}} 100 | 101 | \ncline{->}{a}{b} 102 | \ncline{->}{b}{c} 103 | \ncline{->}{c}{d} 104 | \ncline{->}{d}{e} 105 | \ncline{->}{e}{f} 106 | \ncline{->}{f}{g} 107 | \ncline{->}{g}{h} 108 | } 109 | \end{pspicture} 110 | -------------------------------------------------------------------------------- /Body/Images-chap1/regexs.tex: -------------------------------------------------------------------------------- 1 | \begin{table}[ptbh] 2 | \caption[Motif discovery tools using regular expressions or similar models]{ 3 | Motif discovery tools using regular expressions or similar 4 | models. This list is not intended to be exhaustive; 5 | however, it includes many of the well--known motif discovery 6 | tools used in bioinformatics. Early methods tended to use 7 | consensus strings or simple word counting approaches, i.e.\ 8 | counting the occurrences of ``n--mers'' such as the 4--mer 9 | \texttt{ATGC}. Words that are statistically over--represented 10 | are called motifs. Later approaches used more complex 11 | regular expressions, cf.~\citet{rigoutsos1998combinatorial}. 12 | } 13 | \label{table:regexMD} 14 | \centering 15 | \begin{tabular}{lcc} \hline\hline 16 | Authors & Year & Citation \\ \hline 17 | \citeauthor{queen1982improvements} & \citeyear{queen1982improvements} & \cite{queen1982improvements} \\ 18 | \citeauthor{galas1985rigorous} & \citeyear{galas1985rigorous} & \cite{galas1985rigorous} \\ 19 | \citeauthor{mengeritsky1987recognition} & \citeyear{mengeritsky1987recognition} & \cite{mengeritsky1987recognition} \\ 20 | \citeauthor{staden1989methods} & \citeyear{staden1989methods} & \cite{staden1989methods} \\ 21 | \citeauthor{neuwald1994detecting} & \citeyear{neuwald1994detecting} & \cite{neuwald1994detecting} \\ 22 | \citeauthor{jonassen1995finding} & \citeyear{jonassen1995finding} & \cite{jonassen1995finding} \\ 23 | \citeauthor{wolferstetter1996identification} & \citeyear{wolferstetter1996identification} & \cite{wolferstetter1996identification} \\ 24 | \citeauthor{sagot1997multiple} & \citeyear{sagot1997multiple} & \cite{sagot1997multiple} \\ 25 | \citeauthor{rigoutsos1998combinatorial} & \citeyear{rigoutsos1998combinatorial} & \cite{rigoutsos1998combinatorial,floratos1999pattern} \\ 26 | \citeauthor{van1998extracting} & \citeyear{van1998extracting} & \cite{van1998extracting,van2000discovering} \\ 27 | \citeauthor{jacobs2000computational} & \citeyear{jacobs2000computational} & \cite{jacobs2000computational} \\ 28 | \citeauthor{marsan2000algorithms} & \citeyear{marsan2000algorithms} & \cite{marsan2000algorithms} \\ 29 | \citeauthor{pevzner2000combinatorial} & \citeyear{pevzner2000combinatorial} & \cite{pevzner2000combinatorial} \\ 30 | \citeauthor{bussemaker2000building} & \citeyear{bussemaker2000building} & \cite{bussemaker2000building} \\ 31 | \citeauthor{kielbasa2001combining} & \citeyear{kielbasa2001combining} & \cite{kielbasa2001combining} \\ 32 | \citeauthor{horton2001tsukuba} & \citeyear{horton2001tsukuba} & \cite{horton2001tsukuba} \\ 33 | \citeauthor{keich2002subtle} & \citeyear{keich2002subtle} & \cite{keich2002subtle} \\ 34 | \citeauthor{eskin2002finding} & \citeyear{eskin2002finding} & \cite{eskin2002finding} \\ 35 | \citeauthor{buhler2002finding} & \citeyear{buhler2002finding} & \cite{buhler2002finding} \\ 36 | \citeauthor{sinha2002discovery} & \citeyear{sinha2002discovery} & \cite{sinha2002discovery} \\ 37 | \citeauthor{price2003finding} & \citeyear{price2003finding} & \cite{price2003finding} \\ 38 | \citeauthor{sinha2003discriminative} & \citeyear{sinha2003discriminative} & \cite{sinha2003discriminative} \\ 39 | \citeauthor{danilova2003an} & \citeyear{danilova2003an} & \cite{danilova2003an} \\ 40 | \citeauthor{ganesh2003mopac} & \citeyear{ganesh2003mopac} & \cite{ganesh2003mopac} \\ 41 | \citeauthor{liang2004cwinnower} & \citeyear{liang2004cwinnower} & \cite{liang2004cwinnower} \\ 42 | \citeauthor{fogel2004discovery} & \citeyear{fogel2004discovery} & \cite{fogel2004discovery} \\ 43 | \citeauthor{pavesi2004weeder} & \citeyear{pavesi2004weeder} & \cite{pavesi2004weeder} \\ 44 | \citeauthor{hernandez2004model} & \citeyear{hernandez2004model} & \cite{hernandez2004model} \\ 45 | \citeauthor{markstein2004regulatory} & \citeyear{markstein2004regulatory} & \cite{markstein2004regulatory} \\ 46 | \citeauthor{frith2004finding} & \citeyear{frith2004finding} & \cite{frith2004finding} \\ 47 | \citeauthor{sumazin2005dwe} & \citeyear{sumazin2005dwe} & \cite{sumazin2005dwe} \\ 48 | \hline 49 | \end{tabular} 50 | \end{table} 51 | -------------------------------------------------------------------------------- /patchcmd.sty: -------------------------------------------------------------------------------- 1 | %% 2 | %% This is file `patchcmd.sty', 3 | %% generated with the docstrip utility. 4 | %% 5 | %% The original source files were: 6 | %% 7 | %% patchcmd.dtx 8 | %% 9 | %%% ==================================================================== 10 | %%% @LaTeX-doc-source-file{ 11 | %%% filename = "patchcmd.dtx", 12 | %%% version = "1.03", 13 | %%% date = "2000/07/31", 14 | %%% time = "08:40:39 EDT", 15 | %%% author = "Michael J Downes", 16 | %%% email = "mjd@ams.org", 17 | %%% abstract = "Provides a way to add material at beginning or end of 18 | %%% a macro's existing definition.", 19 | %%% checksum = "23183 222 932 7411", 20 | %%% docstring = "The checksum field, produced by Robert Solovay's 21 | %%% checksum utility, gives CRC-16 checksum, lines, 22 | %%% words, and characters.", 23 | %%% } 24 | %%% ==================================================================== 25 | \NeedsTeXFormat{LaTeX2e} 26 | \ProvidesPackage{patchcmd}[2000/07/31 v1.03] 27 | %% Copyright 2000 Michael John Downes 28 | %% This file has no restrictions on its use, distribution, or sale. 29 | 30 | %%\def\wrs{\immediate\write\sixt@@n} 31 | \newcommand{\patchcommand}[1]{% 32 | \expandafter\patchcmd@a\meaning#1??->@\@nil#1% 33 | } 34 | \long\def\patchcmd@a#1#2#3->#4#5\@nil#6{% 35 | %% \wrs{\string#6: [#1] [#2] [#3]->[#4]}% 36 | \ifx @#4\relax \patchcmdError#6#1% 37 | \expandafter\@gobbletwo % discard the other two arguments 38 | \else 39 | \if l#2\toks@{\patchcmd@e{}#6}% l in this position means \long 40 | \else \toks@{\patchcmd@e*#6}% not \long 41 | \fi 42 | \patchcmd@b #3@#4#5 ? ? ? \@nil#6% 43 | \expandafter\the\expandafter\toks@ 44 | \fi 45 | } 46 | \def\patchcmd@b#1:#2@#3#4 #5#6 #7 #8\@nil#9{% 47 | %% \wrs{[#1] [#2] [#3] [#4] [#5] [#6] ARG7=[#7] [#8]}% 48 | \if \ifx @#7@\expandafter 49 | \ifx\csname #6\endcsname#9T\else F\fi\else F\fi T% 50 | \toks@\expandafter{\expandafter\patchcommand\csname #6 \endcsname}% 51 | \else 52 | \ifx @#2@% No arguments 53 | \toks@\expandafter{\the\toks@ 0}% 54 | \else 55 | \patchcmd@c 0#2{\string##}0% 56 | \fi 57 | \fi 58 | } 59 | \def\patchcmd@c#1#2#3{% 60 | \if\string###2% % yes it's a # token 61 | \ifodd 0#31 % and it's followed by a number 62 | \if 0#3\patchcmd@d#1\fi % number=0? then we're done 63 | \else \patchcmd@d D% # not a number: must be a delimited arg 64 | \fi 65 | \else \patchcmd@d D% not a # token: must be a delmited arg 66 | \fi 67 | \patchcmd@c#3% 68 | } 69 | \def\patchcmd@d#1{% 70 | \if D#1% 71 | \PackageError{patchcmd}{Cannot change a macro that has 72 | delimited arguments}\@ehd 73 | \else 74 | \toks@\expandafter{\the\toks@ #1}% 75 | \fi 76 | \begingroup 77 | \aftergroup\@gobble 78 | \let\patchcmd@c\endgroup 79 | } 80 | \def\patchcmd@e#1#2#3#4#5{% 81 | \begingroup 82 | \edef\@##1{% 83 | \@temptokena\noexpand\expandafter{% 84 | \noexpand#2% 85 | \ifnum#3>0 {####1}\ifnum#3>1 {####2}\ifnum#3>2 {####3}% 86 | \ifnum#3>3 {####4}\ifnum#3>4 {####5}\ifnum#3>5 {####6}% 87 | \ifnum#3>6 {####7}\ifnum#3>7 {####8}\ifnum#3>8 {####9}% 88 | \fi\fi\fi\fi\fi\fi\fi\fi\fi 89 | ##1% 90 | }% 91 | } 92 | \@{#5}% 93 | \edef\@##1{\endgroup 94 | \noexpand\renewcommand#1\noexpand#2\ifcase#3 \else [#3]\fi 95 | {##1\the\@temptokena}}% 96 | \@{#4}% 97 | %% \show#2% 98 | } 99 | \long\def\patchcmdError#1#2{% 100 | \begingroup 101 | \toks@{Not redefinable}% 102 | \ifcat\relax\noexpand#1% Is it a control sequence? 103 | \begingroup 104 | \let#1=?\ifx ?\relax % Is it "\relax"? 105 | \endgroup % accept current value of \toks@ 106 | \else \endgroup 107 | \if\ifx\relax#1u\else #2\fi u% 108 | \toks@{Not defined}% 109 | \fi 110 | \fi 111 | \fi 112 | \edef\@{\endgroup 113 | \noexpand\PackageError{patchcmd}{% 114 | \the\toks@: \string#1}\noexpand\@ehd}% 115 | \@ 116 | } 117 | \endinput 118 | %% 119 | %% End of file `patchcmd.sty'. 120 | -------------------------------------------------------------------------------- /nfssext.sty: -------------------------------------------------------------------------------- 1 | \NeedsTeXFormat{LaTeX2e} 2 | \ProvidesPackage{nfssext}[2003/03/14 v1.2 Experimental NFSS Extensions] 3 | \newcommand*{\exfs@tempa}{} 4 | \newcommand*{\exfs@tempb}{} 5 | \newcommand*{\exfs@try@family}[2][]{% 6 | \let\exfs@tempa\relax 7 | \begingroup 8 | \fontfamily{#2}\try@load@fontshape 9 | \expandafter\ifx\csname\curr@fontshape\endcsname\relax 10 | \edef\exfs@tempa{#1}% 11 | \ifx\exfs@tempa\@empty 12 | \PackageWarning{nfssext}{% 13 | Font family '\f@encoding/#2' not available\MessageBreak 14 | Ignoring font switch}% 15 | \else 16 | \PackageInfo{nfssext}{% 17 | Font family '\f@encoding/#2' not available\MessageBreak 18 | Font family '\f@encoding/#1' tried instead}% 19 | \exfs@try@family{#1}% 20 | \fi 21 | \else 22 | \gdef\exfs@tempa{\fontfamily{#2}\selectfont}% 23 | \fi 24 | \endgroup 25 | \exfs@tempa} 26 | \def\exfs@get@base#1#2#3#4\@nil{#1#2#3} 27 | \DeclareRobustCommand{\lnstyle}{% 28 | \not@math@alphabet\lnstyle\relax 29 | \exfs@try@family[\expandafter\exfs@get@base\f@family\@nil]% 30 | {\expandafter\exfs@get@base\f@family\@nil x}} 31 | \DeclareRobustCommand{\osstyle}{% 32 | \not@math@alphabet\osstyle\relax 33 | \exfs@try@family{\expandafter\exfs@get@base\f@family\@nil j}} 34 | \DeclareRobustCommand{\instyle}{% 35 | \not@math@alphabet\instyle\relax 36 | \exfs@try@family{\expandafter\exfs@get@base\f@family\@nil 0}} 37 | \DeclareRobustCommand{\sustyle}{% 38 | \not@math@alphabet\sustyle\relax 39 | \exfs@try@family{\expandafter\exfs@get@base\f@family\@nil 1}} 40 | \DeclareRobustCommand{\swstyle}{% 41 | \not@math@alphabet\swstyle\relax 42 | \exfs@try@family{\expandafter\exfs@get@base\f@family\@nil w}} 43 | \newcommand*{\sidefault}{si} 44 | \DeclareRobustCommand{\sishape}{% 45 | \not@math@alphabet\sishape\relax 46 | \fontshape\sidefault\selectfont} 47 | \newcommand*{\exfs@merge@shape}[3]{% 48 | \edef\exfs@tempa{#1}% 49 | \edef\exfs@tempb{#2}% 50 | \ifx\f@shape\exfs@tempb 51 | \expandafter\ifx\csname\f@encoding/\f@family/\f@series/#3\endcsname\relax 52 | \else 53 | \edef\exfs@tempa{#3}% 54 | \fi 55 | \fi 56 | \fontshape{\exfs@tempa}\selectfont} 57 | \DeclareRobustCommand{\itshape}{% 58 | \not@math@alphabet\itshape\mathit 59 | \exfs@merge@shape{\itdefault}{\scdefault}{\sidefault}} 60 | \DeclareRobustCommand{\scshape}{% 61 | \not@math@alphabet\scshape\relax 62 | \exfs@merge@shape{\scdefault}{\itdefault}{\sidefault}} 63 | \DeclareRobustCommand{\upshape}{% 64 | \not@math@alphabet\upshape\relax 65 | \exfs@merge@shape{\updefault}{\sidefault}{\scdefault}} 66 | \DeclareRobustCommand{\dfshape}{% 67 | \not@math@alphabet\dfshape\relax 68 | \fontshape\shapedefault\selectfont} 69 | \newcommand*{\swshapedefault}{\itdefault} 70 | \DeclareRobustCommand{\swshape}{% 71 | \not@math@alphabet\swshape\relax 72 | \swstyle\fontshape\swshapedefault\selectfont} 73 | \DeclareTextFontCommand{\textln}{\lnstyle} 74 | \DeclareTextFontCommand{\textos}{\osstyle} 75 | \DeclareTextFontCommand{\textin}{\instyle} 76 | \DeclareTextFontCommand{\textsu}{\sustyle} 77 | \DeclareTextFontCommand{\textsi}{\sishape} 78 | \DeclareTextFontCommand{\textdf}{\dfshape} 79 | \DeclareTextFontCommand{\textsw}{\swshape} 80 | \newcommand*{\DeclareTextOrnament}[7]{% 81 | \expandafter\def\csname#1@orn\@roman#2\endcsname{#3/#4/#5/#6/#7}} 82 | \begingroup 83 | \catcode`\/=12 84 | \gdef\exfs@split@orndef#1/#2/#3/#4/#5\@nil{% 85 | \def\f@encoding{#1}% 86 | \def\f@family{#2}% 87 | \def\f@series{#3}% 88 | \def\f@shape{#4}% 89 | \def\exfs@tempa{#5}} 90 | \endgroup 91 | \def\exfs@base@family{\expandafter\exfs@get@base\f@family\@nil} 92 | \DeclareRobustCommand{\ornament}[1]{% 93 | \expandafter\ifx\csname\exfs@base@family @orn\@roman#1\endcsname\relax 94 | \PackageWarning{nfssext}{% 95 | Ornament #1 undefined for font family '\exfs@base@family'\MessageBreak 96 | Setting debug mark}% 97 | \rule{1ex}{1ex}% 98 | \else 99 | \begingroup 100 | \edef\exfs@tempb{\csname\exfs@base@family @orn\@roman#1\endcsname}% 101 | \expandafter\expandafter\expandafter\exfs@split@orndef 102 | \expandafter\string\exfs@tempb\@nil 103 | \selectfont\char\exfs@tempa 104 | \endgroup 105 | \fi} 106 | \endinput 107 | -------------------------------------------------------------------------------- /Body/appb/structcnode.tex: -------------------------------------------------------------------------------- 1 | \hypertarget{structcnode}{ 2 | \section{cnode Struct Reference} 3 | \label{structcnode}\index{cnode@{cnode}} 4 | } 5 | {\tt \#include $<$convll.h$>$} 6 | 7 | Collaboration diagram for cnode:\begin{figure}[H] 8 | \begin{center} 9 | \leavevmode 10 | \includegraphics[width=59pt]{structcnode__coll__graph} 11 | \end{center} 12 | \end{figure} 13 | \subsection*{Data Fields} 14 | \begin{CompactItemize} 15 | \item 16 | \hyperlink{structcSet__t}{c\-Set\_\-t} $\ast$ \hyperlink{structcnode_o0}{set} 17 | \item 18 | int \hyperlink{structcnode_o1}{id} 19 | \item 20 | int \hyperlink{structcnode_o2}{length} 21 | \item 22 | \hyperlink{structcnode}{cnode} $\ast$ \hyperlink{structcnode_o3}{next} 23 | \item 24 | double \hyperlink{structcnode_o4}{stat} 25 | \end{CompactItemize} 26 | 27 | 28 | \subsection*{Detailed Description} 29 | This data structure is a linked list for storing cliques. Each member of the linked list has a set, an ID number, a length (which gives the number of characters in the motif), a pointer to the next member of the linked list, and a floating-point number for storing statistical information. 30 | 31 | 32 | 33 | Definition at line 35 of file convll.h. 34 | 35 | \subsection*{Field Documentation} 36 | \hypertarget{structcnode_o1}{ 37 | \index{cnode@{cnode}!id@{id}} 38 | \index{id@{id}!cnode@{cnode}} 39 | \subsubsection[id]{\setlength{\rightskip}{0pt plus 5cm}int \hyperlink{structcnode_o1}{cnode::id}}} 40 | \label{structcnode_o1} 41 | 42 | 43 | Identification number for this member. 44 | 45 | Definition at line 38 of file convll.h. 46 | 47 | Referenced by add\-To\-Stacks(), print\-Cll(), print\-Cll\-Pattern(), push\-Cll(), remove\-Supers(), single\-Clique\-Conv(), sort\-By\-Stats(), swap\-Nodec\-Set(), uniq\-Clique(), whole\-Clique\-Conv(), whole\-Round\-Conv(), and yank\-Cll().\hypertarget{structcnode_o2}{ 48 | \index{cnode@{cnode}!length@{length}} 49 | \index{length@{length}!cnode@{cnode}} 50 | \subsubsection[length]{\setlength{\rightskip}{0pt plus 5cm}int \hyperlink{structcnode_o2}{cnode::length}}} 51 | \label{structcnode_o2} 52 | 53 | 54 | Length of this motif. 55 | 56 | Definition at line 41 of file convll.h. 57 | 58 | Referenced by calc\-Stat\-Cliq(), get\-Largest\-Length(), main(), output\-Real\-Pats(), output\-Real\-Pats\-WCentroid(), print\-Cll(), and push\-Cll().\hypertarget{structcnode_o3}{ 59 | \index{cnode@{cnode}!next@{next}} 60 | \index{next@{next}!cnode@{cnode}} 61 | \subsubsection[next]{\setlength{\rightskip}{0pt plus 5cm}struct \hyperlink{structcnode}{cnode}$\ast$ \hyperlink{structcnode_o3}{cnode::next}}} 62 | \label{structcnode_o3} 63 | 64 | 65 | A pointer to the next member, or the next motif. 66 | 67 | Definition at line 42 of file convll.h. 68 | 69 | Referenced by calc\-Stat\-All\-Cliqs(), fill\-Member\-Stacks(), get\-Largest\-Length(), get\-Largest\-Support(), main(), output\-Real\-Pats(), output\-Real\-Pats\-WCentroid(), pop\-Cll(), print\-Cll(), prune\-Cll(), push\-Cll(), remove\-Supers(), single\-Clique\-Conv(), sort\-By\-Stats(), swap\-Nodec\-Set(), uniq\-Clique(), whole\-Round\-Conv(), and yank\-Cll().\hypertarget{structcnode_o0}{ 70 | \index{cnode@{cnode}!set@{set}} 71 | \index{set@{set}!cnode@{cnode}} 72 | \subsubsection[set]{\setlength{\rightskip}{0pt plus 5cm}\hyperlink{structcSet__t}{c\-Set\_\-t}$\ast$ \hyperlink{structcnode_o0}{cnode::set}}} 73 | \label{structcnode_o0} 74 | 75 | 76 | The set for this member of the linked list. 77 | 78 | Definition at line 37 of file convll.h. 79 | 80 | Referenced by add\-To\-Stacks(), calc\-Stat\-Cliq(), find\-Clique\-Centroid(), get\-Largest\-Support(), inithead\-Cll(), main(), make\-Alternate\-Centroid(), merge\-Intersect(), output\-Real\-Pats(), output\-Real\-Pats\-WCentroid(), pop\-Cll(), print\-Cll(), print\-Cll\-Pattern(), prune\-Cll(), push\-Cll(), remove\-Supers(), single\-Clique\-Conv(), swap\-Nodec\-Set(), uniq\-Clique(), and whole\-Clique\-Conv().\hypertarget{structcnode_o4}{ 81 | \index{cnode@{cnode}!stat@{stat}} 82 | \index{stat@{stat}!cnode@{cnode}} 83 | \subsubsection[stat]{\setlength{\rightskip}{0pt plus 5cm}double \hyperlink{structcnode_o4}{cnode::stat}}} 84 | \label{structcnode_o4} 85 | 86 | 87 | Used to store the statistical store of a motif. 88 | 89 | Definition at line 43 of file convll.h. 90 | 91 | Referenced by calc\-Stat\-All\-Cliqs(), main(), output\-Real\-Pats(), and push\-Cll(). 92 | 93 | The documentation for this struct was generated from the following file:\begin{CompactItemize} 94 | \item 95 | \hyperlink{convll_8h}{convll.h}\end{CompactItemize} 96 | -------------------------------------------------------------------------------- /Body/appa.tex: -------------------------------------------------------------------------------- 1 | \chapter{Abbreviations and reference data} 2 | 3 | \section{Basic molecular biology data} 4 | 5 | \begin{itemize} 6 | \item Figure~\vref{fig:aas} shows structures and abbreviations for the 7 | 20 naturally occurring amino acids. The abbreviations shown in 8 | the figure are used consistently throughout this thesis. 9 | 10 | \item Figure~\vref{fig:bases} shows structures and abbreviations for the 11 | four nucleotides found in DNA and RNA, and urysil, which is 12 | found only in RNA 13 | 14 | \item Table~\vref{table:codonTable} shows the standard codon table 15 | that translates from three letter nucleotide sequences to the 16 | corresponding amino acid during the process of mRNA translation. 17 | \end{itemize} 18 | 19 | 20 | \begin{figure}[ptbh] 21 | \centering 22 | \includegraphics[width=\textwidth]{Body/Images-appa/aas.pdf} 23 | \caption[Amino acid structures and abbreviations]{Amino acid structures and abbreviations. The figure shows the chemical 24 | structure of the 20 naturally occurring amino acids and their three letter and 25 | one letter abbreviations.} 26 | \label{fig:aas} \end{figure} 27 | 28 | 29 | . 30 | \begin{figure}[bthp] 31 | \centering 32 | \includegraphics{Body/Images-appa/bases.pdf} 33 | \caption[Nucleotide base structures and abbreviations]{Nulceotide base structures and abbreviations.} 34 | \label{fig:bases} \end{figure} 35 | 36 | 37 | 38 | \begin{table}[tbph] 39 | \caption[Standard codon table]{Standard codon table. The table 40 | should be interpreted by reading the first and second 41 | nucleotides off of the vertical axis on the left, and reading 42 | the final nucleotide off of the horizontal axis at the top. 43 | For example, the amino acid corresponding to the three 44 | nucleotide sequence \texttt{AAG} is \texttt{Arg}, or arginine. 45 | }\label{table:codonTable} 46 | \centering 47 | \begin{verbatim} 48 | A C G U 49 | _____________________________ 50 | AA | Lys Asn Lys Asn 51 | F AC | Thr Thr Thr Thr 52 | i AG | Arg Ser Arg Ser 53 | r AU | Ile Ile MET Ile 54 | s P CA | Gln His Gln His 55 | t o CC | Pro Pro Pro Pro 56 | s CG | Arg Arg Arg Arg 57 | & i CU | Leu Leu Leu Leu 58 | t GA | Glu Asp Glu Asp 59 | S i GC | Ala Ala Ala Ala 60 | e o GG | Gly Gly Gly Gly 61 | c n GU | Val Val Val Val 62 | o UA | . Tyr . Tyr 63 | n UC | Ser Ser Ser Ser 64 | d UG | . Cys Trp Cys 65 | UU | Leu 66 | \end{verbatim} 67 | 68 | \end{table} 69 | 70 | 71 | \clearpage 72 | 73 | \section{Supplementary data and analyses} 74 | 75 | \subsection{Position weight matrix computation and matching}\label{section:pwmCode} 76 | 77 | The code shown below is a simple Python script used to compute a 78 | position weight matrix. The script can be copied from this text and 79 | run on most personal computers. After the code, I present a brief 80 | example of how this should be run, using the yeast 3' splice sites 81 | shown in Figure~\vref{fig:yeast}. 82 | 83 | \begin{singlespace} 84 | \small 85 | \verbatiminput{Body/Images-appa/searchPwm.py} 86 | \normalsize 87 | \end{singlespace} 88 | 89 | \begin{singlespace} 90 | \small 91 | \verbatiminput{Body/Images-appa/pwm-run.txt} 92 | \normalsize 93 | \end{singlespace} 94 | 95 | \subsection{Antimicrobial design data} 96 | 97 | Figure~\vref{fig:synth1ms} shows the gas and mass spectra for a 98 | peptide designed in Chapter~\ref{chapter:amps}. See 99 | Section~\vref{section:preliminary}. 100 | 101 | \begin{figure}[htbp] 102 | \centering 103 | \includegraphics[width=\textwidth]{Body/Images-appa/synth1-spectra.pdf} 104 | \caption[Gas and mass spectra for the synth--1 peptide]{ 105 | Gas and mass spectra for the synth--1 peptide. As the 106 | figure shows, the peptide appears well above the 85\% purity 107 | threshold. This peptide was designed using our preliminary, 108 | sensitive approach for designing antimicrobial peptides. 109 | However, the peptide was shown to have undetectable activity 110 | under good experimental conditions, prompting the more 111 | focused, specific approach for designing AmPs. 112 | } 113 | \label{fig:synth1ms} 114 | \end{figure} 115 | -------------------------------------------------------------------------------- /Body/appb/appb.tex: -------------------------------------------------------------------------------- 1 | 2 | \setlength{\parindent}{0cm} 3 | \setlength{\parskip}{0.2cm} 4 | \addtocounter{secnumdepth}{1} 5 | \sloppy 6 | 7 | 8 | 9 | \chapter{Gemoda file documentation}\label{chapter:gfiles} 10 | \section{Introduction} 11 | This chapter contains detailed documentation of the source code 12 | implementation of the Gemoda algorithm described in 13 | Chapter~\vref{chapter:gemoda}. 14 | The Gemoda software is written in the C programming language 15 | and 16 | segmented in such a way as to allow the extension of the 17 | algorithm to varieties of sequential data that were not 18 | anticipated by the authors. Furthermore, where possible the 19 | code was crafted to be ``object--oriented like'' for maximum 20 | readability. The software makes extensive use of the GNU 21 | Scientific Library~\cite{galassi2003gnu} and the popular Basic Linear 22 | Algebra Subprograms 23 | (BLAS)~\cite{blackford2002updated,dongarra2002basicI,dongarra2002basicII} 24 | to speed--up computationally intensive operations associated 25 | with the discovery of motifs in three--dimensional protein 26 | structures and other real--valued data. 27 | 28 | The Gemoda source code is available from \url{http://web.mit.edu/bamel/gemoda}. 29 | The software includes a number of ``helper'' applications 30 | for interoperability with common bioinformatics 31 | tools. 32 | 33 | This software is designed for UNIX--like systems and 34 | uses the GNU autotools framework for managing 35 | installation tasks and properly configuring itself for different 36 | computer architectures. Gemoda is distributed with a 37 | \texttt{configure} shell script that tries to guess 38 | system--dependent variables and to create a ``makefile'' that 39 | can be used as an input for GNU make. 40 | 41 | To install Gemoda, use the following recipe: 42 | \begin{enumerate} 43 | \item Change directories to the folder that contains the ``src'' 44 | directory as a subfolder. From this location, run the command 45 | \texttt{./configure}. To install Gemoda to a nonstandard 46 | location, use the optional flag \texttt{--prefix=PATH}, where 47 | \texttt{PATH} is the desired location, such as 48 | ``/usr/local/software''. 49 | \item Type \texttt{make} to compile the software using your 50 | default C compiler, which is specified by the ``CC'' 51 | environment variable. 52 | \item Type \texttt{make install} to install the software. 53 | \end{enumerate} 54 | There are many other options for the \texttt{configure} script. 55 | To see a list of available options, use the optional flag \texttt{--help}. 56 | 57 | In the following sections of this appendix, I describe in detail 58 | the organization and design of the Gemoda software. These 59 | sections are organized by file and are designed to show the 60 | dependencies and interactions of different functions. As 61 | described in Chapter~\vref{chapter:gemoda}, Gemoda operates in 62 | three steps: comparison, clustering, and convolution. The 63 | software keeps the steps clearly segmented. 64 | 65 | \begin{singlespace} 66 | \input{align_8c} 67 | \include{bitSet_8c} 68 | %\include{bitSet_8h} 69 | \include{convll_8c} 70 | \include{convll_8h} 71 | \include{fastaSeqIO_8c} 72 | \include{fastaSeqIO_8h} 73 | \include{gemoda-r_8c} 74 | \include{gemoda-s_8c} 75 | \include{matdata_8h} 76 | \include{matrices_8c} 77 | \include{matrices_8h} 78 | \include{matrixmap_8h} 79 | \include{newConv_8c} 80 | \include{patStats_8c} 81 | \include{patStats_8h} 82 | \include{realCompare_8c} 83 | \include{realCompare_8h} 84 | \include{realIo_8c} 85 | \include{realIo_8h} 86 | \include{spat_8h} 87 | \include{words_8c} 88 | \end{singlespace} 89 | 90 | \chapter{Gemoda data structure documentation}\label{chapter:gstructs} 91 | 92 | \section{Introduction} 93 | This appendix describes in detail the data structures used in the 94 | Gemoda software, which is described in the appendix on 95 | page~\pageref{chapter:gfiles}. Although C is not an 96 | object--oriented programming language, we have tried where possible 97 | to use a similar philosophy in our programming. 98 | 99 | \begin{singlespace} 100 | \input{structbitGraph__t} 101 | \include{structbitSet__t} 102 | \include{structcnode} 103 | \include{structcSet__t} 104 | \include{structfSeq__t} 105 | \include{structmnode} 106 | \include{structrdh__t} 107 | \include{structsHash__t} 108 | \include{structsHashEntry__t} 109 | \include{structsOffset__t} 110 | \include{structsPat__t} 111 | \include{structsSize__t} 112 | \end{singlespace} 113 | -------------------------------------------------------------------------------- /import.sty: -------------------------------------------------------------------------------- 1 | % import.sty Ver 3.0 17-Dec-1997 Donald Arseneau (asnd@reg.triumf.ca) 2 | % 3 | % Allow input of a file with its own inputs from another directory. 4 | % 5 | % \import {full_path} {file} \subimport {path_extension} {file} 6 | % Also \includefrom, \subincludefrom, and * variants. 7 | % 8 | % This software is free of any restrictions. 9 | % 10 | % For example, if a remote file "/usr5/friend/work/report.tex" has contents: 11 | % My graph: \includegraphics{picture} 12 | % \input{explanation} 13 | % then you can safely input that file in your own document with the command 14 | % "\import{/usr5/friend/work/}{report}"; the explanation and picture will 15 | % be taken from the remote directory. You can then import documents from 16 | % other friends, even if they use the same file names. 17 | % 18 | % The "\subimport" command allows imported files to import files themselves, 19 | % using their own directory as the root of a "path_extension". Do not use 20 | % both "\import" and "\subimport" in the same file. 21 | % 22 | % For example, if a file is imported (using either command) from directory 23 | % "abc/" and that file contains the command "\subimport{lmn/}{xyz}" then 24 | % file "abc/lmn/xyz.tex" is input, and any "\input" commands in that file 25 | % will read files from directory "abc/lmn/". 26 | % 27 | % Note that the sub-import path is merely appended to the current import 28 | % path. Mistakes from this method must be rectified by "\import@path@fix". 29 | % 30 | % Depending on on how your \TeX\ system is configured, if a file with the 31 | % same name as the import file exists in the current directory or in the 32 | % TEXINPUTS path, that other file will be read in preference to one in the 33 | % import directory. So here is the real behavior of the previous example: 34 | % Given the sequence "\import{abc/}{one}", "\subimport{lmn/uvw/}{two}" (in 35 | % file one), "\input{three}" (in file two), LaTeX first looks for three.tex 36 | % in the TEXINPUTS search path; if not found, it tries "abc/lmn/uvw/three"; 37 | % if that doesn't exist, it tries "abc/three"; if still not found, it tries 38 | % the defined "\input@path", if there is one. 39 | % 40 | % To avoid searching the TEXINPUTS path when importing files, use the `star' 41 | % versions of the commands: "\import*" and "\subimport*". 42 | % 43 | % A hook "\import@path@fix" is provided to reformat the import path 44 | % to fit the syntax of a particular operating system. It *could* be 45 | % defined to convert unix-style path names to the local format, but 46 | % all it does now is remove "][" from VMS sub-import directories. 47 | % 48 | % Presently, the paths are defined ``locally'' so input files must have 49 | % balanced grouping. 50 | 51 | \newcommand{\import}{\global\let\import@path\@empty \@doimport\input} 52 | \newcommand{\subimport}{\@doimport\input} 53 | \newcommand{\includefrom}{\global\let\import@path\@empty \@doimport\include} 54 | \newcommand{\subincludefrom}{\@doimport\include} 55 | 56 | \def\@doimport#1{\@ifstar 57 | {\@sub@import#1\@iffileonpath}{\@sub@import#1\IfFileExists}} 58 | 59 | % #1 = import command, #2 = switch for *, #3 = import path extension 60 | \def\@sub@import#1#2#3{% 61 | \begingroup 62 | \protected@edef\@tempa{\endgroup 63 | \let\noexpand\IfFileExists\noexpand#2 64 | \noexpand\@import \noexpand#1% param 1 65 | {\@ifundefined{input@path}{}{\input@path}}% 2 66 | {\@ifundefined{Ginput@path}{}{\Ginput@path}}% 3 67 | {\import@path#3}{\import@path}% 4,5 68 | {\ifx\IfFileExists\im@@IfFileExists \noexpand\im@@IfFileExists 69 | \else \noexpand\IfFileExists \fi}}% 6 70 | \@tempa} 71 | % 72 | % #1 = import command (\input or \include) 73 | % #2 = previous input path list. #3 = previous graphics input path list. 74 | % #4 = full path added to each. #5 = previous import path. 75 | % #6 = previous \IfFileExists. #7 = file name. 76 | % 77 | \def\@import#1#2#3#4#5#6#7{% 78 | \gdef\import@path{#4}% 79 | \protected@edef\input@path{{\import@path@fix{#4}}#2}% 80 | \protected@edef\Ginput@path{{\import@path@fix{#4}}#3}% 81 | #1{#7}% 82 | \let\IfFileExists#6% restore after \import* 83 | \gdef\import@path{#5}% 84 | \def\input@path{#2}\ifx\input@path\@empty \let\input@path\@undefined \fi 85 | \def\Ginput@path{#3}\ifx\Ginput@path\@empty \let\Ginput@path\@undefined \fi 86 | } 87 | 88 | \let\im@@IfFileExists\IfFileExists 89 | \gdef\import@path{} 90 | 91 | \let\import@path@fix\@firstofone % default 92 | 93 | % Check for vms file names and set \import@path@fix appropriately 94 | \gdef\@gtempa{[]} 95 | \ifx\@gtempa\@currdir % VMS directory syntax 96 | \gdef\import@path@fix#1{\@gobbleVMSbrack#1][>} 97 | \gdef\@gobbleVMSbrack#1][#2{#1\ifx>#2\@empty 98 | \expandafter \strip@prefix \fi % Gobble up to > 99 | \@gobbleVMSbrack #2} 100 | \fi 101 | -------------------------------------------------------------------------------- /Body/appb/patStats_8h.tex: -------------------------------------------------------------------------------- 1 | \hypertarget{patStats_8h}{ 2 | \section{pat\-Stats.h File Reference} 3 | \label{patStats_8h}\index{patStats.h@{patStats.h}} 4 | } 5 | {\tt \#include $<$stdio.h$>$}\par 6 | {\tt \#include $<$stdlib.h$>$}\par 7 | {\tt \#include $<$string.h$>$}\par 8 | {\tt \#include $<$errno.h$>$}\par 9 | {\tt \#include \char`\"{}bit\-Set.h\char`\"{}}\par 10 | {\tt \#include \char`\"{}convll.h\char`\"{}}\par 11 | {\tt \#include $<$time.h$>$}\par 12 | 13 | 14 | Include dependency graph for pat\-Stats.h:\begin{figure}[H] 15 | \begin{center} 16 | \leavevmode 17 | \includegraphics[width=179pt]{patStats_8h__incl} 18 | \end{center} 19 | \end{figure} 20 | 21 | 22 | This graph shows which files directly or indirectly include this file:\begin{figure}[H] 23 | \begin{center} 24 | \leavevmode 25 | \includegraphics[width=102pt]{patStats_8h__dep__incl} 26 | \end{center} 27 | \end{figure} 28 | \subsection*{Functions} 29 | \begin{CompactItemize} 30 | \item 31 | unsigned int $\ast$$\ast$ \hyperlink{patStats_8h_a0}{get\-Stat\-Mat} (\hyperlink{structbitGraph__t}{bit\-Graph\_\-t} $\ast$bg, int support, int length, int $\ast$support\-Dim, int $\ast$length\-Dim, int num\-Blanks, int s, FILE $\ast$OUTPUT\_\-FILE) 32 | \item 33 | int \hyperlink{patStats_8h_a1}{cum\-DMatrix} (unsigned int $\ast$$\ast$d, \hyperlink{structcnode}{cll\_\-t} $\ast$cliqs, int curr\-Support, int curr\-Length, int bg\-Size, int num\-Seqs) 34 | \item 35 | int \hyperlink{patStats_8h_a2}{calc\-Stat\-All\-Cliqs} (unsigned int $\ast$$\ast$d, \hyperlink{structcnode}{cll\_\-t} $\ast$all\-Cliqs, int num\-Windows) 36 | \item 37 | \hyperlink{structcnode}{cll\_\-t} $\ast$ \hyperlink{patStats_8h_a3}{sort\-By\-Stats} (\hyperlink{structcnode}{cll\_\-t} $\ast$all\-Cliqs) 38 | \item 39 | int \hyperlink{patStats_8h_a4}{free\-D} (unsigned int $\ast$$\ast$d, int support\-Dim) 40 | \end{CompactItemize} 41 | 42 | 43 | \subsection*{Function Documentation} 44 | \hypertarget{patStats_8h_a2}{ 45 | \index{patStats.h@{pat\-Stats.h}!calcStatAllCliqs@{calcStatAllCliqs}} 46 | \index{calcStatAllCliqs@{calcStatAllCliqs}!patStats.h@{pat\-Stats.h}} 47 | \subsubsection[calcStatAllCliqs]{\setlength{\rightskip}{0pt plus 5cm}int calc\-Stat\-All\-Cliqs (unsigned int $\ast$$\ast$ {\em d}, \hyperlink{structcnode}{cll\_\-t} $\ast$ {\em all\-Cliqs}, int {\em num\-Windows})}} 48 | \label{patStats_8h_a2} 49 | 50 | 51 | 52 | 53 | Definition at line 623 of file pat\-Stats.c. 54 | 55 | References calc\-Stat\-Cliq(), cnode::next, and cnode::stat. 56 | 57 | Referenced by main(). 58 | 59 | 60 | 61 | \hypertarget{patStats_8h_a1}{ 62 | \index{patStats.h@{pat\-Stats.h}!cumDMatrix@{cumDMatrix}} 63 | \index{cumDMatrix@{cumDMatrix}!patStats.h@{pat\-Stats.h}} 64 | \subsubsection[cumDMatrix]{\setlength{\rightskip}{0pt plus 5cm}int cum\-DMatrix (unsigned int $\ast$$\ast$ {\em d}, \hyperlink{structcnode}{cll\_\-t} $\ast$ {\em cliqs}, int {\em curr\-Support}, int {\em curr\-Length}, int {\em bg\-Size}, int {\em num\-Seqs})}} 65 | \label{patStats_8h_a1} 66 | 67 | 68 | 69 | 70 | Definition at line 460 of file pat\-Stats.c. 71 | 72 | References get\-Largest\-Length(), and get\-Largest\-Support(). 73 | 74 | Referenced by main(). 75 | 76 | 77 | 78 | \hypertarget{patStats_8h_a4}{ 79 | \index{patStats.h@{pat\-Stats.h}!freeD@{freeD}} 80 | \index{freeD@{freeD}!patStats.h@{pat\-Stats.h}} 81 | \subsubsection[freeD]{\setlength{\rightskip}{0pt plus 5cm}int free\-D (unsigned int $\ast$$\ast$ {\em d}, int {\em support\-Dim})}} 82 | \label{patStats_8h_a4} 83 | 84 | 85 | 86 | 87 | Definition at line 637 of file pat\-Stats.c. 88 | 89 | Referenced by main(). 90 | 91 | 92 | 93 | \hypertarget{patStats_8h_a0}{ 94 | \index{patStats.h@{pat\-Stats.h}!getStatMat@{getStatMat}} 95 | \index{getStatMat@{getStatMat}!patStats.h@{pat\-Stats.h}} 96 | \subsubsection[getStatMat]{\setlength{\rightskip}{0pt plus 5cm}unsigned int$\ast$$\ast$ get\-Stat\-Mat (\hyperlink{structbitGraph__t}{bit\-Graph\_\-t} $\ast$ {\em bg}, int {\em support}, int {\em length}, int $\ast$ {\em support\-Dim}, int $\ast$ {\em length\-Dim}, int {\em num\-Blanks}, int {\em s}, FILE $\ast$ {\em OUTPUT\_\-FILE})}} 97 | \label{patStats_8h_a0} 98 | 99 | 100 | 101 | 102 | Definition at line 289 of file pat\-Stats.c. 103 | 104 | References bit\-Graph\-Row\-Intersection(), check\-Bit(), count\-Set(), delete\-Bit\-Set(), bit\-Graph\_\-t::graph, increase\-Mem(), measure\-Diagonal(), new\-Bit\-Set(), next\-Bit\-Bit\-Set(), and bit\-Graph\_\-t::size. 105 | 106 | Referenced by main(). 107 | 108 | 109 | 110 | \hypertarget{patStats_8h_a3}{ 111 | \index{patStats.h@{pat\-Stats.h}!sortByStats@{sortByStats}} 112 | \index{sortByStats@{sortByStats}!patStats.h@{pat\-Stats.h}} 113 | \subsubsection[sortByStats]{\setlength{\rightskip}{0pt plus 5cm}\hyperlink{structcnode}{cll\_\-t}$\ast$ sort\-By\-Stats (\hyperlink{structcnode}{cll\_\-t} $\ast$ {\em all\-Cliqs})}} 114 | \label{patStats_8h_a3} 115 | 116 | 117 | This function is used to sort a link to list of cliques by the statistical significance of the motifs found in that linked list. 118 | 119 | Definition at line 674 of file pat\-Stats.c. 120 | 121 | References cnode::id. 122 | 123 | Referenced by main(). 124 | 125 | 126 | 127 | -------------------------------------------------------------------------------- /Body/appb/structrdh__t.tex: -------------------------------------------------------------------------------- 1 | \hypertarget{structrdh__t}{ 2 | \section{rdh\_\-t Struct Reference} 3 | \label{structrdh__t}\index{rdh_t@{rdh\_\-t}} 4 | } 5 | {\tt \#include $<$real\-Io.h$>$} 6 | 7 | \subsection*{Data Fields} 8 | \begin{CompactItemize} 9 | \item 10 | int \hyperlink{structrdh__t_o0}{size} 11 | \item 12 | int \hyperlink{structrdh__t_o1}{index\-Size} 13 | \item 14 | char $\ast$$\ast$ \hyperlink{structrdh__t_o2}{label} 15 | \item 16 | gsl\_\-matrix $\ast$$\ast$ \hyperlink{structrdh__t_o3}{seq} 17 | \item 18 | int $\ast$ \hyperlink{structrdh__t_o4}{index\-To\-Seq} 19 | \item 20 | int $\ast$ \hyperlink{structrdh__t_o5}{index\-To\-Pos} 21 | \item 22 | int $\ast$$\ast$ \hyperlink{structrdh__t_o6}{offset\-To\-Index} 23 | \end{CompactItemize} 24 | 25 | 26 | \subsection*{Detailed Description} 27 | This is a data structure, which is used to store real valued data. Basically, this is an array of gsl\_\-matrix objects, where each matrix represents a single, multidimensional array that was read in from a Fast\-A formatted file. 28 | 29 | 30 | 31 | Definition at line 24 of file real\-Io.h. 32 | 33 | \subsection*{Field Documentation} 34 | \hypertarget{structrdh__t_o1}{ 35 | \index{rdh_t@{rdh\_\-t}!indexSize@{indexSize}} 36 | \index{indexSize@{indexSize}!rdh_t@{rdh\_\-t}} 37 | \subsubsection[indexSize]{\setlength{\rightskip}{0pt plus 5cm}int \hyperlink{structrdh__t_o1}{rdh\_\-t::index\-Size}}} 38 | \label{structrdh__t_o1} 39 | 40 | 41 | The size of the index, where the index is used to store pointers to the different sequences in this object. 42 | 43 | Definition at line 30 of file real\-Io.h. 44 | 45 | Referenced by get\-Rdh\-Index\-Seq\-Pos(), init\-Rdh(), init\-Rdh\-Index(), real\-Comparison(), and set\-Rdh\-Index().\hypertarget{structrdh__t_o5}{ 46 | \index{rdh_t@{rdh\_\-t}!indexToPos@{indexToPos}} 47 | \index{indexToPos@{indexToPos}!rdh_t@{rdh\_\-t}} 48 | \subsubsection[indexToPos]{\setlength{\rightskip}{0pt plus 5cm}int$\ast$ \hyperlink{structrdh__t_o5}{rdh\_\-t::index\-To\-Pos}}} 49 | \label{structrdh__t_o5} 50 | 51 | 52 | The array of integers that tell us to which position in a sequence each index in the gsl\_\-matrix array corresponds. 53 | 54 | Definition at line 40 of file real\-Io.h. 55 | 56 | Referenced by free\-Rdh(), get\-Rdh\-Index\-Seq\-Pos(), init\-Rdh(), init\-Rdh\-Index(), and set\-Rdh\-Index().\hypertarget{structrdh__t_o4}{ 57 | \index{rdh_t@{rdh\_\-t}!indexToSeq@{indexToSeq}} 58 | \index{indexToSeq@{indexToSeq}!rdh_t@{rdh\_\-t}} 59 | \subsubsection[indexToSeq]{\setlength{\rightskip}{0pt plus 5cm}int$\ast$ \hyperlink{structrdh__t_o4}{rdh\_\-t::index\-To\-Seq}}} 60 | \label{structrdh__t_o4} 61 | 62 | 63 | The array of integers that will tell us to which sequence each index and the gsl\_\-matrix array corresponds. 64 | 65 | Definition at line 37 of file real\-Io.h. 66 | 67 | Referenced by free\-Rdh(), get\-Rdh\-Index\-Seq\-Pos(), init\-Rdh(), init\-Rdh\-Index(), main(), and set\-Rdh\-Index().\hypertarget{structrdh__t_o2}{ 68 | \index{rdh_t@{rdh\_\-t}!label@{label}} 69 | \index{label@{label}!rdh_t@{rdh\_\-t}} 70 | \subsubsection[label]{\setlength{\rightskip}{0pt plus 5cm}char$\ast$$\ast$ \hyperlink{structrdh__t_o2}{rdh\_\-t::label}}} 71 | \label{structrdh__t_o2} 72 | 73 | 74 | The array of labels that store the names of each sequence. 75 | 76 | Definition at line 32 of file real\-Io.h. 77 | 78 | Referenced by free\-Rdh(), get\-Rdh\-Label(), init\-Rdh(), and set\-Rdh\-Label().\hypertarget{structrdh__t_o6}{ 79 | \index{rdh_t@{rdh\_\-t}!offsetToIndex@{offsetToIndex}} 80 | \index{offsetToIndex@{offsetToIndex}!rdh_t@{rdh\_\-t}} 81 | \subsubsection[offsetToIndex]{\setlength{\rightskip}{0pt plus 5cm}int$\ast$$\ast$ \hyperlink{structrdh__t_o6}{rdh\_\-t::offset\-To\-Index}}} 82 | \label{structrdh__t_o6} 83 | 84 | 85 | The array that points from a particular offset to its index. 86 | 87 | Definition at line 42 of file real\-Io.h. 88 | 89 | Referenced by free\-Rdh(), init\-Rdh\-Index(), and main().\hypertarget{structrdh__t_o3}{ 90 | \index{rdh_t@{rdh\_\-t}!seq@{seq}} 91 | \index{seq@{seq}!rdh_t@{rdh\_\-t}} 92 | \subsubsection[seq]{\setlength{\rightskip}{0pt plus 5cm}gsl\_\-matrix$\ast$$\ast$ \hyperlink{structrdh__t_o3}{rdh\_\-t::seq}}} 93 | \label{structrdh__t_o3} 94 | 95 | 96 | The array of matrices that store the data we read in. 97 | 98 | Definition at line 34 of file real\-Io.h. 99 | 100 | Referenced by free\-Rdh(), general\-Match\-Factor(), get\-Rdh\-Dim(), get\-Rdh\-Seq\-Length(), get\-Rdh\-Value(), init\-Rdh(), init\-Rdh\-Gsl\-Mat(), mass\-Spec\-Compare\-WElut(), output\-Real\-Pats(), rmsd\-Compare(), set\-Rdh\-Col\-From\-String(), set\-Rdh\-Label(), and set\-Rdh\-Value().\hypertarget{structrdh__t_o0}{ 101 | \index{rdh_t@{rdh\_\-t}!size@{size}} 102 | \index{size@{size}!rdh_t@{rdh\_\-t}} 103 | \subsubsection[size]{\setlength{\rightskip}{0pt plus 5cm}int \hyperlink{structrdh__t_o0}{rdh\_\-t::size}}} 104 | \label{structrdh__t_o0} 105 | 106 | 107 | The number of sequences stored in this data structure. 108 | 109 | Definition at line 27 of file real\-Io.h. 110 | 111 | Referenced by init\-Rdh(), init\-Rdh\-Index(), and main(). 112 | 113 | The documentation for this struct was generated from the following file:\begin{CompactItemize} 114 | \item 115 | \hyperlink{realIo_8h}{real\-Io.h}\end{CompactItemize} 116 | -------------------------------------------------------------------------------- /Body/abstract.tex: -------------------------------------------------------------------------------- 1 | % $Log: abstract.tex,v $ 2 | % Revision 1.1 93/05/14 14:56:25 starflt 3 | % Initial revision 4 | % 5 | % Revision 1.1 90/05/04 10:41:01 lwvanels 6 | % Initial revision 7 | % 8 | % 9 | %% The text of your abstract and nothing else (other than comments) goes here. 10 | %% It will be single-spaced and the rest of the text that is supposed to go on 11 | %% the abstract page will be generated by the abstractpage environment. This 12 | %% file should be \input (not \include 'd) from cover.tex. 13 | In this thesis, I discuss the application and development of methods 14 | for the automated discovery of motifs in sequential data. These data 15 | include DNA sequences, protein sequences, and real--valued 16 | sequential data such as protein structures and timeseries of 17 | arbitrary dimension. As more genomes are sequenced and annotated, 18 | the need for automated, computational methods for analyzing 19 | biological data is increasing rapidly. In broad terms, the goal of 20 | this thesis is to treat sequential data sets as unknown languages 21 | and to develop tools for interpreting an understanding these 22 | languages. 23 | 24 | The first chapter of this thesis is an introduction to the 25 | fundamentals of motif discovery, which establishes a common mode of 26 | thought and vocabulary for the subsequent chapters. One of the 27 | central themes of this work is the use of grammatical models, 28 | which are more commonly associated with the field of computational 29 | linguistics. In the second chapter, I use grammatical models to 30 | design novel antimicrobial peptides (AmPs). AmPs are small proteins 31 | used by the innate immune system to combat bacterial infection in 32 | multicellular eukaryotes. There is mounting evidence that these 33 | peptides are less susceptible to bacterial resistance than 34 | traditional antibiotics and may form the basis for a novel class of 35 | therapeutics. In this thesis, I described the rational design of 36 | novel AmPs that show limited homology to naturally--occurring 37 | proteins but have strong bacteriostatic activity against several 38 | species of bacteria, including \emph{Staphylococcus aureus} and 39 | \emph{Bacillus anthracis}. These peptides were designed using a 40 | linguistic model of natural AmPs by treating the amino acid 41 | sequences of natural AmPs as a formal language and building a set of 42 | regular grammars to describe this language. This set of grammars was 43 | used to create novel, unnatural AmP sequences that conform to the 44 | formal syntax of natural antimicrobial peptides but populate a 45 | previously unexplored region of protein sequence space. 46 | 47 | The third chapter describes a novel, GEneric MOtif DIscovery 48 | Algorithm (Gemoda) for sequential data. Gemoda can be applied to any 49 | dataset with a sequential character, including both categorical and 50 | real--valued data. As I show, Gemoda deterministically discovers 51 | motifs that are maximal in composition and length. As well, the 52 | algorithm allows any choice of similarity metric for finding motifs. 53 | These motifs are representation--agnostic: they can be represented 54 | using regular expressions, position weight matrices, or 55 | any other model for sequential data. I demonstrate a 56 | number of applications of the algorithm, including the discovery of 57 | motifs in amino acids and DNA sequences, and the discovery of 58 | conserved protein sub--structures. 59 | 60 | The final chapter is devoted to a series of smaller projects, 61 | employing tools and methods indirectly related to motif discovery in 62 | sequential data. I describe the construction of a software tool, 63 | Biogrep that is designed to match large pattern sets against large 64 | biosequence databases in a \emph{parallel} fashion. This makes 65 | biogrep well--suited to annotating sets of sequences using 66 | biologically significant patterns. In addition, I show that the 67 | BLOSUM series of amino acid substitution matrices, which are 68 | commonly used in motif discovery and sequence alignment problems, 69 | have changed drastically over time. The fidelity of amino acid 70 | sequence alignment and motif discovery tools depends strongly on the 71 | target frequencies implied by these underlying matrices. Thus, 72 | these results suggest that further optimization of these matrices is 73 | possible. 74 | 75 | The final chapter also contains two projects wherein I apply 76 | statistical motif discovery tools instead of grammatical tools. In 77 | the first of these two, I develop three different physiochemical 78 | representations for a set of roughly 700 HIV--I protease substrates 79 | and use these representations for sequence classification and 80 | annotation. In the second of these two projects, I develop a simple 81 | statistical method for parsing out the phenotypic contribution of a 82 | single mutation from libraries of functional diversity that contain 83 | a multitude of mutations and varied phenotypes. I show that this 84 | new method successfully elucidates the effects of single nucleotide 85 | polymorphisms on the strength of a promoter placed upstream of a 86 | reporter gene. 87 | 88 | The central theme, present throughout this work, is the development 89 | and application of novel approaches to finding motifs in sequential 90 | data. The work on the design of AmPs is very applied and relies 91 | heavily on existing literature. In contrast, the work on Gemoda is 92 | the greatest contribution of this thesis and contains many new 93 | ideas. 94 | -------------------------------------------------------------------------------- /Body/Images-chap2/chrisResults1.tex: -------------------------------------------------------------------------------- 1 | \begin{table}[ptbh] 2 | \caption[Antimicrobial activity of rationally designed and shuffled peptides]{Antimicrobial activity of rationally designed and shuffled peptides. 3 | Each entry shows the minimum inhibitory concentration in $\mu$g/mL\@. ``+'' = MIC greater than 256 $\mu g/mL$. ++ = MIC greater than 128 $\mu g/mL$, not sufficiently soluble to test at 256 $\mu g/mL$.}\label{table:chrisResults1} 4 | \centering \scriptsize 5 | \begin{tabular}{llcclcc} \hline\hline 6 | Peptide & Sequence & \emph{E. coli} & \emph{B. subtilis} & Shuffled Sequence & \emph{E. coli} & \emph{B. subtilis} \\ 7 | \rowcolor[gray]{0.9} 8 | 1 & \texttt{ALFSLASKVVPSVFSMVTKK} & + & + & \texttt{MVVFSVPKFKSTVAKLLSSA} & + & + \\ 9 | 2 & \texttt{VVFRVASKVFPAVYCTVSKK} & 128 & + & \texttt{TAKVVVFVSFSYVVPKKRAC} & + & + \\ 10 | \rowcolor[gray]{0.9} 11 | 5 & \texttt{FLFGLASKVFPAVYCKVTRK} & 64 & 256 & \texttt{FLPVLVKVFRYSKKTAAGCF} & ++ & 64 \\ 12 | 6 & \texttt{LSAVGKIASKVVPSVIGAFK} & + & + & \texttt{GVSSPIVAVKFKGAVASLIK} & + & + \\ 13 | \rowcolor[gray]{0.9} 14 | 7 & \texttt{PVIGKLASKVVPSVFSMIKR} & + & + & \texttt{SRVPLKSPVKIVGSKVMIFA} & + & + \\ 15 | 9 & \texttt{GLMSLVKDIAKLAAKQGAKQ} & 256 & + & \texttt{GLKKDALQSIVKKAQLAAMG} & + & + \\ 16 | \rowcolor[gray]{0.9} 17 | 15 & \texttt{SALGRVASKVFPAVYCSITK} & + & + & \texttt{LYSPTCVKAAVSRFIGKVSA} & + & + \\ 18 | 22 & \texttt{LGALFRVASKVFPAVISMVK} & 256 & 64 & \texttt{SVPSVGAVLFFKRAAVMKLI} & + & + \\ 19 | \rowcolor[gray]{0.9} 20 | 23 & \texttt{ALGKLASKVFPAVYCTISRK} & 128 & + & \texttt{KYGPALVIAVKKSCSLTFRA} & + & + \\ 21 | 24 & \texttt{GFIGKLASKVVPSVYCKVTG} & 128 & + & \texttt{GGSTLGVFVKKSKACVIVPY} & \multicolumn{2}{c}{Not soluble} \\ 22 | \rowcolor[gray]{0.9} 23 | 25 & \texttt{PVVFSVASKVVPSLISALKR} & + & + & \texttt{KSPFVLVVSSRVAAVIKSLP} & + & + \\ 24 | 28 & \texttt{FLGVVFKLASKVFPAVFGKV} & 64 & 16 & \texttt{GVSVAGAKKVKVLFVFPFLF} & + & + \\ 25 | \rowcolor[gray]{0.9} 26 | 29 & \texttt{PAVFKIASKVVPSVYCKVSR} & 128 & + & \texttt{KVYVVKIAVPCFPKSARSVS} & + & + \\ 27 | 30 & \texttt{GALFGLASKVFPAVFGAFKK} & 256 & + & \texttt{KVVLFGAAGAKLFKASFFGP} & \multicolumn{2}{c}{Not enough material} \\ 28 | \rowcolor[gray]{0.9} 29 | 31 & \texttt{SAVGKLASKVFPAVFSMVTK} & + & + & \texttt{FMKVLAVFGSVVTSAPKASK} & + & + \\ 30 | 33 & \texttt{VKDLAKFIAKTVAKQGGCYL} & ++ & ++ & \texttt{ALVYAGIKKTAFLKVQKCDG} & + & + \\ 31 | \rowcolor[gray]{0.9} 32 | 34 & \texttt{GVVGKLASKVVPSVFGSFTK} & + & + & \texttt{SVKPVGSSVVKGTALVKFFG} & + & + \\ 33 | 35 & \texttt{LPVVFRVASKVFPALISKLT} & + & 256 & \texttt{KVFIATLVVSSFLLAKPPRV} & + & + \\ 34 | \rowcolor[gray]{0.9} 35 | 36 & \texttt{SAVGSVASKVVPSLISKVTK} & + & + & \texttt{STVKVASKLAVVVSPISKGS} & + & + \\ 36 | 39 & \texttt{MKSIAKFIAKTVAKQGAKQG} & + & + & \texttt{AKKAQKSGAQTIVKIFAKGM} & + & + \\ 37 | \rowcolor[gray]{0.9} 38 | 42 & \texttt{LPAVFKLASKVVPSVFGLVK} & + & + & \texttt{VVAKKFFVLVKGLAPVLSPS} & + & + \\ 39 | 43 & \texttt{SFVFKLASKVVPSVFSALTR} & 256 & 256 & \texttt{ASPTVFRSSVFLSLFVVAKK} & + & + \\ 40 | \rowcolor[gray]{0.9} 41 | 44 & \texttt{SVIGKIASKVVPSVYCAISK} & + & + & \texttt{IASAVPVCVKGKISKSYISV} & + & + \\ 42 | 45 & \texttt{PVVGRVASKVFPAVIGLVKK} & + & + & \texttt{VKRAGKGVAVVPSPLFKIVV} & + & + \\ 43 | \rowcolor[gray]{0.9} 44 | 51 & \texttt{FLFRVASKVFPALIGKFKKK} & 64 & 16 & \texttt{RKVAPALIKSFVFLFKFKKG} & + & + \\ 45 | 55 & \texttt{LSFVGRVASKVVPSLISMIK} & 256 & + & \texttt{SSSIPIKMVLVRALVFVKSG} & + & + \\ 46 | \rowcolor[gray]{0.9} 47 | 56 & \texttt{SALGRLASKVVPAVIGKVTT} & + & + & \texttt{TLVGVVAKLVATKIGSSPRA} & + & + \\ 48 | 57 & \texttt{LGVVGSLASKVVPAVISKVK} & + & + & \texttt{PKVVGLSIVVVKAKVSSALG} & + & + \\ 49 | \rowcolor[gray]{0.9} 50 | 62 & \texttt{LPAVFKLASKVFPAVYCKAS} & 128 & + & \texttt{PSLLYKAKAVFCKPSAVAVF} & ++ & ++ \\ 51 | 63 & \texttt{LPVLFKLASKVFPAVFSSLK} & 256 & 64 & \texttt{VSVKKVLPFAPLKSLLSFAF} & 256 & 256 \\ 52 | \rowcolor[gray]{0.9} 53 | 65 & \texttt{VVGRVASKVVPSLIGLFTTK} & + & + & \texttt{FKVVISKPGLSVRVGTALVT} & ++ & ++ \\ 54 | 69 & \texttt{SVVFGVASKVVPSVIGKVKT} & + & + & \texttt{VFSVKGGKPSVVIKVVVAST} & + & + \\ 55 | \rowcolor[gray]{0.9} 56 | 75 & \texttt{FLPFVGRIASKVVPSVIGKV} & + & + & \texttt{SKFPLAGIFSVPGVKRVVVI} & + & + \\ 57 | 77 & \texttt{GKKLAKTIAKEVAKQGAKFA} & 64 & + & \texttt{VIAFAKTKEAKAKLKGQAKG} & + & + \\ 58 | \rowcolor[gray]{0.9} 59 | 81 & \texttt{PFVGRVASKVVPSVYCAITR} & \multicolumn{2}{c}{Not soluble} & \texttt{PAVYKSIVGFSPVARVTVCR} & \multicolumn{2}{c}{Not soluble} \\ 60 | 82 & \texttt{FVGSLASKVVPSVFGAIKTK} & + & + & \texttt{KTVPVVLKASIKVSSAGFGF} & + & + \\ 61 | \rowcolor[gray]{0.9} 62 | 83 & \texttt{LPVVFKIASKVVPSVISKIT} & + & + & \texttt{KIVKVITVKSISPASLVPVF} & ++ & ++ \\ 63 | 84 & \texttt{GAVFGVASKVVPSVFSAIKK} & + & + & \texttt{SVKVAKSVIPSAVFAGGKVF} & + & + \\ 64 | \rowcolor[gray]{0.9} 65 | 85 & \texttt{FVGGVASKVVPSVYCKVSKK} & + & + & \texttt{KVGKGSYPCSFVKVVAKVSV} & + & + \\ 66 | 88 & \texttt{VVFKLASKVVPSVYCTITKK} & 256 & + & \texttt{VKTKCSVPAVVYILVKTFKS} & + & + \\ 67 | \rowcolor[gray]{0.9} 68 | 96 & \texttt{GALFSLASKVVPAVIGLIKK} & 256 & + & \texttt{LPVLFSSAIAKVGIKLGAKV} & + & + \\ 69 | \hline\hline 70 | \end{tabular} 71 | \end{table} 72 | -------------------------------------------------------------------------------- /Body/Images-chap3/hmm-graph.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 20 | 22 | 38 | 40 | 41 | 43 | image/svg+xml 44 | 46 | 47 | 48 | 49 | 53 | 64 | 75 | 82 | 89 | 97 | 105 | 112 | 118 | 119 | 120 | -------------------------------------------------------------------------------- /xkeyval.sty: -------------------------------------------------------------------------------- 1 | %% 2 | %% This is file `xkeyval.sty', 3 | %% generated with the docstrip utility. 4 | %% 5 | %% The original source files were: 6 | %% 7 | %% xkeyval.dtx (with options: `xkvlatex') 8 | %% 9 | %% --------------------------------------- 10 | %% Copyright (C) 2004-2005 Hendri Adriaens 11 | %% --------------------------------------- 12 | %% 13 | %% This work may be distributed and/or modified under the 14 | %% conditions of the LaTeX Project Public License, either version 1.3 15 | %% of this license or (at your option) any later version. 16 | %% The latest version of this license is in 17 | %% http://www.latex-project.org/lppl.txt 18 | %% and version 1.3 or later is part of all distributions of LaTeX 19 | %% version 2003/12/01 or later. 20 | %% 21 | %% This work has the LPPL maintenance status "maintained". 22 | %% 23 | %% This Current Maintainer of this work is Hendri Adriaens. 24 | %% 25 | %% This work consists of the file xkeyval.dtx and derived files 26 | %% keyval.tex, xkvtxhdr.tex, xkeyval.sty, xkeyval.tex, xkvview.sty, 27 | %% xkvltxp.sty, pst-xkey.tex, pst-xkey.sty, xkveca.cls, xkvecb.cls, 28 | %% xkvesa.sty, xkvesb.sty, xkvesc.sty, xkvex1.tex, xkvex2.tex, 29 | %% xkvex3.tex and xkvex4.tex. 30 | %% 31 | %% The following files constitute the xkeyval bundle and must be 32 | %% distributed as a whole: readme, xkeyval.pdf, keyval.tex, 33 | %% pst-xkey.sty, pst-xkey.tex, xkeyval.sty, xkeyval.tex, xkvview.sty, 34 | %% xkvltxp.sty, xkvtxhdr.tex, pst-xkey.dtx and xkeyval.dtx. 35 | %% 36 | \NeedsTeXFormat{LaTeX2e}[1995/12/01] 37 | \ProvidesPackage{xkeyval} 38 | [2005/11/25 v2.5e package option processing (HA)] 39 | \ifx\XKeyValLoaded\endinput\else\input xkeyval \fi 40 | \edef\XKVcatcodes{% 41 | \catcode`\noexpand\=\the\catcode`\=\relax 42 | \catcode`\noexpand\,\the\catcode`\,\relax 43 | \let\noexpand\XKVcatcodes\relax 44 | } 45 | \catcode`\=12\relax 46 | \catcode`\,12\relax 47 | \let\XKV@doxs\relax 48 | \def\XKV@warn#1{\PackageWarning{xkeyval}{#1}} 49 | \def\XKV@err#1{\PackageError{xkeyval}{#1}\@ehc} 50 | \XKV@whilist\@filelist\XKV@tempa\ifx\XKV@documentclass\@undefined\fi{% 51 | \filename@parse\XKV@tempa 52 | \ifx\filename@ext\@clsextension 53 | \XKV@ifundefined{opt@\filename@area\filename@base.\filename@ext 54 | }{}{% 55 | \edef\XKV@documentclass{% 56 | \filename@area\filename@base.\filename@ext 57 | }% 58 | }% 59 | \fi 60 | } 61 | \ifx\XKV@documentclass\@undefined 62 | \XKV@err{xkeyval loaded before \protect\documentclass}% 63 | \let\XKV@documentclass\@empty 64 | \let\XKV@classoptionslist\@empty 65 | \else 66 | \let\XKV@classoptionslist\@classoptionslist 67 | \def\XKV@tempa#1{% 68 | \let\@classoptionslist\@empty 69 | \XKV@for@n{#1}\XKV@tempa{% 70 | \expandafter\in@\expandafter=\expandafter{\XKV@tempa}% 71 | \ifin@\else\XKV@addtolist@o\@classoptionslist\XKV@tempa\fi 72 | }% 73 | } 74 | \expandafter\XKV@tempa\expandafter{\@classoptionslist} 75 | \fi 76 | \def\XKV@testopte#1{% 77 | \XKV@ifstar{\XKV@sttrue\XKV@t@stopte#1}{\XKV@stfalse\XKV@t@stopte#1}% 78 | } 79 | \def\XKV@t@stopte#1{\@testopt{\XKV@t@st@pte#1}{KV}} 80 | \def\XKV@t@st@pte#1[#2]{% 81 | \XKV@makepf{#2}% 82 | \@ifnextchar<{\XKV@@t@st@pte#1}% 83 | {\XKV@@t@st@pte#1<\@currname.\@currext>}% 84 | } 85 | \def\XKV@@t@st@pte#1<#2>{% 86 | \XKV@sp@deflist\XKV@fams{#2}% 87 | \@testopt#1{}% 88 | } 89 | \def\DeclareOptionX{% 90 | \let\@fileswith@pti@ns\@badrequireerror 91 | \XKV@ifstar\XKV@dox\XKV@d@x 92 | } 93 | \long\def\XKV@dox#1{\XKV@toks{#1}\edef\XKV@doxs{\the\XKV@toks}} 94 | \def\XKV@d@x{\@testopt\XKV@@d@x{KV}} 95 | \def\XKV@@d@x[#1]{% 96 | \@ifnextchar<{\XKV@@@d@x[#1]}{\XKV@@@d@x[#1]<\@currname.\@currext>}% 97 | } 98 | \def\XKV@@@d@x[#1]<#2>#3{\@testopt{\define@key[#1]{#2}{#3}}{}} 99 | \def\ExecuteOptionsX{\XKV@stfalse\XKV@plfalse\XKV@t@stopte\XKV@setkeys} 100 | \def\ProcessOptionsX{\XKV@plfalse\XKV@testopte\XKV@pox} 101 | \def\XKV@pox[#1]{% 102 | \let\XKV@tempa\@empty 103 | \XKV@inpoxtrue 104 | \let\@fileswith@pti@ns\@badrequireerror 105 | \edef\XKV@testclass{\@currname.\@currext}% 106 | \ifx\XKV@testclass\XKV@documentclass 107 | \let\@unusedoptionlist\XKV@classoptionslist 108 | \XKV@ifundefined{ver@xkvltxp.sty}{}{% 109 | \@onelevel@sanitize\@unusedoptionlist 110 | }% 111 | \else 112 | \ifXKV@st 113 | \def\XKV@tempb##1,{% 114 | \def\CurrentOption{##1}% 115 | \ifx\CurrentOption\@nnil\else 116 | \XKV@g@tkeyname##1=\@nil\CurrentOption 117 | \XKV@key@if@ndefined{\CurrentOption}{}{% 118 | \XKV@useoption{##1}% 119 | \XKV@addtolist@n\XKV@tempa{##1}% 120 | }% 121 | \expandafter\XKV@tempb 122 | \fi 123 | }% 124 | \expandafter\XKV@tempb\XKV@classoptionslist,\@nil,% 125 | \fi 126 | \fi 127 | \expandafter\XKV@addtolist@o\expandafter 128 | \XKV@tempa\csname opt@\@currname.\@currext\endcsname 129 | \def\XKV@tempb{\XKV@setkeys[#1]}% 130 | \expandafter\XKV@tempb\expandafter{\XKV@tempa}% 131 | \let\XKV@doxs\relax 132 | \let\XKV@rm\@empty 133 | \XKV@inpoxfalse 134 | \let\@fileswith@pti@ns\@@fileswith@pti@ns 135 | \AtEndOfPackage{\let\@unprocessedoptions\relax}% 136 | } 137 | \def\XKV@useoption#1{% 138 | \def\XKV@resa{#1}% 139 | \XKV@ifundefined{ver@xkvltxp.sty}{}{% 140 | \@onelevel@sanitize\XKV@resa 141 | }% 142 | \@expandtwoargs\@removeelement{\XKV@resa}% 143 | {\@unusedoptionlist}\@unusedoptionlist 144 | } 145 | \DeclareOptionX*{% 146 | \PackageWarning{xkeyval}{Unknown option `\CurrentOption'}% 147 | } 148 | \ProcessOptionsX 149 | \XKVcatcodes 150 | \endinput 151 | %% 152 | %% End of file `xkeyval.sty'. 153 | -------------------------------------------------------------------------------- /Body/Images-chap4/fig_pdaAlign.tex: -------------------------------------------------------------------------------- 1 | \psset{xunit=0.4cm,yunit=0.4cm} 2 | \begin{pspicture}(-1,-1)(23,31)%\showgrid 3 | \rput(3,25){ 4 | \rput(3.1,6.5){writer 1} 5 | \scalebox{0.25}{ 6 | %\psframe[linewidth=1mm](0,0)(23,23) 7 | \psframe(0,0)(23,23) 8 | % \pscurve[linewidth=2mm,linecolor=gray](4,18)(10,20)(18,17)(13,12)(15,8)(10,4)(5,6) 9 | \pscurve[linewidth=2mm,linecolor=lightgray](19.5,19.5)(11.5,22)(11.5,14.5)(20.5,18.5)(22.5,10.5)(18.5,3.5)(9.5,0.5)(0.5,1.5) 10 | \pnode(0,0){bl} 11 | \pnode(23,0){br} 12 | %\psgrid[griddots=5,subgriddiv=0,gridlabels=0pt](0,0)(23,23) 13 | } 14 | } 15 | \rput(14.5,25){ 16 | \rput(3.1,6.5){writer 2} 17 | \scalebox{0.25}{ 18 | %\psframe[linewidth=1mm](0,0)(23,23) 19 | \psframe(0,0)(23,23) 20 | \pscurve[linewidth=2mm,linecolor=gray](15.5,22.5)(6.5,20.5)(8.5,13.5)(18.5,18.5)(22.5,11.5)(19.5,3.5)(9.5,0.5)(0.5,1.5) 21 | \pnode(0,0){bll} 22 | \pnode(23,0){brr} 23 | %\psgrid[griddots=5,subgriddiv=0,gridlabels=0pt](0,0)(23,23) 24 | } 25 | } 26 | \rput(0,0){ 27 | \pnode(0,23){tl} 28 | \pnode(23,23){tr} 29 | \ncline[linestyle=dotted]{bl}{tl} 30 | \ncline[linestyle=dotted]{bll}{tl} 31 | \ncline[linestyle=dotted]{br}{tr} 32 | \ncline[linestyle=dotted]{brr}{tr} 33 | \psgrid[griddots=5,subgriddiv=0,gridlabels=0pt](0,0)(23,23) 34 | %\psframe(0,0)(23,23) 35 | } 36 | \rput[tl](-1,22.5){ 37 | \rput(0,0){\small \texttt X} 38 | \rput(0,-1){\small \texttt Z} 39 | \rput(0,-2){\small \texttt B} 40 | \rput(0,-3){\small \texttt V} 41 | \rput(0,-4){\small \texttt Y} 42 | \rput(0,-5){\small \texttt W} 43 | \rput(0,-6){\small \texttt T} 44 | \rput(0,-7){\small \texttt S} 45 | \rput(0,-8){\small \texttt P} 46 | \rput(0,-9){\small \texttt F} 47 | \rput(0,-10){\small \texttt M} 48 | \rput(0,-11){\small \texttt K} 49 | \rput(0,-12){\small \texttt L} 50 | \rput(0,-13){\small \texttt I} 51 | \rput(0,-14){\small \texttt H} 52 | \rput(0,-15){\small \texttt G} 53 | \rput(0,-16){\small \texttt E} 54 | \rput(0,-17){\small \texttt Q} 55 | \rput(0,-18){\small \texttt C} 56 | \rput(0,-19){\small \texttt D} 57 | \rput(0,-20){\small \texttt N} 58 | \rput(0,-21){\small \texttt R} 59 | \rput(0,-22){\small \texttt A} 60 | } 61 | \rput[tl](0.5,-1){ 62 | \rput[t](0,0){\small \texttt A} 63 | \rput[t](1,0){\small \texttt R} 64 | \rput[t](2,0){\small \texttt N} 65 | \rput[t](3,0){\small \texttt D} 66 | \rput[t](4,0){\small \texttt C} 67 | \rput[t](5,0){\small \texttt Q} 68 | \rput[t](6,0){\small \texttt E} 69 | \rput[t](7,0){\small \texttt G} 70 | \rput[t](8,0){\small \texttt H} 71 | \rput[t](9,0){\small \texttt I} 72 | \rput[t](10,0){\small \texttt L} 73 | \rput[t](11,0){\small \texttt K} 74 | \rput[t](12,0){\small \texttt M} 75 | \rput[t](13,0){\small \texttt F} 76 | \rput[t](14,0){\small \texttt P} 77 | \rput[t](15,0){\small \texttt S} 78 | \rput[t](16,0){\small \texttt T} 79 | \rput[t](17,0){\small \texttt W} 80 | \rput[t](18,0){\small \texttt Y} 81 | \rput[t](19,0){\small \texttt V} 82 | \rput[t](20,0){\small \texttt B} 83 | \rput[t](21,0){\small \texttt Z} 84 | \rput[t](22,0){\small \texttt X} 85 | } 86 | % sequence 1 = VXEBGTVYXSZEFAAR :::: VVKXKPBYXLYDIAAR 87 | % sequence 2 = ZVFXKSXYXPBEFAAR :::: SCEBHFYYXKVDIAAR 88 | 89 | \rput(4,6){ 90 | \psframe[fillstyle=solid,fillcolor=white,linewidth=0.5mm,linecolor=gray](0.1,0)(15.2,5) 91 | \rput(5.0,0.25){ 92 | \rput[bl](1,3){\texttt{VVKXKPBYXLYDIAAR}} 93 | \rput[bl](1,2){\texttt{.......::..:::::}} 94 | \rput[bl](1,1){\texttt{SXEBHFYYXKVDIAAR}} 95 | \rput[br](-1,3){writer 1:} 96 | \rput[br](-1,1){writer 2:} 97 | 98 | } 99 | } 100 | \rput(0,0){ 101 | \newcommand{\mydot}{\psdot(0,0)} 102 | \psset{arrowscale=2} 103 | \psset{linewidth=0.5mm} 104 | \pscurve[linewidth=2mm,linecolor=gray](15.5,22.5)(6.5,20.5)(8.5,13.5)(18.5,18.5)(22.5,11.5)(19.5,3.5)(9.5,0.5)(0.5,1.5) 105 | 106 | \rput(15.5,22.5){\rnode{a}{\mydot}} 107 | \rput[r](15.5,22.5){} 108 | 109 | \rput(6.5,20.5){\rnode{b}{\mydot}} 110 | \rput[r](6.5,20.5){} 111 | 112 | \rput(8.5,13.5){\rnode{c}{\mydot}} 113 | \rput[r](8.5,13.5){} 114 | 115 | \rput(18.5,18.5){\rnode{d}{\mydot}} 116 | \rput[r](18.5,18.5){} 117 | 118 | \rput(22.5,11.5){\rnode{e}{\mydot}} 119 | \rput[r](22.5,11.5){} 120 | 121 | \rput(19.5,3.5){\rnode{f}{\mydot}} 122 | \rput[r](19.5,3.5){} 123 | 124 | \rput(9.5,0.5){\rnode{g}{\mydot}} 125 | \rput[r](9.5,0.5){} 126 | 127 | \rput(0.5,1.5){\rnode{h}{\mydot}} 128 | \rput[r](0.5,1.5){} 129 | 130 | } 131 | \rput(0,0){ 132 | \newcommand{\mydot}{\psdot(0,0)} 133 | \psset{arrowscale=2} 134 | \psset{linewidth=0.5mm} 135 | \pscurve[linewidth=2mm,linecolor=lightgray](19.5,19.5)(11.5,22)(11.5,14.5)(20.5,18.5)(22.5,10.5)(18.5,3.5)(9.5,0.5)(0.5,1.5) 136 | 137 | \rput(19.5,19.5){\rnode{aa}{\mydot}} 138 | \rput[r](21.5,19.5){} 139 | 140 | \rput(11.5,22){\rnode{bb}{\mydot}} 141 | \rput[r](11.5,22){} 142 | 143 | \rput(11.5,14.5){\rnode{cc}{\mydot}} 144 | \rput[r](11.5,15.5){} 145 | 146 | \rput(20.5,18.5){\rnode{dd}{\mydot}} 147 | \rput[r](20.5,18.5){} 148 | 149 | \rput(22.5,10.5){\rnode{ee}{\mydot}} 150 | \rput[r](22.5,10.5){} 151 | 152 | \rput(18.5,3.5){\rnode{ff}{\mydot}} 153 | \rput[r](18.5,3.5){} 154 | 155 | \rput(9.5,0.5){\rnode{gg}{\mydot}} 156 | \rput[r](9.5,0.5){} 157 | 158 | \rput(0.5,1.5){\rnode{hh}{\mydot}} 159 | \rput[r](0.5,1.5){} 160 | 161 | \ncline{cc-}{a}{aa} 162 | \ncline{cc-}{b}{bb} 163 | \ncline{cc-}{c}{cc} 164 | \ncline{cc-}{d}{dd} 165 | \ncline{cc-}{e}{ee} 166 | \ncline{cc-}{f}{ff} 167 | \ncline{cc-}{g}{gg} 168 | \ncline{cc-}{h}{hh} 169 | 170 | } 171 | \end{pspicture} 172 | -------------------------------------------------------------------------------- /Body/header.tex: -------------------------------------------------------------------------------- 1 | \usepackage{url} 2 | \usepackage{textcomp} 3 | \usepackage{verbatim} 4 | \usepackage{amsmath} 5 | \usepackage{amsfonts} 6 | \usepackage{amssymb} % if you want extra symbols 7 | \usepackage{mathrsfs} 8 | \usepackage{program} 9 | \usepackage{newlfont} 10 | \usepackage{rotating} 11 | \usepackage{varioref} 12 | \usepackage{graphicx} 13 | %\usepackage{txfont} 14 | \usepackage{makeidx} 15 | \usepackage{tocbibind} 16 | \usepackage{program} 17 | \usepackage{import} 18 | \usepackage{subfigure} 19 | \usepackage{verbatim} 20 | \usepackage{colortbl} 21 | 22 | %\usepackage[LY1]{fontenc} 23 | %\usepackage{patchcmd} 24 | %\usepackage{myfss} 25 | %\usepackage{caslon} 26 | %\renewcommand{\encodingdefault}{LY1} 27 | %\rmshape \rgshape 28 | 29 | \usepackage[oldstyle]{agaramond} 30 | %\usepackage[lining]{agaramond} 31 | \usepackage[small]{eulervm} 32 | \usepackage{courier} % for texttt 33 | 34 | 35 | 36 | \usepackage{ulem} %underlines 37 | \normalem % normal emph w/ ulem 38 | 39 | \usepackage[numbers,square,sort&compress]{natbib} 40 | \usepackage[pdftex,plainpages=false,breaklinks=true,colorlinks=true,urlcolor=blue,citecolor=blue, linkcolor=blue,bookmarks=true,bookmarksopen=true,bookmarksopenlevel=0,pdfstartview=Fit,pdfview=Fit,pagebackref,linktocpage=true,bookmarksnumbered=true]{hyperref} 41 | \usepackage{hypernat} 42 | \usepackage{array} 43 | \usepackage{supertabular} 44 | 45 | %%% stuff for doxygen 46 | %\usepackage{times} 47 | \usepackage{multicol} 48 | \usepackage{multirow} 49 | \usepackage{float} 50 | \usepackage{alltt} 51 | \usepackage{Body/appb/doxygen} 52 | %%%%%%%%%%% 53 | 54 | 55 | 56 | 57 | % Symbols used by the authors 58 | \DeclareMathOperator{\suffix}{suffix} 59 | \DeclareMathOperator{\prefix}{prefix} 60 | \DeclareMathOperator{\prob}{Pr} 61 | \newcommand{\conv}{\curvearrowright} 62 | \newcommand{\ttt}[1]{\texttt{#1}} 63 | \newcommand{\vect}[1]{\begin{pmatrix}#1\end{pmatrix}} 64 | \newcommand{\paren}[1]{\left(#1\right)} 65 | \newcommand{\brac}[1]{\left[#1\right]} 66 | \newcommand{\braces}[1]{\left\{#1\right\}} 67 | \newcommand{\avector}[2]{(#1_1,#1_2,\ldots,#1_{#2})} 68 | \newcommand{\aset}[2]{{#1_1,#1_2,\ldots,#1_{#2}}} 69 | \newcommand{\ith}[1]{\ensuremath{{#1^{\textrm{th}}}}} 70 | \newcommand{\nd}[1]{\ensuremath{{#1^{\textrm{nd}}}}} 71 | \DeclareSymbolFont{AMSb}{U}{msb}{m}{n} 72 | \DeclareMathSymbol{\N}{\mathbin}{AMSb}{"4E} 73 | \DeclareMathSymbol{\realNums}{\mathbin}{AMSb}{"52} 74 | 75 | 76 | \newcommand{\curls}[1]{\left\{#1\right\}} 77 | %\newcommand{\teirRegEx}{\ensuremath{\paren{\Sigma\cup\curls{\brac{\Sigma\Sigma^{\ast}\Sigma}}}\paren{\Sigma\cup\curls{.}\cup\curls{\brac{\Sigma\Sigma^{\ast}\Sigma}}}^{\ast}\paren{\Sigma\cup\curls{\brac{\Sigma\Sigma^{\ast}\Sigma}}}\cup\Sigma}} 78 | \newcommand{\teirRegEx}{\ensuremath{\Sigma\paren{\Sigma\cup\curls{.}}\Sigma}} 79 | \newcommand{\teiresias}{\texttt{TEIRESIAS}} 80 | \newcommand{\Teiresias}{\texttt{TEIRESIAS}} 81 | \newcommand{\Fasta}{FastA} 82 | \newcommand{\fasta}{FastA} 83 | \newcommand{\psiblast}{psi--Blast} 84 | \newcommand{\prosite}{PROSITE} 85 | \newcommand{\biodictionary}{Bio--Dictionary} 86 | \newcommand{\genbank}{GENBANK} 87 | \newcommand{\embl}{EMBL} 88 | \newcommand{\etal}{\emph{et.\ al.\ }} 89 | \newcommand{\sptr}{SwissProt/TrEMBL} 90 | \newcommand{\swissp}{SWISS--PROT} 91 | \newcommand{\swissprot}{\swissp} 92 | \newcommand{\swissptr}{SWISS--PROT/TrEMBL} 93 | \newcommand{\swissprottrembl}{\swissptr} 94 | \newcommand{\amsdb}{AMSDb} 95 | \newcommand{\ncbi}{NCBI} 96 | \newcommand{\blosum}{BLOSUM} 97 | \newcommand{\pam}{PAM} 98 | \newcommand{\oligo}{oligonucleotide} 99 | \newcommand{\Oligo}{Oligonucleotide} 100 | \newcommand{\blast}{BLAST} 101 | \newcommand{\pr}[1]{\prob\left(#1\right)} 102 | \newcommand{\prt}[1]{\prob\left(\textrm{#1}\right)} 103 | \newcommand{\cp}[2]{\prob\left(#1\mid #2\right)} 104 | \newcommand{\cpt}[2]{\prob\left(\textrm{#1}\mid \textrm{#2}\right)} 105 | \newcommand{\ex}[1]{\mathbf{E}\left[#1\right]} 106 | %\newcommand{\var}[1]{\textrm{var}\left(#1\right)} 107 | \newcommand{\phip}[3]{\Phi\paren{\frac{#1-\paren{#2}}{#3}}} 108 | %\newcommand{\vect}[1]{\mathbf{#1}} 109 | \newcommand{\ten}[1]{\mathbf{#1}} 110 | \newcommand{\pdf}[2]{p_{#1}\left(#2\right)} 111 | \newcommand{\pmf}[2]{p_{#1}\left(#2\right)} 112 | \newcommand{\transf}[2]{M_{#1}\left(#2\right)} 113 | \newcommand{\expo}[1]{\exp\left[#1\right]} 114 | \newcommand{\pd}[2]{\frac{\partial}{\partial #2}\brac{#1}} 115 | \setlength{\extrarowheight}{3pt} 116 | \newcommand{\marnote}[1]{\marginpar{\raggedleft\footnotesize\bfseries\hspace{0pt} #1}} 117 | 118 | \usepackage{fancyhdr} 119 | %\renewcommand{\chaptermark}[1]{\markboth{\textit{\chaptername}\ \thechapter.\ #1}{}} 120 | 121 | %this defines the basic headers and footer 122 | % styles when we use the 'fancyhdr' styles 123 | %\lhead[\fancyplain{}{\itshape\footnotesize\thepage}]{\fancyplain{}{\itshape\footnotesize\rightmark}} 124 | %\rhead[\fancyplain{}{\itshape\footnotesize\leftmark}]{\fancyplain{}{\itshape\footnotesize\thepage}} 125 | %\lhead[\fancyplain{}\bfseries\thepage]{\fancyplain{}\bfseries\rightmark} 126 | %\rhead[\fancyplain{}\bfseries\leftmark]{\fancyplain{}\bfseries\thepage} 127 | %\pagestyle{fancyplain} 128 | \addtolength{\headwidth}{0.5\marginparsep} 129 | \addtolength{\headwidth}{0.5\marginparwidth} 130 | %\renewcommand{\chaptermark}[1]{\markboth{#1}{}} 131 | %\renewcommand{\sectionmark}[1]{\markright{\thesection\ #1}} 132 | \lhead[\fancyplain{}{\footnotesize\thepage}]{\fancyplain{}{\footnotesize\rightmark}} 133 | \rhead[\fancyplain{}{\footnotesize\leftmark}]{\fancyplain{}{\footnotesize\thepage}} 134 | \cfoot{} 135 | \cfoot{} 136 | 137 | % Special Float captions 138 | % Different font in captions 139 | \newcommand{\captionfonts}{\mdseries} 140 | \newcommand{\floatnamefonts}{\bfseries} 141 | \makeatletter % Allow the use of @ in command names 142 | \long\def\@makecaption#1#2{% 143 | \vskip\abovecaptionskip 144 | \sbox\@tempboxa{{\floatnamefonts #1:~~\captionfonts #2}}% 145 | \ifdim \wd\@tempboxa >\hsize 146 | {\floatnamefonts #1: \captionfonts #2\par} 147 | \else 148 | \hbox to\hsize{\hfil\box\@tempboxa\hfil}% 149 | \fi 150 | \vskip\belowcaptionskip} 151 | \makeatother % Cancel the effect of \makeatletter 152 | 153 | \makeindex 154 | -------------------------------------------------------------------------------- /Body/Images-appa/splicing.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import string 3 | import sys 4 | import re 5 | 6 | # Usage 7 | def usage(): 8 | print sys.argv[0], ": searches for IUPAC regular expressions in a fasta file" 9 | print "Usage: ", sys.argv[0], " " 10 | 11 | class IntronSeq: 12 | """A simple intron class""" 13 | def __init__(self, start, stop): 14 | self.start = start 15 | self.stop = stop 16 | self.length = stop-start 17 | def Start(self): 18 | return self.start 19 | def Stop(self): 20 | return self.stop 21 | def Length(self): 22 | return self.length 23 | 24 | # A simple fasta sequence class 25 | class FastaSeq: 26 | """A simple fasta sequence class""" 27 | def __init__(self, label, seq): 28 | self.label = label 29 | self.seq = seq 30 | def getSeq(self): 31 | return self.seq 32 | def getLabel(self): 33 | return self.label 34 | def appendLabel(self, newLabel): 35 | self.label = self.label + newLabel 36 | def appendSeq(self, newSeq): 37 | self.seq = self.seq + newSeq 38 | def getLength(self): 39 | return len(self.seq) 40 | def getSubSeq(self, start, length): 41 | return self.seq[start:start+length] 42 | def getSubSeq2(self, start, stop): 43 | return self.seq[start:stop] 44 | 45 | # finds a regular expression in the sequence and returns 46 | # a list of integers specifying where the pattern starts 47 | def findRegEx(self, pattern): 48 | lastOffset = 0 49 | count = 0 50 | myRegEx = re.compile(pattern) 51 | positions = [] 52 | while 1: 53 | match = myRegEx.search(self.seq, lastOffset) 54 | if match: 55 | count = count + 1 56 | lastOffset = match.start() + 1 57 | positions.append( match.start() ) 58 | else: 59 | break 60 | return positions 61 | 62 | # finds a regular expression in the sequence and returns 63 | # a list of integers specifying where the pattern ends 64 | def findRegExEnd(self, pattern): 65 | lastOffset = 0 66 | count = 0 67 | myRegEx = re.compile(pattern) 68 | positions = [] 69 | while 1: 70 | match = myRegEx.search(self.seq, lastOffset) 71 | if match: 72 | count = count + 1 73 | lastOffset = match.end() + 1 74 | positions.append( match.end() ) 75 | else: 76 | break 77 | return positions 78 | 79 | def getIntrons(mySeq, fpRegEx, tpRegEx): 80 | fpSites = mySeq.findRegEx(fpRegEx) 81 | tpSites = mySeq.findRegExEnd(tpRegEx) 82 | i=0 83 | j=0 84 | introns = [] 85 | while i": 108 | # we hit a label line 109 | newSeq = FastaSeq(line[1:], "") 110 | mySeqs.append( newSeq ) 111 | numSeq = numSeq + 1 112 | else: 113 | # not a label line 114 | mySeqs[ numSeq - 1 ].appendSeq(line) 115 | 116 | return mySeqs 117 | 118 | def iupac2regex(motif): 119 | regex = motif 120 | i2rTable = { 'N': '[ATGC]', 121 | 'K': '[GT]', 122 | 'B': '[CGT]', 123 | 'S': '[CG]', 124 | 'V': '[ACG]', 125 | 'Y': '[CT]', 126 | 'W': '[AT]', 127 | 'D': '[AGT]', 128 | 'R': '[GA]', 129 | 'M': '[AC]', 130 | 'H': '[ACT]'} 131 | for old,new in i2rTable.iteritems(): 132 | regex = string.replace(regex, old, new) 133 | return regex 134 | 135 | # Main 136 | def main(): 137 | 138 | # see if we got the right number of command line args 139 | if len(sys.argv) != 3: 140 | usage() 141 | sys.exit(2) 142 | 143 | # get command line paramters 144 | fname1 = sys.argv[1] 145 | fname2 = sys.argv[2] 146 | 147 | # open the motif file 148 | try: 149 | motifFH = open(fname1, "r") 150 | except IOError, (errno, strerror): 151 | print "Error %s: %s" % (errno, strerror) 152 | sys.exit() 153 | 154 | # open the fasta file 155 | try: 156 | fastaFH = open(fname2, "r") 157 | except IOError, (errno, strerror): 158 | print "Error %s: %s" % (errno, strerror) 159 | sys.exit() 160 | 161 | # read in the DNA sequences 162 | myDnaSeqs = readFastaFile(fastaFH) 163 | 164 | # read in our regexs 165 | regexs = [] 166 | patterns = [] 167 | for line in motifFH: 168 | regexs.append( iupac2regex( string.strip(line) ) ) 169 | patterns.append(string.strip(line)) 170 | # assume 0 = 5' && 1 = 3' 171 | 172 | # splice each seq 173 | for seq in myDnaSeqs: 174 | introns = getIntrons(seq, regexs[0], regexs[1]) 175 | splicedSeq = "" 176 | last = 0 177 | for intron in introns: 178 | #if intron.Length() >= 300 and intron.Length() <= 600: 179 | print "found 5' at %d" % (intron.Start()) 180 | print "found 3' at %d" % (intron.Stop()-len(patterns[1])) 181 | splicedSeq = splicedSeq + seq.seq[last:intron.Start()] 182 | last = intron.Stop() 183 | splicedSeq = splicedSeq + seq.seq[last:seq.getLength()] 184 | print "Concatination of exons:" 185 | print splicedSeq 186 | 187 | 188 | # execute main 189 | if __name__ == "__main__": 190 | sys.exit(main()) 191 | 192 | 193 | -------------------------------------------------------------------------------- /Body/Images-chap3/spot.fa: -------------------------------------------------------------------------------- 1 | >sp|O67012|SPOT\_AQUAE - Aquifex aeolicus. 2 | MSKLGEVSLEEDLEKLLSHYPQHAEEIQRAYEFAKEKHGEQKRKTGEPYIIHPLNVALKLAELGMDHETIIAALLHDTLEDTDTTYEEIKERFGERVAKLVEGVTKIGKIKYKSEQAENYRKLILATAE 3 | DPRVILLKLSDRLDNVKTLWVFREEKRKKIAKETMEIYAPLAHRLGVWSIKNELEDWAFKYLYPEEYEKVRNFVKESRKNLEEYLRKYVIPKVRKELEKYGIEAEIKYRSKHYYSIWEKTRRKGIRLED 4 | VHDILGVRIIVNTVPECYTVLGIIHSLFRPVPGKFKDYISLPKPNLYQSLHTTVIADKGKLVEFQIRTWEMHERAEKGIASHWAYKEGKNPSDAGVYSWLRELVESIQGSTNPSEVLENLKSNLFFEEV 5 | FVFTPKGDLVVLPKGSTPVDLAYKIHTEVGNHCAGAKSNGRIVPLNYELKSGDVVEIITNPNKSPSYEWLSFVKTSRARNKIKQFLKKQERERYLSEGKRILERIREKLGLSHEDLINKIRERVRFDTE 6 | EELLLALGKRKISSANLIKLIFPKKKEEKEERRGSSTVFLEDLSNIKHEVAKCCKPIPGDEILGVITRTKGLVLHEKSCSNLKNVLRLNPEKVKEVQLQASGYFQTDIRVVASDRIGLLSDITKVISES 7 | GSNIVSSMTNTREGKAVMDFTVEVKNKEHLEKIMKKIKSVEGVKICKRLYH 8 | 9 | >sp|O51216|SPOT\_BORBU - Borrelia burgdorferi (Lyme disease spirochete). 10 | MIQAYEIAHLIKINDLEKARNIFKKTVENTYKDEFERKSIFKALEIAEQLHYGQYRESGEPYIIHPIMVSLFLAKFQLDFKATIAGLLHDVLEDTNVEKEEIVKEFDEEILSLIDGVTKIHDLHNKTRS 11 | IKEANTISKMFFAMTHDIRIIIIKLADKLHNMTTLSYLPKNRQDRIAKDCLSTYVPIAERLGISSLKTYLEDLSFKHLYPKDYKEIKNFLSETKIEREKKLYKGKLSIEKELQKSGIEAEITVRSKHFY 12 | SIFRKMQTRTNKLTQIFDTLGIRIICKKQKECYEILEIVHRVWKPIPGRLKDYIASPKENKYQSLHTTVRIPEDNQLIEIQIRTEEMDRIANYGVAAHWIYKEQIELKADDLSFINRIKKWQQESANKS 13 | QYSMNDIHKELLNTFIYVYTPEGEVVELPFGSNSIDFAYIIHTDIGDQALYAKINGKISSITKPLKNEQIVEIFTSKDSKPDVIWLNSVRTKKARSKIRSWLNKNDNTIFVDNNIIAYLVGANKEQRKL 14 | FSLFKSYTKTKIKRIAIDPECSPTTGEDIIGIIHKDEIIVHNENCQKLKSYKKPQLIEVEWEATPTRKVHHIILLLKELKGIFSYLENIFTLNDVRLISEKIEDCGNGHGITNIIVSSNAKNITKIISA 15 | LKENPNILQIMQIEEDIKNYDN 16 | 17 | >sp|P17580|SPOT\_ECOLI - Escherichia coli, Escherichia coli O157:H7, and Shigella flexneri. 18 | MYLFESLNQLIQTYLPEDQIKRLRQAYLVARDAHEGQTRSSGEPYITHPVAVACILAEMKLDYETLMAALLHDVIEDTPATYQDMEQLFGKSVAELVEGVSKLDKLKFRDKKEAQAENFRKMIMAMVQD 19 | IRVILIKLADRTHNMRTLGSLRPDKRRRIARETLEIYSPLAHRLGIHHIKTELEELGFEALYPNRYRVIKEVVKAARGNRKEMIQKILSEIEGRLQEAGIPCRVSGREKHLYSIYCKMVLKEQRFHSIM 20 | DIYAFRVIVNDSDTCYRVLGQMHSLYKPRPGRVKDYIAIPKANGYQSLHTSMIGPHGVPVEVQIRTEDMDQMAEMGVAAHWAYKEHGETSTTAQIRAQRWMQSLLELQQSAGSSFEFIESVKSDLFPDE 21 | IYVFTPEGRIVELPAGATPVDFAYAVHTDIGHACVGARVDRQPYPLSQPLTSGQTVEIITAPGARPNAAWLNFVVSSKARAKIRQLLKNLKRDDSVSLGRRLLNHALGGSRKLNEIPQENIQRELDRMK 22 | LATLDDLLAEIGLGNAMSVVVAKNLQHGDASIPPATQSHGHLPIKGADGVLITFAKCCRPIPGDPIIAHVSPGKGLVIHHESCRNIRGYQKEPEKFMAVEWDKETAQEFITEIKVEMFNHQGALANLTA 23 | AINTTTSNIQSLNTEEKDGRVYSAFIRLTARDRVHLANIMRKIRVMPDVIKVTRNRN 24 | 25 | >sp|P43811|SPOT\_HAEIN - Haemophilus influenzae. 26 | MIARDAHEGQFRSSGEPYITHPVAVASIIAQLHLDHEAVMAALLHDVIEDTPYTEEQLKEEFGASVAEIVDGVSKLDKLKFRTRQEAQVENFRKMILAMTRDIRVVLIKLADRTHNMRTLGSLRPDKRR 27 | RIAKETLEIYCPLAHRLGIEHIKNELEDLSFQAMHPHRYEVLKKLVDVARSNRQDLIERISQEIKVRLENSGIFARVWGREKHLYKIYQKMRIKDQEFHSIMDIYAFRVIVKNVDDCYRVLGQMHNLYK 28 | PRPGRVKDYIAVPKANGYQSLQTSMIGPKGVPVEVHIHTEDMEQVAEMGITAHWVYKENGKNDSTTAQIRVQRWLQSLVEIQQSVGNSFEFIENVKSEFFPKEIYVFTPKGRIVELPMGATAVDFAYAV 29 | HSDVGNTCVGVTVEHKPYPLSKALESGQTVNIITDPNAHPEVAWLNFVVTARAKTRIRHYLKQRCEEDAVKLGEVELNVALQPHNLGDFSIQQIRTVLDALALSSLDELLREIGLGNQSASMIAHQFVG 30 | VPLESANTKNLEFESKILTIAPMQVGKTQFAQCCHPILGDPIVGCCTEKNTVVVHHQHCASLKNACRQSLAKWDNVQSAVNFEAELQIEILNEQNALLSLMTAISASESSLQNIWTEELENNLLLVILQ 31 | VCVKDIKHLANIVHRIKGITGVVNVKRNINEL 32 | 33 | >sp|P47520|SPOT\_MYCGE Probable - Mycoplasma genitalium. 34 | MATIQEIECDFLAKIAQKFTNAEIELINKAFYHAKTWHENQKRLSGEPFFIHPLRTALSLVEWNMDPITICAGLLHDIIEDTDQTEANIAMIFSKEIAELVTKVTKITNESKKQRHLKNKKENLNLKSF 35 | VNIAINSQQEINVMVLKLADRLDNIASIEFLPIEKQKVIAKETLELYAKIAGRIGMYPVKTKLADLSFKVLDLKNYDNTLSKINKQKVFYDNEWDNFKQQLKKILAQNQIEYQLESRIKGIYSTYKKLT 36 | VHEQNISKIHDLFAIRLITKSELDCYHILGLIHLNFLIDSKYFKDYIASPKQNLYQSIHTTVRLKGLNVEIQIRTQQMDNVSKFGLASHWIYKEQKEGLLAPALQLNYLVTKQKHSHDFLKRIFGTDII 37 | KINVSASHEPNVIKQINVDSNNKLLDIAFENYPKQFAKLTKIEIDGVEINSFDTSVENEMLIEFYFGKNNNLKSKWIRYMNNPIYREKVKKSLAKLAKSGRYSELAFYEKELGEKQLKLASETEIQKRL 38 | NTLRIKKMSDYLALIECTNFTNDEHLLFLAKNNDKWNKLTKPLKFAFSKVVFHNSYFEQIEGIFITKIVIEPCCSKIPDMPEQVTGILTKNILSVHRYGCKNLQNKKQLKIIPLYWNIQQLKLKPRKFR 39 | SYININGVWSEKTINKICQTIINGDGYIEKIIPKINKQKDEFDLNITLFVNNYQQLLTLMDQITTKNISFSWKYL 40 | 41 | >sp|P75386|SPOT\_MYCPN Probable - Mycoplasma pneumoniae. 42 | MFYNWLKLYKFSKMATLVEIERDFLQKTAQKFAPEVVALITKALDYSKKWHGEQKRLSGEPFFIHPLRTALRLVEWNMDSNTVCAGLLHDIIEDTQVTEADLTAIFGKEITDLVVKVTKITSESKKQRQ 43 | LNRKKEDLNLKSLVNIAMSSQQEVNALVLKLADRLDNISSIEFLAVEKQKIIAKETLELYAKIAGRIGMYPVKTQLADLSFKVLDPKNFNNTLSKINQQKVFYDNEWGNFKKQLEEMLEQNQIEYRLES 44 | RIKGIYSTYQKLTFHEQNIAKIHDLFAIRLIVKSELDCYHLLGLIHLNFTVLMKHFKDYIASPKQNFYQSIHTTVRLKGLNVEIQIRTQRMDHVSKYGFASHWIYKEKKEGLLASALQVNYLNSKQMHS 45 | RDFFKRIFGTDIIKVNVSSDNEPNIVKKLNVESNSKLLDIAYELYPKQFNKLEKIKLDGVEVMSFDVTAENEMVIEFCFGKTNNLKRRWLRYMNNHVFRERVKKDLNKLKKAVKYSELPLYEKALEELH 46 | LKLADETQIKQRLNALGIKKLTEFLELIEYPHFPKNEHLYFLASNNQKWRELIKPIKFALSQAVFQNSYFEQIEGIYITKIVIETCCTKIPDMPEQVIGILMKNILRVHLHDCRELANQKQPKIIPLYW 47 | NAHQLKMRPRKFRCQINIRGVWSETTVNKIVQTIIEGDSYLERIIPKIDKQKDEFELNITMFIDNYHQLITIMEQITTKNISYVWKYL 48 | 49 | >sp|O34098|SPOT\_SPICI - Spiroplasma citri. 50 | MDRDIKYEEVLAQIKLYIKDEATLKEIQKAYEYAEEKHHGQVRNSGARYIIHPLWTTFFLAQWRMGPKTLIAGLLHDVLEDTPATFEELQELFGIEIANLVEGVTKVSYFAKENRTQIKAQYLRKLYLS 51 | MAKDIRVIIVKLADRLHNLKTIGYLKPERQQIIARESLEIYSAIAHRLGMKAVKQEIEDISFKIINPVQYNKIVSLLESSNKERENTINQKIEELKKILITEKKMSVKVYGRSKSIYSIYRKMNQFGKN 52 | FDDIHDILAVRIITNSVDDCYKVLGFVHQHYTPLNNRFKDYIATPKHNLYQSLHTTIVADDGLIFEVQIRTEEMDELAEQGVAAHWRYKEGENYDIAKKQKDIDERLDIFKRILDLENISVQERDEIQQ 53 | EVYKPDHLMEQIIQNDIFSSLIYVLTPNGKVVTLPFGSTVLDFAYKIHSEIGEKTIGAKINGLFSPISTVLKSGDVVDIKTAATQKPNHSWLVVSKTSSALEKIKKYLKKELVEVTSDAKSVNLEKIKQ 54 | TKSQIEEYIAKKDLKWKLVNSETQLERLHAINFNNIEDFLLDVANDEYTLEEAINLVYLDHETSQNEKILKKLQDKQYKKAQLKDDIIVQGISNIKVVISQCCLPIPYEDITGYVSKAEGIKVHLKTCR 55 | NIQSGDKQDRQVEVSWNEAVCKNKQYDCAIRIEAIDRPALLVDVTKVLSHLNASVQMMSANVSGDLMNLTIKTIIKVSNADRLQQIRSSLLTIPDIKVVERVMM 56 | 57 | >sp|P74007|SPOT\_SYNY3 Synechocystis sp. (strain PCC 6803). 58 | MNAVAALPTPTIHTTCAQDIHDIELPQWLEDCLQQWQREIEQGQDETTAPHCLICRAFCFAYDLHAQQRRKSGEPYIAHPVAVAGLLRDLGGDEAMIAAGFLHDVVEDTDISIEQIEALFGEETASLVE 59 | GVTKLSKFNFSSTTEHQAENFRRMFLAMAKDIRVIVVKLADRLHNMRTLDALSPEKQRRIARETKDIFAPLANRLGIWRFKWELEDLSFKYLEPDSYRKIQSLVVEKRGDRESRLETVKDMLRFRLRDE 60 | GIEHFELQGRPKHLYGIYYKMTSQDKAFEEIYDIAALRIIVESKGECYRALSVVHDVFKPIPGRFKDYIGLPKPNRYQSLHTTVLGLTSRPLEIQIRTEEMHHVAEYGIAAHWKYKESGGSENATLTST 61 | DEKFTWLRQLLDWQSDLKDAQEYVENLKQNLFDDDVYVFTPKGEVISLARGATPVDFAYRIHTEVGHHMKGARVNGQWLGVDTRLKNGDIVEIVTQKNSHPSLDWLNFVVTPSARHRIRQWFKRSRRDE 62 | NILRGRELLEKELGKTGLEALLKSEPMQKTAERCNYQNVEDLLAGLGYGEITSNSVVNRLRENNVNNVKNSQSSQEVTLASSPQVHPPTPPATGKDNSPIAGIEGLLYHIAGCCHPLPGEPIMGVVTRG 63 | ARGISIHRQGCHNLEQMDGDRLIPVRWNPNTNNHQTYPVDIVIEAIDRVGVLKDILSRLSDNHINVRNADVKTHLGRPAIISLKIDIHDYQQLLGIMAKIKNMSDVMDLRRVISG 64 | -------------------------------------------------------------------------------- /Body/appb/fastaSeqIO_8h.tex: -------------------------------------------------------------------------------- 1 | \hypertarget{fastaSeqIO_8h}{ 2 | \section{Fasta\-Seq\-IO/fasta\-Seq\-IO.h File Reference} 3 | \label{fastaSeqIO_8h}\index{FastaSeqIO/fastaSeqIO.h@{FastaSeqIO/fastaSeqIO.h}} 4 | } 5 | {\tt \#include $<$stdio.h$>$}\par 6 | 7 | 8 | Include dependency graph for fasta\-Seq\-IO.h:\begin{figure}[H] 9 | \begin{center} 10 | \leavevmode 11 | \includegraphics[width=125pt]{fastaSeqIO_8h__incl} 12 | \end{center} 13 | \end{figure} 14 | 15 | 16 | This graph shows which files directly or indirectly include this file:\begin{figure}[H] 17 | \begin{center} 18 | \leavevmode 19 | \includegraphics[width=287pt]{fastaSeqIO_8h__dep__incl} 20 | \end{center} 21 | \end{figure} 22 | \subsection*{Data Structures} 23 | \begin{CompactItemize} 24 | \item 25 | struct \hyperlink{structfSeq__t}{f\-Seq\_\-t} 26 | \end{CompactItemize} 27 | \subsection*{Functions} 28 | \begin{CompactItemize} 29 | \item 30 | int \hyperlink{fastaSeqIO_8h_a0}{print\-FSeq\-Sub\-Seq} (\hyperlink{structfSeq__t}{f\-Seq\_\-t} $\ast$seq, int start, int stop) 31 | \item 32 | long \hyperlink{fastaSeqIO_8h_a1}{measure\-Line} (FILE $\ast$INPUT) 33 | \item 34 | long \hyperlink{fastaSeqIO_8h_a2}{count\-Lines} (FILE $\ast$INPUT) 35 | \item 36 | long \hyperlink{fastaSeqIO_8h_a3}{Count\-FSeqs} (FILE $\ast$INPUT) 37 | \item 38 | int \hyperlink{fastaSeqIO_8h_a4}{init\-Aof\-FSeqs} (\hyperlink{structfSeq__t}{f\-Seq\_\-t} $\ast$aos, int num\-Seq) 39 | \item 40 | \hyperlink{structfSeq__t}{f\-Seq\_\-t} $\ast$ \hyperlink{fastaSeqIO_8h_a5}{Read\-FSeqs} (FILE $\ast$INPUT, int $\ast$number\-Of\-Sequences) 41 | \item 42 | int \hyperlink{fastaSeqIO_8h_a6}{Free\-FSeqs} (\hyperlink{structfSeq__t}{f\-Seq\_\-t} $\ast$array\-Of\-Sequences, int number\-Of\-Sequences) 43 | \item 44 | int \hyperlink{fastaSeqIO_8h_a7}{Write\-FSeq\-A} (FILE $\ast$MY\_\-FILE, \hyperlink{structfSeq__t}{f\-Seq\_\-t} $\ast$array\-Of\-Sequences, int start, int stop) 45 | \item 46 | \hyperlink{structfSeq__t}{f\-Seq\_\-t} $\ast$ \hyperlink{fastaSeqIO_8h_a8}{Read\-Txt\-Seqs} (FILE $\ast$INPUT, int $\ast$number\-Of\-Sequences) 47 | \end{CompactItemize} 48 | 49 | 50 | \subsection*{Function Documentation} 51 | \hypertarget{fastaSeqIO_8h_a3}{ 52 | \index{fastaSeqIO.h@{fasta\-Seq\-IO.h}!CountFSeqs@{CountFSeqs}} 53 | \index{CountFSeqs@{CountFSeqs}!fastaSeqIO.h@{fasta\-Seq\-IO.h}} 54 | \subsubsection[CountFSeqs]{\setlength{\rightskip}{0pt plus 5cm}long Count\-FSeqs (FILE $\ast$ {\em INPUT})}} 55 | \label{fastaSeqIO_8h_a3} 56 | 57 | 58 | 59 | 60 | Definition at line 44 of file fasta\-Seq\-IO.c. 61 | 62 | 63 | 64 | \hypertarget{fastaSeqIO_8h_a2}{ 65 | \index{fastaSeqIO.h@{fasta\-Seq\-IO.h}!countLines@{countLines}} 66 | \index{countLines@{countLines}!fastaSeqIO.h@{fasta\-Seq\-IO.h}} 67 | \subsubsection[countLines]{\setlength{\rightskip}{0pt plus 5cm}long count\-Lines (FILE $\ast$ {\em INPUT})}} 68 | \label{fastaSeqIO_8h_a2} 69 | 70 | 71 | 72 | 73 | Definition at line 69 of file fasta\-Seq\-IO.c. 74 | 75 | Referenced by Read\-File(). 76 | 77 | 78 | 79 | \hypertarget{fastaSeqIO_8h_a6}{ 80 | \index{fastaSeqIO.h@{fasta\-Seq\-IO.h}!FreeFSeqs@{FreeFSeqs}} 81 | \index{FreeFSeqs@{FreeFSeqs}!fastaSeqIO.h@{fasta\-Seq\-IO.h}} 82 | \subsubsection[FreeFSeqs]{\setlength{\rightskip}{0pt plus 5cm}int Free\-FSeqs (\hyperlink{structfSeq__t}{f\-Seq\_\-t} $\ast$ {\em array\-Of\-Sequences}, int {\em number\-Of\-Sequences})}} 83 | \label{fastaSeqIO_8h_a6} 84 | 85 | 86 | 87 | 88 | Definition at line 306 of file fasta\-Seq\-IO.c. 89 | 90 | References f\-Seq\_\-t::label, and f\-Seq\_\-t::seq. 91 | 92 | Referenced by main(). 93 | 94 | 95 | 96 | \hypertarget{fastaSeqIO_8h_a4}{ 97 | \index{fastaSeqIO.h@{fasta\-Seq\-IO.h}!initAofFSeqs@{initAofFSeqs}} 98 | \index{initAofFSeqs@{initAofFSeqs}!fastaSeqIO.h@{fasta\-Seq\-IO.h}} 99 | \subsubsection[initAofFSeqs]{\setlength{\rightskip}{0pt plus 5cm}int init\-Aof\-FSeqs (\hyperlink{structfSeq__t}{f\-Seq\_\-t} $\ast$ {\em aos}, int {\em num\-Seq})}} 100 | \label{fastaSeqIO_8h_a4} 101 | 102 | 103 | 104 | 105 | Definition at line 94 of file fasta\-Seq\-IO.c. 106 | 107 | References f\-Seq\_\-t::label, and f\-Seq\_\-t::seq. 108 | 109 | Referenced by Read\-FSeqs(), and Read\-Txt\-Seqs(). 110 | 111 | 112 | 113 | \hypertarget{fastaSeqIO_8h_a1}{ 114 | \index{fastaSeqIO.h@{fasta\-Seq\-IO.h}!measureLine@{measureLine}} 115 | \index{measureLine@{measureLine}!fastaSeqIO.h@{fasta\-Seq\-IO.h}} 116 | \subsubsection[measureLine]{\setlength{\rightskip}{0pt plus 5cm}long measure\-Line (FILE $\ast$ {\em INPUT})}} 117 | \label{fastaSeqIO_8h_a1} 118 | 119 | 120 | 121 | 122 | Definition at line 25 of file fasta\-Seq\-IO.c. 123 | 124 | Referenced by Read\-File(). 125 | 126 | 127 | 128 | \hypertarget{fastaSeqIO_8h_a0}{ 129 | \index{fastaSeqIO.h@{fasta\-Seq\-IO.h}!printFSeqSubSeq@{printFSeqSubSeq}} 130 | \index{printFSeqSubSeq@{printFSeqSubSeq}!fastaSeqIO.h@{fasta\-Seq\-IO.h}} 131 | \subsubsection[printFSeqSubSeq]{\setlength{\rightskip}{0pt plus 5cm}int print\-FSeq\-Sub\-Seq (\hyperlink{structfSeq__t}{f\-Seq\_\-t} $\ast$ {\em seq}, int {\em start}, int {\em stop})}} 132 | \label{fastaSeqIO_8h_a0} 133 | 134 | 135 | 136 | 137 | Definition at line 14 of file fasta\-Seq\-IO.c. 138 | 139 | References f\-Seq\_\-t::seq. 140 | 141 | 142 | 143 | \hypertarget{fastaSeqIO_8h_a5}{ 144 | \index{fastaSeqIO.h@{fasta\-Seq\-IO.h}!ReadFSeqs@{ReadFSeqs}} 145 | \index{ReadFSeqs@{ReadFSeqs}!fastaSeqIO.h@{fasta\-Seq\-IO.h}} 146 | \subsubsection[ReadFSeqs]{\setlength{\rightskip}{0pt plus 5cm}\hyperlink{structfSeq__t}{f\-Seq\_\-t}$\ast$ Read\-FSeqs (FILE $\ast$ {\em INPUT}, int $\ast$ {\em number\-Of\-Sequences})}} 147 | \label{fastaSeqIO_8h_a5} 148 | 149 | 150 | 151 | 152 | Definition at line 199 of file fasta\-Seq\-IO.c. 153 | 154 | References init\-Aof\-FSeqs(), f\-Seq\_\-t::label, Read\-File(), f\-Seq\_\-t::seq, s\-Size\_\-t::size, s\-Size\_\-t::start, and s\-Size\_\-t::stop. 155 | 156 | Referenced by main(). 157 | 158 | 159 | 160 | \hypertarget{fastaSeqIO_8h_a8}{ 161 | \index{fastaSeqIO.h@{fasta\-Seq\-IO.h}!ReadTxtSeqs@{ReadTxtSeqs}} 162 | \index{ReadTxtSeqs@{ReadTxtSeqs}!fastaSeqIO.h@{fasta\-Seq\-IO.h}} 163 | \subsubsection[ReadTxtSeqs]{\setlength{\rightskip}{0pt plus 5cm}\hyperlink{structfSeq__t}{f\-Seq\_\-t}$\ast$ Read\-Txt\-Seqs (FILE $\ast$ {\em INPUT}, int $\ast$ {\em number\-Of\-Sequences})}} 164 | \label{fastaSeqIO_8h_a8} 165 | 166 | 167 | 168 | 169 | Definition at line 172 of file fasta\-Seq\-IO.c. 170 | 171 | References init\-Aof\-FSeqs(), Read\-File(), and f\-Seq\_\-t::seq. 172 | 173 | 174 | 175 | \hypertarget{fastaSeqIO_8h_a7}{ 176 | \index{fastaSeqIO.h@{fasta\-Seq\-IO.h}!WriteFSeqA@{WriteFSeqA}} 177 | \index{WriteFSeqA@{WriteFSeqA}!fastaSeqIO.h@{fasta\-Seq\-IO.h}} 178 | \subsubsection[WriteFSeqA]{\setlength{\rightskip}{0pt plus 5cm}int Write\-FSeq\-A (FILE $\ast$ {\em MY\_\-FILE}, \hyperlink{structfSeq__t}{f\-Seq\_\-t} $\ast$ {\em array\-Of\-Sequences}, int {\em start}, int {\em stop})}} 179 | \label{fastaSeqIO_8h_a7} 180 | 181 | 182 | 183 | 184 | Definition at line 332 of file fasta\-Seq\-IO.c. 185 | 186 | 187 | 188 | --------------------------------------------------------------------------------