├── README.md ├── Section1 ├── Chap1 │ ├── .DS_Store │ ├── blastSample.fasta │ ├── blast_result.xml │ └── hemoglobin_subunit_beta.fasta ├── Chap10 │ ├── 10.1.2.SwissProt_Record.py │ ├── 10.1.3.SwissProt_Parsing.py │ ├── 10.2.2.Access_ExPASy.py │ └── P02649.txt ├── Chap11 │ ├── .DS_Store │ ├── 11.4.1.read_newick.py │ ├── 11.4.2.draw_ascii.py │ ├── 11.4.3.draw_tree.py │ ├── 11.4.4.draw_color_tree.py │ ├── 11.4.5.draw_length_label_tree.py │ ├── 11.4.5.draw_length_tree.py │ ├── sample_tree1.nwk │ ├── sample_tree2.nwk │ ├── sample_tree3.nwk │ └── sample_tree4.nwk ├── Chap12 │ ├── 12.3.KEGG_Enzyme_example.py │ ├── 12.4.KEGG_REST_example.py │ └── ec_2.7.1.40.txt ├── Chap13 │ ├── 13.10.KEGG.py │ ├── 13.2.fasta │ ├── 13.2.fasta_record_counter.py │ ├── 13.3.rev_comp.py │ ├── 13.4.read_gb.py │ ├── 13.5.WebLogo.py │ ├── 13.5.png │ ├── 13.6.blast.py │ ├── 13.6.fasta │ ├── 13.7.efetch.py │ ├── 13.8.ExPASy.py │ ├── 13.9.nwk │ ├── 13.9.tree.py │ └── NM_000384.2.gb ├── Chap3 │ ├── KT225476.2.fasta │ ├── KT225476.2.gbk │ └── blast_result.xml ├── Chap4 │ ├── 4.3.1.make_sequence.py │ ├── 4.3.2.alphabet.py │ ├── 4.4.1.count.py │ ├── 4.4.2.gc_contents.py │ ├── 4.4.3.case.py │ ├── 4.4.4.translate_transcribe.py │ ├── 4.4.5.translate_stop_1.py │ ├── 4.4.5.translate_stop_2.py │ ├── 4.4.6.split.py │ ├── 4.4.7.complement.bio.py │ ├── 4.4.7.complement.py │ ├── 4.4.8.codonTable.mitochondria.py │ ├── 4.4.8.codonTable.py │ ├── 4.4.9.orf_finder.py │ ├── 4.5.1.gc_contents.py │ ├── 4.5.2.calc_molecular_weight.py │ ├── 4.5.3.make_six_frame_translations.py │ ├── 4.5.4.calc_melting_temperature.py │ ├── 4.5.5.convert_aminoacid_1to3.py │ └── 4.5.5.convert_aminoacid_3to1.py ├── Chap5 │ ├── 5.1.simple_seqrecord_object_example_1.py │ ├── 5.1.simple_seqrecord_object_example_2.py │ ├── 5.1.simple_seqrecord_object_example_3.py │ ├── 5.3.1.seqRecord_example.py │ ├── 5.3.2.seqRecord_FASTA_example.py │ ├── 5.3.3.seqRecord_GenBank_example.py │ ├── 5.4.compare_example_1.py │ ├── 5.4.compare_example_2.py │ ├── 5.4.compare_example_3.py │ ├── 5.4.compare_example_4.py │ ├── J01636.1.fasta │ └── J01636.1.gbk ├── Chap6 │ ├── 6.1.1.parse_example_1.py │ ├── 6.1.1.parse_example_2.py │ ├── 6.1.2.read_example_1.py │ ├── 6.1.2.read_example_2.py │ ├── 6.2.1.read_example_1.py │ ├── 6.2.1.read_example_2.py │ ├── 6.2.2.read_example_1.py │ ├── 6.2.2.read_example_2.py │ ├── 6.3.read_example_1.py │ ├── 6.3.read_example_2.py │ ├── 6.4.2.entrez_example.py │ ├── 6.4.3.read_example_GenBank_1.py │ ├── 6.4.3.read_example_GenBank_2.py │ ├── HM624086.1.gbk │ ├── KT225476.2.gbk │ ├── sample_1.fasta │ ├── sample_1.fastq │ ├── sample_1.fastq.gz │ └── sample_2.fasta ├── Chap7 │ ├── 7.2.read_MSA_example.py │ ├── 7.3.read_MSA_example_1.py │ ├── 7.3.read_MSA_example_2.py │ ├── 7.4.3.muscle_cmd_example.py │ ├── 7.5.1.example.fasta │ ├── 7.5.2.WebLogo_example_1.py │ ├── 7.5.2.WebLogo_example_2.py │ ├── 7.6.phylo_example.py │ ├── HBA.all.fasta │ ├── HBA.aln │ ├── HBA.newick │ ├── HBA_WebLogo.png │ ├── example.aln │ └── test.png ├── Chap8 │ ├── 8.3.1.blast_example_1.py │ ├── 8.3.2.blast_example.py │ ├── buccal_swab.unmapped1.fasta │ └── buccal_swab.unmapped2.fasta └── Chap9 │ ├── 9.2.1.efetch_example.py │ ├── 9.2.2.entrez_parse_example.py │ ├── 9.2.2.entrez_read_example.py │ ├── 9.3.entrez.einfo_example.py │ ├── 9.3.entrez_example_1.py │ └── 9.4.entrez.esearch_example.py └── Section2 ├── 001.py ├── 002.py ├── 003.py ├── 004.py ├── 005.py ├── 006.py ├── 007.py ├── 008.py ├── 009.py ├── 010.py ├── 011.py ├── 012.py ├── 013.py ├── 014.py ├── 015.py ├── 016-1.py ├── 016-2.py ├── 017-1.py ├── 017-2.py ├── 019_after.py ├── 019_before.py ├── 020_after.py ├── 020_before.py ├── 021.py ├── 022.py ├── 023.py ├── 024.py ├── 025.py ├── 026.py ├── 027-1.py ├── 027-2.py ├── 028.py ├── 029-1.py ├── 029-2.py ├── 029-3.py ├── 030.py ├── 031-1.py ├── 031-2.py ├── 032.py ├── 033.py ├── 034-1.py ├── 034-2.py ├── 035-1.py ├── 035-2.py ├── 036.py ├── 037-1.py ├── 037-2.py ├── 037-3.py ├── 038-1.py ├── 038-2.py ├── 039.py ├── 040.py ├── 041.py ├── 042.py ├── 043.py ├── 044-1.py ├── 044-2.py ├── 045.py ├── 046-1.py ├── 046-2.py ├── 047.py ├── 048.py ├── 049.py ├── 050.py ├── 051.py ├── 052.py ├── 053.py ├── 054-1.py ├── 054-2.py ├── 055.py ├── 056-1.py ├── 056-2.py ├── 057-1.py ├── 057-2.py ├── 058-1.py ├── 058-2.py ├── 059-1.py ├── 059-2.py ├── 060-1.py ├── 060-2.py ├── 061.py ├── 062.py ├── 063.py ├── 064.py ├── 065.py ├── 066-1.py ├── 066-2.py ├── 067.py ├── 068.py ├── 069.py ├── 070.py ├── 071.py ├── 072.py ├── 073.py ├── 074.py ├── 075.py ├── 076.py ├── 077.py ├── 078.py ├── 079.py ├── 080.py ├── 081.py ├── 082.py ├── 083.py ├── 084.py ├── 085.py ├── 086.py ├── 087.py ├── 088.py ├── 089.py ├── 090-1.py ├── 090-2.py ├── 091.py ├── 092.py ├── 093.py ├── 094.py ├── 095.py ├── 096.py ├── 097.py ├── 098.py ├── 099.py ├── 100.py ├── read_sample.txt ├── sample1.bed ├── sample1.fasta ├── sample1.vcf └── write_sample.txt /README.md: -------------------------------------------------------------------------------- 1 | # 바이오파이썬으로 만나는 생물정보학 2 | > 출처: https://korbillgates.tistory.com/211 [생물정보학자의 블로그] 3 | 4 | 바이오파이썬과 생물정보학 파이썬 프로그래밍을 주제로 작성한 도서입니다. 출간일은 2019년 3월 22일입니다. 생물정보학을 처음 접했을 때, 어디서 부터 시작해야할 지 모르던 때의 고민을 생각하며 작성한 도서입니다. 5 | 6 | - 섹션 1은 바이오파이썬의 내용이 담겨있고, 7 | - 섹션 2는 생물정보학 프로그래밍(기초)이 담겨있습니다. 8 | 9 | ```python 10 | basic_info = { 11 | "제목(subject)" : "바이오파이썬으로 만나는 생물정보학", 12 | "저자(author)" : "한주현", 13 | "페이지(page)" : "312", 14 | "출간일(publish date)" : "2019년 3월 22일" 15 | } 16 | 17 | reader = { 18 | 1 : "바이오파이썬으로 생물정보학을 배우고 싶은 의생명연구자", 19 | 2 : "파이썬으로 생물정보학 데이터를 다루고 싶은 개발자", 20 | 3 : "실제 코딩 문제를 해결하며 생물정보학 파이썬 프로그래밍 실력을 올리고 싶은 학습자" 21 | } 22 | 23 | purchase = { 24 | "교보문고" : "https://bit.ly/2CcMJ16", 25 | "알라딘" : "http://aladin.kr/p/qed0s", 26 | "예스24" : "http://www.yes24.com/Product/Goods/70861061", 27 | "인터파크" : "https://bit.ly/2Ur7ahD" 28 | } 29 | ``` 30 | 31 | ## 목차 32 | 33 | ### 섹션 1. 바이오파이썬 34 | #### 1장 바이오파이썬 소개 35 | #### 2장 바이오파이썬 설치 36 | #### 3장 생물정보학 파일 포맷 소개 37 | #### 4장 유전자 서열 다루어보기 : Sequence 객체 38 | #### 5장 Sequence record 객체 39 | #### 6장 FASTA, FASTQ, GenBank 파일 : Sequence 읽기 40 | #### 7장 인간과 다른 동물은 얼마나 다를까? 종 간 차이점의 비밀: Multiple Sequence Alignment 41 | #### 8장 원인 불명의 환자에서 나온 미지의 종 찾기 : BLAST 42 | #### 9장 NCBI 데이터베이스 43 | #### 10장 단백질의 세계로 : Swiss-Prot과 ExPASy 44 | #### 11장 계통분류학 분석 : Bio.Phylo 45 | #### 12장 시스템생물학 지도 : KEGG 46 | #### 13장 바이오파이썬 활용 연습문제 47 | ### 섹션 2. 생물정보학 파이썬 프로그래밍 48 | #### 1. 프로그래밍 시작 49 | #### 2. 자료형 다루기 50 | ##### 2.1 문자열 51 | ##### 2.2 숫자 52 | ##### 2.3 리스트 53 | ##### 2.4 사전 54 | ##### 2.5 세트 55 | ##### 2.6 튜플 56 | #### 3. 클래스 57 | #### 4. 알고리즘 소개 58 | #### 5. 생물정보학 파일 다루기 59 | ##### 5.1 FASTA 60 | ##### 5.2 VCF 61 | ##### 5.3 BED 62 | -------------------------------------------------------------------------------- /Section1/Chap1/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjpublic/biopython/1ee610b4a5a0d4bedb8318b9b86b90a7dc3d1e1f/Section1/Chap1/.DS_Store -------------------------------------------------------------------------------- /Section1/Chap1/blastSample.fasta: -------------------------------------------------------------------------------- 1 | >blastSample 2 | CCTACATAGTAGTTTCACCAAGGGCTGAGAATGGAATACACCATCCAGGAGATTTTCATCGACTATGAGGA -------------------------------------------------------------------------------- /Section1/Chap1/hemoglobin_subunit_beta.fasta: -------------------------------------------------------------------------------- 1 | >NC_000011.10:c5227071-5225466 Homo sapiens chromosome 11, GRCh38.p12 Primary Assembly 2 | ACATTTGCTTCTGACACAACTGTGTTCACTAGCAACCTCAAACAGACACCATGGTGCATCTGACTCCTGA 3 | GGAGAAGTCTGCCGTTACTGCCCTGTGGGGCAAGGTGAACGTGGATGAAGTTGGTGGTGAGGCCCTGGGC 4 | AGGTTGGTATCAAGGTTACAAGACAGGTTTAAGGAGACCAATAGAAACTGGGCATGTGGAGACAGAGAAG 5 | ACTCTTGGGTTTCTGATAGGCACTGACTCTCTCTGCCTATTGGTCTATTTTCCCACCCTTAGGCTGCTGG 6 | TGGTCTACCCTTGGACCCAGAGGTTCTTTGAGTCCTTTGGGGATCTGTCCACTCCTGATGCTGTTATGGG 7 | CAACCCTAAGGTGAAGGCTCATGGCAAGAAAGTGCTCGGTGCCTTTAGTGATGGCCTGGCTCACCTGGAC 8 | AACCTCAAGGGCACCTTTGCCACACTGAGTGAGCTGCACTGTGACAAGCTGCACGTGGATCCTGAGAACT 9 | TCAGGGTGAGTCTATGGGACGCTTGATGTTTTCTTTCCCCTTCTTTTCTATGGTTAAGTTCATGTCATAG 10 | GAAGGGGATAAGTAACAGGGTACAGTTTAGAATGGGAAACAGACGAATGATTGCATCAGTGTGGAAGTCT 11 | CAGGATCGTTTTAGTTTCTTTTATTTGCTGTTCATAACAATTGTTTTCTTTTGTTTAATTCTTGCTTTCT 12 | TTTTTTTTCTTCTCCGCAATTTTTACTATTATACTTAATGCCTTAACATTGTGTATAACAAAAGGAAATA 13 | TCTCTGAGATACATTAAGTAACTTAAAAAAAAACTTTACACAGTCTGCCTAGTACATTACTATTTGGAAT 14 | ATATGTGTGCTTATTTGCATATTCATAATCTCCCTACTTTATTTTCTTTTATTTTTAATTGATACATAAT 15 | CATTATACATATTTATGGGTTAAAGTGTAATGTTTTAATATGTGTACACATATTGACCAAATCAGGGTAA 16 | TTTTGCATTTGTAATTTTAAAAAATGCTTTCTTCTTTTAATATACTTTTTTGTTTATCTTATTTCTAATA 17 | CTTTCCCTAATCTCTTTCTTTCAGGGCAATAATGATACAATGTATCATGCCTCTTTGCACCATTCTAAAG 18 | AATAACAGTGATAATTTCTGGGTTAAGGCAATAGCAATATCTCTGCATATAAATATTTCTGCATATAAAT 19 | TGTAACTGATGTAAGAGGTTTCATATTGCTAATAGCAGCTACAATCCAGCTACCATTCTGCTTTTATTTT 20 | ATGGTTGGGATAAGGCTGGATTATTCTGAGTCCAAGCTAGGCCCTTTTGCTAATCATGTTCATACCTCTT 21 | ATCTTCCTCCCACAGCTCCTGGGCAACGTGCTGGTCTGTGTGCTGGCCCATCACTTTGGCAAAGAATTCA 22 | CCCCACCAGTGCAGGCTGCCTATCAGAAAGTGGTGGCTGGTGTGGCTAATGCCCTGGCCCACAAGTATCA 23 | CTAAGCTCGCTTTCTTGCTGTCCAATTTCTATTAAAGGTTCCTTTGTTCCCTAAGTCCAACTACTAAACT 24 | GGGGGATATTATGAAGGGCCTTGAGCATCTGGATTCTGCCTAATAAAAAACATTTATTTTCATTGC 25 | 26 | -------------------------------------------------------------------------------- /Section1/Chap10/10.1.2.SwissProt_Record.py: -------------------------------------------------------------------------------- 1 | #10.1.2.SwissProt_Record.py 2 | from Bio import SwissProt 3 | #https://www.uniprot.org/uniprot/P02649.txt 4 | handle = open("P02649.txt") 5 | record = SwissProt.read(handle) 6 | print(type(record)) # 7 | handle.close() 8 | 9 | -------------------------------------------------------------------------------- /Section1/Chap10/10.1.3.SwissProt_Parsing.py: -------------------------------------------------------------------------------- 1 | #10.1.3.SwissProt_Parsing.py 2 | from Bio import SwissProt 3 | 4 | #https://www.uniprot.org/uniprot/P02649.txt 5 | handle = open("P02649.txt") 6 | record = SwissProt.read(handle) 7 | print(type(record)) # 8 | handle.close() 9 | 10 | print(record.description) 11 | print("gene_name:",record.gene_name) 12 | print("organism:",record.organism) 13 | print("sequence_length:",record.sequence_length) 14 | print("sequence:",record.sequence) 15 | 16 | -------------------------------------------------------------------------------- /Section1/Chap10/10.2.2.Access_ExPASy.py: -------------------------------------------------------------------------------- 1 | #10.2.2.Access_ExPASy.py 2 | from Bio import ExPASy 3 | from Bio import SwissProt 4 | 5 | accession = "P02649" 6 | handle = ExPASy.get_sprot_raw(accession) 7 | record = SwissProt.read(handle) 8 | print(record.gene_name) 9 | print(record.organism) 10 | print(record.sequence_length) 11 | print(record.sequence) 12 | 13 | -------------------------------------------------------------------------------- /Section1/Chap11/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjpublic/biopython/1ee610b4a5a0d4bedb8318b9b86b90a7dc3d1e1f/Section1/Chap11/.DS_Store -------------------------------------------------------------------------------- /Section1/Chap11/11.4.1.read_newick.py: -------------------------------------------------------------------------------- 1 | #11.4.1.read_newick.py 2 | from Bio import Phylo 3 | tree = Phylo.read("sample_tree3.nwk","newick") 4 | print(type(tree)) 5 | print(tree) 6 | 7 | -------------------------------------------------------------------------------- /Section1/Chap11/11.4.2.draw_ascii.py: -------------------------------------------------------------------------------- 1 | #11.4.2.draw_ascii.py 2 | ## ASCII 형태로 tree 객체 출력하기 3 | from Bio import Phylo 4 | tree = Phylo.read("sample_tree3.nwk","newick") 5 | Phylo.draw_ascii(tree) 6 | 7 | -------------------------------------------------------------------------------- /Section1/Chap11/11.4.3.draw_tree.py: -------------------------------------------------------------------------------- 1 | #11.4.2.draw_ascii.py 2 | ## ASCII 형태로 tree 객체 출력하기 3 | from Bio import Phylo 4 | tree = Phylo.read("sample_tree3.nwk","newick") 5 | Phylo.draw(tree) 6 | 7 | -------------------------------------------------------------------------------- /Section1/Chap11/11.4.4.draw_color_tree.py: -------------------------------------------------------------------------------- 1 | #11.4.4.draw_color_tree.py 2 | from Bio import Phylo 3 | tree = Phylo.read("sample_tree3.nwk","newick") 4 | tree.rooted = True 5 | tree.root.color = (128,128,128) 6 | print(tree) 7 | print("tree.clade[0]:", tree.clade[1]) 8 | print("tree.clade[1]:", tree.clade[1]) 9 | print("tree.clade[2,0]:", tree.clade[2,0]) 10 | print("tree.clade[2,1]:", tree.clade[2,1]) 11 | tree.clade[1].color = "blue" 12 | tree.clade[2,0].color = "red" 13 | Phylo.draw(tree) 14 | 15 | -------------------------------------------------------------------------------- /Section1/Chap11/11.4.5.draw_length_label_tree.py: -------------------------------------------------------------------------------- 1 | #11.4.5.draw_length_label_tree.py 2 | from Bio import Phylo 3 | tree = Phylo.read("sample_tree4.nwk","newick") 4 | Phylo.draw(tree, branch_labels = lambda c: c.branch_length) 5 | 6 | -------------------------------------------------------------------------------- /Section1/Chap11/11.4.5.draw_length_tree.py: -------------------------------------------------------------------------------- 1 | #11.4.5.draw_length_tree.py 2 | from Bio import Phylo 3 | tree = Phylo.read("sample_tree4.nwk","newick") 4 | Phylo.draw(tree) 5 | 6 | -------------------------------------------------------------------------------- /Section1/Chap11/sample_tree1.nwk: -------------------------------------------------------------------------------- 1 | (A, B, C); 2 | -------------------------------------------------------------------------------- /Section1/Chap11/sample_tree2.nwk: -------------------------------------------------------------------------------- 1 | (A:0.1, B:0.3, C:0.2); 2 | -------------------------------------------------------------------------------- /Section1/Chap11/sample_tree3.nwk: -------------------------------------------------------------------------------- 1 | (A, B, (C, D)); 2 | -------------------------------------------------------------------------------- /Section1/Chap11/sample_tree4.nwk: -------------------------------------------------------------------------------- 1 | (A:0.1, B:0.2, (C:0.15, D:0.1):0.15); 2 | -------------------------------------------------------------------------------- /Section1/Chap12/12.3.KEGG_Enzyme_example.py: -------------------------------------------------------------------------------- 1 | #12.3.KEGG_Enzyme_example.py 2 | 3 | from Bio.KEGG import Enzyme 4 | 5 | records = Enzyme.parse(open("ec_2.7.1.40.txt")) 6 | record = list(records)[0] 7 | print("classname:", record.classname) 8 | print("sysname:", record.sysname) 9 | print("subtrate:", record.substrate) 10 | print("product:", record.product) 11 | 12 | -------------------------------------------------------------------------------- /Section1/Chap12/12.4.KEGG_REST_example.py: -------------------------------------------------------------------------------- 1 | #12.4.KEGG_REST_example.py 2 | from Bio.KEGG import REST 3 | 4 | human_pathways = REST.kegg_list("pathway", "hsa").read() 5 | 6 | insulin_pathways = [] 7 | for line in human_pathways.rstrip().split("\n"): 8 | entry, description = line.split("\t") 9 | if "insulin" in description.lower(): 10 | insulin_pathways.append(entry) 11 | print(entry, description) 12 | print(insulin_pathways) 13 | 14 | insulin_genes = [] 15 | for pathway in insulin_pathways: 16 | pathway_file = REST.kegg_get(pathway).read() 17 | 18 | current_section = None 19 | for line in pathway_file.rstrip().split("\n"): 20 | section = line[:12].strip() 21 | if not section == "": 22 | current_section = section 23 | 24 | if current_section == "GENE": 25 | gene_identifiers, gene_description = line[12:].split("; ") 26 | gene_id, gene_symbol = gene_identifiers.split() 27 | 28 | if not gene_symbol in insulin_genes: 29 | insulin_genes.append(gene_symbol) 30 | 31 | print("There are %d insulin pathways and %d insulin genes. The genes are:" % (len(insulin_pathways), len(insulin_genes))) 32 | print(", ".join(insulin_genes)) 33 | 34 | -------------------------------------------------------------------------------- /Section1/Chap13/13.10.KEGG.py: -------------------------------------------------------------------------------- 1 | #13.10.KEGG.py 2 | from Bio.KEGG import REST 3 | 4 | human_pathways = REST.kegg_list("pathway", "hsa").read() 5 | 6 | hepatitis_pathways = [] 7 | for line in human_pathways.rstrip().split("\n"): 8 | entry, description = line.split("\t") 9 | if "hepatitis" in description.lower(): 10 | hepatitis_pathways.append(entry) 11 | print(entry, description) 12 | print(hepatitis_pathways) 13 | 14 | hepatitis_genes = [] 15 | for pathway in hepatitis_pathways: 16 | pathway_file = REST.kegg_get(pathway).read() 17 | 18 | current_section = None 19 | for line in pathway_file.rstrip().split("\n"): 20 | section = line[:12].strip() 21 | if not section == "": 22 | current_section = section 23 | 24 | if current_section == "GENE": 25 | gene_identifiers, gene_description = line[12:].split("; ") 26 | gene_id, gene_symbol = gene_identifiers.split() 27 | 28 | if not gene_symbol in hepatitis_genes: 29 | hepatitis_genes.append(gene_symbol) 30 | 31 | print("There are %d hepatitis pathways and %d hepatitis genes. The genes are:" % (len(hepatitis_pathways), len(hepatitis_genes))) 32 | print(", ".join(hepatitis_genes)) 33 | -------------------------------------------------------------------------------- /Section1/Chap13/13.2.fasta: -------------------------------------------------------------------------------- 1 | >sample1 2 | AACCGGGTTT 3 | CANNTTTAAA 4 | GGGAACTTCA 5 | >sample2 6 | ACANNNTAAA 7 | GGGAATATAG 8 | CCACANTTAC 9 | >sample3 10 | TTATNNCCCG 11 | GGCGCGCTNT 12 | AACNCTTAGG 13 | -------------------------------------------------------------------------------- /Section1/Chap13/13.2.fasta_record_counter.py: -------------------------------------------------------------------------------- 1 | #13.2.fasta_record_counter.py 2 | 3 | from Bio import SeqIO 4 | 5 | f = "13.2.fasta" 6 | 7 | with open(f,"rU") as handle: 8 | for record in SeqIO.parse(handle,"fasta"): 9 | seq = record.seq 10 | print(record.id) 11 | print("A",seq.upper().count("A")) 12 | print("C",seq.upper().count("C")) 13 | print("G",seq.upper().count("G")) 14 | print("T",seq.upper().count("T")) 15 | print("N",seq.upper().count("N")) 16 | -------------------------------------------------------------------------------- /Section1/Chap13/13.3.rev_comp.py: -------------------------------------------------------------------------------- 1 | # 13.3.rev_comp.py 2 | from Bio.Seq import Seq 3 | 4 | seq = Seq("ACATTA") 5 | comp_seq = seq.complement() 6 | rev_comp_seq = seq.reverse_complement() 7 | print("comp_seq:",comp_seq) # TGTAAT 8 | print("rev_comp_seq:",rev_comp_seq) # TAATGT 9 | 10 | -------------------------------------------------------------------------------- /Section1/Chap13/13.4.read_gb.py: -------------------------------------------------------------------------------- 1 | #13.4.read_gb.py 2 | 3 | from Bio import SeqIO 4 | 5 | gbk = SeqIO.read("NM_000384.2.gb","genbank") 6 | print("id:",gbk.id) 7 | print("description:",gbk.description) 8 | print("molecule_type:",gbk.annotations['molecule_type']) 9 | print("organism:",gbk.annotations['organism']) 10 | -------------------------------------------------------------------------------- /Section1/Chap13/13.5.WebLogo.py: -------------------------------------------------------------------------------- 1 | #13.5.WebLogo.py 2 | 3 | from Bio.motifs import Motif 4 | from Bio import motifs 5 | from Bio.Seq import Seq 6 | 7 | instances = [Seq("AATTAAA"), 8 | Seq("AAAAAGA"), 9 | Seq("AAATAGC"), 10 | Seq("AATCAAC"), 11 | Seq("AATTTAA"), 12 | Seq("TATCAGA"), 13 | Seq("ATATAGC"), 14 | Seq("ATATTAA"), 15 | ] 16 | 17 | m = motifs.create(instances) 18 | 19 | print(m.counts) 20 | Motif.weblogo(m,'13.5.png') 21 | -------------------------------------------------------------------------------- /Section1/Chap13/13.5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjpublic/biopython/1ee610b4a5a0d4bedb8318b9b86b90a7dc3d1e1f/Section1/Chap13/13.5.png -------------------------------------------------------------------------------- /Section1/Chap13/13.6.blast.py: -------------------------------------------------------------------------------- 1 | #13.6.blast.py 2 | 3 | from Bio.Blast import NCBIWWW 4 | from Bio.Blast import NCBIXML 5 | from Bio import SeqIO 6 | 7 | record = SeqIO.read("13.6.fasta", format="fasta") 8 | handle = NCBIWWW.qblast("blastn","nt",record.format("fasta")) 9 | 10 | blast_records = NCBIXML.parse(handle) 11 | E_VALUE_THRESHOLD = 0.05 12 | for blast_record in blast_records: 13 | for alignment in blast_record.alignments: 14 | for hsp in alignment.hsps: 15 | if hsp.expect < E_VALUE_THRESHOLD: 16 | print(alignment.title) 17 | print(alignment.length) 18 | print(hsp.expect) 19 | print(hsp.query[0:75]) 20 | print(hsp.match[0:75]) 21 | print(hsp.sbjct[0:75]) 22 | 23 | -------------------------------------------------------------------------------- /Section1/Chap13/13.6.fasta: -------------------------------------------------------------------------------- 1 | >sample 2 | GTGGGCAAGCACTACTGTTTCGCCGGTGAGGCCGCATAGACTGTTCCCACGGTTGAAAGTGTCCGATCCGTTATCCGACCATGTACTTCGAGAAGCCTAGTACCGCTCTGGAATCTTCGACGCGTTGCGCTCAGCACTCAACCCCGGAGTGTAGCTTGGGCCGATGAGTCTGGACAGTCCCCACTGGCGACAGTGGCTCCAGCTGCGCTGGCGGCCGCGACCT 3 | -------------------------------------------------------------------------------- /Section1/Chap13/13.7.efetch.py: -------------------------------------------------------------------------------- 1 | #13.7.efetch.py 2 | 3 | from Bio import Entrez 4 | 5 | Entrez.email = "your@email.com" 6 | handle = Entrez.efetch(db="nucleotide", id="NC_001498.1", rettype="gb", retmode="xml") 7 | records = Entrez.read(handle) 8 | for record in records: 9 | print("Locus:",record["GBSeq_locus"]) 10 | print("Definition:",record["GBSeq_definition"]) 11 | print("Strand, Molecular type:",record["GBSeq_strandedness"], record["GBSeq_moltype"]) 12 | print("Length:",record["GBSeq_length"], "bp") 13 | print("Journal:",len(record["GBSeq_references"]), "journals") 14 | 15 | -------------------------------------------------------------------------------- /Section1/Chap13/13.8.ExPASy.py: -------------------------------------------------------------------------------- 1 | #13.8.ExPASy.py 2 | from Bio import ExPASy 3 | from Bio import SwissProt 4 | 5 | accession = "P04637" 6 | handle = ExPASy.get_sprot_raw(accession) 7 | record = SwissProt.read(handle) 8 | print("gene_name:",record.gene_name) 9 | print("organism:",record.organism) 10 | print("sequence:",record.sequence) 11 | 12 | -------------------------------------------------------------------------------- /Section1/Chap13/13.9.nwk: -------------------------------------------------------------------------------- 1 | ((A:0.05, B:0.1,(C:0.05,D:0.1,E:0.15):0.1):0.1,(F:0.15,G:0.1):0.15); 2 | -------------------------------------------------------------------------------- /Section1/Chap13/13.9.tree.py: -------------------------------------------------------------------------------- 1 | #13.9.tree.py 2 | from Bio import Phylo 3 | tree = Phylo.read("13.9.nwk","newick") 4 | Phylo.draw(tree, branch_labels = lambda c: c.branch_length) 5 | 6 | -------------------------------------------------------------------------------- /Section1/Chap3/KT225476.2.fasta: -------------------------------------------------------------------------------- 1 | >KT225476.2 Middle East respiratory syndrome coronavirus isolate MERS-CoV/THA/CU/17_06_2015, complete genome 2 | AGTGAATAGCTTGGCTATCTCACTTCCCCTCGTTCTCTTGCAGAACTTTGATTTTAACGAACTTAAATAA 3 | AAGCCCTGTTGTTTAGCGTATTGTTGCACTTGTCTGGTGGGATTGTGGCACTAATTTGCCTGCTCATCTA 4 | GGCAGTGGACATATGCTCAACACTGGGTATAATTCTAATTGAATACTATTTTTCAGTTAGAGCGTCGTGT 5 | CTCTTGTACGTCTCGGTCACAATATACGGTTTCGTCCGGTGCGTGGCAATTCGGGGCACATCATGTCTTT 6 | CGTGGCTGGTGTGATCGCGCAAGGTGCGCGCGGTACGTATCGAGCAGCGCTCAACTCTGAAAAACATCAA 7 | GACCATGTGTCTCTAACTGTGCCACTCTGTGGTTCAGGAAACCTGGTTGAAAAACTTTCACCATGGTTCA 8 | TGGATGGCGAAAATGCCTATGAAGTGGTCAAGGCCATGTTACTTAAAAAAGAGCCACTTCTCTATGTGCC 9 | CATCCGGCTGGCTGGACACACTAGACACCTCCCAGGTCCTCGTGTGTACCTGGTTGAGAGGCTCATTGCT 10 | TGTGAAAATCCATTCATGGTTAACCAATTGGCTTATAGCTCTAGTGCAAATGGCAGCCTGGTTGGCACAA 11 | CTTTGCAGGGCAAGCCTATTGGTATGTTCTTCCCTTATGACATCGAACTTGTCACAGGAAAGCAAAATAT 12 | TCTCCTGCGCAAGTATGGCCGTGGTGGTTATCACTACACCCCAGTCCACTATGAGCGAGACAACACCTCT 13 | TGCCCTGAGTGGATGGACGATTTTGAGGCGGATCCTAAAGGCAAATATGCCCAGAATCTGCTTAAGAAGT 14 | TGATTGGCGGTGATGTCACTCCAGTTGACCAATACATGTGTGGCGTTGATGGAAAACCCATTAGTGCCTA 15 | CGCATTTTTAATGGCCAAGGATGGAATAACCAAACTGGCTGATGTTGAAGCGGACGTCGCAGCACGTGCT 16 | GATGACGAAGGCTTCATCACATTAAAGAACAATCTATATAGATTGGTTTGGCATGTTGAGCGTAAAGACG 17 | TTCCATATCCTAAGCAATCTATTTTTACTATTAATAGTGTGGTCCAAAAGGATGGTGTTGAAAACACTCC 18 | TCCTCACTATTTTACTCTTGGATGCAAAATTTTAACGCTCACCCCACGCAACAAGTGGAGTGGCGTTTCT 19 | GACTTGTCCCTCAAACAAAAACTCCTTTACACCTTCTATGGTAAGGAGTCACTTGAGAACCCAACCTACA 20 | TTTACCACTCCGCATTCATTGAGTGTGGAAGTTGTGGTAATGATTCCTGGCTTACAGGGAATGCTATCCA 21 | AGGGTTTGCCTGTGGATGTGGGGCATCATATACAGCTAATGATGTCGAAGTCCAATCATCTGGCATGATT 22 | AAGCCAAATGCTCTTCTTTGTGCTACTTGCCCCTTTGCTAAGGGTGACAGCTGTTCTTCTAATTGCAAAC 23 | ATTCAGTTGCTCAGTTGGTTAGTTACCTTTCTGAACGCTGTAATGTTATTGCTGATTCTAAGTCCTTCAC 24 | ACTTATCTTTGGTGGCGTAGCTTACGCCTACTTTGGATGTGAGGAAGGTACTATGTACTTTGTGCCTAGA 25 | GCTAAGTCTGTTGTCTCAAGGATTGGAGACTCCATCTTTACAGGCTGTACTGGCTCTTGGAACAAGGTCA 26 | CTCAAATTGCTAACATGTTCTTGGAACAGACTCAGCATTCCCTTAACTTTGTGGGAGAGTTCGTTGTCAA 27 | CGATGTTGTCCTCGCAATTCTCTCTGGAACCACAACTAATGTTGACAAAATACGCCAGCTTCTCAAAGGT 28 | GTCACCATTGACAAGTTGCGTGATTATTTAGCTGACTATGACGTAGCAGTCACTGCCGGCCCATTCATGG 29 | ATAATGCTATTAATGTTGGTGGTACAGGATTACAGTATGCCGCCATTACTGCACCTTATGTAGTTCTCAC 30 | TGGCTTAGGTGAGTCCTTTAAGAAAGTTGCAACCATACCGTACAAGGTTTGCAACTCTGTTAAGGATACT 31 | CTGACTTATTATGCTCACAGCGTGTTGTACAGAGTTTTTCCTTATGACATGGATTCTGGTGTGTCATCCT 32 | TTAGTGAACTACTTTTTGATTGCGTTGATCTTTCAGTAGCTTCTACCTATTTTTTAGTCCGCCTCTTGCA 33 | AGATAAGACTGGCGACTTTATGTCTACAATTATTACTTCCTGCCAAACTGCTGTTAGTAAGCTTCTAGAT 34 | ACATGTTTTGAAGCTACAGAAGCAACATTTAACTTCTTGTTAGATTTGGCAGGATTGTTCAGAATCTTTC 35 | TTCGCAATGCCTATGTGTACACTTCACAAGGGTTTGTGGTGGTCAATGGCAAAGTTTCTACACTTGTCAA 36 | ACAAGTGTTAGACTTGCTTAATAAGGGTATGCAACTTTTGCATACAAAGGTCTCCTGGGCTGGTTCTAAT 37 | ATCAGTGCTGTTATCTACAGCGGCAGGGAGTCTCTAATATTCCCATCGGGAACCTATTACTGTGTCACCA 38 | CTAAGGCTAAGTCCGTTCAACAAGATCTTGACGTTATTTTGCCTGGTGAGTTTTCCAAGAAGCAGTTAGG 39 | ACTGCTCCAACCTACTGACAATTCTACAACTGTTAGTGTTACTGTATCCAGTAACATGGTTGAAACTGTT 40 | GTGGGTCAACTTGAGCAAACTAATATGCATAGTCCTGATGTTATAGTAGGTGACTATGTCATTATTAGTG 41 | AAAAATTGTTTGTGCGTAGTAAGGAAGAAGACGGATTCGCCTTCTACCCTGCTTGCACTAATGGTCATGC 42 | TGTACCGACTCTCTTTAGACTTAAGGGAGGTGCACCTGTAAAAAAAGTAGCCTTTGGCGGTGATCAAGTA 43 | CATGAGGTTGCTGCTGTAAGAAGTGTTACTGTCGAGTACAACATTCATGCTGTATTAGACACACTACTTG 44 | CTTCTTCTAGTCTTAGAACCTTTGTTGTAGATAAGTCTTTGTCAATTGAGGAGTTTGCTGACGTAGTAAA 45 | GGAACAAGTCTCAGACTTGCTTGTTAAATTACTGCGTGGAATGCCGATTCCAGATTTTGATTTAGACGAT 46 | TTTATTGACGCACCATGCTATTGCTTTAACGCTGAGGGTGATGCATCTTGGTCTTCTACTATGATCTTCT 47 | CTCTTCACCCCGTCGAGTGTGACGAGGAGTGTTCTGAAGTAGAGGCTTCAGATTTAGAAGAAGGTGAATC 48 | AGAGTGCATTTCTGAGACTTCAACTGAACAAGTTGACGTTTCTCATGAGATTTCTGACGACGAGTGGGCT 49 | GCTGCAGTTGATGAAGCGTTCCCCCTCGATGAAGCAGAAGATGTTACTGAATCTGTGCAAGAAGAAGCAC 50 | AACCAGTAGAAGTACCTGTTGAAGATATTGCGCAGGTTGTCATAGCTGACACCTTACAGGAAACTCCTGT 51 | TGTGTCTGATACTGTTGAAGTCCCACCGCAAGTGGTGAAACTTCCGTCTGAACCTCAGACTATCCAGCCC 52 | GAGGTAAAAGAAGTTGCACCTGTCTATGAGGCTGATACCGAACAGACACAGAGTGTTACTGTTAAACCTA 53 | AGAGGTTACGCAAAAAGCGTAATGTTGACCCTTTGTCCAATTTTGAACATAAGGTTATTACAGAGTGCGT 54 | TACCATAGTTTTAGGTGACGCAATTCAAGTAGCCAAGTGCTATGGGGAGTCTGTGTTAGTTAATGCTGCT 55 | AACACACATCTTAAGCATGGCGGTGGTATCGCTGGTGCTATTAATGCGGCTTCAAAAGGGGCTGTCCAAA 56 | AAGAGTCAGATGAGTATATTCTGGCTAAAGGGCCGTTACAAGTAGGAGATTCAGTTCTCTTGCAAGGCCA 57 | TTCTCTAGCTAAGAATATCCTGCATGTCGTAGGCCCAGATGCCCGCGCTAAACAGGATGTTTCTCTCCTT 58 | AGTAAGTGCTATAAGGCTATGAATGCATATCCTCTTGTAGTCACTCCTCTTGTTTCAACAGGCATATTTG 59 | GTGTAAAACCAGCTGTGTCTTTTGATTATCTTATTAGAGAGGCTAAGACTAGAGTTTTAGTCGTCGTTAA 60 | TTCCCAAGATGTCTATAAGAGTCTTACCATAGTTGACATTCCACAGAGTTTGACTTTTTCATATGATGGG 61 | TTACGTGGCGCAATACGTAAAGCTAAAGATTATGGTTTTACTGTTTTTGTGTGCACAGACAACTCTGCTA 62 | ACACTAAAGTTCTTAGGAACAAGGGTGTTGATTATACTAAGAAGTTTCTTACAGTTGACGGTGTGCAATA 63 | TTATTGCTACACGTCTAAGGACACTTTAGATGATATCTTACAACAGGCTAATAAGTCTGTTGGTATTATA 64 | TCTATGCCTTTGGGATATGTGTCTCATGGTTTAGACTTAATTCAAGCAGGGAGTGTCGTGCGTAGAGTTA 65 | ACGTGCCCTACGTGTGTCTCCTAGCTAATAAAGAGCAAGAAGCTATTTTGATGTCTGAAGACGTTAAGTT 66 | AAACCCTTCAGAAGATTTTATAAAGCACGTCCGCACTAATGGTGGTTACAATTCTTGGCATTTAGTCGAG 67 | GGTGAACTATTGGTGCAAGACTTACGCTTAAATAAGCTCCTGCATTGGTCTGATCAAACCATATGCTACA 68 | AGGATAGTGTGTTTTATGTTGTAAAGAATAGTACAGCTTTTCCATTTGAAACACTTTCAGCATGTCGTGC 69 | GTATTTGGATTCACGCACGACACAGCAGTTAACAATCGAAGTCTTAGTGACTGTCGATGGTGTAAATTTT 70 | AGAACAGTCGTTCTAAATAATAAGAACACTTATAGATCACAGCTTGGATGCGTTTTCTTTAATGGTGCTG 71 | ATATTTCTGATACCATTCCTGATGAGAAACAGAATGGTCACAGTTTATATCTAGCAGACAATTTGACTGC 72 | TGATGAAACAAAGGCGCTTAAAGAGTTATATGGCCCCGTTGATCCTACTTTCTTACACAGATTCTATTCA 73 | CTTAAGGCTGCAGTCCATAAGTGGAAGATGGTTGTGTGTGATAAGGTACGTTCTCTCAAATTGAGTGATA 74 | ATAATTGTTATCTTAATGCAGTTATTATGACACTTGATTTATTGAAGGACATTAAATTTGTTATACCTGC 75 | TCTACAGCATGCATTTATGAAACATAAGGGCGGTGATTCAACTGACTTCATAGCCCTCATTATGGCTTAT 76 | GGCAATTGCACATTTGGTGCTCCAGATGATGCCTCTCGGTTACTTCATACCGTGCTTGCAAAGGCTGAGT 77 | TATGCTGTTCTGCACGCATGGTTTGGAGAGAGTGGTGCAATGTCTGTGGCATAAAAGATGTTGTTCTACA 78 | AGGCTTAAAAGCTTGTTGTTACGTGGGTGTGCAAACTGTTGAAGATCTGCGTGCTCGCATGACATATGTA 79 | TGCCAGTGTGGTGGTGAACGTCATCGGCAAATAGTCGAACACACCACCCCCTGGTTGCTGCTCTCAGGCA 80 | CACCAAATGAAAAATTGGTGACAACCTCCACGGCGCCTGATTTTGTAGCGTTTAATGTCTTTCAGGGCAT 81 | TGAAACGGCTGTTGGCCATTATGTTCATGCTCGCCTGAAGGGTGGTCTTATTTTAAAGTTTGACTCTGGC 82 | ACCGTTAGCAAGACTTCAGACTGGAAGTGCAAGGTGACAGATGTACTTTTCCCCGGCCAAAAATACAGTA 83 | GCGATTGTAATGTCGTACGGTATTCTTTGGACGGTAATTTCAGAACAGAGGTTGATCCCGACCTATCTGC 84 | TTTCTATGTTAAGGATGGTAAATACTTTACAAGTGAACCACCCGTAACATATTCACCAGCTACAATTTTA 85 | GCTGGTAGTGTCTACACTAATAGCTGCCTTGTATCGTCTGATGGACAACCTGGCGGTGATGCTATTAGTT 86 | TGAGTTTTAATAACCTTTTAGGGTTTGATTCTAGTAAACCAGTCACTAAGAAATACACTTACTCCTTCTT 87 | GCCTAAAGAAGACGGCGATGTGTTGTTGGCTGAGTTTGACACTTATGACCCTATTTATAAGAATGGTGCC 88 | ATGTATAAAGGCAAACCAATTCTTTGGGTCAACAAAGCATCTTATGATACTAATCTTAATAAGTTCAATA 89 | GAGCTAGTTTGCGTCAAATTTTTGACGTAGCCCCCATTGAACTCGAAAATAAATTCACACCTTTGAGTGT 90 | GGCGTCCACACCAGTTGAACCTCCAACTGTAGATGTGGTAGCACTTCAACAGGAAATGACAATTGTCAAA 91 | TGTAAGGGTTTAAATAAACCTTTCGTGAAGGACAATGTCAGTTTCGTTGCTGATGACTCAGGTACTCCCG 92 | TTGTTGAGTATCTGTCTAAAGAAGATCTACATACATTGTATGTAGACCCTAAGTATCAAGTCATTGTCTT 93 | AAAAGACAATGTACTTTCTTCTATGCTTAGATTGCACACCGTTGAGTCAGGTGATATTAACGTTGTTGCA 94 | GCTTCCGGATCTTTGACACGTAAAGTGAAGTTACTATTTAGGGCTTCATTTTATTTCAAAGAATTTGCTA 95 | CCCGCACTTTCACTGCTACCACTGCTGTAGGTAGTTGTATAAAGAGTGTAGTGCGGCATCTAGGTGTTAC 96 | TAAAGGCATATTGACAGGCTGTTTTAGTTTTGTCAAGATGTTATTTATTCTTCCACTAGCTTACTTTAGT 97 | GATTCAAAAATCGGCACCACAGAGGTTAAAGTGAGTGCTTTGAAAACAGCTGGCGTTGTGACAGGTAATG 98 | TTGTAAAACAGTGTTGCACTGCTGCTGTTGATTTAAGTATGGATAAGTTGCGCCGTGTGGATTGGAAATC 99 | AACCCTACGGTTGTTACTTATGTTATGCACAACTATGGTATTGTTGTCTTCTGTGTATCACTTGTATGTC 100 | TTCAATCAGGTCTTATCAAGTGATGTTATGTTTGAAGATGCCCAAGGTTTGAAAAAGTTCTACAAAGAAG 101 | TTAGAGCTTACCTAGGAATCTCTTCTGCTTGTGACGGTCTTGCTTCAGCTTATAGGGCGAATTCATTTGA 102 | TGTACCTACATTCTGCGCAAACCGTTCTGCAATGTGTAATTGGTGCTTGATTAGCCAAGATTCCATAACT 103 | CACTACCCAGCTCTTAAGATGGTTCAAACACATCTTAGCCACTATGTTCTTAACATAGATTGGTTGTGGT 104 | TTGCATTTGAGACTGGTTTGGCATACATGCTCTATACCTCGGCCTTCAACTGGTTGTTGTTGGCAGGTAC 105 | ATTGCATTATTTCTTTGCACAGACTTCCATATTTGTAGACTGGCGGTCATGCAATTATGCTGTGTCTAGT 106 | GCCTTCTGGTTATTTACCCACATTCCAATGGCGGGTTTGGTACGAATGTATAATTTGTTAGCATGCCTTT 107 | GGCTTTTACGCAAGTTTTATCAGCATGTAATCAATGGTTGCAAAGATACGGCATGCTTGCTCTGCTATAA 108 | GAGGAACCGACTTACTAGAGTTGAAGCTTCTACCGTTGTCTGTGGTGGAAAACGTACGTTTTATATCACA 109 | GCAAATGGCGGTATTTCATTCTGTCGTAGGCATAATTGGAATTGTGTGGATTGTGACATTGCAGGTGTGG 110 | GGAATACCTTCATCTGTGAAGAAGTCGCAAATGACCTCACTACCGCCCTACGCAGGCCTATTAACGCTAC 111 | GGATAGATCACATTATTATGTGGATTCCGTTACAGTTAAAGAGACTGTTGTTCAGTTTAATTATCGTAGA 112 | GACGGTCAACCATTCTACGAGCCGTTTCCCCTCTGCGCTTTTACAAATCTAGATAAGTTGAAGTTCAAAG 113 | AGGTCTGTAAAACTACTACTGGTATACCTGAATACAACTTTATCATCTACGACTCATCAGATCGTGGCCA 114 | GGAAAGTTTAGCTAGGTCTGCATGTGTTTATTATTCTCAAGTCTTGTGTAAATCAATTCTTTTGGTTGAC 115 | TCAAGTTTGGTTACTTCTGTTGGTGATTCTAGTGAAATCGCCACTAAAATGTTTGATTCCTTTGTTAATA 116 | GTTTCGTCTCGCTGTATAATGTCACACGCGAAAAGTTGGAAAAACTTATCTCTACTGCTCGTGATGGCGT 117 | AAGGCGAGGCGATAACTTCCATAGTGTCTTAACAACATTCATTGACGCAGCACGAGGCCCCGCAGGTGTG 118 | GAGTCTGATGTTGAGACCAATGAAATTGTTGACTCTGTGCAGTATGCTCATAAACATGACATACAAATTA 119 | CTAATGAGAGTTACAATAATTATGTACCCTCATATGTTAAACCTGATAGTGTGTCTACCAGTGATTTAGG 120 | TAGTCTCATTGATTGTAATGCGGCTTCAGTTAACCAAATTGTCTTGCGTAATTCTAATGGTGCTTGTATT 121 | TGGAACGCTGCTGCATATATGAAACTCTCGGATGCACTTAAACGACAGATTCGCATTGCATGCCGTAAGT 122 | GTAATTTAGCTTTCCGGTTAACCACCTCAAAGCTACGCGCTAATGATAATATCTTATCAGTTAGATTCAC 123 | TGCTAACAAAATTGTTGGTGGTGCTCCTACATGGTTTAATGTGTTGCGTGACTTTACGTTAAAGGGTTAC 124 | GTTCTTGCTACCATTATTGTGTTTCTGTGTGCTGTACTGATGTATTTGTGTTTACCTACATTTTCTATGG 125 | TACCTGTTGAATTTTATGAAGACCGCATCTTGGACTTTAAAGTTCTTGATAATGGTATCATTAGGGATGT 126 | AAATCCTGATGATAAGTGCTTTGCTAATAAGCACCGGTCCTTCACACAATGGTATCATGAGCATGTTGGT 127 | GGTGTCTATGACAACTCTATCACATGCCCATTGACAGTTGCAGTAATTGCTGGAGTTGCTGGTGCTCGCA 128 | TTCCAGACGTACCTACTACATTGGCTTGGGTGAACAATCAGATAATTTTCTTTGTTTCTCGAGTCTTTGC 129 | TAATACAGGCAGTGTTTGCTACACTCCTATAGATGAGATACCCTATAAGAGTTTCTCTGATAGTGGTTGC 130 | ATTCTTCCATCTGAGTGCACTATGTTTAGGGATGCAGAGGGCCGTATGACACCATACTGCCATGATCCTA 131 | CTGTTTTGCCTGGGGCTTTTGCGTACAGTCAGATGAGGCCTCATGTTCGTTACGACTTGTATGATGGTAA 132 | CATGTTTATTAAATTTCCTGAAGTAGTATTTGAAAGTACACTTAGGATTACTAGAACTCTGTCAACTCAG 133 | TACTGCCGGTTCGGTAGTTGTGAGTATGCACAAGAGGGTGTTTGTATTACCACAAATGGCTCGTGGGCCA 134 | TTTTTAATGACCACCATCTTAATAGACCTGGTGTCTATTGTGGCTCTGATTTTATTGACATTGTCAGGCG 135 | GTTAGCAGTATCACTGTTCCAGCCTATTACTTATTTCCAATTGACTACCTCATTGGTCTTGGGTATAGGT 136 | TTGTGTGCATTCCTGACTTTGCTCTTCTATTATATTAATAAAGTAAAACGTGCTTTTGCAGATTACACCC 137 | AGTGTGCTGTAATTGCTGTTGTTGCTGCTGTTCTTAATAGCTTGTGCATCTGCTTTGTTGCCTCTATACC 138 | ATTGTGTATATTACCTTACACTGCATTGTACTATTATGCTACATTCTATTTTACTAATGAGCCTGCATTT 139 | ATTATGCATGTTTCTTGGTACATTATGTTCGGGCCTATCGTTCCCATATGGATGACCTGCGTCTATACAG 140 | TTGCAATGTGCTTTAGACACTTCTTCTGGGTTTTAGCTTATTTTAGTAAGAAACATGTAGAAGTTTTTAC 141 | TGATGGTAAGCTTAATTGTAGTTTCCAGGACGCTGCCTCTAATATCTTTGTTATTAACAAGGACACTTAT 142 | GCAGCTCTTAGAAACTCTTTAACTAATGATGCCTATTCACGATTTTTGGGGTTGTTTAACAAGTATAAGT 143 | ACTTCTCTGGTGCTATGGAAACAGCCGCTTATCGTGAAGCTGCAGCATGTCATCTTGCTAAAGCCTTACA 144 | AACATACAGCGAGACTGGTAGTGATCTTCTTTACCAACCACCCAACTGTAGCATAACCTCTGGCGTGTTG 145 | CAAAGCGGTTTGGTGAAAATGTCACATCCCAGTGGAGATGTTGAGGCTTGTATGGTTCAGGTTACCTGCG 146 | GTAGCATGACTCTTAATGGTCTTTGGCTTGACAACACAGTCTGGTGCCCACGACACGTAATGTGCCCGGC 147 | TGACCAGTTGTCTGATCCTAATTATGATGCCTTGTTGATTTCTATGACTAATCATAGTTTCAGTGTGCAA 148 | AAACACATTGGCGCTCCAGCAAACTTGCGTGTTGTTGGTCATGCCATGCAAGGCACTCTTTTGAAGTTGA 149 | CTGTCGATGTTGCTAACCCTAGCACTCCAGCCTACACTTTTACAACAGTGAAACCTGGCGCAGCATTTAG 150 | TGTGTTAGCATGCTATAATGGTCGTCCGACTGGTACATTCACTGTTGTAATGCGCCCTAACTACACAATT 151 | AAGGGTTCCTTTCTGTGTGGTTCTTGTGGTAGTGTTGGTTACACCAAGGAGGGTAGTGTGATCAATTTTT 152 | GTTACATGCATCAAATGGAACTTGCTAATGGTACACATACCGGTTCAGCATTTGATGGTACTATGTATGG 153 | TGCCTTTATGGATAAACAAGTGCACCAAGTTCAGTTAACAGACAAATACTGCAGTGTTAATGTAGTAGCT 154 | TGGCTTTACGCAGCAATACTTAATGGTTGCGCTTGGTTTGTAAAACCTAATCGCACTAGTGTTGTTTCTT 155 | TTAATGAATGGGCTCTTGCCAACCAATTCACTGAATTTGTTGGCACTCAATCCGTTGACATGTTAGCTGT 156 | CAAAACAGGCGTTGCTATTGAACAGCTGCTTTATGCGATCCAACAACTTTATACTGGGTTCCAGGGAAAG 157 | CAAATCCTTGGCAGTACTATGTTGGAAGATGAATTCACACCTGAGGATGTTAATATGCAGATTATGGGTG 158 | TGGTTATGCAGAGTGGTGTGAGAAAAGTTACATATGGTACTGCGCATTGGTTGTTCGCGACCCTTGTCTC 159 | AACCTATGTGATAATCTTACAAGCCACTAAATTTACTTTGTGGAACTACTTGTTTGAGACTATTCCCACA 160 | CAGTTGTTCCCACTCTTATTTGTGACTATGGCCTTCGTTATGTTGTTGGTTAAACACAAACACACCTTTT 161 | TGACACTTTTCTTGTTGCCTGTGGCTATTTGTTTGACTTATGCAAACATAGTCTACGAGCCCACTACTCC 162 | CATTTCGTCAGCGCTGATTGCAGTTGCAAATTGGCTTGCCCCCACTAATGCTTATATGCGCACTACACAT 163 | ACTGATATTGGTGTCTACATTAGTATGTCACTTGTATTAGTCATTGTAGTGAAGAGATTGTACAACCCAT 164 | CACTTTCTAACTTTGCGTTAGCATTGTGCAGTGGTGTAATGTGGTTGTACACTTATAGCATTGGAGAAGC 165 | CTCAAGTCCCATTGCCTATCTGGTTTTTGTCACTACACTCACTAGTGATTATACGATTACAGTCTTTGTT 166 | ACTGTCAACCTTGCAAAAGTTTGCACTTATGCCATCTTTGCTTACTCACCACAGCTTACACTTGTGTTTC 167 | CGGAAGTGAAGATGATACTTTTATTATACACATGTTTAGGTTTCATGTGTACTTGCTATTTTGGTGTCTT 168 | CTCTCTTTTGAACCTTAAGCTTAGAGCACCTATGGGTGTCTATGACTTTAAGGTCTCAACACAAGAGTTC 169 | AGATTCATGACTGCTAACAATCTAACTGCACCTAGAAATTCTTGGGAGGCTATGGCTCTGAACTTTAAGT 170 | TAATAGGTATTGGCGGTACACCTTGTATAAAGGTTGCTGCTATGCAGTCTAAACTTACAGATCTTAAATG 171 | CACATCTGTGGTTCTCCTCTCTGTGCTCCAACAGTTACACTTAGAGGCTAATAGTAGGGCCTGGGCTTTC 172 | TGTGTTAAATGCCATAATGATATATTGGCAGCAACAGACCCCAGTGAGGCTTTCGAGAAATTCGTAAGTC 173 | TCTTTGCCACTTTAATGACTTTTTCTGGTAATGTAGATCTTGATGCGTTAGCTAGTGATATTTTTGACAC 174 | TCCTAGCGTACTTCAAGCTACTCTTTCTGAGTTTTCACACTTAGCTACCTTTGCTGAGTTGGAAGCTGCG 175 | CAGAAAGCCTATCAGGAAGCTATGGACTCTGGTGACACCTCACCACAAGTTCTTAAGGCTTTGCAGAAGG 176 | CTGTTAATATAGCTAAAAACGCCTATGAGAAGGATAAGGCAGTGGCCCGTAAGTTAGAACGTATGGCTGA 177 | TCAGGCTATGACTTCTATGTATAAGCAAGCACGTGCTGAAGACAAGAAAGCAAAAATTGTCAGTGCTATG 178 | CAAACTATGTTGTTTGGTATGATTAAGAAGCTCGACAACGATGTTCTTAATGGTATCATTTCTAACGCTA 179 | GGAATGGTTGTATACCTCTTAGTGTCATTCCACTGTGTGCTTCAAATAAACTTCGCGTTGTAATTCCTGA 180 | CTTCACCGTCTGGAATCAGGTAGTCACATATCCCTCGCTTAACTACGCTGGGGCTTTGTGGGACATTACA 181 | GTTATAAACAATGTGGACAATGAAATTGTTAAGTCTTCAGATGTTGTAGACAGCAATGAAAATTTACCAT 182 | GGCCCCTTGTTTTAGAATTCGCTAGGGCATCCACTTCTGCCGCTAAGTTGCAAAATAATGAGATCAAACC 183 | TTCAGGTTTAAAAACCATGGTTGTGTCTGCAGGTCAAGAGCAAACTAACTGTAATACTAGTTCCTTAGCT 184 | TATTACGAACCTGTGCAGGGTCGTAAAATGCTGATGGCTCTTCTTTCTGATAATGCCTATCTCAAATGGG 185 | CGCGTGTTGAAGGTAAGGACGGATTTGTTAGTGTAGAGCTACAACCTCCTTGCAAATTCTTGATTGCGGG 186 | ACCAAAAGGACCTGAAATCCGATATCTCTATTTTGTTAAAAATCTTAACAACCTTCATCGCGGGCAAGTG 187 | TTAGGGCACATTGCTGCGACTGTTAGATTGCAAGCTGGTTCTAACACCGAGTTTGCCTCTAATTCTTCGG 188 | TGTTGTCACTTGTTAACTTCACCGTTGATCCTCAAAAAGCTTATCTCGATTTCGTCAATGCGGGAGGTGC 189 | CCCATTGACAAATTGTGTTAAGATGCTTACTCCTAAAACTGGTACAGGTATAGCTATATCTGTTAAACCA 190 | GAGAGTACAGCTGATCAAGAGACTTATGGTGGAGCTTCAGTGTGTCTCTATTGCCGTGCGCATATAGAAC 191 | ATCCTGATGTCTCTGGTGTTTGTAAATATAAGGGTAAGTTTGTCCAAATCCCTGCTCAGTGTGTCCGTGA 192 | CCCTGTGGGATTTTGTTTGTCAAATACCCCCTGTAATGTCTGTCAATATTGGATTGGATATGGGTGCAAT 193 | TGTGACTCGCTTAGGCAAGTAGCACTGCCCCAATCTAAAGATTCCAATTTTTTAAACGAGTCCGGGGTTC 194 | TATTGTAAATGCCCGAATAGAACCCTGTTCAAGTGGTTTGTCCACTGATGTCGTCTTTAGGGCATTTGAC 195 | ATCTGCAACTATAAGGCTAAGGTTGCTGGTATTGGAAAATACTACAAGACTAATACTTGTAGGTTTGTAG 196 | AATTAGATGACCAAGGGCATCATTTAGACTCCTATTTTGTCGTTAAGAGGCATACTATGGAGAATTATGA 197 | ACTAGAGAAGCACTGTTACGATTTGTTACGTGACTGTGATGCTGTAGCTCCCCATGATTTCTTCATCTTT 198 | GATGTAGACAAAGTTAAAACACCTCATATTGTACGTCAGCGTTTAACTGAGTACACTATGATGGATCTTG 199 | TATATGCCCTGAGGCACTTTGATCAAAATAGCGAAGTGCTTAAGGCTATCTTAGTGAAGTATGGTTGCTG 200 | TGATGTTACCTACTTTGAAAATAAACTCTGGTTTGATTTTGTTGAAAATCCCAGTGTTATTGGTGTTTAT 201 | CATAAACTTGGAGAACGTGTACGCCAAGCTATCTTAAACACTGTTAAATTTTGTGACCACATGGTCAAGG 202 | CTGGTTTAGTCGGTGTGCTCACACTAGACAACCAGGACCTTAATGGCAAGTGGTATGATTTTGGTGACTT 203 | CGTAATCACTCAACCTGGTTCAGGAGTAGCTATAGTTGATAGCTACTATTCTTATTTGATGCCTGTGCTC 204 | TCAATGACCGATTGTCTGGCCGCTGAGACACATAGGGATTGTGATTTTAATAAACCACTCATTGAGTGGC 205 | CACTTACTGAGTATGATTTTACTGATTATAAGGTACAACTCTTTGAGAAGTACTTTAAATATTGGGATCA 206 | GACGTATCACGCAAATTGCGTTAATTGTACTGATGACCGTTGTGTGTTACATTGTGCTAATTTCAATGTA 207 | TTGTTTGCTATGACCATGCCTAAGACTTGTTTCGGACCCATAGTCCGAAAGATCTTTGTTGATGGCGTGC 208 | CATTTGTAGTATCTTGTGGTTATCACTACAAAGAATTAGGTTTAGTCATGAATATGGATGTTAGTCTCCA 209 | TAGACATAGGCTCTCTCTTAAGGAGTTGATGATGTATGCCGCTGATCCAGCCATGCACATTGCCTCCTCT 210 | AACGCTTTTCTTGATTTGAGGACATCATGTTTTAGTGTCGCTGCACTTACAACTGGTTTGACTTTTCAAA 211 | CTGTGCGGCCTGGCAATTTTAACCAAGACTTCTATGATTTCGTGGTATCTAAAGGTTTCTTTAAGGAGGG 212 | CTCTTCAGTTACGCTCAAACATTTTTTCTTTGCTCAAGATGGTAATGCTGCTATTACAGATTATAATTAC 213 | TATTCTTATAATCTGCCTACTATGTGTGACATCAAACAAATGTTGTTCTGCATGGAAGTTGTAAACAAGT 214 | ACTTCGAAATCTATGACGGTGGTTGTCTTAATGCTTCTGAAGTGGTTGTTAATAATTTAGACAAGAGTGC 215 | TGGCCATCCTTTTAATAAGTTTGGCAAAGCTCGTGTCTATTATGAGAGCATGTCTTACCAGGAGCAAGAT 216 | GAACTCTTTGCCATGACAAAGCGTAACGTCATTCCTACCATGACTCAAATGAATCTAAAATATGCTATTA 217 | GTGCTAAGAATAGAGCTCGCACTGTTGCAGGCGTGTCCATACTTAGCACAATGACTAATCGCCAGTACCA 218 | TCAGAAAATGCTTAAGTCCATGGCTGCAACTCGTGGAGCGACTTGCGTCATTGGTACTACAAAGTTCTAT 219 | GGTGGCTGGGATTTCATGCTTAAAACATTGTACAAAGATGTCGATAATCCGCATCTTATGGGTTGGGATT 220 | ACCCTAAGTGTGATAGAGCTATGCCTAATATGTGTAGAATCTTCGCTTCACTCATATTAGCTCGTAAACA 221 | TGGCACTTGTTGTACTACAAGGGACAGATTTTATCGCTTGGCAAATGAGTGTGCTCAGGTGCTAAGCGAA 222 | TATGTTCTATGTGGTGGTGGTTACTACGTCAAACCTGGAGGTACCAGTAGCGGAGATGCCACCACTGCAT 223 | ATGCCAATAGTGTCTTTAACATTTTGCAGGCGACAACTGCTAATGTCAGTGCACTTATGGGTGCTAATGG 224 | CAACAAGATTGTTGACAAAGAAGTTAAAGACATGCAGTTTGATTTGTATGTCAATGTTTACAGGAGCACT 225 | AGCCCAGACCCCAAATTTGTTGATAAATACTATGCTTTTCTTAATAAGCACTTTTCTATGATGATACTGT 226 | CTGATGACGGTGTCGTTTGCTATAATAGTGATTATGCAGCTAAGGGTTACATTGCTGGAATACAGAATTT 227 | TAAGGAAACGCTGTATTATCAGAACAATGTCTTTATGTCTGAAGCTAAATGCTGGGTGGAAACCGATCTG 228 | AAGAAAGGGCCACATGAATTCTGTTCACAGCATACGCTTTATATTAAGGATGGCGACGATGGTTACTTCC 229 | TTCCTTATCCAGACCCTTCAAGAATTTTGTCTGCCGGTTGCTTTGTAGATGATATCGTTAAGACTGACGG 230 | TACACTCATGGTAGAGCGGTTTGTGTCTTTGGCTATAGATGCTTACCCTCTCACAAAGCATGAAGATATA 231 | GAATACCAGAATGTATTCTGGGTCTACTTACAGTATATAGAAAAACTGTATAAAGACCTTACAGGACACA 232 | TGCTTGACAGTTATTCTGTCATGCTATGTGGTGATAATTCTGCTAAGTTTTGGGAAGAGGCATTCTACAG 233 | AGATCTCTATAGTTCGCCTACCACTTTGCAGGCTGTCGGTTCATGCGTTGTATGCCATTCACAGACTTCC 234 | CTACGCTGTGGGACATGCATCCGTAGACCATTTCTCTGCTGTAAATGCTGCTATGATCATGTTATAGCAA 235 | CTCCACATAAGATGGTTTTGTCTGTTTCTCCTTACGTTTGTAATGCCCCTGGTTGTGGCGTTTCAGACGT 236 | TACTAAGCTATATTTAGGTGGTATGAGCTACTTTTGTGTAGATCATAGACCTGTGTGTAGTTTTCCACTT 237 | TGCGCTAATGGTCTTGTATTCGGCTTATACAAGAATATGTGCACAGGTAGTCCTTCTATAGTTGAATTTA 238 | ATAGGTTGGCTACCTGTGACTGGACTGAAAGTGGTGATTACACCCTTGCCAATACTACAACAGAACCACT 239 | CAAACTTTTTGCTGCTGAGACTTTACGTGCCACTGAAGAGGCGTCTAAGCAGTCTTATGCTATTGCCACC 240 | ATCAAAGAAATTGTTGGTGAGCGCCAACTATTACTTGTGTGGGAGGCTGGCAAGTCCAAACCACCACTCA 241 | ATCGTAATTATGTTTTTACTGGTTATCATATAACCAAAAATAGTAAAGTGCAGCTCGGTGAGTACATCTT 242 | CGAGCGCATTGATTATAGTGATGCTGTATCCTACAAGTCTAGTACAACGTATAAACTGACTGTAGGTGAC 243 | ATCTTCGTACTTACCTCTCACTCTGTGGCTACCTTGACGGCGCCCACAATTGTGAATCAAGAGAGGTATG 244 | TTAAAATTACTGGGTTGTACCCAACCATTACGGTACCTGAAGAGTTCGCAAGTCATGTTGCCAACTTCCA 245 | AAAATCAGGTTATAGTAAATATGTCACTGTTCAGGGACCACCTGGCACTGGCAAAAGTCATTTTGCTATA 246 | GGGTTAGCGATTTACTACCCTACAGCACGTGTTGTTTATACAGCATGTTCACACGCAGCTGTTGATGCTT 247 | TGTGTGAAAAAGCTTTTAAATATTTGAACATTGCTAAATGTTCCCGTATCATTCCTGCAAAGGCACGTGT 248 | TGAGTGCTATGACAGGTTTAAAGTTAATGAGACAAATTCTCAATATTTGTTTAGTACTATTAATGCTCTA 249 | CCAGAAACTTCTGCCGATATTCTGGTGGTTGATGAGGTTAGTATGTGCACTAATTATGATCTTTCAATTA 250 | TTAATGCACGTATTAAAGCTAAGCACATTGTCTATGTAGGAGATCCAGCACAGTTGCCAGCTCCTAGGAC 251 | TTTGTTGACTAGAGGCACATTGGAACCAGAAAATTTCAATAGTGTCACTAGATTGATGTGTAACTTAGGT 252 | CCTGACATATTTTTAAGTATGTGCTACAGGTGTCCTAAGGAAATAGTAAGCACTGTGAGCGCTCTTGTCT 253 | ACAATAATAAATTGTTAGCCAAGAAGGAGCTTTCAGGCCAGTGCTTTAAAATACTCTATAAGGGCAATGT 254 | GACGCATGATGCTAGCTCTGCCATTAATAGACCACAACTCACATTTGTGAAGAATTTTATTACTGCCAAT 255 | CCGGCATGGAGTAAGGCAGTCTTTATTTCGCCTTATAATTCACAGAATGCTGTGGCTCGTTCAATGCTGG 256 | GTCTTACTACTCAGACTGTTGATTCCTCACAGGGTTCAGAATACCAGTACGTTATCTTCTGTCAAACAGC 257 | AGATACGGCACATGCTAACAACATTAACAGATTTAATGTTGCAATCACTCGTGCCCAAAAAGGTATTCTT 258 | TGTGTTATGACATCTCAGGCACTCTTTGAGTCCTTAGAGTTTACTGAATTGTCTTTTACTAATTACAAGC 259 | TCCAGTCTCAGATTGTAACTGGCCTTTTTAAAGATTGCTCTAGAGAAACTTCTGGCCTCTCACCTGCTTA 260 | TGCACCAACATACGTTAGTGTTGATGACAAGTATAAGACGAGTGATGAGCTTTGCGTGAATCTTAATTTA 261 | CCCGCAAATATCCCATACTCTCGTGTTATTTCCAGGATGGGCTTTAAACTCGATGCAACAGTTCCTGGAT 262 | ATCCTAAGCTTTTCATTACTCGTGAAGAGGCTGTAAGGCAAGTTCGAAGCTGGATAGGCTTCGATGTTGA 263 | GGGTGCTCATGCTTCCCGTAATGCATGTGGCACCAATGTGCCTCTACAATTAGGATTTTCAACTGGTGTG 264 | AACTTTGTTGTTCAGCCAGTTGGTGTTGTAGACACTGAGTGGGGTAACATGTTAACGGGCATTGCTGCCC 265 | GTCCTCCACCAGGTGAACAGTTTAAGCACCTCGTGCCTCTTATGCATAAGGGGGCTGCGTGGCCTATTGT 266 | TAGACGACGTATAGTGCAAATGTTGTCAGACACTTTAGACAAATTGTCTGATTACTGTACGTTTGTTTGT 267 | TGGGCTCATGGCTTTGAATTAACGTCTGCATCATACTTTTGCAAGATAGGTAAGGAACAGAAGTGTTGCA 268 | TGTGCAATAGACGCGCTGTAGCGTACTCTTCGCCTCTGCAATCTTATGCCTGCTGGACTCATTCCTGCGG 269 | TTATGATTATGTCTACAACCCTTTCTTTGTCGATGTTCAACAGTGGGGTTATGTAGGCAATCTTGCTACT 270 | AATCACGATCGTTATTGCTCTGTCCATCAAGGAGCTCATGTGGCTTCTAATGATGCAATAATGACTCGTT 271 | GTTTAGCTATTCATTCTTGTTTTATAGAACGTGTGGATTGGGATATAGAGTATCCTTATATCTCACATGA 272 | AAAGAAATTGAATTCCTGTTGTAGAATCGTTGAGCGCAACGTCGTACGTGCTGCTCTTCTTGCCGGTTCA 273 | TTTGACAAAGTCTATGATATTGGCAATCCTAAAGGAATTCCTATTGTTGATGACCCTGTGGTTGATTGGC 274 | ATTATTTTGATGCACAGCCCTTGACCAGAAAGGTACAACAGCTTTTCTATACAGAGGACATGGCCTCAAG 275 | ATTTGCTGATGGGCTCTGCTTATTTTGGAACTGTAATGTACCAAAATATCCTAATAATGCAATTGTATGC 276 | AGGTTTGACACACGTGTGCATTCTGAGTTCAATTTGCCAGGTTGTGATGGCGGTAGTTTGTATGTTAACA 277 | AGCACGCTTTTCATACACCAGCATATGATGTGAGTGCATTCCGTGATCTGAAACCTTTACCATTCTTTTA 278 | TTATTCTACTACACCATGTGAAGTGCATGGTAATGGTAGTATGATAGAGGATATTGATTATGTACCCCTA 279 | AAATCTGCAGTCTGTATTACAGCTTGTAATTTAGGGGGCGCTGTTTGTAGGAAGCATGCTACAGAGTACA 280 | GAGAGTATATGGAAGCATATAATCTTGTCTCTGCATCAGGTTTCCGCCTTTGGTGTTATAAGACCTTTGA 281 | TATTTATAATCTCTGGTCTACTTTTACAAAAGTTCAAGGTTTGGAAAACATTGCTTTTAATGTTGTTAAA 282 | CAAGGCCATTTTATTGGTGTTGAGGGTGAACTACCTGTAGCTGTAGTCAATGATAAGATCTTCACCAAGA 283 | GTGGCGTTAATGACATTTGTATGTTTGAGAATAAAACCACTTTGCCTACTAATATAGCTTTTGAACTCTA 284 | TGCTAAGCGTGCTGTACGCTCGCATCCTGATTTCAAATTGCTACACAATTTACAAGCAGACATTTGCTAC 285 | AAGTTCGTCCTTTGGGATTATGAACGTAGCAATATTTATGGTACTGCTACTATTGGTGTATGTAAGTACA 286 | CTGATATTGATGTTAATTCAGCTTTGAATATATGTTTTGACATACGCGATAATGGTTCATTGGAGAAGTT 287 | CATGTCTACTCCCAATGCCATCTTTATTTCTGATAGAAAAATTAAGAAATACCCTTGTATTGTAGGTCCT 288 | GATTATGCTTACTTCAATGGTGCTATCATCCGTGATAGTGATGTTGTTAAACAACCAGTGAAGTTCTACT 289 | TGTATAAGAAAGTCAATAATGAGTTTATTGATCCTACTGAGTGTATTTACACTCAGAGTCGCTCTTGTAG 290 | TGACTTCCTACCCCTGTCTGACATGGAGAAAGACTTTCTATCTTTTGATAGTGATGTTTTCATTAAGAAG 291 | TATGGCTTGGAAAACTATGCTTTTGAGCACGTAGTCTATGGAGACTTCTCTCATACTACGTTAGGCGGTC 292 | TTCACTTGCTTATTGGTTTATACAAGAAGCAACAGGAAGGTCATATTATTATGGAAGAAATGCTAAAAGG 293 | TAGCTCAACTATTCATAACTATTTTATTACTGAGACTAACACAGCGGCTTTTAAGGCGGTGTGTTCTGTT 294 | ATAGATTTAAAGCTTGACGACTTTGTTATGATTTTAAAGAGTCAAGACCTTGGCGTAGTATCCAAGGTTG 295 | TCAAGGTTCCTATTGACTTAACAATGATTGAGTTTATGTTATGGTGTAAGGATGGACAGGTTCAAACCTT 296 | CTACCCTCGACTCCAGGCTTCTGCAGATTGGAAACCTGGTCATGCAATGCCATCCCTCTTTAAAGTTCAA 297 | AATGTAAACCTTGAACGTTGTGAGCTTGCTAATTACAAGCAATCTATTCCTATGCCTCGCGGTGTGCACA 298 | TGAACATCGCTAAATATATGCAATTGTGCCAGTATTTAAATACTTGCACATTAGCCGTGCCTGCCAATAT 299 | GCGTGTTATACATTTTGGCGCTGGTTCTGATAAAGGTATCGCTCCTGGTACATCAGTTTTACGACAGTGG 300 | CTTCCTACAGATGCCATTATTATAGATAATGATTTAAATGAGTTCGTGTCAGATGCTGACATAACTTTAT 301 | TTGGAGATTGTGTAACTGTACGTGTCGGCCAACAAGTGGATCTTGTTATTTCCGACATGTATGATCCTAC 302 | TACTAAGAATGTAACAGGTAGTAATGAGTCAAAGGCTTTATTCTTTACTTACCTGTGTAACCTCATTAAT 303 | AATAATCTTGCTCTTGGTGGGTCTGTTGCTATTAAAATAACAGAACACTCTTGGAGCGTTGAACTTTATG 304 | AACTTATGGGAAAATTTGCTTGGTGGACTGTTTTCTGCACCAATGCAAATGCATCCTCATCTGAAGGATT 305 | CCTCTTAGGTATTAATTACTTGGGTACTATTAAAGAAAATATAGATGGTGGTGCTATGCACGCCAACTAT 306 | ATATTTTGGAGAAATTCCACTCCTATGAATCTGAGTACTTACTCACTTTTTGATTTATCCAAGTTTCAAT 307 | TAAAATTAAAAGGAACACCAGTTCTTCAATTAAAGGAGAGTCAAATTAACGAACTCGTAATATCTCTCCT 308 | GTCGCAGGGTAAGTTACTTATCCGTGACAATGATACACTCAGTGTTTCTACTGATGTTCTTGTTAACACC 309 | TACAGAAAGTTACGTTGATGTAGGGCCAGATTCTGTTAAGTCTGCTTGTATTGAGGTTGATATACAACAG 310 | ACTTTCTTTGATAAAACTTGGCCTAGGCCAATTGATGTTTCTAAGGCTGACGGTATTATATACCCTCAAG 311 | GCCGTACATATTCTAACATAACTATCACTTATCAAGGTCTTTTTCCCTATCAGGGAGACCATGGTGATAT 312 | GTATGTCTACTCTGCAGGACATGCTACAGGCACAACTCCACAAAAGTTGTTTGTAGCTAACTATTCTCAG 313 | GACGTCAAACAGTTTGCTAATGGGTTTGTCGTCCGTATAGGAGCAGCTGCCAATTCCACTGGCACTGTTA 314 | TTATTAGCCCATCTACCAGCGCTACTATACGAAAAATTTACCCTGCTTTTATGCTGGGTTCTTCAGTTGG 315 | TAATTTCTCAGATGGTAAAATGGGCCGCTTCTTCAATCATACTCTAGTTCTTTTGCCCGATGGATGTGGC 316 | ACTTTACTTAGAGCTTTTTATTGTATTCTAGAGCCTCGCTCTGGAAATCATTGTCCTGCTGGCAATTCCT 317 | ATACTTCTTTTGCCACTTATCACACTCCTGCAACAGATTGTTCTGATGGCAATTACAATCGTAATGCCAG 318 | TCTGAACTCTTTTAAGGAGTATTTTAATTTACGTAACTGCACCTTTATGTACACTTATAACATTACCGAA 319 | GATGAGATTTTAGAGTGGTTTGGCATTACACAAACTGCTCAAGGTGTTCACCTCTTCTCATCTCGGTATG 320 | TTGATTTGTACGGCGGCAATATGTTTCAATTTGCCACCTTGCCTGTTTATGATACTATTAAGTATTATTC 321 | TATCATTCCTCACAGTATTCGTTCTATCCAAAGTGATAGAAAAGCTTGGGCTGCCTTCTACGTATATAAA 322 | CTTCAACCGTTAACTTTCCTGTTGGATTTTTCTGTTGATGGTTATATACGCAGAGCTATAGACTGTGGTT 323 | TTAATGATTTGTCACAACTCCACTGCTCATATGAATCCTTCGATGTTGAATCTGGAGTTTATTCAGTTTC 324 | GTCTTTCGAAGCAAAACCTTCTGGCTCAGTTGTGGAACAGGCTGAAGGTGTTGAATGTGATTTTTCACCT 325 | CTTCTGTCTGGCACACCTCCTCAGGTTTATAATTTCAAGCGTTTGGTTTTTACCAATTGCAATTATAATC 326 | TTACCAAATTGCTTTCACTTTTTTCTGTGAATGATTTTACTTGTAGTCAAATATCTCCAGCAGCAATTGC 327 | TAGCAACTGTTATTCTTCACTGATTTTGGATTATTTTTCATACCCACTTAGTATGAAATCCGATCTCAGT 328 | GTTAGTTCTGCTGGTCCAATATCCCAGTTTAATTATAAACAGTCCTTTTCTAATCCCACATGTTTGATTT 329 | TAGCGACTGTTCCTCATAACCTTACTACTATTACTAAGCCTCTTAAGTACAGCTATATTAACAAGTGCTC 330 | TCGTCTTCTTTCTGATGATCGTACTGAAGTACCTCAGTTAGTGAACGCTAATCAATACTCACCCTGTGTA 331 | TCCATTGTCCCATCCACTGTGTGGGAAGACGGTGATTATTATAGGAAACAACTATCTCCACTTGAAGGTG 332 | GTGGCTGGCTTGTTGCTAGTGGCTCAACTGTTGCCATGACTGAGCAATTACAGATGGGCTTTGGTATTAC 333 | AGTTCAATATGGTACAGACACCAATAGTGTTTGCCCCAAGCTTGAATTTGCTAATGACACAAAAATTGCC 334 | TCTCAATTAGGCAATTGCGTGGAATATTCCCTCTATGGTGTTTCGGGCCGTGGTGTTTTTCAGAATTGCA 335 | CAGCTGTAGGTGTTCGACAGCAGCGCTTTGTTTATGATGCGTACCAGAATTTAGTTGGCTATTATTCTGA 336 | TGATGGCAACTACTACTGTTTGCGTGCTTGTGTTAGTGTTCCTGTTTCTGTCATCTATGATAAAGAAACT 337 | AAAACCCACGCTACTCTATTTGGTAGTGTTGCATGTGAACACATTTCCTCTACCATGTCTCAATACTCCC 338 | GTTCTACGCGATCAATGCTTAAACGGCGAGATTCTACATATGGTCCCCTTCAGACACCTGTTGGTTGTGT 339 | CCTAGGACTTGTTAATTCCTCTTTGTTCGTAGAGGACTGCAAGTTGCCTCTTGGTCAATCTCTCTGTGCT 340 | CTTCCTGACACACCTAGTACTCTCACACCTCGCAGTGTGCGCTCTGTTCCAGGTGAAATGCGCTTGGCAT 341 | CCATTGCTTTTAATCATCCTATTCAGGTTGATCAACTTAATAGTAGTTATTTTAAATTAAGTATACCTAC 342 | TAATTTTTCCTTTGGTGTGACTCAGGAGTACATTCAGACAACCATTCAGAAAGTTACTGTTGATTGTAAA 343 | CAGTACGTTTGCAATGGTTTCCAGAAGTGTGAGCAATTACTGCGCGAGTATGGCCAGTTTTGTGCCAAAA 344 | TAAACCAGGCTCTCCATGGTGCCAATTTACGCCAGGATGATTCTGTACGTAATTTGTTTGCGAGCGTGAA 345 | AAGCTCTCAATCATCTCCTATCATACCAGGTTTTGGAGGTGACTTTAATTTGACACTTCTAGAACCTGTT 346 | TCTATATCTACTGGCAGTCGTAGTGCACGTAGTGCTATTGAGGATTTGCTATTTGACAAAGTCACTATAG 347 | CTGATCCTGGTTATATGCAAGGTTACGATGATTGCATGCAGCAAGGTCCAGCATCAGCTCGTGATCTTAT 348 | TTGTGCTCAATATGTGGCTGGTTATAAAGTATTACCTCCTCTTATGGATGTTAATATGGAAGCCGCGTAC 349 | ACTTCATCTTTGCTTGGCAGCATAGCAGGTGTTGGCTGGACTGCTGGCTTATCCTCCTTTGCTGCTATTC 350 | CATTTGCACAGAGTATTTTTTATAGGTTAAACGGTGTTGGCATTACTCAACAGGTTCTTTCAGAGAACCA 351 | AAAGCTTATTGCCAATAAGTTTAATCAGGCTCTGGGAGCTATGCAAACAGGCTTCACTACAACTAATGAA 352 | GCTTTTCGGAAGGTTCAGGATGCTGTGAACAACAATGCACAGGCTCTATCCAAATTAGCTAGCGAGCTAT 353 | CTAATACTTTTGGTGCTATTTCCGCCTCTATTGGAGACATCATACAACGTCTTGATGTTCTCGAACAGGA 354 | CGCCCAAATAGACAGACTTATTAATGGCCGTTTGACAACACTAAATGCTTTTGTTGCACAGCAGCTTGTT 355 | CGTTCCGAATCAGCTGCTCTTTCCGCTCAATTGGCTAAAGATAAAGTCAATGAGTGTGTCAAGGCACAAT 356 | CCAAGCGTTCTGGATTTTGCGGTCAAGGCACACATATAGTGTCCTTTGTTGTAAATGCCCCTAATGGCCT 357 | TTACTTTATGCATGTTGGTTATTACCCTAGCAACCACATTGAGGTTGTTTCTGCTTATGGTCTTTGCGAT 358 | GCAGCTAACCCTACTAATTGTATAGCCCCTGTTAATGGCTACTTTATTAAAACTAATAACACTAGGATTG 359 | TTGATGAGTGGTCATATACTGGCTCGTCCTTCTATGCACCTGAGCCCATCACCTCTCTTAATACTAAGTA 360 | TGTTGCACCACAGGTGACATACCAAAACATTTCTACTAACCTCCCTCCTCCTCTTCTCGGCAATTCCACC 361 | GGGATTGACTTCCAAGATGAGTTGGATGAGTTTTTCAAAAATGTTAGCACCAGTATACCTAATTTTGGTT 362 | CTCTAACACAGATTAATACTACATTACTCGATCTTACCTACGAGATGTTGTCTCTTCAACAAGTTGTTAA 363 | AGCCCTTAATGAGTCTTACATAGACCTTAAAGAGCTTGGCAATTATACTTATTACAACAAATGGCCGTGG 364 | TACATTTGGCTTGGTTTCATTGCTGGGCTTGTTGCCTTAGCTCTATGCGTCTTCTTCATACTGTGCTGCA 365 | CTGGTTGTGGCACAAACTGTATGGGAAAACTTAAGTGTAATCGTTGTTGTGATAGATACGAGGAATACGA 366 | CCTCGAGCCGCATAAGGTTCATGTTCACTAATTAACGAACTATCAATGAGAGTTCAAAGACCACCCACTC 367 | TCTTGTTAGTGTTCTCACTCTCTTTTTTGGTCACTGCATTTTCAAAACCTCTCTATGTACCTGAGCATTG 368 | TCAGAATTATTCTGGTTGCATGCTTAGGGCTTGTATTAAAACTGCCCAAGCTGATACAGCTGGTCTTTAT 369 | ACAAATTTTCGAATTGACGTCCCATCTGCAGAATCAACTGGTACTCAATCAGTTTCTGTCGATCGTGAGT 370 | CTACTTCAACTCATGATGGTCTTACCGAACATGTTACTAGTGTGAATCTTTTTGACGTTGGTTACTCAGT 371 | TAATTAACGAACTCTATGGATTACGTGTCTCTGCTTAATCAAATTTGGCAGAAGTACCTTAATTCACCGT 372 | ATACTACTTGTTTGTATATCCCTAAACCCACAGCTAAGTATACACCTTTAGTTGGCACTTCATTGCACCC 373 | TGTGCTGTGGAACTGTCAGCTATCCTTTGCTGGTTATACTGAATCTGCTGTTAATTCTACAAAAGCTTTG 374 | GCCAAACAGGACGCAGCTCAGCGAATCGCTTGGTTGCTACATAAGGATGGAGGAATCCCTGACGGATGTT 375 | CCCTCTACCTCCGGCACTCAAGTTTATTCGCGCAAAGCCAGGAAGAGGAGTCATTCTCCAACTAAGAAAC 376 | TGCGCTACGTTAAGCGTAGATTTTCTCTTCTGCGCCCTGAAGACCTTAGTGTTATTGTCCAACCAACACA 377 | CTATGTCAGGGTTACATTTTCAGACCCCAACATGTGGTATCTACGTTCGGGTCATCATTTACACTCAGTT 378 | CACAATTGGCTTAAACCTTATGGCGGCCAACCTGTTTCTGAGTACCATATTACTCTAGCTTTGCTAAATC 379 | TCACTGATGAAGATTTAGCTAGAGATTTTTCACCCATTGCGCTCTTTTTGCGCAATGTCAGATTTGAGCT 380 | ACATGAGTTCGCCTTGCTGCGCAAAACTCTTGTTCTTAATGCATCAGAGATCTACTGTGCTAACATACAT 381 | AGATTTAAGCCTGTGTATAGAGTTAACACGGCAATCCCTACTATTAAGGATTGGCTTCTCGTTCAGGGAT 382 | TTTCCCTTTACCATAGTGGCCTCCCTTTACATATGTCAATCTCTAAATTGCATGCACTGGATGATGTTAC 383 | TCGCAATTACATCATTACAATGCCATGCTTTAGAACTTATCCTCAACAAATGTTTGTTACTCCTTTGGCC 384 | GTAGATGTTGTCTCCATACGGTCTTCCAATCAGGGTAATAAACAAATTGTTCATTCTTACCCCATTTTAC 385 | ATCATCCAGGATTTTAACGAACTATGGCTTTCTCGGCGTCTTTATTTAAACCCGTCCAGCTAGTCCCAGT 386 | TTCTCCCGCATTTCATCGCATTGAGTCTACTGACTCTATTGTTTTCACATACATTCCTGCTAGCGGCTAT 387 | GTAGCTGCTTTAGCTGTCAATGTGTGTCTCATTCCCCTATTATTACTGCTACGTCAAGATACTTGTCGTC 388 | GCAGCATTATCAGAACTATGGTTCTCTATTTCCTTGTTCTTTATAACTTTTTATTAGCCATTGTACTAGT 389 | CAATGGTTTACATTATCCAACTGGAAGTTGCCTGATAGCCTTCTTAGTTATCCTCATAATACTTTGGTTT 390 | GTAGATAGAATTCGTTTCTGTCTCATGCTGAATTCCTACATTCCACTGTTTGACATGCGTTCTCACTTTA 391 | TTCGTGTTAGTACAGTTTCTTCTCATGGTATGGTCCCTGTCATACACACCAAACCATTATTTATTAGAAA 392 | CTTCGATCAGCGTTGCAGCTGTTCTCGTTGTTTTTATTTGCACTCTTCTACTTATATAGAGTGCACTTAT 393 | ATTAGCCGTTTTAGTAAGATTAGCCTAGTTTCTGTAACTGACTTCTCCTTAAACGGCAATGTTTCCACTG 394 | TTTTCGTGCCTGCAACGCGCGATTCAGTTCCTCTTCACATAATCGCCCCGAGCTCGCTTATCGTTTAAGC 395 | AGCTCTGCGCTACTATGGGTCCCGTGTAGAGGCTAATCCATTAGTCTCTCTTTGGACATATGGAAAACGA 396 | ACTATGTTACCCTTTGTCCAAGAACGAATAGGGTTGTTCATAGTAAACTTTTTCATTTTTACCGTAGTAT 397 | GTGCTATAACACTCTTGGTGTGTATGGCTTTCCTTACGGCTACTAGATTATGTGTGCAATGTATGACAGG 398 | CTTCAATACCCTGTTAGTTCAGCCCGCATTATACTTGTATAATACTGGACGTTCAGTCTATGTAAAATTC 399 | CAGGATAGTAAACCCCCTCTACCACCTGACGAGTGGGTTTAACGAACTCCTTCATAATGTCTAATATGAC 400 | GCAACTCACTGAGGCGCAGATTATTGCCATTATTAAAGACTGGAACTTTGCATGGTCCCTGATCTTTCTC 401 | TTAATTACTATCGTACTACAGTATGGATACCCATCCCGTAGTATGACTGTCTATGTCTTTAAAATGTTTG 402 | TTTTATGGCTCCTATGGCCATCTTCCATGGCGCTATCAATATTTAGCGCCATTTATCCAATTGATCTAGC 403 | TTCCCAGATAATCTCTGGCATTGTAGCAGCTGTTTCAGCTATGATGTGGATTTCCTACTTTGTGCAGAGT 404 | ATCCGGCTGTTTATGAGAACTGGATCATGGTGGTCATTCAATCCTGAGACTAATTGCCTTTTGAACGTTC 405 | CAATTGGTGGTACAACTGTCGTACGTCCACTCGTAGAGGACTCTACCAGTGTAACTGCTGTTGTAACCAA 406 | TGGTCACCTCAAAATGGCTGGCATGCATTTCGGTGCTTGTGACTACGACAGACTTCCTAATGAAGTCACC 407 | GTGGCCAAACCCAATGTGCTGATTGCTTTAAAAATGGTGAAGCGGCAAAGCTACGGAACTAATTCCGGCG 408 | TTGCCATTTACCATAGATATAAGGCAGGTAATTACAGGAGTCCGCCTATTACGGCGGATATTGAACTTGC 409 | ATTGCTTCGAGCTTAGGCTCTTTAGTAAGAGTATCTTAATTGATTTTAACGAATCTCAATTTCATTGTTA 410 | TGGCATCCCCTGCTGCACCTCGTGCTGTTTCCTTTGCCGATAACAATGATATAACAAATACAAACCTGTC 411 | TCGAGGTAGAGGACGTAATCCAAAACCACGAGCTGCACCAAATAACACTGTCTCTTGGTACACTGGGCTT 412 | ACCCAACACGGGAAAGTCCCTCTTACCTTTCCACCTGGGCAGGGTGTACCTCTTAATGCCAATTCCACCC 413 | CTGCGCAAAATGCTGGGTATTGGCGGAGACAGGACAGAAAAATTAATACCGGGAATGGAATTAAGCAACT 414 | GGCTCCCAGGTGGTACTTCTACTACACTGGAACTGGACCCGAAGCAGCACTCCCATTCCGGGCTGTTAAG 415 | GATGGCATCGTTTGGGTCCATGAAGATGGCGCCACTGATGCTCCTTCAACTTTTGGGACGCGGAACCCTA 416 | ACAATGATTCAGCTATTGTTACACAATTCGCGCCCGGTACTAAGCTTCCTAAAAACTTCCACATTGAGGG 417 | GACTGGAGGCAATAGTCAATCATCTTCAAGAGCCTCTAGCGTAAGCAGAAACTCTTCCAGATCTAGTTCA 418 | CAAGGTTCAAGATCAGGAAACTCTACCCGCGGCACTTCTCCAGGTCCATCTGGAATCGGAGCAGTAGGAG 419 | GTGATCTACTTTACCTTGATCTTCTGAACAGACTACAAGCCCTTGAGTCTGGCAAAGTAAAGCAATCGCA 420 | GCCAAAAGTAATCACTAAGAAAGATGCTGCTGCTGCTAAAAATAAGATGCGCCACAAGCGCACTTCCACC 421 | AAAAGTTTCAACATGGTGCAAGCTTTTGGTCTTCGCGGACCAGGAGACCTCCAGGGAAACTTTGGTGATC 422 | TTCAATTGAATAAACTCGGCACTGAGGACCCACGTTGGCCCCAAATTGCTGAGCTTGCTCCTACAGCCAG 423 | TGCTTTTATGGGTATGTCGCAATTTAAACTTACCCATCAGAACAATGATGATCATGGCAACCCTGTGTAC 424 | TTCCTTCGGTACAGTGGAGCCATTAAACTTGACCCAAAGAATCCCAACTACAATAAGTGGTTGGAGCTTC 425 | TTGAGCAAAATATTGATGCCTACAAAACCTTCCCTAAGAAGGAAAAGAAACAAAAGGCACCAAAAGAAGA 426 | ATCAACAGACCAAATGTCTGAACCTCCAAAGGAGCAGCGTGTGCAAGGTAGCATCACTCAGCGCACTCGC 427 | ACCCGTCCAAGTGTTCAGCCTGGTCCAATGATTGATGTTAACACTGATTAGTGTCACTC 428 | 429 | -------------------------------------------------------------------------------- /Section1/Chap4/4.3.1.make_sequence.py: -------------------------------------------------------------------------------- 1 | # 4.3.1.make_sequence.py 2 | from Bio.Seq import Seq 3 | 4 | tatabox_seq = Seq("tataaaggcAATATGCAGTAG") 5 | print(tatabox_seq) # tataaaggcAATATGCAGTAG 가 출력된다. 6 | print(type(tatabox_seq)) # 7 | 8 | -------------------------------------------------------------------------------- /Section1/Chap4/4.3.2.alphabet.py: -------------------------------------------------------------------------------- 1 | # 4.3.1.make_sequence.py 2 | from Bio.Seq import Seq 3 | 4 | tatabox_seq = Seq("tataaaggcAATATGCAGTAG") 5 | print(tatabox_seq) # tataaaggcAATATGCAGTAG 가 출력된다. 6 | print(type(tatabox_seq)) # 7 | -------------------------------------------------------------------------------- /Section1/Chap4/4.4.1.count.py: -------------------------------------------------------------------------------- 1 | # 4.4.1.count.py 2 | from Bio.Seq import Seq 3 | 4 | exon_seq = Seq("ATGCAGTAG") 5 | count_a = exon_seq.count("A") 6 | print(count_a) # 3 이 출력된다. 7 | -------------------------------------------------------------------------------- /Section1/Chap4/4.4.2.gc_contents.py: -------------------------------------------------------------------------------- 1 | # 4.4.2.gc_contents.py 2 | from Bio.Seq import Seq 3 | 4 | exon_seq = Seq("ATGCAGTAG") 5 | g_count = exon_seq.count("G") 6 | c_count = exon_seq.count("C") 7 | gc_contents = (g_count + c_count) / len(exon_seq) * 100 8 | print(gc_contents) # 44.44 9 | 10 | -------------------------------------------------------------------------------- /Section1/Chap4/4.4.3.case.py: -------------------------------------------------------------------------------- 1 | # 4.4.3.case.py 2 | from Bio.Seq import Seq 3 | 4 | tatabox_seq = Seq("tataaaggcAATATGCAGTAG") 5 | print(tatabox_seq.upper()) # TATAAAGGCAATATGCAGTAG 6 | print(tatabox_seq.lower()) # tataaaggcaatatgcagtag 7 | 8 | -------------------------------------------------------------------------------- /Section1/Chap4/4.4.4.translate_transcribe.py: -------------------------------------------------------------------------------- 1 | # 4.4.4.translate_transcribe.py 2 | 3 | from Bio.Seq import Seq 4 | 5 | dna = Seq("ATGCAGTAG") 6 | mrna = dna.transcribe() 7 | ptn = dna.translate() 8 | print(mrna) # AUGCAGUAG 9 | print(ptn) # MQ* 10 | 11 | -------------------------------------------------------------------------------- /Section1/Chap4/4.4.5.translate_stop_1.py: -------------------------------------------------------------------------------- 1 | # 4.4.5.translate_stop_1.py 2 | from Bio.Seq import Seq 3 | 4 | mRNA = Seq("AUGAACUAAGUUUAGAAU") 5 | ptn = mRNA.translate() 6 | print(ptn) ## MN*V*N 7 | 8 | -------------------------------------------------------------------------------- /Section1/Chap4/4.4.5.translate_stop_2.py: -------------------------------------------------------------------------------- 1 | # 4.4.5.translate_stop_2.py 2 | from Bio.Seq import Seq 3 | 4 | mRNA = Seq("AUGAACUAAGUUUAGAAU") 5 | ptn = mRNA.translate(to_stop=True) 6 | print(ptn) ## MN 7 | 8 | -------------------------------------------------------------------------------- /Section1/Chap4/4.4.6.split.py: -------------------------------------------------------------------------------- 1 | # 4.4.6.split.py 2 | from Bio.Seq import Seq 3 | 4 | mrna = Seq("AUGAACUAAGUUUAGAAU") 5 | ptn = mrna.translate() 6 | print(ptn) ## MN*V*N 7 | for seq in ptn.split("*"): 8 | print(seq) 9 | 10 | -------------------------------------------------------------------------------- /Section1/Chap4/4.4.7.complement.bio.py: -------------------------------------------------------------------------------- 1 | # 4.4.7.complement.bio.py 2 | from Bio.Seq import Seq 3 | 4 | seq = Seq("TATAAAGGCAATATGCAGTAG") 5 | comp_seq = seq.complement() 6 | rev_comp_seq = seq.reverse_complement() 7 | print(comp_seq) # ATATTTCCGTTATACGTCATC 8 | print(rev_comp_seq) # CTACTGCATATTGCCTTTATA 9 | 10 | -------------------------------------------------------------------------------- /Section1/Chap4/4.4.7.complement.py: -------------------------------------------------------------------------------- 1 | # 4.4.7.complement.py 2 | seq = "TATAAAGGCAATATGCAGTAG" 3 | comp_dic = { 'A':'T', 'C':'G', 'G':'C', 'T':'A' } # 상보적 염기를 키-값 으로 하는 사전을 만든다 4 | comp_seq = "" 5 | for s in seq: # 서열에서 하나씩 읽어서 6 | comp_seq += comp_dic[s] # 상보적 염기를 추가해준다 7 | revcomp_seq = comp_seq[::-1] # 파이썬 문자열을 뒤집어준다 8 | print(comp_seq) # ATATTTCCGTTATACGTCATC 9 | print(revcomp_seq) # CTACTGCATATTGCCTTTATA 10 | 11 | -------------------------------------------------------------------------------- /Section1/Chap4/4.4.8.codonTable.mitochondria.py: -------------------------------------------------------------------------------- 1 | # 4.4.8.codonTable.mitochondria.py 2 | from Bio.Data import CodonTable 3 | 4 | codon_table = CodonTable.unambiguous_dna_by_name["Vertebrate Mitochondrial"] 5 | print(codon_table) 6 | 7 | -------------------------------------------------------------------------------- /Section1/Chap4/4.4.8.codonTable.py: -------------------------------------------------------------------------------- 1 | # 4.4.8.codonTable.py 2 | from Bio.Data import CodonTable 3 | 4 | codon_table = CodonTable.unambiguous_dna_by_name["Standard"] 5 | print(codon_table) 6 | 7 | -------------------------------------------------------------------------------- /Section1/Chap4/4.4.9.orf_finder.py: -------------------------------------------------------------------------------- 1 | # 4.4.9.orf_finder.py 2 | 3 | from Bio.Seq import Seq 4 | 5 | tatabox_seq = Seq("tataaaggcAATATGCAGTAG") 6 | start_idx = tatabox_seq.find("ATG") 7 | end_idx = tatabox_seq.find("TAG", start_idx) # 예문의 편의상 TAG 로 하였다. 8 | orf = tatabox_seq[start_idx:end_idx+3] # 파이썬 문자열과 같은 방법으로 슬라이싱이 가능하다. 9 | print(orf) # ATGCAGTAG 10 | 11 | -------------------------------------------------------------------------------- /Section1/Chap4/4.5.1.gc_contents.py: -------------------------------------------------------------------------------- 1 | # 4.5.1.gc_contents.py 2 | from Bio.Seq import Seq 3 | from Bio.SeqUtils import GC 4 | 5 | exon_seq = Seq("ATGCAGTAG") 6 | gc_contents = GC(exon_seq) 7 | print(gc_contents) # 44.44 8 | 9 | -------------------------------------------------------------------------------- /Section1/Chap4/4.5.2.calc_molecular_weight.py: -------------------------------------------------------------------------------- 1 | # 4.5.2.calc_molecular_weight.py 2 | from Bio.Seq import Seq 3 | from Bio.Alphabet import IUPAC 4 | from Bio.SeqUtils import molecular_weight 5 | 6 | seq1 = Seq("ATGCAGTAG") 7 | seq2 = Seq("ATGCAGTAG", IUPAC.unambiguous_dna) 8 | seq3 = Seq("ATGCAGTAG", IUPAC.protein) 9 | 10 | print(molecular_weight(seq1)) # 2842.82 가 출력된다 11 | print(molecular_weight(seq2)) # 2842.82 가 출력된다 12 | print(molecular_weight(seq3)) # 707.75 가 출력된다 13 | 14 | -------------------------------------------------------------------------------- /Section1/Chap4/4.5.3.make_six_frame_translations.py: -------------------------------------------------------------------------------- 1 | # 4.5.3.make_six_frame_translations.py 2 | from Bio.Seq import Seq 3 | from Bio.SeqUtils import six_frame_translations 4 | 5 | seq1 = Seq("ATGCCTTGAAATGTATAG") 6 | print(six_frame_translations(seq1)) 7 | 8 | -------------------------------------------------------------------------------- /Section1/Chap4/4.5.4.calc_melting_temperature.py: -------------------------------------------------------------------------------- 1 | # 4.5.4.calc_melting_temperature.py 2 | from Bio.SeqUtils import MeltingTemp as mt 3 | from Bio.Seq import Seq 4 | 5 | myseq = Seq("AGTCTGGGACGGCGCGGCAATCGCA") 6 | print(mt.Tm_Wallace(myseq)) # 84.0 이 출력된다. 7 | 8 | -------------------------------------------------------------------------------- /Section1/Chap4/4.5.5.convert_aminoacid_1to3.py: -------------------------------------------------------------------------------- 1 | # 4.5.5.convert_aminoacid_1to3.py 2 | from Bio.SeqUtils import seq1 3 | 4 | essential_amino_acid_3 = "LeuLysMetValIleThrTrpPhe" 5 | print(seq1(essential_amino_acid_3)) ## LKMVITWF 가 출력된다. 6 | 7 | -------------------------------------------------------------------------------- /Section1/Chap4/4.5.5.convert_aminoacid_3to1.py: -------------------------------------------------------------------------------- 1 | # 4.5.5.convert_aminoacid_3to1.py 2 | from Bio.SeqUtils import seq3 3 | 4 | essential_amino_acid_1 = "LKMVITWF" 5 | print(seq3(essential_amino_acid_1)) ## LeuLysMetValIleThrTrpPhe 가 출력된다. 6 | 7 | -------------------------------------------------------------------------------- /Section1/Chap5/5.1.simple_seqrecord_object_example_1.py: -------------------------------------------------------------------------------- 1 | # 5.1.simple_seqrecord_object_example_1.py 2 | 3 | from Bio.Seq import Seq 4 | from Bio.SeqRecord import SeqRecord # Bio.SeqRecord에서 SeqRecord를 import 하였다. 5 | 6 | seq = Seq("ACGT") # 먼저 Sequence 객체를 만든다. 7 | seqRecord = SeqRecord(seq) # SeqRecord 객체를 만든다. 8 | 9 | print(seqRecord) 10 | 11 | -------------------------------------------------------------------------------- /Section1/Chap5/5.1.simple_seqrecord_object_example_2.py: -------------------------------------------------------------------------------- 1 | # 5.1.simple_seqrecord_object_example_2.py 2 | 3 | from Bio.Seq import Seq 4 | from Bio.SeqRecord import SeqRecord 5 | 6 | simple_seq = Seq("ACGT") 7 | simple_seqRecord = SeqRecord(simple_seq, id="NC_1111", name="Test") 8 | # SeqRecord 객체를 만들 때, id와 name에 각각 값을 집어넣었다. 9 | 10 | print(simple_seqRecord) 11 | 12 | -------------------------------------------------------------------------------- /Section1/Chap5/5.1.simple_seqrecord_object_example_3.py: -------------------------------------------------------------------------------- 1 | # 5.1.simple_seqrecord_object_example_3.py 2 | 3 | from Bio.Seq import Seq 4 | from Bio.SeqRecord import SeqRecord 5 | 6 | simple_seq = Seq("ACGT") 7 | simple_seqRecord = SeqRecord(simple_seq, id="NC_1111", name="Test") 8 | simple_seqRecord.name = "Another name" 9 | 10 | print(simple_seqRecord) 11 | 12 | -------------------------------------------------------------------------------- /Section1/Chap5/5.3.1.seqRecord_example.py: -------------------------------------------------------------------------------- 1 | # 5.3.1.seqRecord_example.py 2 | 3 | from Bio.Seq import Seq 4 | from Bio.SeqRecord import SeqRecord 5 | 6 | seq = Seq("ACGT") 7 | seqRecord = SeqRecord(seq) 8 | print(seqRecord) 9 | print("----------") 10 | 11 | # SeqRecord 객체에 설명을 넣어준다. 12 | seqRecord.id = "NC_1111" 13 | seqRecord.name = "GeneA" 14 | seqRecord.description = "This is a description." 15 | seqRecord.annotations["Annotation_Key1"] = "Annotation_Value1" 16 | seqRecord.annotations["Annotation_Key2"] = "Annotation_Value2" 17 | print(seqRecord) 18 | 19 | -------------------------------------------------------------------------------- /Section1/Chap5/5.3.2.seqRecord_FASTA_example.py: -------------------------------------------------------------------------------- 1 | #5.3.2.seqRecord_FASTA_example.py 2 | 3 | from Bio import SeqIO 4 | 5 | record = SeqIO.read("J01636.1.fasta","fasta") 6 | print(type(record)) 7 | print(record) 8 | 9 | -------------------------------------------------------------------------------- /Section1/Chap5/5.3.3.seqRecord_GenBank_example.py: -------------------------------------------------------------------------------- 1 | #5.3.3.seqRecord_GenBank_example.py 2 | 3 | from Bio import SeqIO 4 | 5 | record = SeqIO.read("J01636.1.gbk","genbank") 6 | print(type(record)) 7 | print(record) 8 | 9 | -------------------------------------------------------------------------------- /Section1/Chap5/5.4.compare_example_1.py: -------------------------------------------------------------------------------- 1 | #5.4.compare_example_1.py 2 | 3 | str1 = "ACGT" 4 | str2 = "ACGT" 5 | 6 | print(str1) # ACGT 가 출력된다. 7 | print(str2) # ACGT 가 출력된다. 8 | print(str1 == str2) # True 가 출력된다. 9 | 10 | -------------------------------------------------------------------------------- /Section1/Chap5/5.4.compare_example_2.py: -------------------------------------------------------------------------------- 1 | #5.4.compare_example_2.py 2 | 3 | from Bio.Seq import Seq 4 | from Bio.SeqRecord import SeqRecord 5 | 6 | seq1 = Seq("ACGT") 7 | record1 = SeqRecord(seq1) 8 | print(record1) 9 | 10 | print("----------") 11 | 12 | seq2 = Seq("ACGT") 13 | record2 = SeqRecord(seq2) 14 | print(record2) 15 | 16 | -------------------------------------------------------------------------------- /Section1/Chap5/5.4.compare_example_3.py: -------------------------------------------------------------------------------- 1 | #5.4.compare_example_3.py 2 | 3 | from Bio.Seq import Seq 4 | from Bio.SeqRecord import SeqRecord 5 | 6 | seq1 = Seq("ACGT") 7 | record1 = SeqRecord(seq1) 8 | 9 | seq2 = Seq("ACGT") 10 | record2 = SeqRecord(seq2) 11 | 12 | print(record1 == record2) # NotImplementedError가 발생한다. 13 | 14 | -------------------------------------------------------------------------------- /Section1/Chap5/5.4.compare_example_4.py: -------------------------------------------------------------------------------- 1 | #5.4.compare_example_4.py 2 | 3 | from Bio.Seq import Seq 4 | from Bio.SeqRecord import SeqRecord 5 | 6 | seq1 = Seq("ACGT") 7 | record1 = SeqRecord(seq1) 8 | 9 | seq2 = Seq("ACGT") 10 | record2 = SeqRecord(seq2) 11 | 12 | print(record1.seq == record2.seq) # True가 출력된다. 13 | 14 | -------------------------------------------------------------------------------- /Section1/Chap5/J01636.1.fasta: -------------------------------------------------------------------------------- 1 | >J01636.1 E.coli lactose operon with lacI, lacZ, lacY and lacA genes 2 | GACACCATCGAATGGCGCAAAACCTTTCGCGGTATGGCATGATAGCGCCCGGAAGAGAGTCAATTCAGGG 3 | TGGTGAATGTGAAACCAGTAACGTTATACGATGTCGCAGAGTATGCCGGTGTCTCTTATCAGACCGTTTC 4 | CCGCGTGGTGAACCAGGCCAGCCACGTTTCTGCGAAAACGCGGGAAAAAGTGGAAGCGGCGATGGCGGAG 5 | CTGAATTACATTCCCAACCGCGTGGCACAACAACTGGCGGGCAAACAGTCGTTGCTGATTGGCGTTGCCA 6 | CCTCCAGTCTGGCCCTGCACGCGCCGTCGCAAATTGTCGCGGCGATTAAATCTCGCGCCGATCAACTGGG 7 | TGCCAGCGTGGTGGTGTCGATGGTAGAACGAAGCGGCGTCGAAGCCTGTAAAGCGGCGGTGCACAATCTT 8 | CTCGCGCAACGCGTCAGTGGGCTGATCATTAACTATCCGCTGGATGACCAGGATGCCATTGCTGTGGAAG 9 | CTGCCTGCACTAATGTTCCGGCGTTATTTCTTGATGTCTCTGACCAGACACCCATCAACAGTATTATTTT 10 | CTCCCATGAAGACGGTACGCGACTGGGCGTGGAGCATCTGGTCGCATTGGGTCACCAGCAAATCGCGCTG 11 | TTAGCGGGCCCATTAAGTTCTGTCTCGGCGCGTCTGCGTCTGGCTGGCTGGCATAAATATCTCACTCGCA 12 | ATCAAATTCAGCCGATAGCGGAACGGGAAGGCGACTGGAGTGCCATGTCCGGTTTTCAACAAACCATGCA 13 | AATGCTGAATGAGGGCATCGTTCCCACTGCGATGCTGGTTGCCAACGATCAGATGGCGCTGGGCGCAATG 14 | CGCGCCATTACCGAGTCCGGGCTGCGCGTTGGTGCGGATATCTCGGTAGTGGGATACGACGATACCGAAG 15 | ACAGCTCATGTTATATCCCGCCGTCAACCACCATCAAACAGGATTTTCGCCTGCTGGGGCAAACCAGCGT 16 | GGACCGCTTGCTGCAACTCTCTCAGGGCCAGGCGGTGAAGGGCAATCAGCTGTTGCCCGTCTCACTGGTG 17 | AAAAGAAAAACCACCCTGGCGCCCAATACGCAAACCGCCTCTCCCCGCGCGTTGGCCGATTCATTAATGC 18 | AGCTGGCACGACAGGTTTCCCGACTGGAAAGCGGGCAGTGAGCGCAACGCAATTAATGTGAGTTAGCTCA 19 | CTCATTAGGCACCCCAGGCTTTACACTTTATGCTTCCGGCTCGTATGTTGTGTGGAATTGTGAGCGGATA 20 | ACAATTTCACACAGGAAACAGCTATGACCATGATTACGGATTCACTGGCCGTCGTTTTACAACGTCGTGA 21 | CTGGGAAAACCCTGGCGTTACCCAACTTAATCGCCTTGCAGCACATCCCCCTTTCGCCAGCTGGCGTAAT 22 | AGCGAAGAGGCCCGCACCGATCGCCCTTCCCAACAGTTGCGCAGCCTGAATGGCGAATGGCGCTTTGCCT 23 | GGTTTCCGGCACCAGAAGCGGTGCCGGAAAGCTGGCTGGAGTGCGATCTTCCTGAGGCCGATACTGTCGT 24 | CGTCCCCTCAAACTGGCAGATGCACGGTTACGATGCGCCCATCTACACCAACGTAACCTATCCCATTACG 25 | GTCAATCCGCCGTTTGTTCCCACGGAGAATCCGACGGGTTGTTACTCGCTCACATTTAATGTTGATGAAA 26 | GCTGGCTACAGGAAGGCCAGACGCGAATTATTTTTGATGGCGTTAACTCGGCGTTTCATCTGTGGTGCAA 27 | CGGGCGCTGGGTCGGTTACGGCCAGGACAGTCGTTTGCCGTCTGAATTTGACCTGAGCGCATTTTTACGC 28 | GCCGGAGAAAACCGCCTCGCGGTGATGGTGCTGCGTTGGAGTGACGGCAGTTATCTGGAAGATCAGGATA 29 | TGTGGCGGATGAGCGGCATTTTCCGTGACGTCTCGTTGCTGCATAAACCGACTACACAAATCAGCGATTT 30 | CCATGTTGCCACTCGCTTTAATGATGATTTCAGCCGCGCTGTACTGGAGGCTGAAGTTCAGATGTGCGGC 31 | GAGTTGCGTGACTACCTACGGGTAACAGTTTCTTTATGGCAGGGTGAAACGCAGGTCGCCAGCGGCACCG 32 | CGCCTTTCGGCGGTGAAATTATCGATGAGCGTGGTGGTTATGCCGATCGCGTCACACTACGTCTGAACGT 33 | CGAAAACCCGAAACTGTGGAGCGCCGAAATCCCGAATCTCTATCGTGCGGTGGTTGAACTGCACACCGCC 34 | GACGGCACGCTGATTGAAGCAGAAGCCTGCGATGTCGGTTTCCGCGAGGTGCGGATTGAAAATGGTCTGC 35 | TGCTGCTGAACGGCAAGCCGTTGCTGATTCGAGGCGTTAACCGTCACGAGCATCATCCTCTGCATGGTCA 36 | GGTCATGGATGAGCAGACGATGGTGCAGGATATCCTGCTGATGAAGCAGAACAACTTTAACGCCGTGCGC 37 | TGTTCGCATTATCCGAACCATCCGCTGTGGTACACGCTGTGCGACCGCTACGGCCTGTATGTGGTGGATG 38 | AAGCCAATATTGAAACCCACGGCATGGTGCCAATGAATCGTCTGACCGATGATCCGCGCTGGCTACCGGC 39 | GATGAGCGAACGCGTAACGCGAATGGTGCAGCGCGATCGTAATCACCCGAGTGTGATCATCTGGTCGCTG 40 | GGGAATGAATCAGGCCACGGCGCTAATCACGACGCGCTGTATCGCTGGATCAAATCTGTCGATCCTTCCC 41 | GCCCGGTGCAGTATGAAGGCGGCGGAGCCGACACCACGGCCACCGATATTATTTGCCCGATGTACGCGCG 42 | CGTGGATGAAGACCAGCCCTTCCCGGCTGTGCCGAAATGGTCCATCAAAAAATGGCTTTCGCTACCTGGA 43 | GAGACGCGCCCGCTGATCCTTTGCGAATACGCCCACGCGATGGGTAACAGTCTTGGCGGTTTCGCTAAAT 44 | ACTGGCAGGCGTTTCGTCAGTATCCCCGTTTACAGGGCGGCTTCGTCTGGGACTGGGTGGATCAGTCGCT 45 | GATTAAATATGATGAAAACGGCAACCCGTGGTCGGCTTACGGCGGTGATTTTGGCGATACGCCGAACGAT 46 | CGCCAGTTCTGTATGAACGGTCTGGTCTTTGCCGACCGCACGCCGCATCCAGCGCTGACGGAAGCAAAAC 47 | ACCAGCAGCAGTTTTTCCAGTTCCGTTTATCCGGGCAAACCATCGAAGTGACCAGCGAATACCTGTTCCG 48 | TCATAGCGATAACGAGCTCCTGCACTGGATGGTGGCGCTGGATGGTAAGCCGCTGGCAAGCGGTGAAGTG 49 | CCTCTGGATGTCGCTCCACAAGGTAAACAGTTGATTGAACTGCCTGAACTACCGCAGCCGGAGAGCGCCG 50 | GGCAACTCTGGCTCACAGTACGCGTAGTGCAACCGAACGCGACCGCATGGTCAGAAGCCGGGCACATCAG 51 | CGCCTGGCAGCAGTGGCGTCTGGCGGAAAACCTCAGTGTGACGCTCCCCGCCGCGTCCCACGCCATCCCG 52 | CATCTGACCACCAGCGAAATGGATTTTTGCATCGAGCTGGGTAATAAGCGTTGGCAATTTAACCGCCAGT 53 | CAGGCTTTCTTTCACAGATGTGGATTGGCGATAAAAAACAACTGCTGACGCCGCTGCGCGATCAGTTCAC 54 | CCGTGCACCGCTGGATAACGACATTGGCGTAAGTGAAGCGACCCGCATTGACCCTAACGCCTGGGTCGAA 55 | CGCTGGAAGGCGGCGGGCCATTACCAGGCCGAAGCAGCGTTGTTGCAGTGCACGGCAGATACACTTGCTG 56 | ATGCGGTGCTGATTACGACCGCTCACGCGTGGCAGCATCAGGGGAAAACCTTATTTATCAGCCGGAAAAC 57 | CTACCGGATTGATGGTAGTGGTCAAATGGCGATTACCGTTGATGTTGAAGTGGCGAGCGATACACCGCAT 58 | CCGGCGCGGATTGGCCTGAACTGCCAGCTGGCGCAGGTAGCAGAGCGGGTAAACTGGCTCGGATTAGGGC 59 | CGCAAGAAAACTATCCCGACCGCCTTACTGCCGCCTGTTTTGACCGCTGGGATCTGCCATTGTCAGACAT 60 | GTATACCCCGTACGTCTTCCCGAGCGAAAACGGTCTGCGCTGCGGGACGCGCGAATTGAATTATGGCCCA 61 | CACCAGTGGCGCGGCGACTTCCAGTTCAACATCAGCCGCTACAGTCAACAGCAACTGATGGAAACCAGCC 62 | ATCGCCATCTGCTGCACGCGGAAGAAGGCACATGGCTGAATATCGACGGTTTCCATATGGGGATTGGTGG 63 | CGACGACTCCTGGAGCCCGTCAGTATCGGCGGAATTCCAGCTGAGCGCCGGTCGCTACCATTACCAGTTG 64 | GTCTGGTGTCAAAAATAATAATAACCGGGCAGGCCATGTCTGCCCGTATTTCGCGTAAGGAAATCCATTA 65 | TGTACTATTTAAAAAACACAAACTTTTGGATGTTCGGTTTATTCTTTTTCTTTTACTTTTTTATCATGGG 66 | AGCCTACTTCCCGTTTTTCCCGATTTGGCTACATGACATCAACCATATCAGCAAAAGTGATACGGGTATT 67 | ATTTTTGCCGCTATTTCTCTGTTCTCGCTATTATTCCAACCGCTGTTTGGTCTGCTTTCTGACAAACTCG 68 | GGCTGCGCAAATACCTGCTGTGGATTATTACCGGCATGTTAGTGATGTTTGCGCCGTTCTTTATTTTTAT 69 | CTTCGGGCCACTGTTACAATACAACATTTTAGTAGGATCGATTGTTGGTGGTATTTATCTAGGCTTTTGT 70 | TTTAACGCCGGTGCGCCAGCAGTAGAGGCATTTATTGAGAAAGTCAGCCGTCGCAGTAATTTCGAATTTG 71 | GTCGCGCGCGGATGTTTGGCTGTGTTGGCTGGGCGCTGTGTGCCTCGATTGTCGGCATCATGTTCACCAT 72 | CAATAATCAGTTTGTTTTCTGGCTGGGCTCTGGCTGTGCACTCATCCTCGCCGTTTTACTCTTTTTCGCC 73 | AAAACGGATGCGCCCTCTTCTGCCACGGTTGCCAATGCGGTAGGTGCCAACCATTCGGCATTTAGCCTTA 74 | AGCTGGCACTGGAACTGTTCAGACAGCCAAAACTGTGGTTTTTGTCACTGTATGTTATTGGCGTTTCCTG 75 | CACCTACGATGTTTTTGACCAACAGTTTGCTAATTTCTTTACTTCGTTCTTTGCTACCGGTGAACAGGGT 76 | ACGCGGGTATTTGGCTACGTAACGACAATGGGCGAATTACTTAACGCCTCGATTATGTTCTTTGCGCCAC 77 | TGATCATTAATCGCATCGGTGGGAAAAACGCCCTGCTGCTGGCTGGCACTATTATGTCTGTACGTATTAT 78 | TGGCTCATCGTTCGCCACCTCAGCGCTGGAAGTGGTTATTCTGAAAACGCTGCATATGTTTGAAGTACCG 79 | TTCCTGCTGGTGGGCTGCTTTAAATATATTACCAGCCAGTTTGAAGTGCGTTTTTCAGCGACGATTTATC 80 | TGGTCTGTTTCTGCTTCTTTAAGCAACTGGCGATGATTTTTATGTCTGTACTGGCGGGCAATATGTATGA 81 | AAGCATCGGTTTCCAGGGCGCTTATCTGGTGCTGGGTCTGGTGGCGCTGGGCTTCACCTTAATTTCCGTG 82 | TTCACGCTTAGCGGCCCCGGCCCGCTTTCCCTGCTGCGTCGTCAGGTGAATGAAGTCGCTTAAGCAATCA 83 | ATGTCGGATGCGGCGCGACGCTTATCCGACCAACATATCATAACGGAGTGATCGCATTGAACATGCCAAT 84 | GACCGAAAGAATAAGAGCAGGCAAGCTATTTACCGATATGTGCGAAGGCTTACCGGAAAAAAGACTTCGT 85 | GGGAAAACGTTAATGTATGAGTTTAATCACTCGCATCCATCAGAAGTTGAAAAAAGAGAAAGCCTGATTA 86 | AAGAAATGTTTGCCACGGTAGGGGAAAACGCCTGGGTAGAACCGCCTGTCTATTTCTCTTACGGTTCCAA 87 | CATCCATATAGGCCGCAATTTTTATGCAAATTTCAATTTAACCATTGTCGATGACTACACGGTAACAATC 88 | GGTGATAACGTACTGATTGCACCCAACGTTACTCTTTCCGTTACGGGACACCCTGTACACCATGAATTGA 89 | GAAAAAACGGCGAGATGTACTCTTTTCCGATAACGATTGGCAATAACGTCTGGATCGGAAGTCATGTGGT 90 | TATTAATCCAGGCGTCACCATCGGGGATAATTCTGTTATTGGCGCGGGTAGTATCGTCACAAAAGACATT 91 | CCACCAAACGTCGTGGCGGCTGGCGTTCCTTGTCGGGTTATTCGCGAAATAAACGACCGGGATAAGCACT 92 | ATTATTTCAAAGATTATAAAGTTGAATCGTCAGTTTAAATTATAAAAATTGCCTGATACGCTGCGCTTAT 93 | CAGGCCTACAAGTTCAGCGATCTACATTAGCCGCATCCGGCATGAACAAAGCGCAGGAACAAGCGTCGCA 94 | TCATGCCTCTTTGACCCACAGCTGCGGAAAACGTACTGGTGCAAAACGCAGGGTTATGATCATCAGCCCA 95 | ACGACGCACAGCGCATGAAATGCCCAGTCCATCAGGTAATTGCCGCTGATACTACGCAGCACGCCAGAAA 96 | ACCACGGGGCAAGCCCGGCGATGATAAAACCGATTCCCTGCATAAACGCCACCAGCTTGCCAGCAATAGC 97 | CGGTTGCACAGAGTGATCGAGCGCCAGCAGCAAACAGAGCGGAAACGCGCCGCCCAGACCTAACCCACAC 98 | ACCATCGCCCACAATACCGGCAATTGCATCGGCAGCCAGATAAAGCCGCAGAACCCCACCAGTTGTAACA 99 | CCAGCGCCAGCATTAACAGTTTGCGCCGATCCTGATGGCGAGCCATAGCAGGCATCAGCAAAGCTCCTGC 100 | GGCTTGCCCAAGCGTCATCAATGCCAGTAAGGAACCGCTGTACTGCGCGCTGGCACCAATCTCAATATAG 101 | AAAGCGGGTAACCAGGCAATCAGGCTGGCGTAACCGCCGTTAATCAGACCGAAGTAAACACCCAGCGTCC 102 | ACGCGCGGGGAGTGAATACCACGCGAACCGGAGTGGTTGTTGTCTTGTGGGAAGAGGCGACCTCGCGGGC 103 | GCTTTGCCACCACCAGGCAAAGAGCGCAACAACGGCAGGCAGCGCCACCAGGCGAGTGTTTGATACCAGG 104 | TTTCGCTATGTTGAACTAACCAGGGCGTTATGGCGGCACCAAGCCCACCGCCGCCCATCAGAGCCGCGGA 105 | CCACAGCCCCATCACCAGTGGCGTGCGCTGCTGAAACCGCCGTTTAATCACCGAAGCATCACCGCCTGAA 106 | TGATGCCGATCCCCACCCCACCAAGCAGTGCGCTGCTAAGCAGCAGCGCACTTTGCGGGTAAAGCTCACG 107 | CATCAATGCACCGACGGCAATCAGCAACAGACTGATGGCGACACTGCGACGTTCGCTGACATGCTGATGA 108 | AGCCAGCTTCCGGCCAGCGCCAGCCCGCCCATGGTAACCACCGGCAGAGCGGTCGAC 109 | 110 | -------------------------------------------------------------------------------- /Section1/Chap5/J01636.1.gbk: -------------------------------------------------------------------------------- 1 | LOCUS ECOLAC 7477 bp DNA linear BCT 05-MAY-1993 2 | DEFINITION E.coli lactose operon with lacI, lacZ, lacY and lacA genes. 3 | ACCESSION J01636 J01637 K01483 K01793 4 | VERSION J01636.1 5 | KEYWORDS acetyltransferase; beta-D-galactosidase; galactosidase; lac operon; 6 | lac repressor protein; lacA gene; lacI gene; lacY gene; lacZ gene; 7 | lactose permease; mutagenesis; palindrome; promoter region; 8 | thiogalactoside acetyltransferase. 9 | SOURCE Escherichia coli 10 | ORGANISM Escherichia coli 11 | Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; 12 | Enterobacteriaceae; Escherichia. 13 | REFERENCE 1 (bases 1243 to 1266) 14 | AUTHORS Gilbert,W. and Maxam,A. 15 | TITLE The nucleotide sequence of the lac operator 16 | JOURNAL Proc. Natl. Acad. Sci. U.S.A. 70 (12), 3581-3584 (1973) 17 | PUBMED 4587255 18 | REFERENCE 2 (bases 1246 to 1308) 19 | AUTHORS Maizels,N.M. 20 | TITLE The nucleotide sequence of the lactose messenger ribonucleic acid 21 | transcribed from the UV5 promoter mutant of Escherichia coli 22 | JOURNAL Proc. Natl. Acad. Sci. U.S.A. 70 (12), 3585-3589 (1973) 23 | PUBMED 4587256 24 | REFERENCE 3 (sites) 25 | AUTHORS Gilbert,W., Maizels,N. and Maxam,A. 26 | TITLE Sequences of controlling regions of the lactose operon 27 | JOURNAL Cold Spring Harb. Symp. Quant. Biol. 38, 845-855 (1974) 28 | PUBMED 4598642 29 | REFERENCE 4 (sites) 30 | AUTHORS Gilbert,W., Gralla,J., Majors,A.J. and Maxam,A. 31 | TITLE Lactose operator sequences and the action of lac repressor 32 | JOURNAL (in) Sund,H. and Blauer,G. (Eds.); 33 | PROTEIN-LIGAND INTERACTIONS: 193-207; 34 | Walter de Gruyter, New York (1975) 35 | REFERENCE 5 (bases 1146 to 1282) 36 | AUTHORS Dickson,R.C., Abelson,J., Barnes,W.M. and Reznikoff,W.S. 37 | TITLE Genetic regulation: the Lac control region 38 | JOURNAL Science 187 (4171), 27-35 (1975) 39 | PUBMED 1088926 40 | REFERENCE 6 (bases 1227 to 1271) 41 | AUTHORS Gilbert,W., Maxam,A. and Mirzabekov,A. 42 | TITLE Contacts between the lac repressor and DNA revealed by methylation 43 | JOURNAL (in) Kjeldgaard,N.C. and Maaloe,O. (Eds.); 44 | CONTROL OF RIBOSOME SYNTHESIS: 138-143; 45 | Academic Press, New York (1976) 46 | REFERENCE 7 (sites) 47 | AUTHORS Marians,K.J. and Brooker,J.D. 48 | TITLE Structure of the lactose operator 49 | JOURNAL Nature 260 (5549), 360-363 (1976) 50 | PUBMED 768781 51 | REFERENCE 8 (bases 1242 to 1268) 52 | AUTHORS Heyneker,H.L., Shine,J., Goodman,H.M., Boyer,H.W., Rosenberg,J., 53 | Dickerson,R.E., Narang,S.A., Itakura,K., Lin,S. and Riggs,A.D. 54 | TITLE Synthetic lac operator DNA is functional in vivo 55 | JOURNAL Nature 263 (5580), 748-752 (1976) 56 | PUBMED 1069185 57 | REFERENCE 9 (sites) 58 | AUTHORS Dickson,R.C., Abelson,J. and Johnson,P. 59 | TITLE Nucleotide sequence changes produced by mutations in the lac 60 | promoter of Escherichia coli 61 | JOURNAL J. Mol. Biol. 111 (1), 65-75 (1977) 62 | PUBMED 323498 63 | REFERENCE 10 (bases 51 to 264) 64 | AUTHORS Steege,D.A. 65 | TITLE 5'-Terminal nucleotide sequence of Escherichia coli lactose 66 | repressor mRNA: features of translational initiation and 67 | reinitiation sites 68 | JOURNAL Proc. Natl. Acad. Sci. U.S.A. 74 (10), 4163-4167 (1977) 69 | PUBMED 337294 70 | REFERENCE 11 (bases 1 to 81) 71 | AUTHORS Calos,M.P. 72 | TITLE DNA sequence for a low-level promoter of the lac repressor gene and 73 | an 'up' promoter mutation 74 | JOURNAL Nature 274 (5673), 762-765 (1978) 75 | PUBMED 355890 76 | REFERENCE 12 (bases 49 to 1161) 77 | AUTHORS Farabaugh,P.J. 78 | TITLE Sequence of the lacI gene 79 | JOURNAL Nature 274 (5673), 765-769 (1978) 80 | PUBMED 355891 81 | REFERENCE 13 (sites) 82 | AUTHORS Miller,J.H., Coulondre,C. and Farabaugh,P.J. 83 | TITLE Correlation of nonsense sites in the lacI gene with specific codons 84 | in the nucleotide sequence 85 | JOURNAL Nature 274 (5673), 770-775 (1978) 86 | PUBMED 355892 87 | REFERENCE 14 (sites) 88 | AUTHORS Calos,M.P. and Miller,J.H. 89 | TITLE DNA sequence alteration resulting from a mutation impairing 90 | promoter function in the lac repressor gene 91 | JOURNAL Mol. Gen. Genet. 178 (1), 225-227 (1980) 92 | PUBMED 6770231 93 | REFERENCE 15 (bases 4306 to 5804) 94 | AUTHORS Buchel,D.E., Gronenborn,B. and Muller-Hill,B. 95 | TITLE Sequence of the lactose permease gene 96 | JOURNAL Nature 283 (5747), 541-545 (1980) 97 | PUBMED 6444453 98 | REFERENCE 16 (sites) 99 | AUTHORS Miller,J.H., Calos,M.P. and Galas,D.J. 100 | TITLE Genetic and sequencing studies of the specificity of transposition 101 | into the lac region of E. coli 102 | JOURNAL Cold Spring Harb. Symp. Quant. Biol. 45 (PT 1), 243-257 (1981) 103 | PUBMED 6271472 104 | REFERENCE 17 (sites) 105 | AUTHORS Chenchick,A., Beabealashvilli,R. and Mirzabekov,A. 106 | TITLE Topography of interaction of Escherichia coli RNA polymerase 107 | subunits with lac UV5 promoter 108 | JOURNAL FEBS Lett. 128 (1), 46-50 (1981) 109 | PUBMED 7023981 110 | REFERENCE 18 (sites) 111 | AUTHORS Betz,J.L. and Sadler,J.R. 112 | TITLE Variants of a cloned synthetic lactose operator. I. A palindromic 113 | dimer lactose operator derived from one stand of the cloned 40-base 114 | pair operator 115 | JOURNAL Gene 13 (1), 1-12 (1981) 116 | PUBMED 7016667 117 | REFERENCE 19 (sites) 118 | AUTHORS Sadler,J.R. and Tecklenburg,M. 119 | TITLE Cloning and characterization of the natural lactose operator 120 | JOURNAL Gene 13 (1), 13-23 (1981) 121 | PUBMED 6263752 122 | REFERENCE 20 (sites) 123 | AUTHORS Betz,J.L. and Sadler,J.R. 124 | TITLE Variants of a cloned synthetic lactose operator. II. 125 | Chloramphenicol-resistant revertants retaining a lactose operator 126 | in the CAT gene of plasmid pBR325 127 | JOURNAL Gene 15 (2-3), 187-200 (1981) 128 | PUBMED 6271642 129 | REFERENCE 21 (sites) 130 | AUTHORS Calos,M.P. and Miller,J.H. 131 | TITLE The DNA sequence change resulting from the IQ1 mutation, which 132 | greatly increases promoter strength 133 | JOURNAL Mol. Gen. Genet. 183 (3), 559-560 (1981) 134 | PUBMED 7038381 135 | REFERENCE 22 (sites) 136 | AUTHORS Mieschendahl,M., Buchel,D., Bocklage,H. and Muller-Hill,B. 137 | TITLE Mutations in the lacY gene of Escherichia coli define functional 138 | organization of lactose permease 139 | JOURNAL Proc. Natl. Acad. Sci. U.S.A. 78 (12), 7652-7656 (1981) 140 | PUBMED 6278484 141 | REFERENCE 23 (sites) 142 | AUTHORS Russell,D.R. and Bennett,G.N. 143 | TITLE Construction and analysis of in vivo activity of E. coli promoter 144 | hybrids and promoter mutants that alter the -35 to -10 spacing 145 | JOURNAL Gene 20 (2), 231-243 (1982) 146 | PUBMED 6299890 147 | REFERENCE 24 (sites) 148 | AUTHORS Horowitz,H. and Platt,T. 149 | TITLE A termination site for LacI transcription is between the CAP site 150 | and the lac promoter 151 | JOURNAL J. Biol. Chem. 257 (19), 11740-11746 (1982) 152 | PUBMED 6288696 153 | REFERENCE 25 (sites) 154 | AUTHORS Klein,R.D. and Wells,R.D. 155 | TITLE Effects of neighboring DNA homopolymers on the biochemical and 156 | physical properties of the Escherichia coli lactose promoter. I. 157 | Cloning and characterization studies 158 | JOURNAL J. Biol. Chem. 257 (21), 12954-12961 (1982) 159 | PUBMED 6290487 160 | REFERENCE 26 (bases 1183 to 1291) 161 | AUTHORS Weiher,H. and Schaller,H. 162 | TITLE Segment-specific mutagenesis: extensive mutagenesis of a lac 163 | promoter/operator element 164 | JOURNAL Proc. Natl. Acad. Sci. U.S.A. 79 (5), 1408-1412 (1982) 165 | PUBMED 7041119 166 | REFERENCE 27 (sites) 167 | AUTHORS Van Dyke,M.W. and Dervan,P.B. 168 | TITLE Footprinting with MPE.Fe(II). Complementary-strand analyses of 169 | distamycin- and actinomycin-binding sites on heterogeneous DNA 170 | JOURNAL Cold Spring Harb. Symp. Quant. Biol. 47 (PT 1), 347-353 (1983) 171 | PUBMED 6305557 172 | REFERENCE 28 (bases 1287 to 4364) 173 | AUTHORS Kalnins,A., Otto,K., Ruther,U. and Muller-Hill,B. 174 | TITLE Sequence of the lacZ gene of Escherichia coli 175 | JOURNAL EMBO J. 2 (4), 593-597 (1983) 176 | PUBMED 6313347 177 | REFERENCE 29 (sites) 178 | AUTHORS Cone,K.C., Sellitti,M.A. and Steege,D.A. 179 | TITLE Lac repressor mRNA transcription terminates in vivo in the lac 180 | control region 181 | JOURNAL J. Biol. Chem. 258 (18), 11296-11304 (1983) 182 | PUBMED 6309841 183 | REFERENCE 30 (sites) 184 | AUTHORS Sadler,J.R., Sasmor,H. and Betz,J.L. 185 | TITLE A perfectly symmetric lac operator binds the lac repressor very 186 | tightly 187 | JOURNAL Proc. Natl. Acad. Sci. U.S.A. 80 (22), 6785-6789 (1983) 188 | PUBMED 6316325 189 | REFERENCE 31 (sites) 190 | AUTHORS Glickman,B.W. and Ripley,L.S. 191 | TITLE Structural intermediates of deletion mutagenesis: a role for 192 | palindromic DNA 193 | JOURNAL Proc. Natl. Acad. Sci. U.S.A. 81 (2), 512-516 (1984) 194 | PUBMED 6582506 195 | REFERENCE 32 (sites) 196 | AUTHORS Spassky,A., Kirkegaard,K. and Buc,H. 197 | TITLE Changes in the DNA structure of the lac UV5 promoter during 198 | formation of an open complex with Escherichia coli RNA polymerase 199 | JOURNAL Biochemistry 24 (11), 2723-2731 (1985) 200 | PUBMED 3896305 201 | REFERENCE 33 (sites) 202 | AUTHORS Straney,D.C. and Crothers,D.M. 203 | TITLE Intermediates in transcription initiation from the E. coli lac UV5 204 | promoter 205 | JOURNAL Cell 43 (2 PT 1), 449-459 (1985) 206 | PUBMED 2416465 207 | REFERENCE 34 (sites) 208 | AUTHORS Looman,A.C., de Gruyter,M., Vogelaar,A. and van Knippenberg,P.H. 209 | TITLE Effects of heterologous ribosomal binding sites on the 210 | transcription and translation of the lacZ gene of Escherichia coli 211 | JOURNAL Gene 37 (1-3), 145-154 (1985) 212 | PUBMED 3932130 213 | REFERENCE 35 (sites) 214 | AUTHORS Mandecki,W., Goldman,R.A., Powell,B.S. and Caruthers,M.H. 215 | TITLE lac Up-promoter mutants with increased homology to the consensus 216 | promoter sequence 217 | JOURNAL J. Bacteriol. 164 (3), 1353-1355 (1985) 218 | PUBMED 2999082 219 | REFERENCE 36 (sites) 220 | AUTHORS Malamy,M.H., Rahaim,P.T., Hoffman,C.S., Baghdoyan,D., O'Connor,M.B. 221 | and Miller,J.F. 222 | TITLE A frameshift mutation at the junction of an IS1 insertion within 223 | lacZ restores beta-galactosidase activity via formation of an 224 | active lacZ-IS1 fusion protein 225 | JOURNAL J. Mol. Biol. 181 (4), 551-555 (1985) 226 | PUBMED 2987506 227 | REFERENCE 37 (bases 5646 to 7477) 228 | AUTHORS Hediger,M.A., Johnson,D.F., Nierlich,D.P. and Zabin,I. 229 | TITLE DNA sequence of the lactose operon: the lacA gene and the 230 | transcriptional termination region 231 | JOURNAL Proc. Natl. Acad. Sci. U.S.A. 82 (19), 6414-6418 (1985) 232 | PUBMED 3901000 233 | COMMENT Original source text: Escherichia coli DNA; mRNA; clone 234 | lambda-h80dlac DNA; clone puk217; pgm8 (see comment). 235 | [3] sites; UV5 mRNA transcripts and operator mutants. [(in) 236 | Sund,H. and Blauer,G. (eds.);Protein-Ligand Interactions: 237 | 193-207;Walter de] sites; operator mutational analysis. [7] 238 | sites; S1 and mung bean nuclease action on operator DNA. [9] 239 | sites; class I, II and III promoter mutant analysis. [13] sites; 240 | lacI mutant analysis. 241 | [16] sites; Tn5, Tn9 and Tn10 insertion sites in lac region. [14] 242 | sites; lacI promoter mutation UJ177. 243 | [18] sites; palindromic dimer operator;. 244 | [19] sites; natural operator sequence. 245 | [20] sites; operator mutational analysis. 246 | [21] sites; lacI-Q deletion. 247 | [17] sites; RNA polymerase UV5 promoter interaction. [22] sites; 248 | lacY mutational analysis. 249 | [24] sites; lacI-promoted transcription termination. [25] sites; 250 | wt and UV5 promoter sequence studies. [23] sites; UV5 promoter 251 | mutational analysis. 252 | [30] sites; perfectly symmetric operator sequence. [29] sites; 253 | lacI mRNA termination site. 254 | [27] sites; distamycin and actinomycin binding to promoter. [31] 255 | sites; lacI deletion studies. 256 | [35] sites; promoter mutational studies. 257 | [33] sites; DNAase I studies with promoter sequence. [34] sites; 258 | ribosomal binding and translation initiation for lacZ. [36] sites; 259 | insertion sequence IS1 integration in lacZ;. [32] sites; DNAase I 260 | studies with promoter. 261 | [1] first reports a 27 bp operator(sites 1240-1266) with two-fold 262 | symmetries; the operator has also been defined to be bases 263 | 1246-1266 or bases 1239-1273 [8]. [(in) Kjeldgaard,N.C. and Maaloe, 264 | O.(eds);Control of ribosome synthesis: 138-143;A] explores the 265 | ability of lac 266 | repressor protein to affect methylation of operator DNA. [8] 267 | argues that DNA on both sides of the 21 bp operator (bases 268 | 1246-1266) affects repressor binding but that the sequences of this 269 | DNA are probably not critical. [5] gives a larger sequence known as 270 | the promoter-operator region for the wild-type, whereas [2] and 271 | [26] give portions of this region for the mutant strain UV5. Within 272 | the promoter region, bases 1162-1199 are identified as the 273 | catabolite gene activator protein binding site (cap) and bases 274 | 1200-1245 are the RNA polymerase interaction site. [10] reports a 275 | sequence for the 5'end of the lacI (repressor) gene and discusses 276 | restart in mutant strains. [11] presents a sequence for the lacI 277 | promoter region and identifies an I-Q mutation which enhances lacI 278 | transcription approximately ten-fold. [12] gives a complete 279 | sequence 280 | for lacI which agrees with the known lac repressor sequence. [26] 281 | examines the promoter-operator region in the UV5 strain (lac109) 282 | and studies 23 mutant derivatives of this sequence. This sequence 283 | agrees with known protein sequences for the lacZ, lacY and lacA 284 | enzymes. [15] notes that the fMet codon is not present 285 | for lacA and suggests that the 'ttg' codon (5727-5729), which 286 | immediately precedes the mature N-terminal asparagine codon, is the 287 | start codon. The cds for lacZ, lacY and lacA are included on a 288 | single mRNA transcript. 289 | Complete source information: 290 | Escherichia coli DNA [1],[(in) Kjeldgaard,N.C. and Maaloe,O.(eds); 291 | Control of ribosome synthesis: 138-143;A],[8],[12],[26]; mRNA [2], 292 | [5],[10]; clone 293 | lambda-h80dlac DNA [11],[15]; clone puk217 [28]; pgm8 [37]. 294 | FEATURES Location/Qualifiers 295 | source 1..7477 296 | /organism="Escherichia coli" 297 | /mol_type="genomic DNA" 298 | /db_xref="taxon:562" 299 | variation 16 300 | /note="c in wild-type; t in 'up' promoter mutant I-Q [11]" 301 | mRNA 51..1230 302 | /note="lacI (repressor) mRNA; preferred in vivo 3' end 303 | [12],[29]" 304 | gene 79..1161 305 | /gene="lacI" 306 | CDS 79..1161 307 | /gene="lacI" 308 | /note="lac repressor protein (gtg start codon)" 309 | /codon_start=1 310 | /transl_table=11 311 | /protein_id="AAA24052.1" 312 | /translation="MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAE 313 | LNYIPNRVAQQLAGKQSLLIGVATSSLALHAPSQIVAAIKSRADQLGASVVVSMVERS 314 | GVEACKAAVHNLLAQRVSGLIINYPLDDQDAIAVEAACTNVPALFLDVSDQTPINSII 315 | FSHEDGTRLGVEHLVALGHQQIALLAGPLSSVSARLRLAGWHKYLTRNQIQPIAEREG 316 | DWSAMSGFQQTMQMLNEGIVPTAMLVANDQMALGAMRAITESGLRVGADISVVGYDDT 317 | EDSSCYIPPSTTIKQDFRLLGQTSVDRLLQLSQGQAVKGNQLLPVSLVKRKTTLAPNT 318 | QTASPRALADSLMQLARQVSRLESGQ" 319 | regulatory 1162..1199 320 | /regulatory_class="other" 321 | /note="cap protein binding site" 322 | variation 1183..1186 323 | /note="ttag in wild-type; aatt in strain UV5 [26]" 324 | variation 1209..1211 325 | /note="gct in wild-type; gt in mutant l305 [5]" 326 | variation 1212 327 | /note="t in wild-type; a in mutant l241 [5]" 328 | variation 1230 329 | /note="c in wild-type; a in mutant p-r-1a [5]" 330 | variation 1237..1238 331 | /note="gt in wild-type; aa in strain UV5 [26]" 332 | variation 1242..1245 333 | /note="gtgg in wild-type; ttca in synthetic operator [8]" 334 | mRNA 1246..>4358 335 | /note="lacZ mRNA [2],[5]" 336 | regulatory 1246..1266 337 | /regulatory_class="other" 338 | /note="lac repressor protein binding site" 339 | variation 1267..1268 340 | /note="tc in wild-type; tg in synthetic operator [8]" 341 | variation 1282..1291 342 | /note="ctatgaccat in wild-type; gatccggcca in strain UV5 343 | [26]" 344 | gene 1284..4358 345 | /gene="lacZ" 346 | CDS 1284..4358 347 | /gene="lacZ" 348 | /note="beta-d-galactosidase" 349 | /codon_start=1 350 | /transl_table=11 351 | /protein_id="AAA24053.1" 352 | /translation="MTMITDSLAVVLQRRDWENPGVTQLNRLAAHPPFASWRNSEEAR 353 | TDRPSQQLRSLNGEWRFAWFPAPEAVPESWLECDLPEADTVVVPSNWQMHGYDAPIYT 354 | NVTYPITVNPPFVPTENPTGCYSLTFNVDESWLQEGQTRIIFDGVNSAFHLWCNGRWV 355 | GYGQDSRLPSEFDLSAFLRAGENRLAVMVLRWSDGSYLEDQDMWRMSGIFRDVSLLHK 356 | PTTQISDFHVATRFNDDFSRAVLEAEVQMCGELRDYLRVTVSLWQGETQVASGTAPFG 357 | GEIIDERGGYADRVTLRLNVENPKLWSAEIPNLYRAVVELHTADGTLIEAEACDVGFR 358 | EVRIENGLLLLNGKPLLIRGVNRHEHHPLHGQVMDEQTMVQDILLMKQNNFNAVRCSH 359 | YPNHPLWYTLCDRYGLYVVDEANIETHGMVPMNRLTDDPRWLPAMSERVTRMVQRDRN 360 | HPSVIIWSLGNESGHGANHDALYRWIKSVDPSRPVQYEGGGADTTATDIICPMYARVD 361 | EDQPFPAVPKWSIKKWLSLPGETRPLILCEYAHAMGNSLGGFAKYWQAFRQYPRLQGG 362 | FVWDWVDQSLIKYDENGNPWSAYGGDFGDTPNDRQFCMNGLVFADRTPHPALTEAKHQ 363 | QQFFQFRLSGQTIEVTSEYLFRHSDNELLHWMVALDGKPLASGEVPLDVAPQGKQLIE 364 | LPELPQPESAGQLWLTVRVVQPNATAWSEAGHISAWQQWRLAENLSVTLPAASHAIPH 365 | LTTSEMDFCIELGNKRWQFNRQSGFLSQMWIGDKKQLLTPLRDQFTRAPLDNDIGVSE 366 | ATRIDPNAWVERWKAAGHYQAEAALLQCTADTLADAVLITTAHAWQHQGKTLFISRKT 367 | YRIDGSGQMAITVDVEVASDTPHPARIGLNCQLAQVAERVNWLGLGPQENYPDRLTAA 368 | CFDRWDLPLSDMYTPYVFPSENGLRCGTRELNYGPHQWRGDFQFNISRYSQQQLMETS 369 | HRHLLHAEEGTWLNIDGFHMGIGGDDSWSPSVSAEFQLSAGRYHYQLVWCQK" 370 | gene 4410..5663 371 | /gene="lacY" 372 | CDS 4410..5663 373 | /gene="lacY" 374 | /note="lactose permease" 375 | /codon_start=1 376 | /transl_table=11 377 | /protein_id="AAA24054.1" 378 | /translation="MYYLKNTNFWMFGLFFFFYFFIMGAYFPFFPIWLHDINHISKSD 379 | TGIIFAAISLFSLLFQPLFGLLSDKLGLRKYLLWIITGMLVMFAPFFIFIFGPLLQYN 380 | ILVGSIVGGIYLGFCFNAGAPAVEAFIEKVSRRSNFEFGRARMFGCVGWALCASIVGI 381 | MFTINNQFVFWLGSGCALILAVLLFFAKTDAPSSATVANAVGANHSAFSLKLALELFR 382 | QPKLWFLSLYVIGVSCTYDVFDQQFANFFTSFFATGEQGTRVFGYVTTMGELLNASIM 383 | FFAPLIINRIGGKNALLLAGTIMSVRIIGSSFATSALEVVILKTLHMFEVPFLLVGCF 384 | KYITSQFEVRFSATIYLVCFCFFKQLAMIFMSVLAGNMYESIGFQGAYLVLGLVALGF 385 | TLISVFTLSGPGPLSLLRRQVNEVA" 386 | gene 5727..6338 387 | /gene="lacA" 388 | CDS 5727..6338 389 | /gene="lacA" 390 | /note="thiogalactoside acetyltransferase (ttg start 391 | codon)" 392 | /codon_start=1 393 | /transl_table=11 394 | /protein_id="AAA24055.1" 395 | /translation="MNMPMTERIRAGKLFTDMCEGLPEKRLRGKTLMYEFNHSHPSEV 396 | EKRESLIKEMFATVGENAWVEPPVYFSYGSNIHIGRNFYANFNLTIVDDYTVTIGDNV 397 | LIAPNVTLSVTGHPVHHELRKNGEMYSFPITIGNNVWIGSHVVINPGVTIGDNSVIGA 398 | GSIVTKDIPPNVVAAGVPCRVIREINDRDKHYYFKDYKVESSV" 399 | ORIGIN HindII site [Nature 274, 762-765 (1978)]. 400 | 1 gacaccatcg aatggcgcaa aacctttcgc ggtatggcat gatagcgccc ggaagagagt 401 | 61 caattcaggg tggtgaatgt gaaaccagta acgttatacg atgtcgcaga gtatgccggt 402 | 121 gtctcttatc agaccgtttc ccgcgtggtg aaccaggcca gccacgtttc tgcgaaaacg 403 | 181 cgggaaaaag tggaagcggc gatggcggag ctgaattaca ttcccaaccg cgtggcacaa 404 | 241 caactggcgg gcaaacagtc gttgctgatt ggcgttgcca cctccagtct ggccctgcac 405 | 301 gcgccgtcgc aaattgtcgc ggcgattaaa tctcgcgccg atcaactggg tgccagcgtg 406 | 361 gtggtgtcga tggtagaacg aagcggcgtc gaagcctgta aagcggcggt gcacaatctt 407 | 421 ctcgcgcaac gcgtcagtgg gctgatcatt aactatccgc tggatgacca ggatgccatt 408 | 481 gctgtggaag ctgcctgcac taatgttccg gcgttatttc ttgatgtctc tgaccagaca 409 | 541 cccatcaaca gtattatttt ctcccatgaa gacggtacgc gactgggcgt ggagcatctg 410 | 601 gtcgcattgg gtcaccagca aatcgcgctg ttagcgggcc cattaagttc tgtctcggcg 411 | 661 cgtctgcgtc tggctggctg gcataaatat ctcactcgca atcaaattca gccgatagcg 412 | 721 gaacgggaag gcgactggag tgccatgtcc ggttttcaac aaaccatgca aatgctgaat 413 | 781 gagggcatcg ttcccactgc gatgctggtt gccaacgatc agatggcgct gggcgcaatg 414 | 841 cgcgccatta ccgagtccgg gctgcgcgtt ggtgcggata tctcggtagt gggatacgac 415 | 901 gataccgaag acagctcatg ttatatcccg ccgtcaacca ccatcaaaca ggattttcgc 416 | 961 ctgctggggc aaaccagcgt ggaccgcttg ctgcaactct ctcagggcca ggcggtgaag 417 | 1021 ggcaatcagc tgttgcccgt ctcactggtg aaaagaaaaa ccaccctggc gcccaatacg 418 | 1081 caaaccgcct ctccccgcgc gttggccgat tcattaatgc agctggcacg acaggtttcc 419 | 1141 cgactggaaa gcgggcagtg agcgcaacgc aattaatgtg agttagctca ctcattaggc 420 | 1201 accccaggct ttacacttta tgcttccggc tcgtatgttg tgtggaattg tgagcggata 421 | 1261 acaatttcac acaggaaaca gctatgacca tgattacgga ttcactggcc gtcgttttac 422 | 1321 aacgtcgtga ctgggaaaac cctggcgtta cccaacttaa tcgccttgca gcacatcccc 423 | 1381 ctttcgccag ctggcgtaat agcgaagagg cccgcaccga tcgcccttcc caacagttgc 424 | 1441 gcagcctgaa tggcgaatgg cgctttgcct ggtttccggc accagaagcg gtgccggaaa 425 | 1501 gctggctgga gtgcgatctt cctgaggccg atactgtcgt cgtcccctca aactggcaga 426 | 1561 tgcacggtta cgatgcgccc atctacacca acgtaaccta tcccattacg gtcaatccgc 427 | 1621 cgtttgttcc cacggagaat ccgacgggtt gttactcgct cacatttaat gttgatgaaa 428 | 1681 gctggctaca ggaaggccag acgcgaatta tttttgatgg cgttaactcg gcgtttcatc 429 | 1741 tgtggtgcaa cgggcgctgg gtcggttacg gccaggacag tcgtttgccg tctgaatttg 430 | 1801 acctgagcgc atttttacgc gccggagaaa accgcctcgc ggtgatggtg ctgcgttgga 431 | 1861 gtgacggcag ttatctggaa gatcaggata tgtggcggat gagcggcatt ttccgtgacg 432 | 1921 tctcgttgct gcataaaccg actacacaaa tcagcgattt ccatgttgcc actcgcttta 433 | 1981 atgatgattt cagccgcgct gtactggagg ctgaagttca gatgtgcggc gagttgcgtg 434 | 2041 actacctacg ggtaacagtt tctttatggc agggtgaaac gcaggtcgcc agcggcaccg 435 | 2101 cgcctttcgg cggtgaaatt atcgatgagc gtggtggtta tgccgatcgc gtcacactac 436 | 2161 gtctgaacgt cgaaaacccg aaactgtgga gcgccgaaat cccgaatctc tatcgtgcgg 437 | 2221 tggttgaact gcacaccgcc gacggcacgc tgattgaagc agaagcctgc gatgtcggtt 438 | 2281 tccgcgaggt gcggattgaa aatggtctgc tgctgctgaa cggcaagccg ttgctgattc 439 | 2341 gaggcgttaa ccgtcacgag catcatcctc tgcatggtca ggtcatggat gagcagacga 440 | 2401 tggtgcagga tatcctgctg atgaagcaga acaactttaa cgccgtgcgc tgttcgcatt 441 | 2461 atccgaacca tccgctgtgg tacacgctgt gcgaccgcta cggcctgtat gtggtggatg 442 | 2521 aagccaatat tgaaacccac ggcatggtgc caatgaatcg tctgaccgat gatccgcgct 443 | 2581 ggctaccggc gatgagcgaa cgcgtaacgc gaatggtgca gcgcgatcgt aatcacccga 444 | 2641 gtgtgatcat ctggtcgctg gggaatgaat caggccacgg cgctaatcac gacgcgctgt 445 | 2701 atcgctggat caaatctgtc gatccttccc gcccggtgca gtatgaaggc ggcggagccg 446 | 2761 acaccacggc caccgatatt atttgcccga tgtacgcgcg cgtggatgaa gaccagccct 447 | 2821 tcccggctgt gccgaaatgg tccatcaaaa aatggctttc gctacctgga gagacgcgcc 448 | 2881 cgctgatcct ttgcgaatac gcccacgcga tgggtaacag tcttggcggt ttcgctaaat 449 | 2941 actggcaggc gtttcgtcag tatccccgtt tacagggcgg cttcgtctgg gactgggtgg 450 | 3001 atcagtcgct gattaaatat gatgaaaacg gcaacccgtg gtcggcttac ggcggtgatt 451 | 3061 ttggcgatac gccgaacgat cgccagttct gtatgaacgg tctggtcttt gccgaccgca 452 | 3121 cgccgcatcc agcgctgacg gaagcaaaac accagcagca gtttttccag ttccgtttat 453 | 3181 ccgggcaaac catcgaagtg accagcgaat acctgttccg tcatagcgat aacgagctcc 454 | 3241 tgcactggat ggtggcgctg gatggtaagc cgctggcaag cggtgaagtg cctctggatg 455 | 3301 tcgctccaca aggtaaacag ttgattgaac tgcctgaact accgcagccg gagagcgccg 456 | 3361 ggcaactctg gctcacagta cgcgtagtgc aaccgaacgc gaccgcatgg tcagaagccg 457 | 3421 ggcacatcag cgcctggcag cagtggcgtc tggcggaaaa cctcagtgtg acgctccccg 458 | 3481 ccgcgtccca cgccatcccg catctgacca ccagcgaaat ggatttttgc atcgagctgg 459 | 3541 gtaataagcg ttggcaattt aaccgccagt caggctttct ttcacagatg tggattggcg 460 | 3601 ataaaaaaca actgctgacg ccgctgcgcg atcagttcac ccgtgcaccg ctggataacg 461 | 3661 acattggcgt aagtgaagcg acccgcattg accctaacgc ctgggtcgaa cgctggaagg 462 | 3721 cggcgggcca ttaccaggcc gaagcagcgt tgttgcagtg cacggcagat acacttgctg 463 | 3781 atgcggtgct gattacgacc gctcacgcgt ggcagcatca ggggaaaacc ttatttatca 464 | 3841 gccggaaaac ctaccggatt gatggtagtg gtcaaatggc gattaccgtt gatgttgaag 465 | 3901 tggcgagcga tacaccgcat ccggcgcgga ttggcctgaa ctgccagctg gcgcaggtag 466 | 3961 cagagcgggt aaactggctc ggattagggc cgcaagaaaa ctatcccgac cgccttactg 467 | 4021 ccgcctgttt tgaccgctgg gatctgccat tgtcagacat gtataccccg tacgtcttcc 468 | 4081 cgagcgaaaa cggtctgcgc tgcgggacgc gcgaattgaa ttatggccca caccagtggc 469 | 4141 gcggcgactt ccagttcaac atcagccgct acagtcaaca gcaactgatg gaaaccagcc 470 | 4201 atcgccatct gctgcacgcg gaagaaggca catggctgaa tatcgacggt ttccatatgg 471 | 4261 ggattggtgg cgacgactcc tggagcccgt cagtatcggc ggaattccag ctgagcgccg 472 | 4321 gtcgctacca ttaccagttg gtctggtgtc aaaaataata ataaccgggc aggccatgtc 473 | 4381 tgcccgtatt tcgcgtaagg aaatccatta tgtactattt aaaaaacaca aacttttgga 474 | 4441 tgttcggttt attctttttc ttttactttt ttatcatggg agcctacttc ccgtttttcc 475 | 4501 cgatttggct acatgacatc aaccatatca gcaaaagtga tacgggtatt atttttgccg 476 | 4561 ctatttctct gttctcgcta ttattccaac cgctgtttgg tctgctttct gacaaactcg 477 | 4621 ggctgcgcaa atacctgctg tggattatta ccggcatgtt agtgatgttt gcgccgttct 478 | 4681 ttatttttat cttcgggcca ctgttacaat acaacatttt agtaggatcg attgttggtg 479 | 4741 gtatttatct aggcttttgt tttaacgccg gtgcgccagc agtagaggca tttattgaga 480 | 4801 aagtcagccg tcgcagtaat ttcgaatttg gtcgcgcgcg gatgtttggc tgtgttggct 481 | 4861 gggcgctgtg tgcctcgatt gtcggcatca tgttcaccat caataatcag tttgttttct 482 | 4921 ggctgggctc tggctgtgca ctcatcctcg ccgttttact ctttttcgcc aaaacggatg 483 | 4981 cgccctcttc tgccacggtt gccaatgcgg taggtgccaa ccattcggca tttagcctta 484 | 5041 agctggcact ggaactgttc agacagccaa aactgtggtt tttgtcactg tatgttattg 485 | 5101 gcgtttcctg cacctacgat gtttttgacc aacagtttgc taatttcttt acttcgttct 486 | 5161 ttgctaccgg tgaacagggt acgcgggtat ttggctacgt aacgacaatg ggcgaattac 487 | 5221 ttaacgcctc gattatgttc tttgcgccac tgatcattaa tcgcatcggt gggaaaaacg 488 | 5281 ccctgctgct ggctggcact attatgtctg tacgtattat tggctcatcg ttcgccacct 489 | 5341 cagcgctgga agtggttatt ctgaaaacgc tgcatatgtt tgaagtaccg ttcctgctgg 490 | 5401 tgggctgctt taaatatatt accagccagt ttgaagtgcg tttttcagcg acgatttatc 491 | 5461 tggtctgttt ctgcttcttt aagcaactgg cgatgatttt tatgtctgta ctggcgggca 492 | 5521 atatgtatga aagcatcggt ttccagggcg cttatctggt gctgggtctg gtggcgctgg 493 | 5581 gcttcacctt aatttccgtg ttcacgctta gcggccccgg cccgctttcc ctgctgcgtc 494 | 5641 gtcaggtgaa tgaagtcgct taagcaatca atgtcggatg cggcgcgacg cttatccgac 495 | 5701 caacatatca taacggagtg atcgcattga acatgccaat gaccgaaaga ataagagcag 496 | 5761 gcaagctatt taccgatatg tgcgaaggct taccggaaaa aagacttcgt gggaaaacgt 497 | 5821 taatgtatga gtttaatcac tcgcatccat cagaagttga aaaaagagaa agcctgatta 498 | 5881 aagaaatgtt tgccacggta ggggaaaacg cctgggtaga accgcctgtc tatttctctt 499 | 5941 acggttccaa catccatata ggccgcaatt tttatgcaaa tttcaattta accattgtcg 500 | 6001 atgactacac ggtaacaatc ggtgataacg tactgattgc acccaacgtt actctttccg 501 | 6061 ttacgggaca ccctgtacac catgaattga gaaaaaacgg cgagatgtac tcttttccga 502 | 6121 taacgattgg caataacgtc tggatcggaa gtcatgtggt tattaatcca ggcgtcacca 503 | 6181 tcggggataa ttctgttatt ggcgcgggta gtatcgtcac aaaagacatt ccaccaaacg 504 | 6241 tcgtggcggc tggcgttcct tgtcgggtta ttcgcgaaat aaacgaccgg gataagcact 505 | 6301 attatttcaa agattataaa gttgaatcgt cagtttaaat tataaaaatt gcctgatacg 506 | 6361 ctgcgcttat caggcctaca agttcagcga tctacattag ccgcatccgg catgaacaaa 507 | 6421 gcgcaggaac aagcgtcgca tcatgcctct ttgacccaca gctgcggaaa acgtactggt 508 | 6481 gcaaaacgca gggttatgat catcagccca acgacgcaca gcgcatgaaa tgcccagtcc 509 | 6541 atcaggtaat tgccgctgat actacgcagc acgccagaaa accacggggc aagcccggcg 510 | 6601 atgataaaac cgattccctg cataaacgcc accagcttgc cagcaatagc cggttgcaca 511 | 6661 gagtgatcga gcgccagcag caaacagagc ggaaacgcgc cgcccagacc taacccacac 512 | 6721 accatcgccc acaataccgg caattgcatc ggcagccaga taaagccgca gaaccccacc 513 | 6781 agttgtaaca ccagcgccag cattaacagt ttgcgccgat cctgatggcg agccatagca 514 | 6841 ggcatcagca aagctcctgc ggcttgccca agcgtcatca atgccagtaa ggaaccgctg 515 | 6901 tactgcgcgc tggcaccaat ctcaatatag aaagcgggta accaggcaat caggctggcg 516 | 6961 taaccgccgt taatcagacc gaagtaaaca cccagcgtcc acgcgcgggg agtgaatacc 517 | 7021 acgcgaaccg gagtggttgt tgtcttgtgg gaagaggcga cctcgcgggc gctttgccac 518 | 7081 caccaggcaa agagcgcaac aacggcaggc agcgccacca ggcgagtgtt tgataccagg 519 | 7141 tttcgctatg ttgaactaac cagggcgtta tggcggcacc aagcccaccg ccgcccatca 520 | 7201 gagccgcgga ccacagcccc atcaccagtg gcgtgcgctg ctgaaaccgc cgtttaatca 521 | 7261 ccgaagcatc accgcctgaa tgatgccgat ccccacccca ccaagcagtg cgctgctaag 522 | 7321 cagcagcgca ctttgcgggt aaagctcacg catcaatgca ccgacggcaa tcagcaacag 523 | 7381 actgatggcg acactgcgac gttcgctgac atgctgatga agccagcttc cggccagcgc 524 | 7441 cagcccgccc atggtaacca ccggcagagc ggtcgac 525 | // 526 | 527 | -------------------------------------------------------------------------------- /Section1/Chap6/6.1.1.parse_example_1.py: -------------------------------------------------------------------------------- 1 | #6.1.1.parse_example_1.py 2 | 3 | from Bio import SeqIO 4 | 5 | seq = SeqIO.parse("sample_1.fasta", "fasta") 6 | print(type(seq)) 7 | for s in seq: 8 | print(type(s)) 9 | print(s) 10 | print("") # 줄 구분을 위해 넣었다. 11 | 12 | -------------------------------------------------------------------------------- /Section1/Chap6/6.1.1.parse_example_2.py: -------------------------------------------------------------------------------- 1 | #6.1.1.parse_example_1.py 2 | 3 | from Bio import SeqIO 4 | 5 | seq = SeqIO.parse("sample_2.fasta", "fasta") 6 | print(type(seq)) 7 | for s in seq: 8 | print(type(s)) 9 | print(s) 10 | print("") # 줄 구분을 위해 넣었다. 11 | 12 | -------------------------------------------------------------------------------- /Section1/Chap6/6.1.2.read_example_1.py: -------------------------------------------------------------------------------- 1 | #6.1.2.read_example_1.py 2 | 3 | from Bio import SeqIO 4 | 5 | seq = SeqIO.read("sample_1.fasta", "fasta") 6 | print(type(seq)) 7 | print(seq) 8 | -------------------------------------------------------------------------------- /Section1/Chap6/6.1.2.read_example_2.py: -------------------------------------------------------------------------------- 1 | #6.1.2.read_example_2.py 2 | 3 | from Bio import SeqIO 4 | 5 | seq = SeqIO.read("sample_2.fasta", "fasta") 6 | print(type(seq)) 7 | print(seq) 8 | 9 | -------------------------------------------------------------------------------- /Section1/Chap6/6.2.1.read_example_1.py: -------------------------------------------------------------------------------- 1 | #6.2.1.read_example_1.py 2 | 3 | from Bio import SeqIO 4 | 5 | seq = SeqIO.parse("sample_1.fastq", "fastq") 6 | print(type(seq)) 7 | for s in seq: 8 | print(type(s)) 9 | print(s) 10 | print("") # 줄 구분을 위해 넣었다. 11 | 12 | -------------------------------------------------------------------------------- /Section1/Chap6/6.2.1.read_example_2.py: -------------------------------------------------------------------------------- 1 | #6.2.1.read_example_2.py 2 | 3 | from Bio import SeqIO 4 | 5 | seq = SeqIO.parse("sample_1.fastq", "fastq") 6 | for s in seq: 7 | print(s.seq) 8 | 9 | -------------------------------------------------------------------------------- /Section1/Chap6/6.2.2.read_example_1.py: -------------------------------------------------------------------------------- 1 | #6.2.2.read_example_1.py 2 | import gzip 3 | from Bio import SeqIO 4 | handle = gzip.open("sample_1.fastq.gz","rt") 5 | seq = SeqIO.parse(handle, "fastq") 6 | for s in seq: 7 | print(s.seq) 8 | 9 | -------------------------------------------------------------------------------- /Section1/Chap6/6.2.2.read_example_2.py: -------------------------------------------------------------------------------- 1 | #6.2.2.read_example2.py 2 | 3 | import gzip 4 | from Bio import SeqIO 5 | 6 | with gzip.open("sample_1.fastq.gz","rt") as handle: 7 | seq = SeqIO.parse(handle, "fastq") 8 | for s in seq: 9 | print(s.seq) 10 | 11 | -------------------------------------------------------------------------------- /Section1/Chap6/6.3.read_example_1.py: -------------------------------------------------------------------------------- 1 | #6.3.read_example_1.py 2 | 3 | from Bio import SeqIO 4 | 5 | gbk = SeqIO.read("KT225476.2.gbk","genbank") 6 | print(type(gbk)) 7 | print(gbk) 8 | 9 | -------------------------------------------------------------------------------- /Section1/Chap6/6.3.read_example_2.py: -------------------------------------------------------------------------------- 1 | #6.3.read_example_2.py 2 | 3 | from Bio import SeqIO 4 | 5 | gbk = SeqIO.read("KT225476.2.gbk","genbank") 6 | print(gbk.id) 7 | print(gbk.description) 8 | print(gbk.annotations['molecule_type']) 9 | print(gbk.annotations['organism']) 10 | 11 | -------------------------------------------------------------------------------- /Section1/Chap6/6.4.2.entrez_example.py: -------------------------------------------------------------------------------- 1 | #6.4.2.entrez_example.py 2 | 3 | from Bio import Entrez 4 | 5 | Entrez.email = "kenneth.jh.han@gmail.com" 6 | handle = Entrez.efetch(db="nucleotide", rettype="gb", id="AY463215", retmode="text") 7 | 8 | for s in handle: 9 | print(s.strip()) 10 | -------------------------------------------------------------------------------- /Section1/Chap6/6.4.3.read_example_GenBank_1.py: -------------------------------------------------------------------------------- 1 | #6.4.3.read_example_GenBank_1.py 2 | 3 | from Bio import Entrez 4 | from Bio import SeqIO 5 | 6 | Entrez.email = "kenneth.jh.han@gmail.com" 7 | 8 | with Entrez.efetch(db="nucleotide", rettype="fasta", retmode="text", id="42540826") as handle: 9 | seq = SeqIO.read(handle, "fasta") 10 | 11 | print(seq) 12 | print(len(seq)) 13 | 14 | -------------------------------------------------------------------------------- /Section1/Chap6/6.4.3.read_example_GenBank_2.py: -------------------------------------------------------------------------------- 1 | #6.4.3.read_example_GenBank_2.py 2 | 3 | from Bio import Entrez 4 | from Bio import SeqIO 5 | 6 | Entrez.email = "your@email.com" 7 | 8 | with Entrez.efetch(db="nucleotide", rettype="fasta", retmode="text", id="1575550") as handle: 9 | seq = SeqIO.read(handle, "fasta") 10 | 11 | print(seq) 12 | print(len(seq)) 13 | 14 | -------------------------------------------------------------------------------- /Section1/Chap6/HM624086.1.gbk: -------------------------------------------------------------------------------- 1 | LOCUS HM624086 1701 bp cRNA linear VRL 18-OCT-2011 2 | DEFINITION Influenza A virus (A/Perth/260/2009(H1N1)) segment 4 hemagglutinin 3 | (HA) gene, complete cds. 4 | ACCESSION HM624086 5 | VERSION HM624086.1 6 | DBLINK BioProject: PRJNA37813 7 | KEYWORDS . 8 | SOURCE Influenza A virus (A/Perth/260/2009(H1N1)) 9 | ORGANISM Influenza A virus (A/Perth/260/2009(H1N1)) 10 | Viruses; ssRNA viruses; ssRNA negative-strand viruses; 11 | Orthomyxoviridae; Influenzavirus A. 12 | REFERENCE 1 (bases 1 to 1701) 13 | AUTHORS Deng,Y.M., Caldwell,N., Hurt,A., Shaw,T., Kelso,A., Chidlow,G., 14 | Williams,S., Smith,D. and Barr,I. 15 | TITLE A comparison of pyrosequencing and neuraminidase inhibition assays 16 | for the detection of oseltamivir-resistant pandemic influenza 17 | A(H1N1) 2009 viruses 18 | JOURNAL Antiviral Res. 90 (1), 87-91 (2011) 19 | PUBMED 21376084 20 | REFERENCE 2 (bases 1 to 1701) 21 | AUTHORS Deng,Y.-M., Iannello,P., Ernest,J., Caldwell,N. and Komadina,N. 22 | TITLE Direct Submission 23 | JOURNAL Submitted (02-JUL-2010) Reference and Research on Influenza, WHO 24 | Collaborating Centre for Reference and Research on Influenza, 10 25 | Wreckyn Street, North Melbourne, Victoria 3051, Australia 26 | COMMENT Swine influenza A (H1N1) virus isolated during human swine flu 27 | outbreak of 2009. 28 | 29 | ##GISAID_EpiFlu(TM)Data-START## 30 | Isolate :: A/Perth/260/2009 31 | Subtype :: H1N1 32 | Segment_name :: HA 33 | Host_gender :: M 34 | Host_age :: 38 35 | Passage_history :: MDCKX 36 | Antigen_character :: A/California/07/2009-like 37 | Adamantane_resistance :: RESISTANT 38 | Oseltamivir_resistance :: RESISTANT 39 | Country :: Australia 40 | State/Province :: Western Australia 41 | Localization_detail :: Ocean Reef 42 | Collection_day :: 13 43 | Collection_month :: 08 44 | Collection_year :: 2009 45 | Isolate_note :: comment: H275Y change, patient treated 46 | with oseltamivir Originating Laboratory: 47 | Pathwest QE II Medical Centre, Hospital 48 | Avenue, J Block, 6009, Nedlands, Western 49 | Australia, Australia 50 | EPI_accession :: EPI270024 51 | Lineage :: swl 52 | ##GISAID_EpiFlu(TM)Data-END## 53 | FEATURES Location/Qualifiers 54 | source 1..1701 55 | /organism="Influenza A virus (A/Perth/260/2009(H1N1))" 56 | /mol_type="viral cRNA" 57 | /strain="A/Perth/260/2009" 58 | /serotype="H1N1" 59 | /host="Homo sapiens; gender M; age 38" 60 | /db_xref="taxon:861191" 61 | /segment="4" 62 | /country="Australia" 63 | /collection_date="13-Aug-2009" 64 | /note="lineage: swl" 65 | gene 1..1701 66 | /gene="HA" 67 | CDS 1..1701 68 | /gene="HA" 69 | /codon_start=1 70 | /product="hemagglutinin" 71 | /protein_id="ADJ67984.1" 72 | /translation="MKAILVVLLYTFATANADTLCIGYHANNSTDTVDTVLEKNVTVT 73 | HSVNLLEDKHNGKLCKLRGVAPLHLGKCNIAGWILGNPECESLSTASSWSYIVETSSS 74 | DNGTCYPGDFIDYEELREQLSSVSSFERFEIFPKTSSWPNHDSNKGVTAACPHAGAKS 75 | FYKNLIWLVKKGNSYPKLSKSYINDKGKEVLVLWGIHHPSTSADQQSLYQNADAYVFV 76 | GTSRYSKKFKPEIAIRPKVRDQEGRMNYYWTLVEPGDKITFEATGNLVVPRYAFAMER 77 | NAGSGIIISDTPVHDCNTTCQTPKGAINTSLPFQNIHPITIGKCPKYVKSTKLRLATG 78 | LRNVPSIQSRGLFGAIAGFIEGGWTGMVDGWYGYHHQNEQGSGYAADLKSTQNAIDEI 79 | TNKVNSVIEKMNTQFTAVGKEFNHLEKRIENLNKKIDDGFLDIWTYNAELLVLLENER 80 | TLDYHDSNVKNLYEKVRSQLKNNAKEIGNGCFEFYHKCDNTCMESVKNGTYDYPKYSE 81 | EAKLNREEIDGVKLESTRIYQILAIYSTVASSLVLVVSLGAISFWMCSNGSLQCRICI 82 | " 83 | ORIGIN 84 | 1 atgaaggcaa tactagtagt tctgctatat acatttgcaa ccgcaaatgc agacacatta 85 | 61 tgtataggtt atcatgcgaa caattcaaca gacactgtag acacagtact agaaaagaat 86 | 121 gtaacagtaa cacactctgt taaccttcta gaagacaagc ataacgggaa actatgcaaa 87 | 181 ctaagagggg tagccccatt gcatttgggt aaatgtaaca ttgctggctg gatcctggga 88 | 241 aacccagagt gtgaatcact ctccacagca agctcatggt cctacattgt ggaaacatct 89 | 301 agttcagaca atggaacgtg ttacccagga gatttcatcg attatgagga gctaagagag 90 | 361 caattgagct cagtgtcatc atttgaaagg tttgagatat tccccaaaac aagttcatgg 91 | 421 cccaatcatg actcgaacaa aggtgtaacg gcagcatgtc ctcatgctgg agcaaaaagc 92 | 481 ttctacaaaa atttaatatg gctagttaaa aaaggaaatt catacccaaa gctcagcaaa 93 | 541 tcctacatta atgataaagg gaaagaagtc ctcgtgctat ggggcattca ccatccatct 94 | 601 actagtgctg accaacaaag tctctatcag aatgcagatg catatgtttt tgtggggaca 95 | 661 tcaagataca gcaagaagtt caagccggaa atagcaataa gacccaaagt gagggatcaa 96 | 721 gaagggagaa tgaactatta ctggacacta gtagagccgg gagacaaaat aacattcgaa 97 | 781 gcaactggaa atctagtggt accgagatat gcattcgcaa tggaaagaaa tgctggatct 98 | 841 ggtattatca tttcagatac accagtccac gattgcaata caacttgtca gacacccaag 99 | 901 ggtgctataa acaccagcct cccatttcag aatatacatc cgatcacaat tggaaaatgt 100 | 961 ccaaaatatg taaaaagcac aaaattgaga ctggccacag gattgaggaa tgtcccgtct 101 | 1021 attcaatcta gaggcctatt tggggccatt gccggtttca ttgaaggggg gtggacaggg 102 | 1081 atggtagatg gatggtacgg ttatcaccat caaaatgagc aggggtcagg atatgcagcc 103 | 1141 gacctgaaga gcacacagaa tgccattgac gagattacta acaaagtaaa ttctgttatt 104 | 1201 gaaaagatga atacacagtt cacagcagta ggtaaagagt tcaaccacct ggaaaaaaga 105 | 1261 atagagaatt taaataaaaa aattgatgat ggtttcctgg acatttggac ttacaatgca 106 | 1321 gaactgttgg ttctattgga aaatgaaaga actttggact accacgattc aaatgtgaag 107 | 1381 aacttatatg aaaaggtaag aagccagtta aaaaacaatg ccaaggaaat tggaaacggc 108 | 1441 tgctttgaat tttaccacaa atgcgataac acgtgcatgg aaagtgtcaa aaatgggact 109 | 1501 tatgactacc caaaatactc agaggaagca aaattaaaca gagaagaaat agatggggta 110 | 1561 aagctggaat caacaaggat ttaccagatt ttggcgatct attcaactgt cgccagttca 111 | 1621 ttggtactgg tagtctccct gggggcaatc agtttctgga tgtgctctaa tgggtctcta 112 | 1681 cagtgtagaa tatgtattta a 113 | // 114 | 115 | -------------------------------------------------------------------------------- /Section1/Chap6/sample_1.fasta: -------------------------------------------------------------------------------- 1 | >AF501235.1 Influenzavirus A (A/duck/Shanghai/1/2000) hemagglutinin gene, complete cds 2 | ATGGAGAAAATAGTGCTTCTTCTTGCAATAGTCAGTCTTGTTAAAAGTGATCAGATTTGCATTGGTTACC 3 | ATGCAAACAACTCGACAGAGCAGGTTGACACAATAATGGAAAAGAACGTTACTGTTACACATGCCCAAGA 4 | -------------------------------------------------------------------------------- /Section1/Chap6/sample_1.fastq: -------------------------------------------------------------------------------- 1 | @SRR000982.5E745RJU01DDHJ6length=113 2 | AAGGCACCATGCAGAGATGCAAGGCCCCTTTCTAAGCCCTAGACTTCTGGATGACACTTCTAGAAACACCCTGGGCCAGAAGTGAACCTGCTGCCTTGAAGGGAATAACTCAG 3 | + 4 | DDDDDDDDDDDDDDDDDDFFDDBB::::@@DDDDDDDDDDFEDDAAADDDDDDDDDDDDDDA8666@DD@@866AAADDDDDDDDDDDDDDDDDDDDDCCCAAAACDDDDDDD 5 | @SRR000982.35E745RJU01DLQBClength=53 6 | ATCTCTACCCAAAGATTAATGGGGATTGGTGTGATATACGGCTGAATTGTACC 7 | + 8 | FFFFFFFFFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFF 9 | -------------------------------------------------------------------------------- /Section1/Chap6/sample_1.fastq.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjpublic/biopython/1ee610b4a5a0d4bedb8318b9b86b90a7dc3d1e1f/Section1/Chap6/sample_1.fastq.gz -------------------------------------------------------------------------------- /Section1/Chap6/sample_2.fasta: -------------------------------------------------------------------------------- 1 | >MH464856.1 Hepatitis B virus isolate MA134, complete genome 2 | TTCCACAACATTCCACCAAGCTCTGCAGGATCCCAGAGTAAGAGGCCTGTATTTTCCTGCTGGTGGCTCC 3 | AGTTCCGGAACAGTGAACCCTGTTCCGACTACTGCCTCACTCATCTCGTCAATCTTCTCGAGGATTGGGG 4 | >CP002925.1 Streptococcus pseudopneumoniae IS7493, complete genome 5 | TTGAAAGAAAAACAATTTTGGAATCGTATATTAGAATTTGCTCAAGAAAGACTGACTCGATCCATGTATG 6 | ATTTCTATGCTATTCAAGCTGAACTCATCAAGGTAGAGGAAAATGTTGCCACTATATTTTTACCACGATC 7 | -------------------------------------------------------------------------------- /Section1/Chap7/7.2.read_MSA_example.py: -------------------------------------------------------------------------------- 1 | #7.2.read_MSA_example.py 2 | 3 | from Bio import AlignIO 4 | 5 | alignment = AlignIO.read("example.aln","clustal") 6 | print(alignment) 7 | -------------------------------------------------------------------------------- /Section1/Chap7/7.3.read_MSA_example_1.py: -------------------------------------------------------------------------------- 1 | #7.3.read_MSA_example_1.py 2 | from Bio import AlignIO 3 | 4 | alignment = AlignIO.read("example.aln","clustal") 5 | for record in alignment: 6 | print("%s - %s" % (record.seq, record.id)) 7 | -------------------------------------------------------------------------------- /Section1/Chap7/7.3.read_MSA_example_2.py: -------------------------------------------------------------------------------- 1 | #7.3.read_MSA_example_2.py 2 | 3 | from Bio import AlignIO 4 | 5 | alignment = AlignIO.read("example.aln","clustal") 6 | for record in alignment: 7 | print("%s - %s" % (record.seq[:10], record.id)) 8 | -------------------------------------------------------------------------------- /Section1/Chap7/7.4.3.muscle_cmd_example.py: -------------------------------------------------------------------------------- 1 | #7.4.3.muscle_cmd_example.py 2 | 3 | from Bio.Align.Applications import MuscleCommandline 4 | 5 | muscle_exe = "/Users/jhan/etc/muscle/muscle3.8.31_i86darwin64" # MUSCLE의 실행경로를 알려준다. MUSCLE 프로그램의 경로는 독자여러분들의 경로에 따라 다르므로 자신의 환경에 맞게 설정하자. 6 | cmd_line = MuscleCommandline(muscle_exe, input="HBA.all.fasta", out="HBA.aln", clw=" ") # clw 옵션을 주기위해 clw=" " 를 넣었다. 7 | 8 | print(cmd_line) 9 | 10 | stdout, stderr = cmd_line() 11 | -------------------------------------------------------------------------------- /Section1/Chap7/7.5.1.example.fasta: -------------------------------------------------------------------------------- 1 | >sample1 2 | TACAA 3 | >sample2 4 | TACGC 5 | >sample3 6 | TACAC 7 | >sample4 8 | TACCC 9 | >sample5 10 | AACCC 11 | >sample6 12 | AATGC 13 | >sample7 14 | AATGC 15 | -------------------------------------------------------------------------------- /Section1/Chap7/7.5.2.WebLogo_example_1.py: -------------------------------------------------------------------------------- 1 | #7.5.2.WebLogo_example_1.py 2 | 3 | from Bio.motifs import Motif 4 | from Bio import motifs 5 | from Bio.Seq import Seq 6 | 7 | instances = [Seq("TACAA"), 8 | Seq("TACGC"), 9 | Seq("TACAC"), 10 | Seq("TACCC"), 11 | Seq("AACCC"), 12 | Seq("AATGC"), 13 | Seq("AATGC"), 14 | ] 15 | 16 | m = motifs.create(instances) 17 | 18 | print(m.counts) 19 | Motif.weblogo(m,'test.png') 20 | -------------------------------------------------------------------------------- /Section1/Chap7/7.5.2.WebLogo_example_2.py: -------------------------------------------------------------------------------- 1 | #7.5.2.WebLogo_example_2.py 2 | 3 | from Bio import AlignIO 4 | from Bio.motifs import Motif 5 | from Bio import motifs 6 | from Bio.Seq import Seq 7 | from Bio.Alphabet import IUPAC 8 | 9 | alignment = AlignIO.read("HBA.aln","clustal") 10 | instance = [] 11 | for record in alignment: 12 | s = Seq(str(record.seq), IUPAC.protein) 13 | instance.append(s) 14 | m = motifs.create(instance) 15 | Motif.weblogo(m,'HBA_WebLogo.png') 16 | 17 | -------------------------------------------------------------------------------- /Section1/Chap7/7.6.phylo_example.py: -------------------------------------------------------------------------------- 1 | #7.6.phylo_example.py 2 | 3 | from Bio import Phylo 4 | 5 | tree = Phylo.read('HBA.newick', 'newick') 6 | print(tree) 7 | 8 | Phylo.draw(tree) 9 | 10 | -------------------------------------------------------------------------------- /Section1/Chap7/HBA.all.fasta: -------------------------------------------------------------------------------- 1 | >sp|P01994|HBA_CHICK Hemoglobin subunit alpha-A OS=Gallus gallus OX=9031 GN=HBAA PE=1 SV=2 2 | MVLSAADKNNVKGIFTKIAGHAEEYGAETLERMFTTYPPTKTYFPHFDLSHGSAQIKGHG 3 | KKVVAALIEAANHIDDIAGTLSKLSDLHAHKLRVDPVNFKLLGQCFLVVVAIHHPAALTP 4 | EVHASLDKFLCAVGTVLTAKYR 5 | >sp|P69907|HBA_PANTR Hemoglobin subunit alpha OS=Pan troglodytes OX=9598 GN=HBA1 PE=1 SV=2 6 | MVLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG 7 | KKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTP 8 | AVHASLDKFLASVSTVLTSKYR 9 | >sp|P01958|HBA_HORSE Hemoglobin subunit alpha OS=Equus caballus OX=9796 GN=HBA PE=1 SV=2 10 | MVLSAADKTNVKAAWSKVGGHAGEYGAEALERMFLGFPTTKTYFPHFDLSHGSAQVKAHG 11 | KKVGDALTLAVGHLDDLPGALSNLSDLHAHKLRVDPVNFKLLSHCLLSTLAVHLPNDFTP 12 | AVHASLDKFLSSVSTVLTSKYR 13 | >sp|P69905|HBA_HUMAN Hemoglobin subunit alpha OS=Homo sapiens OX=9606 GN=HBA1 PE=1 SV=2 14 | MVLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG 15 | KKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTP 16 | AVHASLDKFLASVSTVLTSKYR 17 | >sp|P01942|HBA_MOUSE Hemoglobin subunit alpha OS=Mus musculus OX=10090 GN=Hba PE=1 SV=2 18 | MVLSGEDKSNIKAAWGKIGGHGAEYGAEALERMFASFPTTKTYFPHFDVSHGSAQVKGHG 19 | KKVADALASAAGHLDDLPGALSALSDLHAHKLRVDPVNFKLLSHCLLVTLASHHPADFTP 20 | AVHASLDKFLASVSTVLTSKYR 21 | >sp|P01948|HBA_RABIT Hemoglobin subunit alpha-1/2 OS=Oryctolagus cuniculus OX=9986 PE=1 SV=2 22 | MVLSPADKTNIKTAWEKIGSHGGEYGAEAVERMFLGFPTTKTYFPHFDFTHGSEQIKAHG 23 | KKVSEALTKAVGHLDDLPGALSTLSDLHAHKLRVDPVNFKLLSHCLLVTLANHHPSEFTP 24 | AVHASLDKFLANVSTVLTSKYR 25 | >sp|P18971|HBA_BALAC Hemoglobin subunit alpha OS=Balaenoptera acutorostrata OX=9767 GN=HBA PE=1 SV=2 26 | MVLSPTDKSNVKATWAKIGNHGAEYGAEALERMFMNFPSTKTYFPHFDLGHDSAQVKGHG 27 | KKVADALTKAVGHMDNLLDALSDLSDLHAHKLRVDPANFKLLSHCLLVTLALHLPAEFTP 28 | SVHASLDKFLASVSTVLTSKYR 29 | -------------------------------------------------------------------------------- /Section1/Chap7/HBA.aln: -------------------------------------------------------------------------------- 1 | MUSCLE (3.8) multiple sequence alignment 2 | 3 | 4 | sp|P01994|HBA_CHICK MVLSAADKNNVKGIFTKIAGHAEEYGAETLERMFTTYPPTKTYFPHFDLSHGSAQIKGHG 5 | sp|P18971|HBA_BALAC MVLSPTDKSNVKATWAKIGNHGAEYGAEALERMFMNFPSTKTYFPHFDLGHDSAQVKGHG 6 | sp|P01948|HBA_RABIT MVLSPADKTNIKTAWEKIGSHGGEYGAEAVERMFLGFPTTKTYFPHFDFTHGSEQIKAHG 7 | sp|P01942|HBA_MOUSE MVLSGEDKSNIKAAWGKIGGHGAEYGAEALERMFASFPTTKTYFPHFDVSHGSAQVKGHG 8 | sp|P69907|HBA_PANTR MVLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG 9 | sp|P69905|HBA_HUMAN MVLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG 10 | sp|P01958|HBA_HORSE MVLSAADKTNVKAAWSKVGGHAGEYGAEALERMFLGFPTTKTYFPHFDLSHGSAQVKAHG 11 | **** **.*:* : *:. *. *****::**** :*.*********. *.* *:*.** 12 | 13 | sp|P01994|HBA_CHICK KKVVAALIEAANHIDDIAGTLSKLSDLHAHKLRVDPVNFKLLGQCFLVVVAIHHPAALTP 14 | sp|P18971|HBA_BALAC KKVADALTKAVGHMDNLLDALSDLSDLHAHKLRVDPANFKLLSHCLLVTLALHLPAEFTP 15 | sp|P01948|HBA_RABIT KKVSEALTKAVGHLDDLPGALSTLSDLHAHKLRVDPVNFKLLSHCLLVTLANHHPSEFTP 16 | sp|P01942|HBA_MOUSE KKVADALASAAGHLDDLPGALSALSDLHAHKLRVDPVNFKLLSHCLLVTLASHHPADFTP 17 | sp|P69907|HBA_PANTR KKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTP 18 | sp|P69905|HBA_HUMAN KKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTP 19 | sp|P01958|HBA_HORSE KKVGDALTLAVGHLDDLPGALSNLSDLHAHKLRVDPVNFKLLSHCLLSTLAVHLPNDFTP 20 | *** ** *. *:*:: .:** *************.*****.:*:* .:* * * :** 21 | 22 | sp|P01994|HBA_CHICK EVHASLDKFLCAVGTVLTAKYR 23 | sp|P18971|HBA_BALAC SVHASLDKFLASVSTVLTSKYR 24 | sp|P01948|HBA_RABIT AVHASLDKFLANVSTVLTSKYR 25 | sp|P01942|HBA_MOUSE AVHASLDKFLASVSTVLTSKYR 26 | sp|P69907|HBA_PANTR AVHASLDKFLASVSTVLTSKYR 27 | sp|P69905|HBA_HUMAN AVHASLDKFLASVSTVLTSKYR 28 | sp|P01958|HBA_HORSE AVHASLDKFLSSVSTVLTSKYR 29 | *********. *.****:*** 30 | -------------------------------------------------------------------------------- /Section1/Chap7/HBA.newick: -------------------------------------------------------------------------------- 1 | ( 2 | ( 3 | sp|P01994|HBA_CHICK:0.29387, 4 | ( 5 | ( 6 | sp|P60529|HBA_CANLF:0.11810, 7 | sp|P01948|HBA_RABIT:0.10357) 8 | :0.01282, 9 | ( 10 | ( 11 | sp|P69907|HBA_PANTR:0.00000, 12 | sp|P69905|HBA_HUMAN:0.00000) 13 | :0.05673, 14 | sp|P01958|HBA_HORSE:0.07405) 15 | :0.00617) 16 | :0.01203) 17 | :0.00537, 18 | sp|P18971|HBA_BALAC:0.13313, 19 | sp|P01942|HBA_MOUSE:0.06808); 20 | -------------------------------------------------------------------------------- /Section1/Chap7/HBA_WebLogo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjpublic/biopython/1ee610b4a5a0d4bedb8318b9b86b90a7dc3d1e1f/Section1/Chap7/HBA_WebLogo.png -------------------------------------------------------------------------------- /Section1/Chap7/example.aln: -------------------------------------------------------------------------------- 1 | MUSCLE (3.8) multiple sequence alignment 2 | 3 | 4 | sp|P69905|HBA_HUMAN MVLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG 5 | sp|P69907|HBA_PANTR MVLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSHGSAQVKGHG 6 | sp|P01942|HBA_MOUSE MVLSGEDKSNIKAAWGKIGGHGAEYGAEALERMFASFPTTKTYFPHFDVSHGSAQVKGHG 7 | **** **:*:******:*.*..*********** *************:*********** 8 | 9 | sp|P69905|HBA_HUMAN KKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTP 10 | sp|P69907|HBA_PANTR KKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTP 11 | sp|P01942|HBA_MOUSE KKVADALASAAGHLDDLPGALSALSDLHAHKLRVDPVNFKLLSHCLLVTLASHHPADFTP 12 | *******:.*..*:**:*.********************************:* **:*** 13 | 14 | sp|P69905|HBA_HUMAN AVHASLDKFLASVSTVLTSKYR 15 | sp|P69907|HBA_PANTR AVHASLDKFLASVSTVLTSKYR 16 | sp|P01942|HBA_MOUSE AVHASLDKFLASVSTVLTSKYR 17 | ********************** 18 | -------------------------------------------------------------------------------- /Section1/Chap7/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjpublic/biopython/1ee610b4a5a0d4bedb8318b9b86b90a7dc3d1e1f/Section1/Chap7/test.png -------------------------------------------------------------------------------- /Section1/Chap8/8.3.1.blast_example_1.py: -------------------------------------------------------------------------------- 1 | #8.3.1.blast_example_1.py 2 | 3 | from Bio.Blast import NCBIWWW 4 | from Bio import SeqIO 5 | 6 | record = SeqIO.read("buccal_swab.unmapped1.fasta", format="fasta") 7 | handle = NCBIWWW.qblast("blastn","nt",record.format("fasta")) 8 | result = handle.readlines() 9 | for s in result: 10 | print(s) 11 | 12 | -------------------------------------------------------------------------------- /Section1/Chap8/8.3.2.blast_example.py: -------------------------------------------------------------------------------- 1 | #8.3.2.blast_example.py 2 | 3 | from Bio.Blast import NCBIWWW 4 | from Bio.Blast import NCBIXML 5 | from Bio import SeqIO 6 | 7 | record = SeqIO.read("buccal_swab.unmapped1.fasta", format="fasta") 8 | handle = NCBIWWW.qblast("blastn","nt",record.format("fasta")) 9 | 10 | blast_records = NCBIXML.parse(handle) 11 | E_VALUE_THRESHOLD = 0.05 12 | for blast_record in blast_records: 13 | for alignment in blast_record.alignments: 14 | for hsp in alignment.hsps: 15 | if hsp.expect < E_VALUE_THRESHOLD: 16 | print(alignment.title) 17 | print(alignment.length) 18 | print(hsp.expect) 19 | print(hsp.query[0:75]) 20 | print(hsp.match[0:75]) 21 | print(hsp.sbjct[0:75]) 22 | 23 | -------------------------------------------------------------------------------- /Section1/Chap8/buccal_swab.unmapped1.fasta: -------------------------------------------------------------------------------- 1 | >buccal_swab.unmapped1 2 | CTTTTGTTAATCGATGATATACAGTCACTCAGCGGAAAAAAAGTCGCAACTCAGGAAGAA 3 | TTTTTCAATACCTTTAACGCCCTTCATG 4 | -------------------------------------------------------------------------------- /Section1/Chap8/buccal_swab.unmapped2.fasta: -------------------------------------------------------------------------------- 1 | >buccal_swab.unmapped2 2 | CCAGCCCCCCAGCCTCCCGATCACGGTTTACTACGCCGTGTTGGAGCGCGCCTGCCGCAG 3 | CGTGCTCCTAAACGCACCGTCGGAGGCCCCCCAGATTGTCCGC 4 | -------------------------------------------------------------------------------- /Section1/Chap9/9.2.1.efetch_example.py: -------------------------------------------------------------------------------- 1 | #9.2.1.efetch_example.py 2 | 3 | from Bio import Entrez 4 | 5 | Entrez.email = "your@email.com" # 자신의 이메일을 사용해야 한다. 6 | handle = Entrez.efetch(db="nucleotide", id="NC_002058.3", rettype="gb", retmode="text") 7 | print(handle.read()) 8 | -------------------------------------------------------------------------------- /Section1/Chap9/9.2.2.entrez_parse_example.py: -------------------------------------------------------------------------------- 1 | #9.2.2.entrez_parse_example.py 2 | 3 | from Bio import Entrez 4 | 5 | Entrez.email = "your@email.com" 6 | handle = Entrez.efetch(db="nucleotide", id="NC_002058.3", rettype="gb", retmode="xml") 7 | records = Entrez.parse(handle) 8 | for record in records: 9 | for journal in record["GBSeq_references"]: 10 | print(journal["GBReference_title"]) 11 | 12 | -------------------------------------------------------------------------------- /Section1/Chap9/9.2.2.entrez_read_example.py: -------------------------------------------------------------------------------- 1 | #9.2.2.entrez_read_example.py 2 | 3 | from Bio import Entrez 4 | 5 | Entrez.email = "your@email.com" 6 | handle = Entrez.efetch(db="nucleotide", id="NC_002058.3", rettype="gb", retmode="xml") 7 | records = Entrez.read(handle) 8 | for record in records: 9 | print(record["GBSeq_locus"]) 10 | print(record["GBSeq_definition"]) 11 | print(record["GBSeq_strandedness"], record["GBSeq_moltype"]) 12 | print(record["GBSeq_length"], "bp") 13 | print(len(record["GBSeq_references"]), "journals") 14 | 15 | -------------------------------------------------------------------------------- /Section1/Chap9/9.3.entrez.einfo_example.py: -------------------------------------------------------------------------------- 1 | #9.3.entrez.einfo_example.py 2 | 3 | from Bio import Entrez 4 | 5 | Entrez.email = "your@email.com" 6 | handle = Entrez.einfo() 7 | record = Entrez.read(handle) 8 | 9 | print(record) 10 | 11 | print(len(record["DbList"])) 12 | -------------------------------------------------------------------------------- /Section1/Chap9/9.3.entrez_example_1.py: -------------------------------------------------------------------------------- 1 | #9.3.entrez_example_1.py 2 | 3 | from Bio import Entrez 4 | 5 | Entrez.email = "your@email.com" 6 | handle = Entrez.einfo() 7 | result = handle.read() 8 | print(result) 9 | 10 | -------------------------------------------------------------------------------- /Section1/Chap9/9.4.entrez.esearch_example.py: -------------------------------------------------------------------------------- 1 | #9.4.entrez.esearch_example.py 2 | 3 | from Bio import Entrez 4 | 5 | Entrez.email = "your@email.com" 6 | handle = Entrez.esearch(db="pubmed", term="metagenome") 7 | record = Entrez.read(handle) 8 | print(record["Count"]) # 결과로 7020이 출력된다. 9 | 10 | -------------------------------------------------------------------------------- /Section2/001.py: -------------------------------------------------------------------------------- 1 | #001.py 2 | print("Hello, Bioinformatics") 3 | -------------------------------------------------------------------------------- /Section2/002.py: -------------------------------------------------------------------------------- 1 | #002.py 2 | r = 3 3 | PI = 3.14 4 | area = r * r * PI 5 | print(area) 6 | -------------------------------------------------------------------------------- /Section2/003.py: -------------------------------------------------------------------------------- 1 | #003.py 2 | num1 = 3 3 | num2 = 5 4 | print(num1 + num2) 5 | print(num1 - num2) 6 | print(num1 * num2) 7 | print(num1 / num2) 8 | print(num1 % num2) 9 | print(num1 ** num2) 10 | -------------------------------------------------------------------------------- /Section2/004.py: -------------------------------------------------------------------------------- 1 | #004.py 2 | num1 = 3 3 | if num1 % 2 == 1: 4 | print(num1, "은 홀수다.") 5 | else: 6 | print(num1, "은 짝수다.") 7 | -------------------------------------------------------------------------------- /Section2/005.py: -------------------------------------------------------------------------------- 1 | #005.py 2 | num1 = 21 3 | if num1 % 3 ==0 and num1 % 7 == 0: 4 | print(num1, "은 3과 7의 배수다.") 5 | elif num1 % 3 == 0: 6 | print(num1, "은 3의 배수다.") 7 | elif num1 % 7 == 0: 8 | print(num1, "은 7의 배수다.") 9 | else: 10 | print(num1, "은 3 또는 7의 배수가 아니다.") 11 | -------------------------------------------------------------------------------- /Section2/006.py: -------------------------------------------------------------------------------- 1 | #006.py 2 | s = 0 3 | for i in range(1,11,1): 4 | s += i 5 | print(s) 6 | -------------------------------------------------------------------------------- /Section2/007.py: -------------------------------------------------------------------------------- 1 | #007.py 2 | for i in range(2,9,2): 3 | for j in range(1,10,1): 4 | print(i, "*", j, "=", i*j) 5 | -------------------------------------------------------------------------------- /Section2/008.py: -------------------------------------------------------------------------------- 1 | #008.py 2 | num = 5 3 | result = 1 4 | 5 | while num > 0: 6 | result *= num 7 | num -= 1 8 | 9 | print(result) 10 | -------------------------------------------------------------------------------- /Section2/009.py: -------------------------------------------------------------------------------- 1 | #009.py 2 | 3 | def greet(): 4 | print("Hello, Bioinformatics") 5 | 6 | greet() 7 | greet() 8 | -------------------------------------------------------------------------------- /Section2/010.py: -------------------------------------------------------------------------------- 1 | #010.py 2 | 3 | def mySum(num1, num2): 4 | print("%s + %s = %s" %(num1, num2, num1+num2)) 5 | 6 | mySum(2, 3) 7 | mySum(5, 7) 8 | mySum(10,15) 9 | -------------------------------------------------------------------------------- /Section2/011.py: -------------------------------------------------------------------------------- 1 | #011.py 2 | 3 | def Factorial(): 4 | result = 1 5 | num = 5 6 | 7 | while num > 0: 8 | result *= num 9 | num -= 1 10 | 11 | return result 12 | 13 | 14 | result = Factorial() 15 | print(result) 16 | -------------------------------------------------------------------------------- /Section2/012.py: -------------------------------------------------------------------------------- 1 | #012.py 2 | 3 | def Factorial(num): 4 | result = 1 5 | 6 | while num > 0: 7 | result *= num 8 | num -= 1 9 | 10 | return result 11 | 12 | num = 3 13 | result = Factorial(num) 14 | print(result) 15 | -------------------------------------------------------------------------------- /Section2/013.py: -------------------------------------------------------------------------------- 1 | #013.py 2 | 3 | name = input("이름 입력: ") 4 | print("Hello %s." % name) 5 | -------------------------------------------------------------------------------- /Section2/014.py: -------------------------------------------------------------------------------- 1 | #014.py 2 | 3 | s = input("입력: ") 4 | if s.isalpha(): 5 | print("%s는 문자." % s) 6 | else: 7 | print("%s는 숫자." % s) 8 | -------------------------------------------------------------------------------- /Section2/015.py: -------------------------------------------------------------------------------- 1 | #015.py 2 | 3 | import sys 4 | 5 | s = sys.argv[1] 6 | print("Hello %s" % s) 7 | -------------------------------------------------------------------------------- /Section2/016-1.py: -------------------------------------------------------------------------------- 1 | #016-1.py 2 | f = open("read_sample.txt",'r') 3 | r = f.readlines() 4 | f.close() 5 | for s in r: 6 | print(s.strip()) 7 | -------------------------------------------------------------------------------- /Section2/016-2.py: -------------------------------------------------------------------------------- 1 | #016-2.py 2 | with open("read_sample.txt",'r') as handle: 3 | for line in handle: 4 | print(line.strip()) 5 | -------------------------------------------------------------------------------- /Section2/017-1.py: -------------------------------------------------------------------------------- 1 | #017-1.py 2 | f = open("write_sample.txt",'w') 3 | f.write("Hello\n") 4 | f.write("write_sample text file\n") 5 | f.close() 6 | -------------------------------------------------------------------------------- /Section2/017-2.py: -------------------------------------------------------------------------------- 1 | #017-2.py 2 | with open("write_sample.txt",'w') as handle: 3 | handle.write("Hello\n") 4 | handle.write("write_sample text file\n") 5 | 6 | -------------------------------------------------------------------------------- /Section2/019_after.py: -------------------------------------------------------------------------------- 1 | #019_after.py 2 | try: 3 | with open("noname.txt",'r') as fr: 4 | read = fr.readlines() 5 | print(read) 6 | except FileNotFoundError: 7 | print("파일이 없습니다.") 8 | -------------------------------------------------------------------------------- /Section2/019_before.py: -------------------------------------------------------------------------------- 1 | #019_before.py 2 | with open("noname.txt",'r') as fr: 3 | read = fr.readlines() 4 | print(read) 5 | 6 | -------------------------------------------------------------------------------- /Section2/020_after.py: -------------------------------------------------------------------------------- 1 | #020_after.py 2 | try: 3 | num = int(input("Enter: ")) 4 | print(10 / num) 5 | except ZeroDivisionError: 6 | print("0으로는 나눌 수 없습니다.") 7 | except ValueError: 8 | print("값을 입력해주세요.") 9 | -------------------------------------------------------------------------------- /Section2/020_before.py: -------------------------------------------------------------------------------- 1 | #020_before.py 2 | num = int(input("Enter: ")) 3 | print(10 / num) 4 | -------------------------------------------------------------------------------- /Section2/021.py: -------------------------------------------------------------------------------- 1 | #021.py 2 | 3 | a = "Bio" 4 | b = "Informatics" 5 | c = a + b 6 | 7 | print(c) 8 | -------------------------------------------------------------------------------- /Section2/022.py: -------------------------------------------------------------------------------- 1 | #022.py 2 | 3 | Met = "ATG" 4 | Trp = "TGG" * 10 5 | His = "CAT" 6 | 7 | seq = Met + Trp + His 8 | print(seq) 9 | -------------------------------------------------------------------------------- /Section2/023.py: -------------------------------------------------------------------------------- 1 | #023.py 2 | 3 | seq = "AGTTTATAG" 4 | print(seq[5]) 5 | -------------------------------------------------------------------------------- /Section2/024.py: -------------------------------------------------------------------------------- 1 | #024.py 2 | 3 | seq = "AGTTTATAG" 4 | print(seq[3:6]) 5 | -------------------------------------------------------------------------------- /Section2/025.py: -------------------------------------------------------------------------------- 1 | #025.py 2 | 3 | seq = "AGTTTATAG" 4 | print(len(seq)) 5 | -------------------------------------------------------------------------------- /Section2/026.py: -------------------------------------------------------------------------------- 1 | #026.py 2 | 3 | seq = "ATGttATaG" 4 | print(seq.upper()) 5 | print(seq.lower()) 6 | -------------------------------------------------------------------------------- /Section2/027-1.py: -------------------------------------------------------------------------------- 1 | #027-1.py 2 | 3 | seq = "AGTTTATAG" 4 | for s in seq[::3]: 5 | #for s in seq[0:len(seq):3]: # seq[::3]과 같은 표현이다. 6 | print(s) 7 | -------------------------------------------------------------------------------- /Section2/027-2.py: -------------------------------------------------------------------------------- 1 | #027-2.py 2 | 3 | seq = "AGTTTATAG" 4 | for i in range(0,len(seq),3): 5 | print(seq[i]) 6 | -------------------------------------------------------------------------------- /Section2/028.py: -------------------------------------------------------------------------------- 1 | #028.py 2 | 3 | seq = "AGTTTATAG" 4 | for i in range(0,len(seq),3): 5 | print(seq[i:i+3]) 6 | -------------------------------------------------------------------------------- /Section2/029-1.py: -------------------------------------------------------------------------------- 1 | #029-1.py 2 | seq = "AGTTTATAG" 3 | rev_seq = "" 4 | for i in range(len(seq)-1,-1,-1): 5 | rev_seq += seq[i] 6 | print(rev_seq) 7 | -------------------------------------------------------------------------------- /Section2/029-2.py: -------------------------------------------------------------------------------- 1 | #029-2.py 2 | seq = "AGTTTATAG" 3 | print(seq[::-1]) 4 | -------------------------------------------------------------------------------- /Section2/029-3.py: -------------------------------------------------------------------------------- 1 | #029-3.py 2 | seq = "AGTTTATAG" 3 | print(''.join(reversed(seq))) 4 | -------------------------------------------------------------------------------- /Section2/030.py: -------------------------------------------------------------------------------- 1 | #030.py 2 | seq = "AGTTTATAG" 3 | new_seq = "" 4 | for s in seq: 5 | if s == "A": 6 | new_seq += "T" 7 | elif s == "C": 8 | new_seq += "G" 9 | elif s == "G": 10 | new_seq += "C" 11 | elif s == "T": 12 | new_seq += "A" 13 | print(new_seq) 14 | -------------------------------------------------------------------------------- /Section2/031-1.py: -------------------------------------------------------------------------------- 1 | #031-1.py 2 | 3 | seq = "AGTTTATAG" 4 | rev_seq = seq[::-1] 5 | revcomp_seq = "" 6 | for s in rev_seq: 7 | if s == "A": 8 | revcomp_seq += "T" 9 | elif s == "C": 10 | revcomp_seq += "G" 11 | elif s == "G": 12 | revcomp_seq += "C" 13 | elif s == "T": 14 | revcomp_seq += "A" 15 | print(seq) 16 | print(revcomp_seq) 17 | -------------------------------------------------------------------------------- /Section2/031-2.py: -------------------------------------------------------------------------------- 1 | #031-2.py 2 | from Bio.Seq import Seq 3 | 4 | seq = Seq("AGTTTATAG") 5 | print(seq) 6 | print(seq.reverse_complement()) 7 | -------------------------------------------------------------------------------- /Section2/032.py: -------------------------------------------------------------------------------- 1 | #032.py 2 | 3 | seq = "AGTTTATAG" 4 | print("C" in seq) 5 | print("T" in seq) 6 | -------------------------------------------------------------------------------- /Section2/033.py: -------------------------------------------------------------------------------- 1 | #033.py 2 | 3 | seq = "AGTTTATAG" 4 | motif = "TT" 5 | for i in range(len(seq)): 6 | if seq[i:i+len(motif)] == motif: 7 | print(i) 8 | -------------------------------------------------------------------------------- /Section2/034-1.py: -------------------------------------------------------------------------------- 1 | #034-1.py 2 | 3 | seq = "AGTTTATAG" 4 | A = seq.count("A") 5 | C = seq.count("C") 6 | G = seq.count("G") 7 | T = seq.count("T") 8 | 9 | print("A:", A) 10 | print("C:", C) 11 | print("G:", G) 12 | print("T:", T) 13 | 14 | -------------------------------------------------------------------------------- /Section2/034-2.py: -------------------------------------------------------------------------------- 1 | #034-2.py 2 | 3 | seq = "AGTTTATAG" 4 | count_dic = {} 5 | 6 | for s in seq: 7 | if s in count_dic: 8 | count_dic[s] += 1 9 | else: 10 | count_dic[s] = 1 11 | 12 | for k, v in count_dic.items(): 13 | print("%s: %s" % (k, v)) 14 | -------------------------------------------------------------------------------- /Section2/035-1.py: -------------------------------------------------------------------------------- 1 | #035-1.py 2 | 3 | seq = "AGTTTATAG" 4 | print(seq.replace("T", "U")) 5 | -------------------------------------------------------------------------------- /Section2/035-2.py: -------------------------------------------------------------------------------- 1 | #035-2.py 2 | 3 | from Bio.Seq import Seq 4 | 5 | seq = Seq("AGTTTATAG") 6 | print(seq.transcribe()) 7 | -------------------------------------------------------------------------------- /Section2/036.py: -------------------------------------------------------------------------------- 1 | #036.py 2 | 3 | s = "Welcome to the Bioinformatics World!" 4 | arr = s.split() 5 | print(len(arr)) 6 | -------------------------------------------------------------------------------- /Section2/037-1.py: -------------------------------------------------------------------------------- 1 | #037-1.py 2 | 3 | import math 4 | 5 | n = 144 6 | print(math.sqrt(144)) 7 | -------------------------------------------------------------------------------- /Section2/037-2.py: -------------------------------------------------------------------------------- 1 | #037-2.py 2 | 3 | import math 4 | 5 | n = 144 6 | print(math.pow(144, 0.5)) 7 | -------------------------------------------------------------------------------- /Section2/037-3.py: -------------------------------------------------------------------------------- 1 | #037-3.py 2 | 3 | n = 144 4 | print(n**0.5) 5 | -------------------------------------------------------------------------------- /Section2/038-1.py: -------------------------------------------------------------------------------- 1 | #038-1.py 2 | 3 | print(abs(10)) 4 | print(abs(-15)) 5 | -------------------------------------------------------------------------------- /Section2/038-2.py: -------------------------------------------------------------------------------- 1 | #038-2.py 2 | 3 | def get_absolute(n): 4 | if n >= 0: 5 | return n 6 | else: 7 | return -n 8 | 9 | print(get_absolute(10)) 10 | print(get_absolute(-15)) 11 | -------------------------------------------------------------------------------- /Section2/039.py: -------------------------------------------------------------------------------- 1 | #039.py 2 | 3 | import math 4 | 5 | print(math.log10(2)) 6 | -------------------------------------------------------------------------------- /Section2/040.py: -------------------------------------------------------------------------------- 1 | #040.py 2 | 3 | import math 4 | 5 | print(math.log(2)) 6 | -------------------------------------------------------------------------------- /Section2/041.py: -------------------------------------------------------------------------------- 1 | #041.py 2 | 3 | import math 4 | 5 | print(math.log(81, 3)) 6 | -------------------------------------------------------------------------------- /Section2/042.py: -------------------------------------------------------------------------------- 1 | #042.py 2 | 3 | print(round(62.77779, 2)) 4 | -------------------------------------------------------------------------------- /Section2/043.py: -------------------------------------------------------------------------------- 1 | #043.py 2 | 3 | print(round(78564, -3)) 4 | -------------------------------------------------------------------------------- /Section2/044-1.py: -------------------------------------------------------------------------------- 1 | #044-1.py 2 | 3 | from random import randint 4 | 5 | for i in range(6): 6 | print(randint(1, 45)) 7 | -------------------------------------------------------------------------------- /Section2/044-2.py: -------------------------------------------------------------------------------- 1 | #044-2.py 2 | 3 | from random import randrange 4 | 5 | for i in range(6): 6 | print(randrange(1,45+1)) 7 | -------------------------------------------------------------------------------- /Section2/045.py: -------------------------------------------------------------------------------- 1 | #045.py 2 | 3 | from random import randint 4 | 5 | arr_lotto = [] 6 | 7 | for i in range(6): 8 | n = randint(1, 45) 9 | if n not in arr_lotto: 10 | arr_lotto.append(n) 11 | 12 | for i in sorted(arr_lotto): 13 | print(i) 14 | -------------------------------------------------------------------------------- /Section2/046-1.py: -------------------------------------------------------------------------------- 1 | #046-1.py 2 | 3 | seq = "11A2TG3TT000AT1A2G" 4 | new_seq = "" 5 | 6 | for s in seq: 7 | if s.isalpha(): 8 | new_seq += s 9 | 10 | print(new_seq) 11 | 12 | -------------------------------------------------------------------------------- /Section2/046-2.py: -------------------------------------------------------------------------------- 1 | #046-2.py 2 | 3 | import re 4 | seq = "11A2TG3TT000AT1A2G" 5 | match = re.findall(r'[a-zA-Z]', seq) 6 | 7 | if match: 8 | print(''.join(match)) 9 | -------------------------------------------------------------------------------- /Section2/047.py: -------------------------------------------------------------------------------- 1 | #047.py 2 | 3 | a = [3, 5, 2, 1, 4] 4 | b = [8, 10, 7, 6, 9] 5 | 6 | print(len(a)) 7 | print(len(b)) 8 | -------------------------------------------------------------------------------- /Section2/048.py: -------------------------------------------------------------------------------- 1 | #048.py 2 | 3 | a = [3, 5, 2, 1, 4] 4 | b = [8, 10, 7, 6, 9] 5 | 6 | print(a[1]) 7 | print(b[1]) 8 | -------------------------------------------------------------------------------- /Section2/049.py: -------------------------------------------------------------------------------- 1 | #049.py 2 | 3 | a = [3, 5, 2, 1, 4] 4 | b = [8, 10, 7, 6, 9] 5 | 6 | print(a[1:4]) 7 | print(b[1:4]) 8 | -------------------------------------------------------------------------------- /Section2/050.py: -------------------------------------------------------------------------------- 1 | #050.py 2 | 3 | a = [3, 5, 2, 1, 4] 4 | b = [8, 10, 7, 6, 9] 5 | 6 | print(a[::2]) 7 | print(b[1::2]) 8 | -------------------------------------------------------------------------------- /Section2/051.py: -------------------------------------------------------------------------------- 1 | #051.py 2 | 3 | a = [3, 5, 2, 1, 4] 4 | b = [8, 10, 7, 6, 9] 5 | 6 | print(a[::-1]) 7 | print(b[::-2]) 8 | -------------------------------------------------------------------------------- /Section2/052.py: -------------------------------------------------------------------------------- 1 | #052.py 2 | 3 | a = [3, 5, 2, 1, 4] 4 | b = [8, 10, 7, 6, 9] 5 | 6 | print("sorted(a)") 7 | print(sorted(a)) 8 | print("a") 9 | print(a) 10 | 11 | print("") 12 | b.sort() 13 | print("b.sort()") 14 | print(b) 15 | -------------------------------------------------------------------------------- /Section2/053.py: -------------------------------------------------------------------------------- 1 | #053.py 2 | 3 | a = [3, 5, 2, 1, 4] 4 | b = [8, 10, 7, 6, 9] 5 | 6 | print("sorted(a)") 7 | print(sorted(a, reverse=True)) 8 | print("a") 9 | print(a) 10 | 11 | print("") 12 | b.sort(reverse=True) 13 | print("b.sort()") 14 | print(b) 15 | -------------------------------------------------------------------------------- /Section2/054-1.py: -------------------------------------------------------------------------------- 1 | #054-1.py 2 | 3 | a = ["tree", "lake", "park"] 4 | a.append("goose") 5 | 6 | print(a) 7 | -------------------------------------------------------------------------------- /Section2/054-2.py: -------------------------------------------------------------------------------- 1 | #054-2.py 2 | 3 | a = ["tree", "lake", "park"] 4 | a += ["goose"] 5 | 6 | print(a) 7 | -------------------------------------------------------------------------------- /Section2/055.py: -------------------------------------------------------------------------------- 1 | #055.py 2 | 3 | a = ["tree", "lake", "park"] 4 | a.insert(2, "goose") 5 | 6 | print(a) 7 | -------------------------------------------------------------------------------- /Section2/056-1.py: -------------------------------------------------------------------------------- 1 | #056-1.py 2 | 3 | a = ["tree", "lake", "park"] 4 | a.remove("lake") 5 | 6 | print(a) 7 | -------------------------------------------------------------------------------- /Section2/056-2.py: -------------------------------------------------------------------------------- 1 | #056-2.py 2 | 3 | a = ["tree", "lake", "park"] 4 | idx = a.index("lake") 5 | a.pop(idx) 6 | 7 | print(a) 8 | -------------------------------------------------------------------------------- /Section2/057-1.py: -------------------------------------------------------------------------------- 1 | #057-1.py 2 | 3 | a = ["tree", "lake", "park", "park", "lake", "lake"] 4 | 5 | print(a.count("lake")) 6 | -------------------------------------------------------------------------------- /Section2/057-2.py: -------------------------------------------------------------------------------- 1 | #057-2.py 2 | 3 | a = ["tree", "lake", "park", "park", "lake", "lake"] 4 | 5 | count = 0 6 | 7 | for s in a: 8 | if s == "lake": 9 | count += 1 10 | 11 | print(count) 12 | -------------------------------------------------------------------------------- /Section2/058-1.py: -------------------------------------------------------------------------------- 1 | #058-1.py 2 | 3 | a = [3, 5, 2, 1, 4] 4 | 5 | print(max(a)) 6 | -------------------------------------------------------------------------------- /Section2/058-2.py: -------------------------------------------------------------------------------- 1 | #058-2.py 2 | 3 | a = [3, 5, 2, 1, 4] 4 | 5 | max_val = a[0] 6 | 7 | for i in range(1,len(a)): 8 | if max_val < a[i]: 9 | max_val = a[i] 10 | 11 | print(max_val) 12 | -------------------------------------------------------------------------------- /Section2/059-1.py: -------------------------------------------------------------------------------- 1 | #059-1.py 2 | 3 | a = [3, 5, 2, 1, 4] 4 | 5 | print(min(a)) 6 | -------------------------------------------------------------------------------- /Section2/059-2.py: -------------------------------------------------------------------------------- 1 | #059-2.py 2 | 3 | a = [3, 5, 2, 1, 4] 4 | 5 | min_val = a[0] 6 | 7 | for i in range(1,len(a)): 8 | if min_val > a[i]: 9 | min_val = a[i] 10 | 11 | print(min_val) 12 | -------------------------------------------------------------------------------- /Section2/060-1.py: -------------------------------------------------------------------------------- 1 | #060-1.py 2 | 3 | a = [3, 5, 2, 1, 4] 4 | 5 | sum_val = sum(a) 6 | 7 | print(sum_val) 8 | -------------------------------------------------------------------------------- /Section2/060-2.py: -------------------------------------------------------------------------------- 1 | #060-2.py 2 | 3 | a = [3, 5, 2, 1, 4] 4 | 5 | sum_val = 0 6 | 7 | for i in a: 8 | sum_val += i 9 | 10 | print(sum_val) 11 | -------------------------------------------------------------------------------- /Section2/061.py: -------------------------------------------------------------------------------- 1 | #061.py 2 | 3 | a = [3, 5, 2, 1, 4] 4 | 5 | sum_val = sum(a) 6 | 7 | print(sum_val/len(a)) 8 | -------------------------------------------------------------------------------- /Section2/062.py: -------------------------------------------------------------------------------- 1 | #062.py 2 | s = "a;b;c;d;e" 3 | 4 | arr = s.split(";") 5 | 6 | print(arr) 7 | -------------------------------------------------------------------------------- /Section2/063.py: -------------------------------------------------------------------------------- 1 | #063.py 2 | 3 | arr = ['a', 'b', 'c', 'd', 'e'] 4 | 5 | s = ";".join(arr) 6 | 7 | print(s) 8 | -------------------------------------------------------------------------------- /Section2/064.py: -------------------------------------------------------------------------------- 1 | #064.py 2 | 3 | from random import shuffle 4 | 5 | a = [1, 2, 3, 4, 5] 6 | 7 | shuffle(a) 8 | 9 | print(a) 10 | -------------------------------------------------------------------------------- /Section2/065.py: -------------------------------------------------------------------------------- 1 | #065.py 2 | 3 | d = {"Leu": "L", "Met": "M", "Ser": "S"} 4 | 5 | print(d) 6 | -------------------------------------------------------------------------------- /Section2/066-1.py: -------------------------------------------------------------------------------- 1 | #066-1.py 2 | 3 | d = {"Leu": "L", "Met": "M", "Ser": "S"} 4 | 5 | print(d) 6 | -------------------------------------------------------------------------------- /Section2/066-2.py: -------------------------------------------------------------------------------- 1 | #066-2.py 2 | 3 | d = {} 4 | 5 | key_list = ["Leu", "Met", "Ser"] 6 | value_list = ["L", "M", "S"] 7 | 8 | for i in range(len(key_list)): 9 | d[key_list[i]] = value_list[i] 10 | 11 | print(d) 12 | -------------------------------------------------------------------------------- /Section2/067.py: -------------------------------------------------------------------------------- 1 | #067.py 2 | 3 | d = {"Leu": "L", "Met": "M", "Ser": "S"} 4 | 5 | del d["Met"] 6 | print(d) 7 | -------------------------------------------------------------------------------- /Section2/068.py: -------------------------------------------------------------------------------- 1 | #068.py 2 | 3 | d = {"Leu": "L", "Met": "M", "Ser": "S"} 4 | 5 | print("Met" in d) 6 | print("Pro" in d) 7 | -------------------------------------------------------------------------------- /Section2/069.py: -------------------------------------------------------------------------------- 1 | #069.py 2 | 3 | seq = "MLSSSMPPGGLACHADDDII" 4 | 5 | d = {} 6 | 7 | for s in seq: 8 | if s in d: 9 | d[s] += 1 10 | else: 11 | d[s] = 1 12 | 13 | print(d) 14 | -------------------------------------------------------------------------------- /Section2/070.py: -------------------------------------------------------------------------------- 1 | #070.py 2 | 3 | d = {'M': 2, 'L': 2, 'S': 3, 'P': 2, 'G': 2, 'A': 2, 'C': 1, 'H': 1, 'D': 3, 'I': 2} 4 | 5 | for k in d.keys(): 6 | print(k) 7 | -------------------------------------------------------------------------------- /Section2/071.py: -------------------------------------------------------------------------------- 1 | #071.py 2 | 3 | d = {'M': 2, 'L': 2, 'S': 3, 'P': 2, 'G': 2, 'A': 2, 'C': 1, 'H': 1, 'D': 3, 'I': 2} 4 | 5 | for v in d.values(): 6 | print(v) 7 | -------------------------------------------------------------------------------- /Section2/072.py: -------------------------------------------------------------------------------- 1 | #072.py 2 | 3 | d = {'M': 2, 'L': 2, 'S': 3, 'P': 2, 'G': 2, 'A': 2, 'C': 1, 'H': 1, 'D': 3, 'I': 2} 4 | 5 | for k, v in d.items(): 6 | print(k, v) 7 | -------------------------------------------------------------------------------- /Section2/073.py: -------------------------------------------------------------------------------- 1 | #073.py 2 | 3 | d = {'M': 2, 'L': 2, 'S': 3, 'P': 2, 'G': 2, 'A': 2, 'C': 1, 'H': 1, 'D': 3, 'I': 2} 4 | 5 | d_sorted = sorted(d.items(), key=lambda v: v[1]) 6 | 7 | for k, v in d_sorted: 8 | print(k, v) 9 | -------------------------------------------------------------------------------- /Section2/074.py: -------------------------------------------------------------------------------- 1 | #074.py 2 | 3 | arr1 = ["Ala", "Phe", "Phe", "Cys", "Ala", "Gly"] 4 | arr2 = ["Phe", "Gly", "Gly", "Val", "Val", "Phe"] 5 | 6 | s1 = set(arr1) 7 | print(s1) 8 | 9 | s2 = set(arr2) 10 | print(s2) 11 | 12 | -------------------------------------------------------------------------------- /Section2/075.py: -------------------------------------------------------------------------------- 1 | #075.py 2 | 3 | s1 = {'Phe', 'Gly', 'Cys', 'Ala'} 4 | s2 = {'Gly', 'Val', 'Phe'} 5 | 6 | print(s1.union(s2)) 7 | -------------------------------------------------------------------------------- /Section2/076.py: -------------------------------------------------------------------------------- 1 | #076.py 2 | 3 | s1 = {'Phe', 'Gly', 'Cys', 'Ala'} 4 | s2 = {'Gly', 'Val', 'Phe'} 5 | 6 | print(s1.intersection(s2)) 7 | -------------------------------------------------------------------------------- /Section2/077.py: -------------------------------------------------------------------------------- 1 | #077.py 2 | 3 | s1 = {'Phe', 'Gly', 'Cys', 'Ala'} 4 | s2 = {'Gly', 'Val', 'Phe'} 5 | 6 | print(s1 - s2) 7 | -------------------------------------------------------------------------------- /Section2/078.py: -------------------------------------------------------------------------------- 1 | #078.py 2 | 3 | essential_aminoacids = ["Val", "Leu", "Ile", "Met", "Thr", "Lys", "Phe", "Trp"] 4 | t = tuple(essential_aminoacids) 5 | 6 | print(t) 7 | -------------------------------------------------------------------------------- /Section2/079.py: -------------------------------------------------------------------------------- 1 | #079.py 2 | 3 | t = ('Val', 'Leu', 'Ile', 'Met', 'Thr', 'Lys', 'Phe', 'Trp') 4 | 5 | print(t[1:4]) 6 | print(t.index("Lys")) 7 | -------------------------------------------------------------------------------- /Section2/080.py: -------------------------------------------------------------------------------- 1 | #080.py 2 | 3 | class MyClass: 4 | pass 5 | 6 | obj = MyClass() 7 | print(type(obj)) 8 | -------------------------------------------------------------------------------- /Section2/081.py: -------------------------------------------------------------------------------- 1 | #081.py 2 | 3 | class MyClass: 4 | base = ["A", "C", "G", "T"] 5 | 6 | obj = MyClass() 7 | print(obj.base) 8 | -------------------------------------------------------------------------------- /Section2/082.py: -------------------------------------------------------------------------------- 1 | #082.py 2 | 3 | class MyClass: 4 | def get_length(self, seq): 5 | return len(seq) 6 | 7 | obj = MyClass() 8 | seq = "ACGTACGT" 9 | print(obj.get_length(seq)) 10 | 11 | -------------------------------------------------------------------------------- /Section2/083.py: -------------------------------------------------------------------------------- 1 | #083.py 2 | 3 | class MyClass: 4 | def __init__(self): 5 | print("object created!") 6 | self.seq = "" 7 | 8 | def get_length(self): 9 | return len(self.seq) 10 | 11 | obj = MyClass() 12 | obj.seq = "ACGTACGT" 13 | print(obj.get_length()) 14 | 15 | -------------------------------------------------------------------------------- /Section2/084.py: -------------------------------------------------------------------------------- 1 | #084.py 2 | 3 | class MyClass: 4 | def __init__(self): 5 | print("object created!") 6 | self.seq = "" 7 | 8 | def __del__(self): 9 | print("object deleted!") 10 | 11 | def get_length(self): 12 | return len(self.seq) 13 | 14 | obj = MyClass() 15 | del obj 16 | 17 | -------------------------------------------------------------------------------- /Section2/085.py: -------------------------------------------------------------------------------- 1 | #085.py 2 | 3 | class MyClass: 4 | def __init__(self): 5 | self.seq = "" 6 | 7 | def __add__(self, other): 8 | return self.seq + other.seq 9 | 10 | obj1 = MyClass() 11 | obj2 = MyClass() 12 | 13 | obj1.seq = "AAA" 14 | obj2.seq = "TTT" 15 | 16 | print(obj1 + obj2) 17 | 18 | -------------------------------------------------------------------------------- /Section2/086.py: -------------------------------------------------------------------------------- 1 | #086.py 2 | 3 | class MyClass: 4 | def __init__(self): 5 | self.seq = "" 6 | 7 | def __gt__(self, other): 8 | if len(self.seq) > len(other.seq): 9 | return("%s is longer than %s." %(self.seq, other.seq)) 10 | elif len(self.seq) < len(other.seq): 11 | return("%s is not longer than %s." %(self.seq, other.seq)) 12 | else: 13 | return("The length is same.") 14 | 15 | obj1 = MyClass() 16 | obj2 = MyClass() 17 | obj3 = MyClass() 18 | obj4 = MyClass() 19 | 20 | obj1.seq = "AAAA" 21 | obj2.seq = "TTT" 22 | obj3.seq = "GGG" 23 | obj4.seq = "CC" 24 | 25 | print(obj1 > obj2) 26 | print(obj2 > obj3) 27 | print(obj4 > obj3) 28 | -------------------------------------------------------------------------------- /Section2/087.py: -------------------------------------------------------------------------------- 1 | #087.py 2 | 3 | def factorial(n): 4 | if n == 0: 5 | return 1 6 | else: 7 | return n * factorial(n-1) 8 | 9 | print(factorial(5)) 10 | -------------------------------------------------------------------------------- /Section2/088.py: -------------------------------------------------------------------------------- 1 | #088.py 2 | 3 | arr = [0, 1] 4 | 5 | def fibo(n): 6 | for i in range(n-1): 7 | arr.append(arr[-2] + arr[-1]) 8 | return arr 9 | 10 | print(fibo(10)) 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /Section2/089.py: -------------------------------------------------------------------------------- 1 | #089.py 2 | 3 | def mer(n, arr1, arr2): 4 | if n == 1: 5 | return arr2 6 | else: 7 | tmp = [] 8 | for i in arr1: 9 | for j in arr2: 10 | tmp.append(i+j) 11 | arr2 = tmp 12 | n -= 1 13 | return mer(n, arr1, arr2) 14 | 15 | 16 | arr1 = ["A", "C", "G", "T"] 17 | arr2 = ["A", "C", "G", "T"] 18 | print(mer(3, arr1, arr2)) 19 | -------------------------------------------------------------------------------- /Section2/090-1.py: -------------------------------------------------------------------------------- 1 | #090-1.py 2 | 3 | def palindrome_checker(s): 4 | for i in range(0,len(s)//2): 5 | #print(s[i]) 6 | #print(s[len(s)-1-i]) 7 | if s[i] != s[len(s)-1-i]: 8 | return False 9 | return True 10 | 11 | s1 = "ACACA" 12 | s2 = "ATTCA" 13 | print(palindrome_checker(s1)) 14 | print(palindrome_checker(s2)) 15 | -------------------------------------------------------------------------------- /Section2/090-2.py: -------------------------------------------------------------------------------- 1 | #090-2.py 2 | 3 | def palindrome_checker(s): 4 | if s == s[::-1]: 5 | return True 6 | else: 7 | return False 8 | 9 | s1 = "ACACA" 10 | s2 = "ATTCA" 11 | print(palindrome_checker(s1)) 12 | print(palindrome_checker(s2)) 13 | -------------------------------------------------------------------------------- /Section2/091.py: -------------------------------------------------------------------------------- 1 | #091.py 2 | 3 | A, C, G, T = 0, 0, 0, 0 4 | 5 | with open("sample1.fasta","r") as fr: 6 | for line in fr: 7 | if line.startswith(">"): 8 | pass 9 | else: 10 | A += line.count("A") 11 | C += line.count("C") 12 | G += line.count("G") 13 | T += line.count("T") 14 | 15 | print("A", A) 16 | print("C", C) 17 | print("G", G) 18 | print("T", T) 19 | 20 | -------------------------------------------------------------------------------- /Section2/092.py: -------------------------------------------------------------------------------- 1 | #092.py 2 | 3 | count = 0 4 | 5 | with open("sample1.fasta","r") as fr: 6 | for line in fr: 7 | if line.startswith(">"): 8 | count += 1 9 | 10 | print(count) 11 | -------------------------------------------------------------------------------- /Section2/093.py: -------------------------------------------------------------------------------- 1 | #093.py 2 | 3 | header = "" 4 | data = "" 5 | 6 | with open("sample1.vcf","r") as fr: 7 | for line in fr: 8 | if line.startswith("#"): 9 | header += line 10 | else: 11 | data += line 12 | print(header) 13 | print("") 14 | print(data) 15 | -------------------------------------------------------------------------------- /Section2/094.py: -------------------------------------------------------------------------------- 1 | #094.py 2 | 3 | with open("sample1.vcf","r") as fr: 4 | for line in fr: 5 | if line.startswith("#CHROM"): 6 | print(len(line.split()) - 9) 7 | 8 | -------------------------------------------------------------------------------- /Section2/095.py: -------------------------------------------------------------------------------- 1 | #095.py 2 | 3 | cnt = 0 4 | 5 | with open("sample1.vcf","r") as fr: 6 | for line in fr: 7 | if line.startswith("#"): 8 | pass 9 | else: 10 | l = line.split() 11 | if l[6] == "PASS": 12 | cnt += 1 13 | 14 | print(cnt) 15 | -------------------------------------------------------------------------------- /Section2/096.py: -------------------------------------------------------------------------------- 1 | #096.py 2 | 3 | variants = 0 4 | 5 | with open("sample1.vcf","r") as fr: 6 | for line in fr: 7 | if line.startswith("#"): 8 | pass 9 | else: 10 | variants += 1 11 | 12 | print(variants) 13 | -------------------------------------------------------------------------------- /Section2/097.py: -------------------------------------------------------------------------------- 1 | #097.py 2 | 3 | SNP = 0 4 | Insertion = 0 5 | Deletion = 0 6 | 7 | with open("sample1.vcf","r") as fr: 8 | for line in fr: 9 | if line.startswith("#"): 10 | pass 11 | else: 12 | l = line.split() 13 | ref = l[3] 14 | alt = l[4] 15 | 16 | if len(ref) == len(alt): 17 | SNP += 1 18 | elif len(ref) > len(alt): 19 | Deletion += 1 20 | elif len(ref) < len(alt): 21 | Insertion += 1 22 | 23 | print("SNP:", SNP) 24 | print("Insertion:", Insertion) 25 | print("Deletion:", Deletion) 26 | -------------------------------------------------------------------------------- /Section2/098.py: -------------------------------------------------------------------------------- 1 | #098.py 2 | 3 | rs = 0 4 | 5 | with open("sample1.vcf","r") as fr: 6 | for line in fr: 7 | if line.startswith("#"): 8 | pass 9 | else: 10 | l = line.split() 11 | rsID = l[2] 12 | if rsID != ".": 13 | rs += 1 14 | 15 | print(rs) 16 | -------------------------------------------------------------------------------- /Section2/099.py: -------------------------------------------------------------------------------- 1 | #099.py 2 | 3 | ts = 0 # A <-> G, C <-> T 4 | tv = 0 5 | 6 | with open("sample1.vcf","r") as fr: 7 | for line in fr: 8 | if line.startswith("#"): 9 | pass 10 | else: 11 | l = line.split() 12 | ref = l[3] 13 | alt = l[4] 14 | if len(ref) == len(alt): 15 | if ref == "A": 16 | if alt == "G": 17 | ts += 1 18 | else: 19 | tv += 1 20 | elif ref == "C": 21 | if alt == "T": 22 | ts += 1 23 | else: 24 | tv += 1 25 | elif ref == "G": 26 | if alt == "A": 27 | ts += 1 28 | else: 29 | tv += 1 30 | elif ref == "T": 31 | if alt == "C": 32 | ts += 1 33 | else: 34 | tv += 1 35 | 36 | print("transition:", ts) 37 | print("transversion:", tv) 38 | print("ts/tv:",ts/tv) 39 | -------------------------------------------------------------------------------- /Section2/100.py: -------------------------------------------------------------------------------- 1 | #100.py 2 | 3 | length = 0 4 | 5 | with open("sample1.bed","r") as fr: 6 | for line in fr: 7 | l = line.strip().split() 8 | start = int(l[1]) 9 | end = int(l[2]) 10 | length += end - start 11 | 12 | print(length) 13 | -------------------------------------------------------------------------------- /Section2/read_sample.txt: -------------------------------------------------------------------------------- 1 | Hello 2 | read_sample text file -------------------------------------------------------------------------------- /Section2/sample1.bed: -------------------------------------------------------------------------------- 1 | chr1 100 200 2 | chr1 300 350 3 | chr2 150 200 -------------------------------------------------------------------------------- /Section2/sample1.fasta: -------------------------------------------------------------------------------- 1 | >sample1 2 | ACACGGGTAA 3 | TTTAATTTTT -------------------------------------------------------------------------------- /Section2/sample1.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.1 2 | ##FILTER= 3 | ##FORMAT= 4 | ##FORMAT= 5 | ##FORMAT= 6 | ##FORMAT= 7 | ##INFO= 8 | ##INFO= 9 | ##INFO= 10 | ##INFO= 11 | ##INFO= 12 | ##INFO= 13 | ##INFO= 14 | ##INFO= 15 | ##INFO= 16 | ##INFO= 17 | ##INFO= 18 | ##INFO= 19 | ##INFO= 20 | ##INFO= 21 | ##INFO= 22 | ##INFO= 23 | ##INFO= 24 | ##INFO= 25 | ##INFO= 26 | ##INFO= 27 | ##contig= 28 | ##reference=file:///reference/ucsc.hg19.fasta 29 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample1 Sample2 30 | chr20 14370 rs6054257 G A 29 PASS . GT:AD:DP 0/1:44,37:81 1/1:44,37:81 31 | chr20 17330 . T A 3 q10 GT:AD:DP . 1/1:44,37:81 0/0:44,37:81 32 | chr20 1110696 rs6040355 A G 29 PASS . GT:AD:DP 0/0:44,37:81 1/2:44,37:81 33 | chr20 1230237 . TT T 29 PASS . GT:AD:DP 0/1:44,37:81 1/1:44,37:81 34 | chr20 1234567 . GT GTC 29 PASS . GT:AD:DP 0/1:44,37:81 0/2:44,37,81 35 | -------------------------------------------------------------------------------- /Section2/write_sample.txt: -------------------------------------------------------------------------------- 1 | Hello 2 | write_sample text file 3 | --------------------------------------------------------------------------------