├── data ├── myfile.txt ├── searchSRS.txt ├── datafile.txt ├── dna.txt ├── mydata.txt ├── genes.txt ├── sample_accessions.txt ├── glpa.fa └── mySeqFile.fa ├── scripts └── hello.py ├── my_first_module.py ├── img └── python_shell.png ├── solutions ├── ex2_3_1_a.py ├── ex1_1_3.py ├── ex1_1_2.py ├── ex1_2_3.py ├── ex2_2_2.py ├── ex1_3_1_b.py ├── ex2_3_1_b.py ├── ex1_3_1_a.py ├── ex1_1_1.py ├── ex2_1_1_b.py ├── ex2_1_2_a.py ├── ex1_4_1.py ├── ex2_1_2_b.py ├── ex1_4_2.py ├── ex2_1_1_a.py ├── ex2_2_4.py ├── ex1_2_2.py ├── ex2_2_3.py ├── ex2_1_1_c.py ├── ex1_2_4_extra.py ├── ex1_2_4.py ├── ex1_2_1.py ├── ex2_3_2_a.py ├── ex2_3_2_b.py ├── ex2_1_3.py ├── ex2_4_1.py ├── ex2_0_1.py └── ex2_2_1.py ├── .gitignore ├── install ├── Dockerfile ├── vbox_installer.sh └── 2to3_nb.py ├── README.md ├── feedback.md ├── Introduction_to_python_day_2_session_2.ipynb ├── Introduction_to_python_day_2_session_4.ipynb ├── planning.md ├── Introduction_to_python_day_1_session_3.ipynb ├── Introduction_to_python_day_2_introduction.ipynb ├── Introduction_to_python_day_1_session_4.ipynb ├── Introduction_to_python_day_1_session_1.ipynb ├── Introduction_to_python_day_1_introduction.ipynb └── Introduction_to_python_day_2_session_1.ipynb /data/myfile.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/hello.py: -------------------------------------------------------------------------------- 1 | print("Hello world!") 2 | -------------------------------------------------------------------------------- /data/searchSRS.txt: -------------------------------------------------------------------------------- 1 | SRS006837 2 | SRS003875 3 | SRS009999 -------------------------------------------------------------------------------- /data/datafile.txt: -------------------------------------------------------------------------------- 1 | Header 2 | First line 3 | Second line 4 | -------------------------------------------------------------------------------- /data/dna.txt: -------------------------------------------------------------------------------- 1 | CGGCTAGATCCAGAT 2 | CGTGTAA 3 | GTACACCCA 4 | GTCAACACTTA 5 | -------------------------------------------------------------------------------- /my_first_module.py: -------------------------------------------------------------------------------- 1 | def say_hello(user): 2 | print('Hello', user, '!') 3 | -------------------------------------------------------------------------------- /img/python_shell.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pycam/python-intro/master/img/python_shell.png -------------------------------------------------------------------------------- /data/mydata.txt: -------------------------------------------------------------------------------- 1 | Index Organism Score 2 | 1 Human 1.076 3 | 2 Mouse 1.202 4 | 3 Frog 2.2362 5 | 4 Fly 0.9853 6 | -------------------------------------------------------------------------------- /data/genes.txt: -------------------------------------------------------------------------------- 1 | gene chrom start end 2 | BRCA2 13 32889611 32973805 3 | TNFAIP3 6 138188351 138204449 4 | TCF7 5 133450402 133487556 5 | -------------------------------------------------------------------------------- /data/sample_accessions.txt: -------------------------------------------------------------------------------- 1 | SRS006837 2 | SRS006838 3 | SRS006839 4 | SRS106839 5 | SRS006840 6 | SRS006841 7 | SRS506841 8 | SRS006842 9 | SRS006843 10 | SRS206853 11 | SRS006844 12 | SRS006845 13 | SRS006846 14 | -------------------------------------------------------------------------------- /data/glpa.fa: -------------------------------------------------------------------------------- 1 | >swissprot|P02724|GLPA_HUMAN Glycophorin-A; 2 | MYGKIIFVLLLSEIVSISASSTTGVAMHTSTSSSVTKSYISSQTNDTHKRDTYAATPRAH 3 | EVSEISVRTVYPPEEETGERVQLAHHFSEPEITLIIFGVMAGVIGTILLISYGIRRLIKK 4 | SPSDVKPLPSPDTDVPLSSVEIENPETSDQ 5 | 6 | -------------------------------------------------------------------------------- /solutions/ex2_3_1_a.py: -------------------------------------------------------------------------------- 1 | # Script that writes the values of a list of numbers to a file, 2 | # with each number on a seperate line. 3 | 4 | data = [2, 4, 6, 8, 10] 5 | 6 | with open("numbers.txt", "w") as f: 7 | for d in data: 8 | f.write(str(d) + "\n") 9 | -------------------------------------------------------------------------------- /solutions/ex1_1_3.py: -------------------------------------------------------------------------------- 1 | # Experimenting with python variables in a file 2 | 3 | S = "TCT" 4 | L = "CTT" 5 | Y = "TAT" 6 | C = "TGT" 7 | 8 | # possible DNA sequence for the protein sequence SYLYC 9 | dna = S + Y + L + Y + C 10 | 11 | # print the DNA sequence 12 | print("DNA sequence of SYLYC:", dna) 13 | -------------------------------------------------------------------------------- /solutions/ex1_1_2.py: -------------------------------------------------------------------------------- 1 | # This exercise should be done in the interpreter 2 | 3 | # Assign numerical values to 2 variables, calculate the mean of these two variables 4 | # and store the result in another variable. Print out the result to the screen 5 | 6 | v1 = 5.0 7 | v2 = 10.0 8 | mean = (v1 + v2) / 2 9 | print(mean) 10 | -------------------------------------------------------------------------------- /solutions/ex1_2_3.py: -------------------------------------------------------------------------------- 1 | # Protein sequence given 2 | seq = "MPISEPTFFEIF" 3 | 4 | # Split the sequence into its component amino acids 5 | seq_list = list(seq) 6 | 7 | # Use a set to establish the unique amino acids 8 | unique_amino_acids = set(seq_list) 9 | 10 | # Print out the unique amino acids 11 | print(unique_amino_acids) 12 | -------------------------------------------------------------------------------- /solutions/ex2_2_2.py: -------------------------------------------------------------------------------- 1 | def gc_content(sequence): 2 | """Calculate the GC content of a DNA sequence 3 | """ 4 | gc = 0 5 | for base in sequence: 6 | if (base == 'G') or (base == 'C'): 7 | gc += 1 8 | return 100 * (gc / len(sequence)) 9 | 10 | 11 | #print('GC%', gc_content('ATGGTGCATCTGACTCCTGAGGAGAAGTCTGCCGTTACTGCCCTGTGGGGCAAGGTG')) 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | .idea 7 | .ipynb_checkpoints 8 | 9 | .DS_Store 10 | 11 | venv 12 | 13 | biopython.fa 14 | 15 | csvdata.tsv 16 | 17 | csvdictdata.tsv 18 | 19 | data/mydata.csv 20 | 21 | gene_lengths_csv.tsv 22 | 23 | gene_lengths.tsv 24 | 25 | out.txt 26 | 27 | sample.long.fa 28 | 29 | mySeqFile.fa 30 | -------------------------------------------------------------------------------- /install/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu 2 | MAINTAINER Mark Dunning 3 | 4 | RUN sudo apt-get update 5 | RUN apt-get install -y ipython ipython-notebook git 6 | RUN git clone https://github.com/pycam/python-intro.git 7 | 8 | EXPOSE 8888 9 | ENV USE_HTTP 0 10 | 11 | WORKDIR python-intro/ 12 | RUN ipython notebook --no-browser --port 8888 --ip=* Introduction_to_python_session_1.ipynb 13 | -------------------------------------------------------------------------------- /solutions/ex1_3_1_b.py: -------------------------------------------------------------------------------- 1 | # Check if a DNA sequence contains a stop codon 2 | 3 | # DNA sequence given 4 | dna = "GTT GCA CCA CAA CCG TAG TAA TGA" 5 | 6 | # Check if the sequence contains one possible stop codon 7 | if "TAG" in dna: 8 | print("TAG found") 9 | 10 | # Check if the sequence contains any of the 3 stop codons 11 | if ("TAG" in dna) or ("TAA" in dna) or ("TGA" in dna): 12 | print("Stop codon found") 13 | -------------------------------------------------------------------------------- /solutions/ex2_3_1_b.py: -------------------------------------------------------------------------------- 1 | # Write a script that reads a file containing many lines of nucleotide sequence 2 | # For each line in the file, print out the line number, 3 | # the length of the sequence and the sequence 4 | 5 | import sys 6 | 7 | with open('data/dna.txt', "r") as f: 8 | line_num = 0 9 | for line in f: 10 | line = line.rstrip() 11 | line_num += 1 12 | print(line_num, ":", len(line), "\t", line) 13 | -------------------------------------------------------------------------------- /solutions/ex1_3_1_a.py: -------------------------------------------------------------------------------- 1 | # Compare the age of two persons 2 | 3 | # Variables containing your name and your age 4 | name_1 = "James" 5 | age_1 = 31 6 | 7 | # Variables containing another person name and age 8 | name_2 = "Mark" 9 | age_2 = 29 10 | 11 | # Print a statement which says if you are younger, older or the same age 12 | if age_1 > age_2: 13 | print(name_1, 'is older than', name_2) 14 | elif age_1 < age_2: 15 | print(name_1, 'is younger than', name_2) 16 | else: 17 | print(name_1, 'and', name_2, 'have the same age') 18 | -------------------------------------------------------------------------------- /solutions/ex1_1_1.py: -------------------------------------------------------------------------------- 1 | # This exercise should be done in the interpreter 2 | 3 | # Create a variable and assign it the string value of your first name, 4 | # assign your age to another variable (you are free to lie!), print out a message saying how old you are 5 | 6 | name = "John" 7 | age = 21 8 | print("my name is", name, "and I am", age, "years old.") 9 | 10 | 11 | # Use the addition operator to add 10 to your age and print out a message saying how old you will be in 10 years time 12 | age += 10 13 | print(name, "will be", age, "in 10 years.") 14 | -------------------------------------------------------------------------------- /solutions/ex2_1_1_b.py: -------------------------------------------------------------------------------- 1 | def molecular_weight(sequence): 2 | """Function that takes a single DNA sequence as an argument and estimates 3 | the molecular weight of this sequence. 4 | """ 5 | sequence = sequence.upper() 6 | base_weights = {'A': 331, 'C': 307, 'G': 347, 'T': 306} 7 | total_weight = 0 8 | for base in sequence: 9 | total_weight += base_weights[base] 10 | return total_weight 11 | 12 | # Test your function using some example sequences. 13 | weight = molecular_weight("ACTTGGGCAGATAGTCGCG") 14 | print("Molecular weight:", weight, "g/mol") 15 | -------------------------------------------------------------------------------- /solutions/ex2_1_2_a.py: -------------------------------------------------------------------------------- 1 | def base_composition(sequence): 2 | """Write a function that counts the number of each base found 3 | in a DNA sequence. 4 | """ 5 | sequence = sequence.upper() 6 | num_As = sequence.count('A') 7 | num_Cs = sequence.count('C') 8 | num_Gs = sequence.count('G') 9 | num_Ts = sequence.count('T') 10 | # Return the result as a tuple of 4 numbers representing the counts of each base A, C, G and T. 11 | return (num_As, num_Cs, num_Gs, num_Ts) 12 | 13 | dna = "ACAGTGTCGTACAGATCAGTCAGATACA" 14 | print('base composition', base_composition(dna)) 15 | -------------------------------------------------------------------------------- /solutions/ex1_4_1.py: -------------------------------------------------------------------------------- 1 | # Create a list where each element is an individual base of DNA. 2 | # Make the array 15 bases long. 3 | bases = ['A', 'T', 'T', 'C', 'G', 'G', 'T', 'C', 'A', 'T', 'G', 'C', 'T', 'A', 'A'] 4 | 5 | # Print the length of the list 6 | print("DNA sequence length:", len(bases)) 7 | 8 | # Create a for loop to output every base of the sequence on a new line. 9 | print("All bases:") 10 | for base in bases: 11 | print(base) 12 | 13 | # Create a while loop that starts at the third base in the sequence 14 | # and outputs every third base until the 12th. 15 | print("Every 3rd base:") 16 | pos = 2 17 | while pos <= 12: 18 | print(pos, bases[pos]) 19 | pos += 3 20 | -------------------------------------------------------------------------------- /solutions/ex2_1_2_b.py: -------------------------------------------------------------------------------- 1 | def reverse_complement(sequence): 2 | """Write a function to return the reverse-complement of a nucleotide 3 | sequence. 4 | """ 5 | reverse_base = {'A': 'T', 'T': 'A', 'G': 'C', 'C': 'G'} 6 | sequence = sequence.upper() 7 | sequence = reversed(sequence) 8 | result = [] 9 | for base in sequence: 10 | # check if sequence is a DNA sequence or not 11 | if base not in 'ATCG': 12 | return base + " is NOT a known DNA base" 13 | result.append(reverse_base[base]) 14 | return "".join(result) 15 | 16 | print(reverse_complement('ATCGTAGCatgcAATTGGC')) 17 | print(reverse_complement('ATCGTAGCatgcxAATTGGC')) 18 | -------------------------------------------------------------------------------- /solutions/ex1_4_2.py: -------------------------------------------------------------------------------- 1 | # Calculate GC content of a DNA sequence 2 | 3 | # 15-base array you created for the previous exercise 4 | bases = ['A', 'T', 'T', 'C', 'G', 'G', 'T', 'C', 'A', 'T', 'G', 'C', 'T', 'A', 'A'] 5 | 6 | # Create a variable, gc, which we will use to count the number of Gs or Cs in our sequence 7 | gc = 0 8 | 9 | # Loop over the bases in your sequence. 10 | # If the base is a G or a C, add one to your gc variable. 11 | for base in bases: 12 | if (base == 'G') or (base == 'C'): 13 | gc += 1 14 | print("Frequency of GC in the sequence:", gc) 15 | 16 | # Calculate the GC percentage and print it 17 | gc_percent = 100 * (gc / len(bases)) 18 | print("%GC:", gc_percent) 19 | -------------------------------------------------------------------------------- /solutions/ex2_1_1_a.py: -------------------------------------------------------------------------------- 1 | def simple_mean(x, y): 2 | """Function that takes 2 numerical arguments and returns their mean. 3 | """ 4 | mean = (x + y) / 2 5 | return mean 6 | 7 | 8 | def advanced_mean(values): 9 | """Function that takes a list of numbers and returns the mean of all 10 | the numbers in the list. 11 | """ 12 | total = 0 13 | for v in values: 14 | total += v 15 | mean = total / len(values) 16 | return mean 17 | 18 | print("Mean of 2 & 3:", simple_mean(2, 3)) 19 | print("Mean of 8 & 10:", simple_mean(8, 10)) 20 | print("Mean of [2, 4, 6]", advanced_mean([2, 4, 6])) 21 | print("Mean of values even numbers under 20:", advanced_mean(list(range(0, 20, 2)))) 22 | -------------------------------------------------------------------------------- /solutions/ex2_2_4.py: -------------------------------------------------------------------------------- 1 | from ex2_2_2 import gc_content 2 | from ex2_2_3 import extract_sub_sequences 3 | 4 | 5 | def gc_content_along_the_chain(dna_sequence, window_size): 6 | """Returns a list of GC along the DNA sequence 7 | given a DNA sequence and the size of the sliding window 8 | """ 9 | sub_sequences = extract_sub_sequences(dna_sequence, window_size) 10 | gc_results = [] 11 | for sub_sequence in sub_sequences: 12 | gc_results.append(gc_content(sub_sequence)) 13 | return gc_results 14 | 15 | dna = 'ATGGTGCATCTGACTCCTGAGGAGAAGTCTGCCGTTACTGCCCTGTGGGGCAAGGTG' 16 | print(gc_content(dna)) 17 | print(extract_sub_sequences(dna, 5)) 18 | print(gc_content_along_the_chain(dna, 5)) 19 | -------------------------------------------------------------------------------- /solutions/ex1_2_2.py: -------------------------------------------------------------------------------- 1 | # Create a string variable with your full name 2 | name = "Boris Johnson" 3 | 4 | # Split the string into a list 5 | names = name.split(" ") 6 | 7 | # Print out your surname 8 | surname = names[-1] 9 | print("Surname:", surname) 10 | 11 | # Check if your surname contains the letter 'e' 12 | pos = surname.find("e") 13 | print("Position of 'e':", pos) 14 | 15 | # or contains the letter 'o' 16 | pos = surname.find("o") 17 | print("Position of 'o':", pos) 18 | 19 | ### Optional -------------------------------------------------------- 20 | 21 | # Use a format string to print out your first name and the length of your first name 22 | print("{:s} is {:d} characters long".format(names[0], len(names[0]))) 23 | -------------------------------------------------------------------------------- /solutions/ex2_2_3.py: -------------------------------------------------------------------------------- 1 | def extract_sub_sequences(sequence, window_size): 2 | """Extract a list of overlaping sub-sequences for a given window size 3 | from a given sequence. 4 | """ 5 | if window_size <= 0: 6 | return "Window size must be a positive integer" 7 | if window_size > len(sequence): 8 | return "Window size is larger than sequence length" 9 | result = [] 10 | nr_windows = len(sequence) - window_size + 1 11 | for i in range(nr_windows): 12 | sub_sequence = sequence[i:i + window_size] 13 | result.append(sub_sequence) 14 | return result 15 | 16 | 17 | #dna = 'ATGGTGCATCTGACTCCTGAGGAGAAGTCTGCCGTTACTGCCCTGTGGGGCAAGGTG' 18 | #print(extract_sub_sequences(dna, 5)) 19 | -------------------------------------------------------------------------------- /solutions/ex2_1_1_c.py: -------------------------------------------------------------------------------- 1 | def molecular_weight(sequence): 2 | """Function that takes a single DNA sequence as an argument and estimates 3 | the molecular weight of this sequence. 4 | If the sequence passed in above contains N bases, 5 | use the mean weight of the other bases as the weight. 6 | """ 7 | sequence = sequence.upper() 8 | base_weights = {'A': 331, 'C': 307, 'G': 347, 'T': 306} 9 | base_weights['N'] = sum(base_weights.values()) / len(base_weights) 10 | total_weight = 0 11 | for base in sequence: 12 | total_weight += base_weights[base] 13 | return total_weight 14 | 15 | weight = molecular_weight("AAGGACTGTCNCGTNNCGTAGGATNATAGNN") 16 | print("Moelacular weight:", weight, "g/mol") 17 | -------------------------------------------------------------------------------- /solutions/ex1_2_4_extra.py: -------------------------------------------------------------------------------- 1 | # 1-letter code lysozyme protein sequence given 2 | seq = "MKALIVLGLVLLSVTVQGKVFERCELARTLKRLGMDGYRGISLANWMCLAKWESGYNTRATNYNAGDRSTDYGIFQINSRYWCNDGKTPGAVNACHLSCSALLQDNIADAVACAKRVVRDPQGIRAWVAWRNRCQNRDVRQYVQGCGV" 3 | 4 | # Count the abundance of different residue types and store the result in a dictionary 5 | aa_counts = {} 6 | aa_counts['A'] = seq.count('A') 7 | aa_counts['C'] = seq.count('C') 8 | aa_counts['D'] = seq.count('D') 9 | aa_counts['E'] = seq.count('E') 10 | # etc... 11 | 12 | # Print the results in alphabetical order 13 | print('A has', aa_counts['A'], 'occurrence(s)') 14 | print('C has', aa_counts['C'], 'occurrence(s)') 15 | print('D has', aa_counts['D'], 'occurrence(s)') 16 | print('E has', aa_counts['E'], 'occurrence(s)') 17 | -------------------------------------------------------------------------------- /solutions/ex1_2_4.py: -------------------------------------------------------------------------------- 1 | # DNA sequence given 2 | codon_string = "GTT GCA CCA CAA CCG" 3 | 4 | # Split this string into the individual codons 5 | codon_list = codon_string.split() 6 | 7 | # Dictionnary to map between codon sequences and amino acids they encode 8 | genetic_code = { 9 | "GTT": "Val", 10 | "GCA": "Ala", 11 | "CCA": "Pro", 12 | "CAA": "Glu", 13 | "CCG": "Pro" 14 | } 15 | 16 | # Print each codon and its corresponding amino acid 17 | print(codon_list[0], "codes for", genetic_code[codon_list[0]]) 18 | print(codon_list[1], "codes for", genetic_code[codon_list[1]]) 19 | print(codon_list[2], "codes for", genetic_code[codon_list[2]]) 20 | print(codon_list[3], "codes for", genetic_code[codon_list[3]]) 21 | print(codon_list[4], "codes for", genetic_code[codon_list[4]]) 22 | -------------------------------------------------------------------------------- /solutions/ex1_2_1.py: -------------------------------------------------------------------------------- 1 | # Use the codon variables you defined previously 2 | S = "TCT" 3 | L = "CTT" 4 | Y = "TAT" 5 | C = "TGT" 6 | 7 | # Create a list for the protein sequence CLYSY 8 | codons = [C, L, Y, S, Y] 9 | 10 | # Print the DNA sequence of the protein 11 | print("DNA sequence:", codons) 12 | 13 | # Print the DNA sequence of the last amino acid 14 | print("Last codon:", codons[-1]) 15 | 16 | # Create two more variables containing the DNA sequence for a stop codon and a start codon 17 | start = "ATG" 18 | stop = "TGA" 19 | 20 | # Replace the first element of the list with the start codon 21 | codons[0] = start 22 | 23 | # Append the stop codon to the end of the list 24 | codons.append(stop) 25 | 26 | # Print the resulting DNA sequence 27 | print("DNA sequence after alteration:", "".join(codons)) 28 | -------------------------------------------------------------------------------- /data/mySeqFile.fa: -------------------------------------------------------------------------------- 1 | >sp|P61626|LYSC_HUMAN Lysozyme C OS=Homo sapiens GN=LYZ PE=1 SV=1 2 | MKALIVLGLVLLSVTVQGKVFERCELARTLKRLGMDGYRGISLANWMCLAKWESGYNTRA 3 | TNYNAGDRSTDYGIFQINSRYWCNDGKTPGAVNACHLSCSALLQDNIADAVACAKRVVRD 4 | PQGIRAWVAWRNRCQNRDVRQYVQGCGV 5 | >sp|P04421|LYSC_BOVIN Lysozyme C OS=Bos taurus GN=LYZ1 PE=1 SV=2 6 | MKALVILGFLFLSVAVQGKVFERCELARTLKKLGLDGYKGVSLANWLCLTKWESSYNTKA 7 | TNYNPSSESTDYGIFQINSKWWCNDGKTPNAVDGCHVSCRELMENDIAKAVACAKHIVSE 8 | QGITAWVAWKSHCRDHDVSSYVEGCTL 9 | >sp|P17897|LYZ1_MOUSE Lysozyme C-1 OS=Mus musculus GN=Lyz1 PE=1 SV=1 10 | MKALLTLGLLLLSVTAQAKVYNRCELARILKRNGMDGYRGVKLADWVCLAQHESNYNTRA 11 | TNYNRGDRSTDYGIFQINSRYWCNDGKTPRSKNACGINCSALLQDDITAAIQCAKRVVRD 12 | PQGIRAWVAWRTQCQNRDLSQYIRNCGV 13 | >sp|P00697|LYSC1_RAT Lysozyme C-1 OS=Rattus norvegicus GN=Lyz1 PE=1 SV=2 14 | MKALLVLGFLLLSASVQAKIYERCQFARTLKRNGMSGYYGVSLADWVCLAQHESNYNTQA 15 | RNYNPGDQSTDYGIFQINSRYWCNDGKTPRAKNACGIPCSALLQDDITQAIQCAKRVVRD 16 | PQGIRAWVAWQRHCKNRDLSGYIRNCGV 17 | >sp|Q8T1G5|LYSC_DICDI Lysozyme C OS=Dictyostelium discoideum GN=alyC PE=3 SV=1 18 | MRIAFFLLILSIIVGLAYGYSCPKPCYGNMCCSTSPDHKYYLTDFCGSTSACGPKPSCSG 19 | KLYFTADSQRFGCGKHLNLCRGKKCVKAKVYDAGPAEWVEKDAGKMIIDASPTICHELTG 20 | GSSCGWSDKFEITATVTSLTDSRPLGPFNVTEEEMDQLFIDHEIAMAQCEAEKTCNGFDL 21 | E 22 | 23 | -------------------------------------------------------------------------------- /solutions/ex2_3_2_a.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | 3 | # Read a tab delimited file which has 4 columns: gene, chromosome, start and end coordinates. 4 | # Check if the file exists, then compute the length of each gene and store 5 | # its name and corresponding length into a dictionary. 6 | # Write the results into a new tab separated file. 7 | 8 | gene_file = os.path.join('data', 'genes.txt') 9 | output_file = "gene_lengths.tsv" 10 | 11 | if os.path.exists(gene_file): 12 | results = [] 13 | with open(gene_file) as f: 14 | header = f.readline() 15 | for line in f: 16 | gene, chrom, start, end = line.strip().split("\t") 17 | row = {'gene': gene, 'length': int(end) - int(start) + 1} 18 | results.append(row) 19 | print(results) 20 | with open(output_file, "w") as out: 21 | out.write('gene' + "\t" + 'length' + "\n") # write header 22 | for record in results: 23 | out.write(record['gene'] + "\t" + str(record['length']) + "\n") 24 | else: 25 | print(gene_file, 'does not exists!') 26 | 27 | if os.path.exists(output_file): 28 | # print contents of output file 29 | with open(output_file) as f: 30 | print(f.read()) 31 | else: 32 | print(output_file, 'does not exists!') 33 | -------------------------------------------------------------------------------- /solutions/ex2_3_2_b.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import csv 3 | 4 | # Read a tab delimited file which has 4 columns: gene, chromosome, start and end coordinates. 5 | # Check if the file exists, then compute the length of each gene and store 6 | # its name and corresponding length into a dictionary. 7 | # Write the results into a new tab separated file and make use of the csv module. 8 | 9 | gene_file = os.path.join('data', 'genes.txt') 10 | output_file = "gene_lengths_csv.tsv" 11 | 12 | if os.path.exists(gene_file): 13 | results = [] 14 | with open(gene_file) as f: 15 | reader = csv.DictReader(f, delimiter='\t') 16 | for row in reader: 17 | record = {'gene': row['gene'], 'length': int(row['end']) - int(row['start']) + 1} 18 | results.append(record) 19 | print(results) 20 | with open(output_file, "w") as out: 21 | writer = csv.DictWriter(out, results[0].keys(), delimiter='\t') 22 | writer.writeheader() # write header 23 | for record in results: 24 | writer.writerow(record) 25 | else: 26 | print(gene_file, 'does not exists!') 27 | 28 | if os.path.exists(output_file): 29 | # print contents of output file 30 | with open(output_file) as f: 31 | print(f.read()) 32 | else: 33 | print(output_file, 'does not exists!') 34 | -------------------------------------------------------------------------------- /solutions/ex2_1_3.py: -------------------------------------------------------------------------------- 1 | def molecular_weight(sequence, molecule_type='DNA'): 2 | """Function that takes a single DNA or RNA sequence as an argument 3 | and estimates the molecular weight of this sequence. 4 | If the sequence passed in above contains N bases, 5 | use the mean weight of the other bases as the weight. 6 | Use an optional argument to specify the molecule type, but default to DNA. 7 | """ 8 | sequence = sequence.upper() 9 | molecule_type = molecule_type.upper() 10 | 11 | dna_weights = {'A': 331, 'C': 307, 'G': 347, 'T': 306} 12 | rna_weights = {'A': 347, 'C': 323, 'G': 363, 'U': 324} 13 | 14 | if molecule_type == 'DNA': 15 | base_weights = dna_weights 16 | elif molecule_type == 'RNA': 17 | base_weights = rna_weights 18 | else: 19 | return "Unrecognised molecule_type " + molecule_type 20 | 21 | total_weight = 0 22 | for base in sequence: 23 | # check if base is a DNA base or not 24 | if base not in base_weights: 25 | return base + " is NOT a known DNA base" 26 | total_weight += base_weights[base] 27 | return total_weight 28 | 29 | 30 | print("RNA weight:", molecular_weight("AACGUCGAAUCCUAGCGC", molecule_type="RNA"), "g/mol") 31 | print("DNA weight:", molecular_weight("AACGTCGAATCCTAGCGC"), "g/mol") 32 | print("Other sequence weight:", molecular_weight("AACGTCGAATXXXCCTAGCGC"), "g/mol") 33 | -------------------------------------------------------------------------------- /install/vbox_installer.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # lubuntu LTS 14.04 VirtualBox installer based on lubuntu-14.04.2-desktop-i386 3 | # computer name: crukci-training-vm; user: training; password: admin123 4 | 5 | sudo su - 6 | apt-get install gedit 7 | apt-get install vim 8 | apt-get install git 9 | apt-get install python-pip 10 | apt-get install python-zmq 11 | apt-get install python-matplotlib 12 | apt-get install python-biopython 13 | apt-get install ncbi-blast+ 14 | 15 | # Install VirtualBox Additions 16 | # From the VirtualBox menu of lubuntu go to Devices > Insert Guest Additions CD image... and do 17 | cd /media/training/VBOXADDITIONS_4.3.26_98988 18 | sudo ./VBoxLinuxAdditions.run 19 | 20 | # To increase screen resolution 21 | # Start > Preferences > Additional Drivers: Using x86 virtualization solution... and click Apply Changes 22 | # Then Start > Preferences > Monitor Settings and select 1440x1050 and click Save and Apply 23 | 24 | pip install ipython[notebook] 25 | 26 | apt-get autoremove 27 | apt-get clean 28 | 29 | 30 | adduser pycam # password: pycam123 31 | 32 | exit 33 | 34 | # login as pycam -------------------------------------------------------------- 35 | 36 | git clone https://github.com/pycam/python-intro.git course 37 | 38 | # Add ipython at startup from lubuntu menu do to... 39 | # Preferences > Default applications for LXSession then tab Autostart and add: 40 | # /usr/local/bin/ipython notebook --no-browser --port=8888 --ip=127.0.0.1 /home/pycam/course/ 41 | 42 | # Add bookmarks into firefox: (1) pycam.github.io (2) 127.0.0.1:8888 43 | 44 | 45 | -------------------------------------------------------------------------------- /solutions/ex2_4_1.py: -------------------------------------------------------------------------------- 1 | from Bio import SeqIO 2 | from Bio.SeqUtils import GC 3 | 4 | # Read in a FASTA file named data/sample.fa 5 | seqList = list(SeqIO.parse('data/sample.fa', 'fasta')) 6 | 7 | # find the number of sequences present in the file 8 | numSeq = len(seqList) 9 | print('Total number of sequences:', numSeq) 10 | 11 | # find IDs and lengths of the longest and the shortest sequences 12 | maxLen = minLen = len(seqList[0].seq) 13 | 14 | lSeq = sSeq = seqList[0].id 15 | 16 | for i in range(1, numSeq): 17 | if len(seqList[i].seq) > maxLen: 18 | # update maxLen and lSeq 19 | maxLen = len(seqList[i].seq) 20 | lSeq = seqList[i].id 21 | elif len(seqList[i].seq) < minLen: 22 | # update minLen and sSeq 23 | minLen = len(seqList[i].seq) 24 | sSeq = seqList[i].id 25 | 26 | print('Longest sequence is', lSeq, 'with length', maxLen, 'bp') 27 | print('Shortest sequence is', sSeq, 'with length', minLen, 'bp') 28 | 29 | # Creating a new sequence list containing sequences longer than 500bp 30 | # Calculate the average length of these sequences 31 | # calculate and print the percentage of GC contents 32 | 33 | longSeqList = list() # empty list for sequences 34 | 35 | totLength = 0 36 | for sequence in seqList: 37 | if len(sequence) > 500: 38 | longSeqList.append(sequence) 39 | totLength += len(sequence) 40 | gc = GC(sequence.seq) 41 | print('Percentage of GC content in', sequence.id, 'is', gc) 42 | 43 | avgLength = totLength / len(longSeqList) 44 | 45 | print('Average length for sequences longer than 500bp is', avgLength) 46 | 47 | # Write sequences in the longSeqList in a file with 'GenBank' format 48 | SeqIO.write(longSeqList, 'sample.long.fa', 'fasta') 49 | -------------------------------------------------------------------------------- /solutions/ex2_0_1.py: -------------------------------------------------------------------------------- 1 | lyrics = """ 2 | Imagine there's no Heaven 3 | It's easy if you try 4 | No Hell below us 5 | Above us only sky 6 | 7 | Imagine all the people 8 | Living for today 9 | Aaa haa 10 | 11 | Imagine there's no countries 12 | It isn't hard to do 13 | Nothing to kill or die for 14 | And no religion too 15 | 16 | Imagine all the people 17 | Living life in peace 18 | Yoo hoo 19 | 20 | You may say I'm a dreamer 21 | But I'm not the only one 22 | I hope someday you'll join us 23 | And the world will be as one 24 | 25 | Imagine no possessions 26 | I wonder if you can 27 | No need for greed or hunger 28 | A brotherhood of man 29 | 30 | Imagine all the people 31 | Sharing all the world 32 | Yoo hoo 33 | 34 | You may say I'm a dreamer 35 | But I'm not the only one 36 | I hope someday you'll join us 37 | And the world will live as one 38 | """ 39 | 40 | # Change all character to lower ones 41 | lyrics = lyrics.lower() 42 | 43 | # Split into words 44 | words = lyrics.split() 45 | # Print the total number of words 46 | print('There are', len(words), 'words in this song.') 47 | 48 | # Print the number of unique words 49 | unique_words = set(words) 50 | print('There are', len(unique_words), 'unique ones.') 51 | 52 | # Calculate the frequency of each word and store the result into a dictionary 53 | results = {} 54 | for w in unique_words: 55 | results[w.lower()] = words.count(w) 56 | 57 | # Print each unique word along with its frequency 58 | for r in results: 59 | print(results[r], '\t', r) 60 | 61 | # Find the most frequent word in the song 62 | most_frequent = 0 63 | for r in results: 64 | if results[r] > most_frequent: 65 | most_frequent = results[r] 66 | most_frequent_word = r 67 | 68 | # Print the most frequent word with its frequency 69 | print('"', most_frequent_word, '" is the most frequent word being used', most_frequent, 'times.') 70 | -------------------------------------------------------------------------------- /solutions/ex2_2_1.py: -------------------------------------------------------------------------------- 1 | standardGeneticCode = { 2 | 'UUU': 'Phe', 'UUC': 'Phe', 'UCU': 'Ser', 'UCC': 'Ser', 3 | 'UAU': 'Tyr', 'UAC': 'Tyr', 'UGU': 'Cys', 'UGC': 'Cys', 4 | 'UUA': 'Leu', 'UCA': 'Ser', 'UAA': None, 'UGA': None, 5 | 'UUG': 'Leu', 'UCG': 'Ser', 'UAG': None, 'UGG': 'Trp', 6 | 'CUU': 'Leu', 'CUC': 'Leu', 'CCU': 'Pro', 'CCC': 'Pro', 7 | 'CAU': 'His', 'CAC': 'His', 'CGU': 'Arg', 'CGC': 'Arg', 8 | 'CUA': 'Leu', 'CUG': 'Leu', 'CCA': 'Pro', 'CCG': 'Pro', 9 | 'CAA': 'Gln', 'CAG': 'Gln', 'CGA': 'Arg', 'CGG': 'Arg', 10 | 'AUU': 'Ile', 'AUC': 'Ile', 'ACU': 'Thr', 'ACC': 'Thr', 11 | 'AAU': 'Asn', 'AAC': 'Asn', 'AGU': 'Ser', 'AGC': 'Ser', 12 | 'AUA': 'Ile', 'ACA': 'Thr', 'AAA': 'Lys', 'AGA': 'Arg', 13 | 'AUG': 'Met', 'ACG': 'Thr', 'AAG': 'Lys', 'AGG': 'Arg', 14 | 'GUU': 'Val', 'GUC': 'Val', 'GCU': 'Ala', 'GCC': 'Ala', 15 | 'GAU': 'Asp', 'GAC': 'Asp', 'GGU': 'Gly', 'GGC': 'Gly', 16 | 'GUA': 'Val', 'GUG': 'Val', 'GCA': 'Ala', 'GCG': 'Ala', 17 | 'GAA': 'Glu', 'GAG': 'Glu', 'GGA': 'Gly', 'GGG': 'Gly'} 18 | 19 | def protein_translation(sequence, geneticCode): 20 | """This function translates a nucleic acid sequence into a 21 | protein sequence, until the end or until it comes across 22 | a stop codon. 23 | """ 24 | protein_sequence = [] 25 | for i in range(0, len(sequence)-2, 3): 26 | codon = sequence[i:i + 3] 27 | codon.upper() 28 | 29 | # Convert DNA into RNA sequence 30 | if "T" in codon: 31 | # replace T by U 32 | codon = codon.replace('T', 'U') 33 | 34 | # Make sure the codon corresponds to a amino acid 35 | if codon in geneticCode: 36 | aminoAcid = geneticCode[codon] 37 | else: 38 | return codon + " codon not in dictionary of genetic code" 39 | 40 | # Break if stop codon is found 41 | if aminoAcid is None: 42 | break 43 | 44 | protein_sequence.append(aminoAcid) 45 | 46 | return protein_sequence 47 | 48 | 49 | dna_sequence = 'ATGGTGCATCTGACTCCTGAGGAGAAGTCTGCCGTTACTGCCCTGTGGGGCAAGGTG' 50 | print(dna_sequence) 51 | protein_3letter_sequence = protein_translation(dna_sequence, standardGeneticCode) 52 | print("".join(protein_3letter_sequence)) 53 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ARCHIVED - READ ONLY - OLD 3 DAYS COURSE REPO 2 | 3 | --- 4 | 5 | # An introduction to solving biological problems with Python - course materials 6 | 7 | Materials for the course run by the Graduate School of Life Sciences, University of Cambridge. 8 | 9 | - Course website: http://pycam.github.io/ 10 | - Booking website: http://www.training.cam.ac.uk/ 11 | 12 | 13 | If you wish to run the course on your personal computer, here are the steps to follow to get up and running. 14 | 15 | ## Clone this github project 16 | 17 | ```bash 18 | git clone https://github.com/pycam/python-intro.git 19 | cd python-intro 20 | ``` 21 | 22 | ## Dependencies 23 | 24 | Install Python 3 by downloading the latest version from https://www.python.org/. 25 | 26 | Python 2.x is legacy, Python 3.x is the present and future of the language. 27 | 28 | Create first a virtual environment using the [`venv` library](https://docs.python.org/3/library/venv.html). Update pip if needed, install [jupyter](http://jupyter.org/) and [RISE](https://github.com/damianavila/RISE) to get a slideshow extension into jupyter. 29 | 30 | ***Note*** A virtual environment is a Python environment such that the Python interpreter, libraries and scripts installed into it are isolated from those installed in other virtual environments. 31 | 32 | ```bash 33 | python3 -m venv venv 34 | # activate your virtual environment 35 | source venv/bin/activate 36 | # update pip if needed 37 | pip install --upgrade pip 38 | # install jupyter 39 | pip install jupyter 40 | 41 | # slideshow extension 42 | pip install rise 43 | jupyter-nbextension install rise --py --sys-prefix 44 | jupyter nbextension enable rise --py --sys-prefix 45 | 46 | # biopython 47 | pip install biopython 48 | ``` 49 | 50 | On mac OSX you may need to run this command to accept the XCode license, before installing biopython: 51 | 52 | ```bash 53 | sudo xcodebuild -license 54 | ``` 55 | 56 | ## Usage 57 | 58 | Go to the directory where you've cloned this repository, activate your virtual environment and run jupyter. 59 | 60 | Your web browser should automatically open with this url http://localhost:8888/tree where you see the directory tree of the course with all the jupyter notebooks. 61 | 62 | ```bash 63 | cd python-intro 64 | source venv/bin/activate 65 | jupyter notebook 66 | ``` 67 | 68 | To shutdown jupyter, type ctrl-C into the terminal you've ran `jupyter notebook`, answer `y` and press `enter`. 69 | 70 | You may wish to deactivate the virtual environment, by entering into the terminal: 71 | ``` 72 | deactivate 73 | ``` 74 | . 75 | -------------------------------------------------------------------------------- /install/2to3_nb.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | To run: python3 nb2to3.py notebook-or-directory 4 | """ 5 | # Authors: Thomas Kluyver, Fernando Perez 6 | # See: https://gist.github.com/takluyver/c8839593c615bb2f6e80 7 | 8 | import argparse 9 | import pathlib 10 | from nbformat import read, write 11 | 12 | import lib2to3 13 | from lib2to3.refactor import RefactoringTool, get_fixers_from_package 14 | 15 | 16 | def refactor_notebook_inplace(rt, path): 17 | 18 | def refactor_cell(src): 19 | #print('\n***SRC***\n', src) 20 | try: 21 | tree = rt.refactor_string(src+'\n', str(path) + '/cell-%d' % i) 22 | except (lib2to3.pgen2.parse.ParseError, 23 | lib2to3.pgen2.tokenize.TokenError): 24 | return src 25 | else: 26 | return str(tree)[:-1] 27 | 28 | 29 | print("Refactoring:", path) 30 | nb = read(str(path), as_version=4) 31 | 32 | # Run 2to3 on code 33 | for i, cell in enumerate(nb.cells, start=1): 34 | if cell.cell_type == 'code': 35 | if cell.execution_count in (' ', '*'): 36 | cell.execution_count = None 37 | 38 | if cell.source.startswith('%%'): 39 | # For cell magics, try to refactor the body, in case it's 40 | # valid python 41 | head, source = cell.source.split('\n', 1) 42 | cell.source = head + '\n' + refactor_cell(source) 43 | else: 44 | cell.source = refactor_cell(cell.source) 45 | 46 | 47 | # Update notebook metadata 48 | nb.metadata.kernelspec = { 49 | 'display_name': 'Python 3', 50 | 'name': 'python3', 51 | 'language': 'python', 52 | } 53 | if 'language_info' in nb.metadata: 54 | nb.metadata.language_info.codemirror_mode = { 55 | 'name': 'ipython', 56 | 'version': 3, 57 | } 58 | nb.metadata.language_info.pygments_lexer = 'ipython3' 59 | nb.metadata.language_info.pop('version', None) 60 | 61 | write(nb, str(path)) 62 | 63 | def main(argv=None): 64 | ap = argparse.ArgumentParser() 65 | ap.add_argument('path', type=pathlib.Path, 66 | help="Notebook or directory containing notebooks") 67 | 68 | options = ap.parse_args(argv) 69 | 70 | avail_fixes = set(get_fixers_from_package('lib2to3.fixes')) 71 | rt = RefactoringTool(avail_fixes) 72 | 73 | if options.path.is_dir(): 74 | for nb_path in options.path.rglob('*.ipynb'): 75 | refactor_notebook_inplace(rt, nb_path) 76 | else: 77 | refactor_notebook_inplace(rt, options.path) 78 | 79 | if __name__ == '__main__': 80 | main() 81 | -------------------------------------------------------------------------------- /feedback.md: -------------------------------------------------------------------------------- 1 | # Feedback 2 | 3 | ## Trainers 4 | - _Adrian Baez-Ortega_ 5 | - **Tomás Di Domenico** 6 | - **Mareike Herzog** 7 | - **Mukarram Hossain** 8 | - _Maire Lawlor_ 9 | - Sergio Martinez Cuesta 10 | - **Anne Pajon** 11 | - Cristian Riccio 12 | 13 | **Present at the meeting after training on 22-23 September 2016** 14 | 15 | ## Discussion 16 | 17 | - convert to python 3 18 | - split course into beginner/intermediate/advance 19 | - other python libraries to cover like pandas 20 | - course feedbacks 21 | 22 | **Convert to Python 3** — for sure 23 | - BioPython, pandas all migrated 24 | - anaconda is 3 by default 25 | 26 | **Add linux command introduction** — basic command and tree representation; it is essential to run python from a script on the command line 27 | 28 | **Better introduction** — on why we use python in a notebook, on the command line and in file explaining how we run things 29 | 30 | **Contents to drop** 31 | - file I/O should stay but remove reading XML/PDB format only keep delimited files and maybe JSON 32 | - drop system call 33 | - drop exceptions 34 | 35 | **Contents to add** 36 | - way of searching into python library documentation for string 37 | - independent learners 38 | - bonus exercise — use pandas to parse this file to keep them busy 39 | - add BioPython exercise for the reverse complement function by writing second function using BioPython doing the same thing 40 | 41 | **Contents to re-arrange** 42 | - restructure the second day 43 | - section 4 (1) file then (2) command line 44 | - move functions before section 2 45 | 46 | **How to better stimulate the learner** 47 | - better explanation on how to solve each exercises by splitting problems into smaller chunks 48 | - (1) explain how to solve an exercise by starting with a blank page and comments to divide the problem in small chunks 49 | - (2) search for solution on internet either on forum or in python library 50 | - (3) program together using functions to solve exercises 51 | - better manage expectations by being clearer on objectives 52 | - build connected exercises not disconnected ones, build upon learning — start simple and increase complexity 53 | - write code in group of 2 or 3; one function each to solve one bigger problem; explain and make them sure to write together a bigger program; then exchange code and progress to next level 54 | - bring your questions 55 | 56 | ## Actions 57 | 58 | > Meeting once a month — to get running. 59 | 60 | **Next course** — December 16 61 | - Keep exercises for sure 62 | - Minimize the lecture part 63 | - Make them search for solution 64 | - Assignment — like workshop couple of urls 65 | - Create 3 levels of exercises beginner/intermediate/advance 66 | 67 | **Checklists for December course** 68 | - [ ] Anne to re-organize course and add BioPython exercise 69 | - [ ] Tomás to convert code to Python 3 70 | - [ ] Christian to correct typos 71 | 72 | Christian sent his feedback by email — exceptions and reading PDB and XML files is too advanced for a beginner course. 73 | There are a few typos in the course that I can correct if somebody tells me how. 74 | -------------------------------------------------------------------------------- /Introduction_to_python_day_2_session_2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# An introduction to solving biological problems with Python\n", 8 | "\n", 9 | "## Session 2.2: Exercises and Modules\n", 10 | "\n", 11 | "- [Excercises 2.2.1](#Excercises-2.1.1)\n", 12 | "- [Excercises 2.2.2](#Excercises-2.2.2)\n", 13 | "- [Excercises 2.2.3](#Excercises-2.2.3)\n", 14 | "- [Modules](#Modules)\n", 15 | "- [Excercises 2.2.4](#Excercises-2.2.4)" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "## Excercises 2.2.1\n", 23 | "\n", 24 | "### Translate DNA sequence into protein sequence\n", 25 | "\n", 26 | "Write a function that translates a DNA sequence into a protein, a sequence of amino acids. The function should take 2 arguments, a DNA sequence and a dictionary that defines the standard genetic code.\n", 27 | "\n", 28 | "For mapping RNA codons to amino acids you can use the dictionary `standardGeneticCode` defined below. Notice that it only maps strings in upper case, so make sure that `codon` is in upper case before your look up. You can translate codon into an upper case with the `upper()` method on String. Notice also that it maps RNA codons and not DNA ones.\n", 29 | "\n", 30 | "First, loop over the sequence to extract every three basees until the end or until a stop codon either by using a `for` loop or a `while` one. \n", 31 | "\n", 32 | "Then convert the DNA into an RNA sequence, by replacing all T bases by U. Make sure that the codon corresponds to an amino accid. Convert the RNA codon into an amino acid using the dictionary provided and return the protein sequence as a list of amino acids." 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": { 39 | "collapsed": true 40 | }, 41 | "outputs": [], 42 | "source": [ 43 | "standardGeneticCode = { \n", 44 | " 'UUU':'Phe', 'UUC':'Phe', 'UCU':'Ser', 'UCC':'Ser',\n", 45 | " 'UAU':'Tyr', 'UAC':'Tyr', 'UGU':'Cys', 'UGC':'Cys',\n", 46 | " 'UUA':'Leu', 'UCA':'Ser', 'UAA': None, 'UGA': None,\n", 47 | " 'UUG':'Leu', 'UCG':'Ser', 'UAG': None, 'UGG':'Trp',\n", 48 | " 'CUU':'Leu', 'CUC':'Leu', 'CCU':'Pro', 'CCC':'Pro',\n", 49 | " 'CAU':'His', 'CAC':'His', 'CGU':'Arg', 'CGC':'Arg',\n", 50 | " 'CUA':'Leu', 'CUG':'Leu', 'CCA':'Pro', 'CCG':'Pro',\n", 51 | " 'CAA':'Gln', 'CAG':'Gln', 'CGA':'Arg', 'CGG':'Arg',\n", 52 | " 'AUU':'Ile', 'AUC':'Ile', 'ACU':'Thr', 'ACC':'Thr',\n", 53 | " 'AAU':'Asn', 'AAC':'Asn', 'AGU':'Ser', 'AGC':'Ser',\n", 54 | " 'AUA':'Ile', 'ACA':'Thr', 'AAA':'Lys', 'AGA':'Arg',\n", 55 | " 'AUG':'Met', 'ACG':'Thr', 'AAG':'Lys', 'AGG':'Arg',\n", 56 | " 'GUU':'Val', 'GUC':'Val', 'GCU':'Ala', 'GCC':'Ala',\n", 57 | " 'GAU':'Asp', 'GAC':'Asp', 'GGU':'Gly', 'GGC':'Gly',\n", 58 | " 'GUA':'Val', 'GUG':'Val', 'GCA':'Ala', 'GCG':'Ala', \n", 59 | " 'GAA':'Glu', 'GAG':'Glu', 'GGA':'Gly', 'GGG':'Gly'}" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "## Excercises 2.2.2\n", 67 | "\n", 68 | "### Calculate the GC content of a DNA sequence\n", 69 | "\n", 70 | "Write a function that calculates the GC content of a DNA sequence by re-using the code written for the [Exercises 1.4.2](Introduction_to_python_day_1_session_4.ipynb#Exercises-1.4.2) yesterday." 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "## Excercises 2.2.3\n", 78 | "\n", 79 | "### Extract the list of all overlaping sub-sequences\n", 80 | "Write a function that extracts a list of overlapping sub-sequences for a given window size from a given sequence. Do not forget to test it on a given DNA sequence." 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "## Modules" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "So far we have been writing Python code in files as executable scripts without knowning that they are also modules from which we are able to call the different functions defined in them.\n", 95 | "\n", 96 | "A module is a file containing Python definitions and statements. The file name is the module name with the suffix .py appended. Create a file called `my_first_module.py` in the current directory with the following contents:" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": { 103 | "collapsed": true 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "def say_hello(user):\n", 108 | " print('hello', user, '!')" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "Now enter the Python interpreter from the directory you've created `my_first_module.py` file and import the `say_hello` function from this module with the following command:\n", 116 | "\n", 117 | "```bash\n", 118 | "python3\n", 119 | "Python 3.5.2 (default, Jun 30 2016, 18:10:25) \n", 120 | "[GCC 4.2.1 Compatible Apple LLVM 7.0.2 (clang-700.1.81)] on darwin\n", 121 | "Type \"help\", \"copyright\", \"credits\" or \"license\" for more information.\n", 122 | ">>> from my_first_module import say_hello\n", 123 | ">>> say_hello('Anne')\n", 124 | "hello Anne !\n", 125 | ">>> \n", 126 | "```\n", 127 | "\n", 128 | "There is one module already stored in the course directory called `my_first_module.py`, if you wish to import it into this notebook, below is what you need to do. If you wish to edit this file and change the code or add another function, you will have to restart the notebook to have these changes taken into account using the restart the kernel button in the menu bar." 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": { 135 | "collapsed": false 136 | }, 137 | "outputs": [], 138 | "source": [ 139 | "from my_first_module import say_hello\n", 140 | "say_hello('Anne')" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "A module can contain executable statements as well as function definitions. These statements are intended to initialize the module. They are executed only the first time the module name is encountered in an import statement. \n", 148 | "They are also run if the file is executed as a script.\n", 149 | "\n", 150 | "Do comment out these executable statements if you do not wish to have them executed when importing your module.\n", 151 | "\n", 152 | "For more information about modules, https://docs.python.org/3/tutorial/modules.html." 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "## Excercises 2.2.4\n", 160 | "### Calculate GC content along the DNA sequence\n", 161 | "Combine the two methods written above to calculates the GC content of each overlapping sliding window along a DNA sequence from start to end. \n", 162 | "\n", 163 | "From the two files you wrote, import the methods written at exercices 2.2.2 and 2.2.3.\n", 164 | "The new function should take two arguments, the DNA sequence and the size of the sliding window, and re-use the previous methods written to calculate the GC content of a DNA sequence and to extract the list of all overlapping sub-sequences. It returns a list of GC% along the DNA sequence." 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": {}, 170 | "source": [ 171 | "## Next session\n", 172 | "\n", 173 | "Go to our next notebook: [Introduction_to_python_day_2_session_3](Introduction_to_python_day_2_session_3.ipynb)" 174 | ] 175 | } 176 | ], 177 | "metadata": { 178 | "kernelspec": { 179 | "display_name": "Python 3", 180 | "language": "python", 181 | "name": "python3" 182 | }, 183 | "language_info": { 184 | "codemirror_mode": { 185 | "name": "ipython", 186 | "version": 3 187 | }, 188 | "file_extension": ".py", 189 | "mimetype": "text/x-python", 190 | "name": "python", 191 | "nbconvert_exporter": "python", 192 | "pygments_lexer": "ipython3", 193 | "version": "3.5.2" 194 | } 195 | }, 196 | "nbformat": 4, 197 | "nbformat_minor": 0 198 | } 199 | -------------------------------------------------------------------------------- /Introduction_to_python_day_2_session_4.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# An introduction to solving biological problems with Python\n", 8 | "\n", 9 | "## Session 2.4: BioPython\n", 10 | "\n", 11 | "- [Working with sequences](#Working-with-sequences)\n", 12 | "- [Connecting with biological databases](#Connecting-with-biological-databases)\n", 13 | "- [Exercises 2.4.1](#Exercises-2.4.1)" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "## Using third party library, BioPython\n", 21 | "\n", 22 | "Biopython tutorial: http://biopython.org/DIST/docs/tutorial/Tutorial.html\n", 23 | "\n", 24 | "The goal of Biopython is to make it as easy as possible to use Python for bioinformatics by creating high-quality, reusable modules and classes. Biopython features include parsers for various Bioinformatics file formats (BLAST, Clustalw, FASTA, Genbank,...), access to online services (NCBI, Expasy,...), interfaces to common and not-so-common programs (Clustalw, DSSP, MSMS...), a standard sequence class, various clustering modules, a KD tree data structure etc. and even documentation." 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "## Working with sequences" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "We can create a sequence by defining a `Seq` object with strings. `Bio.Seq()` takes as input a string and converts in into a Seq object. We can print the sequences, individual residues, lengths and use other functions to get summary statistics. " 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": { 45 | "collapsed": false 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "# Creating sequence\n", 50 | "from Bio.Seq import Seq\n", 51 | "my_seq = Seq(\"AGTACACTGGT\")\n", 52 | "print(my_seq)\n", 53 | "print(my_seq[10])\n", 54 | "print(my_seq[1:5])\n", 55 | "print(len(my_seq))\n", 56 | "print(my_seq.count( \"A\" ))" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "We can use functions from `Bio.SeqUtils` to get idea about a sequence " 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": { 70 | "collapsed": false 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "# Calculate the molecular weight\n", 75 | "from Bio.SeqUtils import GC, molecular_weight\n", 76 | "print(GC( my_seq ))\n", 77 | "print(molecular_weight( my_seq ))" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "One letter code protein sequences can be converted into three letter codes using `seq3` utility " 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": { 91 | "collapsed": false 92 | }, 93 | "outputs": [], 94 | "source": [ 95 | "from Bio.SeqUtils import seq3\n", 96 | "print(seq3( my_seq ))" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "Alphabets defines how the strings are going to be treated as sequence object. `Bio.Alphabet` module defines the available alphabets for Biopython. `Bio.Alphabet.IUPAC` provides basic definition for DNA, RNA and proteins. " 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": { 110 | "collapsed": false 111 | }, 112 | "outputs": [], 113 | "source": [ 114 | "from Bio.Alphabet import IUPAC\n", 115 | "my_dna = Seq(\"AGTACATGACTGGTTTAG\", IUPAC.unambiguous_dna)\n", 116 | "print(my_dna)\n", 117 | "print(my_dna.alphabet)" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": { 124 | "collapsed": false 125 | }, 126 | "outputs": [], 127 | "source": [ 128 | "my_dna.complement()" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": { 135 | "collapsed": false 136 | }, 137 | "outputs": [], 138 | "source": [ 139 | "my_dna.reverse_complement()" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": { 146 | "collapsed": false 147 | }, 148 | "outputs": [], 149 | "source": [ 150 | "my_dna.translate()" 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "metadata": {}, 156 | "source": [ 157 | "### Parsing sequence file format: FASTA files" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "Sequence files can be parsed and read the same way we read other files. " 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "metadata": { 171 | "collapsed": false 172 | }, 173 | "outputs": [], 174 | "source": [ 175 | "with open( \"data/glpa.fa\" ) as fileObj:\n", 176 | " print(fileObj.read())" 177 | ] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "metadata": {}, 182 | "source": [ 183 | "Biopython provides specific functions to allow parsing/reading sequence files. " 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "metadata": { 190 | "collapsed": false 191 | }, 192 | "outputs": [], 193 | "source": [ 194 | "# Reading FASTA files\n", 195 | "from Bio import SeqIO\n", 196 | "\n", 197 | "fileObj = open(\"data/glpa.fa\")\n", 198 | "\n", 199 | "for protein in SeqIO.parse(fileObj, 'fasta'):\n", 200 | " print(protein.id)\n", 201 | " print(protein.seq)" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "metadata": {}, 207 | "source": [ 208 | "Sequence objects can be written into files using file handles with the function `SeqIO.write()`. We need to provide the name of the output sequence file and the sequence file format. " 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": { 215 | "collapsed": false 216 | }, 217 | "outputs": [], 218 | "source": [ 219 | "# Writing FASTA files\n", 220 | "from Bio.SeqRecord import SeqRecord\n", 221 | "from Bio.Seq import Seq\n", 222 | "from Bio.Alphabet import IUPAC\n", 223 | "\n", 224 | "sequence = 'MYGKIIFVLLLSEIVSISASSTTGVAMHTSTSSSVTKSYISSQTNDTHKRDTYAATPRAHEVSEISVRTVYPPEEETGERVQLAHHFSEPEITLIIFG'\n", 225 | "\n", 226 | "fileObj = open( \"mySeqFile.fa\", \"w\")\n", 227 | " \n", 228 | "seqObj = Seq(sequence, IUPAC.protein)\n", 229 | "proteinObjs = [SeqRecord(seqObj, id=\"MYID\", description='my description'),]\n", 230 | "\n", 231 | "SeqIO.write(proteinObjs, fileObj, 'fasta')\n", 232 | "\n", 233 | "fileObj.close()\n", 234 | "\n", 235 | "with open( \"biopython.fa\" ) as fileObj:\n", 236 | " print(fileObj.read())" 237 | ] 238 | }, 239 | { 240 | "cell_type": "markdown", 241 | "metadata": {}, 242 | "source": [ 243 | "## Connecting with biological databases" 244 | ] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "metadata": {}, 249 | "source": [ 250 | "Sequences can be searched and downloaded from public databases. " 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": null, 256 | "metadata": { 257 | "collapsed": false 258 | }, 259 | "outputs": [], 260 | "source": [ 261 | "# Read FASTA file from NCBI GenBank\n", 262 | "from Bio import Entrez\n", 263 | "\n", 264 | "Entrez.email = 'A.N.Other@example.com'\n", 265 | "socketObj = Entrez.efetch(db=\"protein\", rettype=\"fasta\", id=\"71066805\")\n", 266 | "dnaObj = SeqIO.read(socketObj, \"fasta\")\n", 267 | "socketObj.close()\n", 268 | "\n", 269 | "print(dnaObj.description)\n", 270 | "print(dnaObj.seq)" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": null, 276 | "metadata": { 277 | "collapsed": false 278 | }, 279 | "outputs": [], 280 | "source": [ 281 | "# Read SWISSPROT record\n", 282 | "from Bio import ExPASy\n", 283 | "\n", 284 | "socketObj = ExPASy.get_sprot_raw('HBB_HUMAN')\n", 285 | "proteinObj = SeqIO.read(socketObj, \"swiss\")\n", 286 | "socketObj.close()\n", 287 | "\n", 288 | "print(proteinObj.description)\n", 289 | "print(proteinObj.seq)" 290 | ] 291 | }, 292 | { 293 | "cell_type": "markdown", 294 | "metadata": {}, 295 | "source": [ 296 | "## Exercises 2.4.1" 297 | ] 298 | }, 299 | { 300 | "cell_type": "markdown", 301 | "metadata": { 302 | "collapsed": true 303 | }, 304 | "source": [ 305 | "- Retrieve a FASTA file named `data/sample.fa` and answer the following questions:\n", 306 | " - How many sequences are in the file?\n", 307 | " - What are the IDs and the lengths of the longest and the shortest sequences?\n", 308 | " - Create a new object that contains only sequences with length longer than 500bp. What is the average length of these sequences?\n", 309 | " - Calculate and print the percentage of GC contents in each of the sequences.\n", 310 | " - Write the newly created sequence object into a FASTA file named `sample.long.fa` " 311 | ] 312 | }, 313 | { 314 | "cell_type": "markdown", 315 | "metadata": {}, 316 | "source": [ 317 | "## Congratulation! You reached the end of day 2! " 318 | ] 319 | } 320 | ], 321 | "metadata": { 322 | "kernelspec": { 323 | "display_name": "Python 3", 324 | "language": "python", 325 | "name": "python3" 326 | }, 327 | "language_info": { 328 | "codemirror_mode": { 329 | "name": "ipython", 330 | "version": 3 331 | }, 332 | "file_extension": ".py", 333 | "mimetype": "text/x-python", 334 | "name": "python", 335 | "nbconvert_exporter": "python", 336 | "pygments_lexer": "ipython3", 337 | "version": "3.5.2" 338 | } 339 | }, 340 | "nbformat": 4, 341 | "nbformat_minor": 0 342 | } 343 | -------------------------------------------------------------------------------- /planning.md: -------------------------------------------------------------------------------- 1 | # Planning and ideas 2 | 3 | 'An Introduction to Solving Biological Problems with Python' training can be divided into 4 sessions over two days. 4 | 5 | 1. DAY 1. MORNING. SESSION 1.: running the Python interpreter, variables and types, arithmetic, basic data structures 6 | 2. DAY 1. AFTERNOON. SESSION 2.: logic & flow control, loops, exceptions, importing libraries 7 | 3. DAY 2. MORNING. SESSION 3.: custom functions, variable scope, some biological examples 8 | 4. DAY 2. AFTERNOON. SESSION 4.: dealing with files, parsing file formats, introduction to BioPython 9 | 10 | ## DAY 1. MORNING. SESSION 1. 11 | 12 | ### Part 1. 13 | 14 | INTRO: the python programming language & python interpreter (command line) 15 | Python is free, cross-platform, widely used, well documented & well supported. 16 | Python is a simple interpreted language, with no separate compilation step. 17 | 18 | - Getting started 19 | - Printing values 20 | - Using variables: they are names for values, created by use. No declaration necessary. 21 | A variable is just a name, it does not have a type. Values are garbage collected, 22 | if nothing refers to data any longer, it can be recycled. Must assign value to variable 23 | before using it. Python does not assume default values for variables, 24 | doing so can mask many errors. 25 | - Simple data types: Values do have types. Use functions to convert between types. 26 | - booleans 27 | - integers 28 | - floating point numbers 29 | - complex numbers 30 | - strings are sequences of characters 31 | - the None object 32 | - Arithmetic: addition, subtraction, multiplication, division, exponentiation, remainder 33 | - Saving code in files 34 | - Comments 35 | 36 | #### EXERCISES 37 | 38 | ``` 39 | * create a variable, print out a message 40 | * addition operator 41 | * calculate the mean of two variables 42 | * [1.1] Print DNA sequence from amino acid one. 43 | ``` 44 | 45 | ### Part 2. 46 | 47 | As well as the basic data types we introduced, python has several ways of storing 48 | a collection of values. We are going to see four of them: tuples, lists, sets and 49 | dictionaries. 50 | 51 | - Collections: complex data types 52 | - tuples: A tuple is a sequence of immutable Python objects. Tuples are sequences, 53 | just like lists. The only difference is that tuples can't be changed i.e., 54 | tuples are immutable and tuples use parentheses and lists use square brackets. 55 | - lists: the most popular [value, value, value, ...] it is mutable, it can be 56 | changed after been created. It is heterogeneous, it can store values of many kinds. 57 | Appending values to a list lengthens it, deleting values shortens it. Most 58 | operations on lists are methods. Two that are often used incorrectly sort() and reverse() 59 | - manipulating tuples and lists 60 | 61 | Online Python doc: https://docs.python.org/2/ Library | 5.6. Sequence Types | Mutable Sequence Types (5.6.4) 62 | 63 | #### EXERCISES 64 | 65 | ``` 66 | * [1.2] Print DNA sequence from a list of DNA codons 67 | ``` 68 | 69 | - String manipulations and format: strings are indexed exactly like lists. 70 | Strings are immutable, they cannot be changed in place. Use + to concatenate strings. 71 | Concatenation always produces a new string. Use string % to format output. 72 | Use triple quotes for multi-line strings. Strings have methods: capitalize() 73 | upper() lower() count() find() replace() 74 | 75 | Online Python doc: https://docs.python.org/2/ Library | 5.6. Sequence Types | 5.6.1. String Methods 76 | 77 | Online Python doc: https://docs.python.org/2/ Library | 5.6. Sequence Types | 5.6.2. String Formatting Operations 78 | 79 | #### EXERCISES 80 | 81 | ``` 82 | * [1.3] String manipulation using your name 83 | ``` 84 | 85 | - Sets contain unique unordered elements. They are very similar to lists but 86 | because the elements are not in order they do not have an index. 87 | 88 | Online Python doc: https://docs.python.org/2/ Library | 5.7. Set Types 89 | 90 | #### EXERCISES 91 | 92 | ``` 93 | * [1.4] Find the unique amino acid codes in a protein sequence 94 | ``` 95 | 96 | - Dictionaries contain a mapping of keys to values 97 | 98 | Online Python doc: https://docs.python.org/2/ Library | 5.8. Mapping Types 99 | 100 | ``` 101 | Dictionary can be very useful when combined with string formatting e.g. 102 | format_string = "Dear %(name)s, we have sequenced %(num)d libraries. The yield is %(yield)dM reads." 103 | print format_string % {'name': 'Anne', 'num':3, 'yield': 182} 104 | ``` 105 | 106 | #### EXERCISES 107 | 108 | ``` 109 | * [1.5] Use a dictionary to map between codon sequences and amino acids they 110 | encode to print out the name of the amino acids of a DNA sequence 111 | ``` 112 | 113 | 114 | ``` 115 | >>> TAKE HOME MESSAGE 116 | >>> Variables are labels that refer to data. 117 | >>> Many variables may refer to the same piece of data. 118 | >>> Use strings to store text. 119 | >>> Use lists to store many related values in order. 120 | >>> User sets to store unique related values in order. 121 | >>> Use dictionaries to store key/value pairs. 122 | ``` 123 | 124 | ## DAY 1. AFTERNOON. SESSION 2. 125 | 126 | ### Part 1. 127 | 128 | INTRO: program control and logic - code blocks: if/loops/exceptions. 129 | Real power of programs comes from repetition and selection. Why indentation? 130 | Because it makes the code you write clearer and easier to read. 131 | Python style guide (PEP 8) recommends 4 spaces. 132 | Loops let us do things many times. Collections let us store many values together. 133 | 134 | - code blocks 135 | - conditional execution 136 | - the if statement: use if/elif/else to make choices 137 | - comparisons and truth 138 | 139 | #### EXERCISES 140 | 141 | ``` 142 | [2.0] Compare your age with other persons and print if you are younger/older/same age 143 | [2.?] Check if a DNA sequence contain a stop codon 144 | ``` 145 | 146 | - loops 147 | - the for loop: a for loop is used to access each value in turn 148 | - the while loop: a while loop is used to step through all possible indices 149 | - skipping and breaking loops 150 | - looping gotchas 151 | 152 | #### EXERCISES 153 | 154 | ``` 155 | [2.1] Loop over a list of bases using for and while loops 156 | ``` 157 | 158 | - more looping 159 | - using enumerate 160 | - using zip 161 | - filtering in loops 162 | 163 | #### EXERCISES 164 | 165 | ``` 166 | [2.2] Calculate the GC content of a DNA sequence 167 | ``` 168 | 169 | ### Part 2 (after break) 170 | 171 | Python provides two very important features to handle any unexpected error in your 172 | Python programs and to add debugging capabilities in them: exceptions and assertions. 173 | 174 | - exceptions: An exception is an event, which occurs during the execution of a program, 175 | that disrupts the normal flow of the program's instructions. In general, when a Python 176 | script encounters a situation that it can't cope with, it raises an exception. 177 | An exception is a Python object that represents an error. 178 | 179 | #### EXERCISES 180 | 181 | ``` 182 | [2.3] Raise an exception if the DNA sequence is not valid 183 | ``` 184 | 185 | - importing modules and libraries 186 | - help(math) 187 | - import sys 188 | - print sys.version & sys.platform 189 | - print sys.path which defines the list of directories Python searches in to find modules. 190 | sys.argv: The most commonly-used element of sys is probably sys.argv, which holds the command-line arguments of the currently-executing program. 191 | 192 | ``` 193 | >>> TAKE HOME MESSAGE 194 | >>> Use while to repeat something until something changes. 195 | >>> Use for to do something once for each part of a larger whole. 196 | >>> Use if and else to make choices. 197 | ``` 198 | 199 | ## DAY 2. MORNING. SESSION 3. 200 | 201 | ### Part 1. 202 | 203 | INTRO: function basics and definition 204 | A programming language should not include everything anyone might ever want 205 | Instead, it should make it easy for people to create what they need 206 | to solve specific problems by defining functions to create higher-level operations. 207 | In python it is done using the keyword 'def'. 208 | 209 | - function definition syntax 210 | 211 | #### EXERCISES 212 | 213 | ``` 214 | [3.1a] Create a function that calculate the means of two number and then from a list of number 215 | [3.1b] Create a function to calculate the molecular weight of a DNA sequence 216 | ``` 217 | 218 | - function arguments 219 | 220 | #### EXERCISES 221 | 222 | ``` 223 | [3.2] Extend the previous function to also calculate the weight of a RNA sequence 224 | ``` 225 | 226 | - return value 227 | 228 | #### EXERCISES 229 | 230 | ``` 231 | [3.3] Write a function that counts the number of each base found in a DNA sequence 232 | ``` 233 | 234 | ### Part 2. 235 | 236 | - variable scope: globals vs within blocks 237 | - advanced topics: anonymous functions (lambda); functions as values; nested functions 238 | 239 | #### EXERCISES 240 | 241 | ``` 242 | BIO examples 243 | - program ribosome that translates RNA into protein 244 | - extra points for also taking DNA (T -> U) 245 | - extra points for all reading frames. 246 | 247 | - calculate GC content of DNA not on whole sequence but with sliding window. 248 | 249 | - calculate hydrophobicity with sliding window. 250 | ``` 251 | 252 | ``` 253 | >>> TAKE HOME MESSAGE 254 | >>> Define functions to break programs down into manageable pieces. 255 | >>> Remember that a function is really just another kind of data. 256 | ``` 257 | 258 | # Day 2. AFTERNOON. SESSION 4. 259 | 260 | ### Part 1. 261 | 262 | INTRO: In this session we cover 2 widely used ways of reading data into our 263 | programs, via the command line and by reading files from disk. 264 | 265 | - reading command line arguments 266 | 267 | #### EXERCISES 268 | 269 | ``` 270 | [4.1a] Write a script that takes 2 integers from the command line using the sys.argv 271 | library, add the two numbers and printout the result 272 | [4.1b] Write a script tha takes a DNA sequence from the command line and prints out 273 | its length and GC content 274 | ``` 275 | 276 | - the argparse library 277 | 278 | #### EXERCISES 279 | 280 | ``` 281 | [4.1c] Use the argparse library to do the same exercise as above 282 | ``` 283 | 284 | ### Part 2. 285 | 286 | - file objects 287 | - mode modifiers 288 | - error checking 289 | - closing files 290 | - reading from files 291 | - the with statement 292 | - writing to files 293 | 294 | #### EXERCISES 295 | 296 | ``` 297 | [4.2a] Write a script that writes a list of number to a file, with each number 298 | on a separate line 299 | [4.2b] Open a file and for each line print out the line number and its length 300 | ``` 301 | 302 | - data formats 303 | - delimited files 304 | - reading delimited files 305 | - writing delimited files 306 | - more advanced examples 307 | - read csv file 308 | - write csv file 309 | 310 | #### EXERCISES 311 | 312 | ``` 313 | [4.3a] Read a tab separated file 314 | [4.3b] Write a csv file 315 | ``` 316 | 317 | - fixed format files (PDB) 318 | - XML files 319 | - python file libraries: os & os.path 320 | - more advanced examples 321 | - recursive file search 322 | - recursive delete 323 | 324 | - system calls 325 | 326 | #### EXERCISES 327 | 328 | ``` 329 | [4.4] Write a script that execute the command 'ls' to get the list of files 330 | then modify your script to only print python files 331 | ``` 332 | 333 | ### Part 3. 334 | 335 | - using BioPython 336 | 337 | Biopython is to make it as easy as possible to use Python for bioinformatics by 338 | creating high-quality, reusable modules and classes. Biopython features include 339 | parsers for various Bioinformatics file formats (BLAST, Clustalw, FASTA, Genbank,...), 340 | access to online services (NCBI, Expasy,...), interfaces to common and not-so-common programs 341 | (Clustalw, DSSP, MSMS...), a standard sequence class, various clustering modules, 342 | a KD tree data structure etc. and even documentation. 343 | 344 | Basically, we just like to program in Python and want to make it as easy as possible 345 | to use Python for bioinformatics by creating high-quality, reusable modules and scripts. 346 | 347 | Biopython tutorial http://biopython.org | Tutorial | 1.2 What can I find in the Biopython package 348 | 349 | #### BioPython EXAMPLES 350 | 351 | - more advanced examples 352 | - writing FASTA files 353 | - reading FASTA files 354 | 355 | ``` 356 | >>> TAKE HOME MESSAGE 357 | >>> Happy Python programming! 358 | ``` 359 | 360 | IDEAS: if you need help: http://stackoverflow.com/ 361 | 362 | IDEAS: Pylint is a tool that checks for errors in python code, tries to enforce a coding standard and looks for bad code smells: http://www.pylint.org/ 363 | 364 | IDEAS: Any code that hasn't been tested is probably wrong: Python unit testing framework unittest 365 | 366 | IDEAS: from http://software-carpentry.org/v4/python 367 | -------------------------------------------------------------------------------- /Introduction_to_python_day_1_session_3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": true 7 | }, 8 | "source": [ 9 | "# An introduction to solving biological problems with Python\n", 10 | "\n", 11 | "## Session 1.3: Conditional execution\n", 12 | "\n", 13 | "- [Code blocks](#Code-blocks)\n", 14 | "- [Conditional execution](#Conditional-execution)\n", 15 | "- [Exercises 1.3.1](#Exercises-1.3.1)" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "## Program control and logic" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "A program will normally run by executing the stated commands, one after the other in sequential order. Frequently however, you will need the program to deviate from this. There are several ways of diverting from the line-by-line paradigm:\n", 30 | "\n", 31 | "- With conditional statements. Here you can check if some statement or expression is true, and if it is then you continue on with the following block of code, otherwise you might skip it or execute a different bit of code.\n", 32 | "\n", 33 | "- By performing repetitive loops through the same block of code, where each time through the loop different values may be used for the variables.\n", 34 | "\n", 35 | "- Through the use of functions (subroutines) where the program’s execution jumps from a particular line of code to an entirely different spot, even in a different file or module, to do a task before (usually) jumping back again. Functions are covered in the next session, so we will not discuss them yet.\n", 36 | "\n", 37 | "- By checking if an error or exception occurs, i.e. something illegal has happened, and executing different blocks of code accordingly" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "## Code blocks" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "With all of the means by which Python code execution can jump about we naturally need to be aware of the boundaries of the block of code we jump into, so that it is clear at what point the job is done, and program execution can jump back again. In essence it is required that the end of a function, loop or conditional statement be defined, so that we know the bounds of their respective code blocks." 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "Python uses indentation to show which statements are in a block of code, other languages use specific `begin` and `end` statements or curly braces `{}`. It doesn't matter how much indentation you use, but the whole block must be consistent, i.e., if the first statement is indented by four spaces, the rest of the block must be indented by the same amount. The Python style guide recommends using 4-space indentation. Use spaces, rather than tabs, since different editors display tab characters with different widths.\n", 59 | "\n", 60 | "The use of indentation to delineate code blocks is illustrated in an abstract manner in the following scheme: \n", 61 | "\n", 62 | "Statement 1:\n", 63 | "\n", 64 | " Command A – in the block of statement 1\n", 65 | " Command B – in the block of statement 1\n", 66 | " \n", 67 | " Statement 2:\n", 68 | " Command C – in the block of statement 2\n", 69 | " Command D – in the block of statement 2\n", 70 | " \n", 71 | " Command E – back in the block of statement 1\n", 72 | "\n", 73 | "Command F – outside all statement blocks\n" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "## Conditional execution" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "### The if statement" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "A conditional if statement is used to specify that some block of code should only be executed if some associated test is upheld; a conditional expression evaluates to True. This might also involve subsidiary checks using the elif statement to use an alternative block if the previous expression turns out to be False. There can even be a final else statement to do something if none of the checks are passed. \n", 95 | "\n", 96 | "The following uses statements that test whether a number is less than zero, greater than zero or otherwise equal to zero and will print out a different message in each case:" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": { 103 | "collapsed": false 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "x = -3\n", 108 | "\n", 109 | "if x > 0:\n", 110 | " print(\"Value is positive\")\n", 111 | "\n", 112 | "elif x < 0:\n", 113 | " print(\"Value is negative\")\n", 114 | "\n", 115 | "else:\n", 116 | " print(\"Value is zero\")" 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": {}, 122 | "source": [ 123 | "The general form of writing out such combined conditional statements is as follows:" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "
\n",
131 |     "if conditionalExpression1:\n",
132 |     "    # codeBlock1\n",
133 |     "\n",
134 |     "elif conditionalExpression2:\n",
135 |     "    # codeBlock2\n",
136 |     "\n",
137 |     "elif conditionalExpressionN:\n",
138 |     "    # codeBlockN\n",
139 |     "    +any number of additional elif statements, then finally:\n",
140 |     "\n",
141 |     "else:\n",
142 |     "    # codeBlockE\n",
143 |     "
" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "\n", 151 | "The elif block is optional, and we can use as many as we like. The else block is also optional, so will only have the if statement, which is a fairly common situation. It is often good practice to include else where possible though, so that you always catch cases that do not pass, otherwise values might go unnoticed, which might not be the desired behaviour." 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "Placeholders are needed for “empty” code blocks:" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": null, 164 | "metadata": { 165 | "collapsed": false 166 | }, 167 | "outputs": [], 168 | "source": [ 169 | "gene = \"BRCA2\"\n", 170 | "geneExpression = -1.2\n", 171 | "\n", 172 | "if geneExpression < 0:\n", 173 | " print(gene, \"is downregulated\")\n", 174 | " \n", 175 | "elif geneExpression > 0:\n", 176 | " print(gene, \"is upregulated\")\n", 177 | " \n", 178 | "else:\n", 179 | " pass" 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": {}, 185 | "source": [ 186 | "For very simple conditional checks, you can write the `if` statement on a single line as a single expression, and the result will be the expression before the `if` if the condition is true or the expression after the `else` otherwise.\n", 187 | "\n" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": { 194 | "collapsed": false 195 | }, 196 | "outputs": [], 197 | "source": [ 198 | "x = 11\n", 199 | "\n", 200 | "if x < 10:\n", 201 | " s = \"Yes\"\n", 202 | "else:\n", 203 | " s = \"No\"\n", 204 | "print(s)\n", 205 | "\n", 206 | "# Could also be written onto one line\n", 207 | "s = \"Yes\" if x < 10 else \"No\"\n", 208 | "print(s)" 209 | ] 210 | }, 211 | { 212 | "cell_type": "markdown", 213 | "metadata": {}, 214 | "source": [ 215 | "### Comparisons and truth" 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "metadata": {}, 221 | "source": [ 222 | "With conditional execution the question naturally arises as to which expressions are deemed to be true and which false. For the python boolean values True and False the answer is (hopefully) obvious. Also, the logical states of truth and falsehood that result from conditional checks like “Is x greater than 5?” or “Is y in this list?” are also clear. When comparing values Python has the standard comparison (or relational) operators, some of which we have already seen:\n", 223 | "\n", 224 | "|Operator |\tDescription |\tExample |\n", 225 | "|---------|-------------|-----------|\n", 226 | "|`==` |\t equality |\t1 == 2 # False |\n", 227 | "|`!=` |\t non equality |\t1 != 2 # True |\n", 228 | "| `<` |\t less than |\t1 < 2 # True |\n", 229 | "| `<=` |\t equal or less than |\t2 <= 2 # True |\n", 230 | "| `>` |\t greater then |\t1 > 2 # False |\n", 231 | "| `>=` |\t equal or greater than |\t1 >= 1 # True |\n", 232 | "\n", 233 | "It is notable that comparison operations can be combined, for example to check if a value is within a range." 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": { 240 | "collapsed": false 241 | }, 242 | "outputs": [], 243 | "source": [ 244 | "x = -5\n", 245 | "\n", 246 | "if x > 0 and x < 10:\n", 247 | " print(\"In range A\")\n", 248 | " \n", 249 | "elif x < 0 or x > 10:\n", 250 | " print(\"In range B\")" 251 | ] 252 | }, 253 | { 254 | "cell_type": "markdown", 255 | "metadata": {}, 256 | "source": [ 257 | "Python has two additional comparison operators is and is not. These compare whether two objects are the same object, whereas == and != compare whether values are the same.\n", 258 | "\n", 259 | "As an example in Python:" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "metadata": { 266 | "collapsed": false 267 | }, 268 | "outputs": [], 269 | "source": [ 270 | "x = [123, 54, 92, 87, 33]\n", 271 | "y = x[:] # y is a copy of x\n", 272 | "z = x\n", 273 | "print(x)\n", 274 | "print(\"Are values of y and x the same?\", y == x)\n", 275 | "print(\"Are objects y and x the same?\", y is x)\n", 276 | "print(\"Are values of z and x the same?\", z == x)\n", 277 | "print(\"Are objects z and x the same?\", z is x)\n", 278 | "# Let's change x\n", 279 | "x[1] = 23\n", 280 | "print(x)\n", 281 | "print(\"Are values of y and x the same?\", y == x)\n", 282 | "print(\"Are objects y and x the same?\", y is x)\n", 283 | "print(\"Are values of z and x the same?\", z == x)\n", 284 | "print(\"Are objects z and x the same?\", z is x)" 285 | ] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": {}, 290 | "source": [ 291 | "In Python even expressions that do not involve an obvious boolean value can be assigned a status of \"truthfulness\"; the value of an item itself can be forced to be considered as either True or False inside an if statement. For the Python built-in types discussed in this chapter the following are deemed to be False in such a context:\n", 292 | "\n", 293 | "| False value | Description | \n", 294 | "|-------------|-------------|\n", 295 | "| `None` |\tnumeric equality |\n", 296 | "| `False` |\tFalse boolean |\n", 297 | "| `0`\t| 0 integer |\n", 298 | "| `0.0` |\t0.0 floating point |\n", 299 | "| `\"\"` |\tempty string |\n", 300 | "| `()` |\tempty tuple |\n", 301 | "| `[]` |\tempty list |\n", 302 | "| `{}` |\tempty dictonary |\n", 303 | "| `set()` |\tempty set |\n", 304 | "\n", 305 | "And everything else is deemed to be True in a conditional context." 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": null, 311 | "metadata": { 312 | "collapsed": false 313 | }, 314 | "outputs": [], 315 | "source": [ 316 | "x = '' # An empty string\n", 317 | "y = ['a'] # A list with one item\n", 318 | "\n", 319 | "if x:\n", 320 | " print(\"x is true\")\n", 321 | "else: \n", 322 | " print(\"x is false\") \n", 323 | "\n", 324 | "if y:\n", 325 | " print(\"y is true\")\n", 326 | "else:\n", 327 | " print(\"y is false\")" 328 | ] 329 | }, 330 | { 331 | "cell_type": "markdown", 332 | "metadata": {}, 333 | "source": [ 334 | "## Exercises 1.3.1\n", 335 | "\n", 336 | "1. (a) Create a `if..elif..else` block that will compare a variable containing your age to another variable containing another person's age and print a statement which says if you are younger, older or the same age as that person.\n", 337 | "2. (b) Use an `if` statement to check if some variable containing DNA sequence contains a stop codon. (e.g. `dna = \"ATGGCGGTCGAATAG\"`), first just check for one possible stop, but then extend your code to look for any of the 3 stop codons (`TAG`, `TAA`, `TGA`). Hint: recall that the `in` operator lets you check if a string contains some substring, and returns `True` or `False` accordingly." 338 | ] 339 | }, 340 | { 341 | "cell_type": "markdown", 342 | "metadata": {}, 343 | "source": [ 344 | "## Next session\n", 345 | "\n", 346 | "Go to our next notebook: [Introduction_to_python_day_1_session_4](Introduction_to_python_day_1_session_4.ipynb)" 347 | ] 348 | } 349 | ], 350 | "metadata": { 351 | "kernelspec": { 352 | "display_name": "Python 3", 353 | "language": "python", 354 | "name": "python3" 355 | }, 356 | "language_info": { 357 | "codemirror_mode": { 358 | "name": "ipython", 359 | "version": 3 360 | }, 361 | "file_extension": ".py", 362 | "mimetype": "text/x-python", 363 | "name": "python", 364 | "nbconvert_exporter": "python", 365 | "pygments_lexer": "ipython3", 366 | "version": "3.5.2" 367 | } 368 | }, 369 | "nbformat": 4, 370 | "nbformat_minor": 0 371 | } 372 | -------------------------------------------------------------------------------- /Introduction_to_python_day_2_introduction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "# An introduction to solving biological problems with Python\n", 12 | "\n", 13 | "- Our course webpage: http://pycam.github.io\n", 14 | "- Python website: https://www.python.org/ \n", 15 | "- [Python 3 Standard Library](https://docs.python.org/3/library/index.html])" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": { 21 | "slideshow": { 22 | "slide_type": "slide" 23 | } 24 | }, 25 | "source": [ 26 | "## Learning objectives\n", 27 | "\n", 28 | "- **Recall** what we've learned so far on variables, common data types, conditions and loops\n", 29 | "- **Propose and create** solutions using these concepts in an exercise\n", 30 | "- **Employ** functions and modules to reuse code\n", 31 | "- **Practice** reading and writing files with Python\n", 32 | "- **Use** third-party library BioPython\n", 33 | "- **Solve** more complex exercises" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": { 39 | "slideshow": { 40 | "slide_type": "slide" 41 | } 42 | }, 43 | "source": [ 44 | "## Course schedule - day two\n", 45 | "\n", 46 | "- 09:30-10:30: [1h00] **Introduction** - What we've learned so far...\n", 47 | "- 10:30-10:45: *break*\n", 48 | "- 10:45-11:45: [1h00] **Session 2.1** - Functions\n", 49 | "- 11:45-12:00: *break*\n", 50 | "- 12:00-13:00: [1h00] **Session 2.2** - Exercices and Modules\n", 51 | "- 13:00-14:00: *lunch break*\n", 52 | "- 14:00-15:30: [1h30] **Session 2.3** - Files\n", 53 | "- 15:30-15:45: *break*\n", 54 | "- 15:45-16:45: [1h00] **Session 2.4** - BioPython" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": { 60 | "slideshow": { 61 | "slide_type": "slide" 62 | } 63 | }, 64 | "source": [ 65 | "## What we've learned so far\n", 66 | "\n", 67 | "- Simple data types, Collections\n", 68 | "- Conditional execution\n", 69 | "- Loops\n", 70 | "- Functions used so far..." 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": { 76 | "slideshow": { 77 | "slide_type": "slide" 78 | } 79 | }, 80 | "source": [ 81 | "## Collections" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 43, 87 | "metadata": { 88 | "collapsed": false, 89 | "slideshow": { 90 | "slide_type": "subslide" 91 | } 92 | }, 93 | "outputs": [ 94 | { 95 | "name": "stdout", 96 | "output_type": "stream", 97 | "text": [ 98 | "A tuple: (2, 3, 4, 5)\n", 99 | "First element of tuple: 2\n" 100 | ] 101 | } 102 | ], 103 | "source": [ 104 | "## Tuple - immutable\n", 105 | "example_tuple = (2, 3, 4, 5)\n", 106 | "print('A tuple:', example_tuple)\n", 107 | "print('First element of tuple:', example_tuple[0])" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 44, 113 | "metadata": { 114 | "collapsed": false, 115 | "slideshow": { 116 | "slide_type": "subslide" 117 | } 118 | }, 119 | "outputs": [ 120 | { 121 | "name": "stdout", 122 | "output_type": "stream", 123 | "text": [ 124 | "A list: [2, 3, 4, 5]\n", 125 | "First element of list: 2\n", 126 | "Appended list: [2, 3, 4, 5, 12]\n", 127 | "Modified list: [45, 3, 4, 5, 12]\n" 128 | ] 129 | } 130 | ], 131 | "source": [ 132 | "## List\n", 133 | "example_list = [2, 3, 4, 5]\n", 134 | "print('A list:', example_list)\n", 135 | "print('First element of list:', example_list[0])\n", 136 | "example_list.append(12)\n", 137 | "print('Appended list:', example_list)\n", 138 | "example_list[0] = 45\n", 139 | "print('Modified list:', example_list)" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 45, 145 | "metadata": { 146 | "collapsed": false, 147 | "slideshow": { 148 | "slide_type": "subslide" 149 | } 150 | }, 151 | "outputs": [ 152 | { 153 | "name": "stdout", 154 | "output_type": "stream", 155 | "text": [ 156 | "Here is a string: ATGTCATTT\n", 157 | "First character: A\n", 158 | "Number of characters in text 9\n" 159 | ] 160 | } 161 | ], 162 | "source": [ 163 | "## String - immutable, tuple of characters\n", 164 | "text = \"ATGTCATTT\"\n", 165 | "print('Here is a string:', text)\n", 166 | "print('First character:', text[0])\n", 167 | "print('Number of characters in text', len(text))" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 46, 173 | "metadata": { 174 | "collapsed": false, 175 | "slideshow": { 176 | "slide_type": "subslide" 177 | } 178 | }, 179 | "outputs": [ 180 | { 181 | "name": "stdout", 182 | "output_type": "stream", 183 | "text": [ 184 | "A set: {1, 2, 4, 5, 6}\n" 185 | ] 186 | } 187 | ], 188 | "source": [ 189 | "## Set - unique unordered elements\n", 190 | "example_set = set([1,2,2,2,2,4,5,6,6,6])\n", 191 | "print('A set:', example_set)" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 47, 197 | "metadata": { 198 | "collapsed": false, 199 | "slideshow": { 200 | "slide_type": "subslide" 201 | } 202 | }, 203 | "outputs": [ 204 | { 205 | "name": "stdout", 206 | "output_type": "stream", 207 | "text": [ 208 | "A dictionary: {'G': 'Guanine', 'T': 'Thymine', 'A': 'Adenine', 'C': 'Cytosine'}\n", 209 | "Value associated to key C: Cytosine\n" 210 | ] 211 | } 212 | ], 213 | "source": [ 214 | "## Dictionary\n", 215 | "example_dictionary = {\"A\": \"Adenine\", \n", 216 | " \"C\": \"Cytosine\", \n", 217 | " \"G\": \"Guanine\", \n", 218 | " \"T\": \"Thymine\"}\n", 219 | "print('A dictionary:', example_dictionary)\n", 220 | "print('Value associated to key C:', example_dictionary['C'])" 221 | ] 222 | }, 223 | { 224 | "cell_type": "markdown", 225 | "metadata": { 226 | "slideshow": { 227 | "slide_type": "slide" 228 | } 229 | }, 230 | "source": [ 231 | "## Conditional execution" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 48, 237 | "metadata": { 238 | "collapsed": false 239 | }, 240 | "outputs": [ 241 | { 242 | "name": "stdout", 243 | "output_type": "stream", 244 | "text": [ 245 | "Is 2 < 5? True\n", 246 | "Is 2 == 5? False\n", 247 | "Is 2 < 5 and 2 > 1? True\n", 248 | "x is equal to 2\n" 249 | ] 250 | } 251 | ], 252 | "source": [ 253 | "x = 2\n", 254 | "print('Is 2 < 5?', x < 5)\n", 255 | "print('Is 2 == 5?', x == 5)\n", 256 | "print('Is 2 < 5 and 2 > 1?', (x < 5) & (x > 1))\n", 257 | "\n", 258 | "if x == 2:\n", 259 | " print('x is equal to 2')" 260 | ] 261 | }, 262 | { 263 | "cell_type": "markdown", 264 | "metadata": { 265 | "slideshow": { 266 | "slide_type": "slide" 267 | } 268 | }, 269 | "source": [ 270 | "## Loops" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": 49, 276 | "metadata": { 277 | "collapsed": false, 278 | "slideshow": { 279 | "slide_type": "-" 280 | } 281 | }, 282 | "outputs": [ 283 | { 284 | "name": "stdout", 285 | "output_type": "stream", 286 | "text": [ 287 | "The element in list is: A\n", 288 | "The element in list is: C\n", 289 | "The element in list is: A\n", 290 | "The element in list is: T\n", 291 | "The element in list is: G\n" 292 | ] 293 | } 294 | ], 295 | "source": [ 296 | "example_list = ['A', 'C', 'A', 'T', 'G']\n", 297 | "\n", 298 | "## Looping through a list\n", 299 | "for element in example_list:\n", 300 | " print(\"The element in list is:\", element)" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": 50, 306 | "metadata": { 307 | "collapsed": false, 308 | "slideshow": { 309 | "slide_type": "subslide" 310 | } 311 | }, 312 | "outputs": [ 313 | { 314 | "name": "stdout", 315 | "output_type": "stream", 316 | "text": [ 317 | "The index in loop is 0 and its corresponding element is in the list A\n", 318 | "The index in loop is 1 and its corresponding element is in the list C\n", 319 | "The index in loop is 2 and its corresponding element is in the list A\n", 320 | "The index in loop is 3 and its corresponding element is in the list T\n", 321 | "The index in loop is 4 and its corresponding element is in the list G\n" 322 | ] 323 | } 324 | ], 325 | "source": [ 326 | "example_list = ['A', 'C', 'A', 'T', 'G']\n", 327 | "\n", 328 | "## Looping through list of indices using range() method\n", 329 | "for index in range(len(example_list)):\n", 330 | " print(\"The index in loop is\", index, 'and its corresponding element is in the list', example_list[index])" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": 51, 336 | "metadata": { 337 | "collapsed": false, 338 | "slideshow": { 339 | "slide_type": "subslide" 340 | } 341 | }, 342 | "outputs": [ 343 | { 344 | "name": "stdout", 345 | "output_type": "stream", 346 | "text": [ 347 | "A found, the corresponding value is Adenine for item at position 0\n", 348 | "Other value for item at position 1\n", 349 | "A found, the corresponding value is Adenine for item at position 2\n", 350 | "T found, the corresponding value is Thymine for item at position 3\n", 351 | "Other value for item at position 4\n" 352 | ] 353 | } 354 | ], 355 | "source": [ 356 | "## Looping using enumerate()\n", 357 | "example_list = ['A', 'C', 'A', 'T', 'G']\n", 358 | "example_dictionary = {\"A\": \"Adenine\", \"C\": \"Cytosine\", \"G\": \"Guanine\", \"T\": \"Thymine\"}\n", 359 | "\n", 360 | "for index, value in enumerate(example_list):\n", 361 | " if value == 'A':\n", 362 | " print(value, \"found, the corresponding value is\", example_dictionary[value], 'for item at position', index)\n", 363 | " elif value == 'T':\n", 364 | " print(value, \"found, the corresponding value is\", example_dictionary[value], 'for item at position', index)\n", 365 | " else:\n", 366 | " print(\"Other value\", 'for item at position', index)" 367 | ] 368 | }, 369 | { 370 | "cell_type": "markdown", 371 | "metadata": { 372 | "slideshow": { 373 | "slide_type": "slide" 374 | } 375 | }, 376 | "source": [ 377 | "## Functions used so far..." 378 | ] 379 | }, 380 | { 381 | "cell_type": "code", 382 | "execution_count": 52, 383 | "metadata": { 384 | "collapsed": false 385 | }, 386 | "outputs": [ 387 | { 388 | "name": "stdout", 389 | "output_type": "stream", 390 | "text": [ 391 | "There are 5 elements in the list ['A', 'C', 'A', 'T', 'G']\n", 392 | "['ATG', 'TCA', 'CCG', 'GGC']\n" 393 | ] 394 | } 395 | ], 396 | "source": [ 397 | "example_list = ['A', 'C', 'A', 'T', 'G']\n", 398 | "print('There are', len(example_list), 'elements in the list', example_list)\n", 399 | "print(\"ATG TCA CCG GGC\".split())" 400 | ] 401 | }, 402 | { 403 | "cell_type": "markdown", 404 | "metadata": { 405 | "slideshow": { 406 | "slide_type": "slide" 407 | } 408 | }, 409 | "source": [ 410 | "## Exercise 2.0.1\n", 411 | "\n", 412 | "- Create a string variable with the lyrics of Imagine by John Lennon, 1971. Split into words. Print the total number of words, and the number of unique words. Calculate the frequency of each word and store the result into a dictionary. Print each unique word along with its frequency. Find the most frequent word in the song, print it with its frequency.\n", 413 | "\n", 414 | "
" 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": 53, 420 | "metadata": { 421 | "collapsed": false, 422 | "slideshow": { 423 | "slide_type": "subslide" 424 | } 425 | }, 426 | "outputs": [], 427 | "source": [ 428 | "lyrics = \"\"\"\n", 429 | "Imagine there's no Heaven\n", 430 | "It's easy if you try\n", 431 | "No Hell below us\n", 432 | "Above us only sky\n", 433 | "\n", 434 | "Imagine all the people\n", 435 | "Living for today\n", 436 | "Aaa haa\n", 437 | "\n", 438 | "Imagine there's no countries\n", 439 | "It isn't hard to do\n", 440 | "Nothing to kill or die for\n", 441 | "And no religion too\n", 442 | "\n", 443 | "Imagine all the people\n", 444 | "Living life in peace\n", 445 | "Yoo hoo\n", 446 | "\n", 447 | "You may say I'm a dreamer\n", 448 | "But I'm not the only one\n", 449 | "I hope someday you'll join us\n", 450 | "And the world will be as one\n", 451 | "\n", 452 | "Imagine no possessions\n", 453 | "I wonder if you can\n", 454 | "No need for greed or hunger\n", 455 | "A brotherhood of man\n", 456 | "\n", 457 | "Imagine all the people\n", 458 | "Sharing all the world\n", 459 | "Yoo hoo\n", 460 | "\n", 461 | "You may say I'm a dreamer\n", 462 | "But I'm not the only one\n", 463 | "I hope someday you'll join us\n", 464 | "And the world will live as one\n", 465 | "\"\"\"" 466 | ] 467 | }, 468 | { 469 | "cell_type": "markdown", 470 | "metadata": { 471 | "slideshow": { 472 | "slide_type": "slide" 473 | } 474 | }, 475 | "source": [ 476 | "## Next session\n", 477 | "\n", 478 | "Go to our next notebook: [Introduction_to_python_day_2_session_1](Introduction_to_python_day_2_session_1.ipynb)" 479 | ] 480 | } 481 | ], 482 | "metadata": { 483 | "celltoolbar": "Slideshow", 484 | "kernelspec": { 485 | "display_name": "Python 3", 486 | "language": "python", 487 | "name": "python3" 488 | }, 489 | "language_info": { 490 | "codemirror_mode": { 491 | "name": "ipython", 492 | "version": 3 493 | }, 494 | "file_extension": ".py", 495 | "mimetype": "text/x-python", 496 | "name": "python", 497 | "nbconvert_exporter": "python", 498 | "pygments_lexer": "ipython3", 499 | "version": "3.5.2" 500 | } 501 | }, 502 | "nbformat": 4, 503 | "nbformat_minor": 0 504 | } 505 | -------------------------------------------------------------------------------- /Introduction_to_python_day_1_session_4.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": true 7 | }, 8 | "source": [ 9 | "# An introduction to solving biological problems with Python\n", 10 | "\n", 11 | "## Session 1.4: Loops\n", 12 | "\n", 13 | "- [The for loop](#The-for-loop)\n", 14 | "- [The while loop](#The-while-loop)\n", 15 | "- [Skipping and breaking loops](#Skipping-and-breaking-loops)\n", 16 | "- [Exercises 1.4.1](#Exercises-1.4.1)\n", 17 | "- [More looping using range and enumerate](#More-looping)\n", 18 | "- [Filtering in loops](#Filtering-in-loops)\n", 19 | "- [Exercises 1.4.2](#Exercises-1.4.2)" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "## Loops" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "When an operation needs to be repeated multiple times, for example on all of the items in a list, we \n", 34 | "avoid having to type (or copy and paste) repetitive code by creating a loop. There are two ways of creating loops in Python, the for loop and the while loop." 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "## The for loop" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "The for loop in Python iterates over each item in a sequence (such as a list or tuple) in the order that they appear in the sequence. What this means is that a variable (code in the below example) is set to each item from the sequence of values in turn, and each time this happens the indented block of code is executed again." 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": { 55 | "collapsed": false 56 | }, 57 | "outputs": [], 58 | "source": [ 59 | "codeList = ['NA06984', 'NA06985', 'NA06986', 'NA06989', 'NA06991']\n", 60 | "\n", 61 | "for code in codeList:\n", 62 | " print(code)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "A for loop can iterate over the individual characters in a string:" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": { 76 | "collapsed": false 77 | }, 78 | "outputs": [], 79 | "source": [ 80 | "dnaSequence = 'ATGGTGTTGCC'\n", 81 | "\n", 82 | "for base in dnaSequence:\n", 83 | " print(base)" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "And also over the keys of a dictionary: " 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": { 97 | "collapsed": false 98 | }, 99 | "outputs": [], 100 | "source": [ 101 | "rnaMassDict = {\"G\":345.21, \"C\":305.18, \"A\":329.21, \"U\":302.16}\n", 102 | "\n", 103 | "for x in rnaMassDict:\n", 104 | " print(x, rnaMassDict[x])" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "Any variables that are defined before the loop can be accessed from inside the loop. So for example to calculate the summation of the items in a list of values we could define the total initially to be zero and add each value to the total in the loop:" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": { 118 | "collapsed": false 119 | }, 120 | "outputs": [], 121 | "source": [ 122 | "total = 0\n", 123 | "values = [1, 2, 4, 8, 16]\n", 124 | "\n", 125 | "for v in values:\n", 126 | " total = total + v\n", 127 | " # total += v\n", 128 | " print(total)\n", 129 | "\n", 130 | "print(total)" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "Naturally we can combine a for loop with an if statement, noting that we need two indentation levels, one for the outer loop and another for the conditional blocks:" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": { 144 | "collapsed": false 145 | }, 146 | "outputs": [], 147 | "source": [ 148 | "geneExpression = {\n", 149 | " 'Beta-Catenin': 2.5, \n", 150 | " 'Beta-Actin': 1.7, \n", 151 | " 'Pax6': 0, \n", 152 | " 'HoxA2': -3.2\n", 153 | "}\n", 154 | "\n", 155 | "for gene in geneExpression:\n", 156 | " if geneExpression.get(gene) < 0:\n", 157 | " print(gene, \"is downregulated\")\n", 158 | " \n", 159 | " elif geneExpression.get(gene) > 0:\n", 160 | " print(gene, \"is upregulated\")\n", 161 | " \n", 162 | " else:\n", 163 | " print(\"No change in expression of \", gene)" 164 | ] 165 | }, 166 | { 167 | "cell_type": "markdown", 168 | "metadata": {}, 169 | "source": [ 170 | "## The while loop" 171 | ] 172 | }, 173 | { 174 | "cell_type": "markdown", 175 | "metadata": {}, 176 | "source": [ 177 | "In addition to the for loop that operates on a collection of items, there is a while loop that simply repeats while some statement evaluates to True and stops when it is False. Note that if the tested expression never evaluates to False then you have an “infinite loop”, which is not good.\n", 178 | "\n", 179 | "In this example we generate a series of numbers by doubling a value after each iteration, until a limit is reached: " 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": { 186 | "collapsed": false 187 | }, 188 | "outputs": [], 189 | "source": [ 190 | "value = 0.25\n", 191 | "while value < 8:\n", 192 | " value = value * 2\n", 193 | " print(value)\n", 194 | "\n", 195 | "print(\"final value:\", value)" 196 | ] 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "metadata": {}, 201 | "source": [ 202 | "Whats going on here is that the value is doubled in each iteration and once it gets to 8 the while test fails (8 is not less than 8) and that last value is preserved. Note that if the test were instead value `<= 8` then we would get one more doubling and the value would reach 16." 203 | ] 204 | }, 205 | { 206 | "cell_type": "markdown", 207 | "metadata": {}, 208 | "source": [ 209 | "## Skipping and breaking loops" 210 | ] 211 | }, 212 | { 213 | "cell_type": "markdown", 214 | "metadata": {}, 215 | "source": [ 216 | "Python has two ways of affecting the flow of the for or while loop inside the block. The continue statement means that the rest of the code in the block is skipped for this particular item in the collection, i.e. jump to the next iteration. In this example negative numbers are left out of a summation:" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": { 223 | "collapsed": false 224 | }, 225 | "outputs": [], 226 | "source": [ 227 | "values = [10, -5, 3, -1, 7]\n", 228 | "\n", 229 | "total = 0\n", 230 | "for v in values:\n", 231 | " if v < 0:\n", 232 | " continue # Skip this iteration \n", 233 | " total += v\n", 234 | "\n", 235 | "print(total)" 236 | ] 237 | }, 238 | { 239 | "cell_type": "markdown", 240 | "metadata": {}, 241 | "source": [ 242 | "The other way of affecting a loop is with the break statement. In contrast to the continue statement, this immediately causes all looping to finish, and execution is resumed at the next statement _after_ the loop." 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": null, 248 | "metadata": { 249 | "collapsed": false 250 | }, 251 | "outputs": [], 252 | "source": [ 253 | "geneticCode = {'TAT': 'Tyrosine', 'TAC': 'Tyrosine',\n", 254 | " 'CAA': 'Glutamine', 'CAG': 'Glutamine',\n", 255 | " 'TAG': 'STOP'}\n", 256 | "\n", 257 | "sequence = ['CAG','TAC','CAA','TAG','TAC','CAG','CAA']\n", 258 | "\n", 259 | "for codon in sequence:\n", 260 | " if geneticCode[codon] == 'STOP':\n", 261 | " break # Quit looping at this point\n", 262 | " else:\n", 263 | " print(geneticCode[codon])" 264 | ] 265 | }, 266 | { 267 | "cell_type": "markdown", 268 | "metadata": {}, 269 | "source": [ 270 | "## Looping gotchas" 271 | ] 272 | }, 273 | { 274 | "cell_type": "markdown", 275 | "metadata": {}, 276 | "source": [ 277 | "An internal counter is used to keep track of which item is used next, and this is incremented on each iteration. When this counter has reached the length of the sequence the loop terminates. This means that if you delete the current item from the sequence, the next item will be skipped (since it gets the index of the current item which has already been treated). Likewise, if you insert an item in a sequence before the current item, the current item will be treated again the next time through the loop. This can lead to nasty bugs that can be avoided by making a temporary copy using a slice of the whole sequence." 278 | ] 279 | }, 280 | { 281 | "cell_type": "markdown", 282 | "metadata": {}, 283 | "source": [ 284 | "
\n", 285 | "**When looping, never modify the collection!** Always create a copy of it first.\n", 286 | "
" 287 | ] 288 | }, 289 | { 290 | "cell_type": "markdown", 291 | "metadata": {}, 292 | "source": [ 293 | "## Exercises 1.4.1\n", 294 | "\n", 295 | "1. Create a sequence where each element is an individual base of DNA. Make the sequence 15 bases long.\n", 296 | "2. Print the length of the sequence.\n", 297 | "3. Create a for loop to output every base of the sequence on a new line.\n", 298 | "4. Create a while loop similar to the one above that starts at the third base in the sequence and outputs every third base until the 12th." 299 | ] 300 | }, 301 | { 302 | "cell_type": "markdown", 303 | "metadata": {}, 304 | "source": [ 305 | "## More looping" 306 | ] 307 | }, 308 | { 309 | "cell_type": "markdown", 310 | "metadata": {}, 311 | "source": [ 312 | "### Using range" 313 | ] 314 | }, 315 | { 316 | "cell_type": "markdown", 317 | "metadata": {}, 318 | "source": [ 319 | "If you would like to iterate over a numeric sequence then this is possible by combining the `range()` function and a for loop." 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": null, 325 | "metadata": { 326 | "collapsed": false 327 | }, 328 | "outputs": [], 329 | "source": [ 330 | "print(list(range(10)))\n", 331 | "\n", 332 | "print(list(range(5, 10)))\n", 333 | "\n", 334 | "print(list(range(0, 10, 3)))\n", 335 | "\n", 336 | "print(list(range(7, 2, -2)))" 337 | ] 338 | }, 339 | { 340 | "cell_type": "markdown", 341 | "metadata": {}, 342 | "source": [ 343 | "Looping through ranges " 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": null, 349 | "metadata": { 350 | "collapsed": false 351 | }, 352 | "outputs": [], 353 | "source": [ 354 | "for x in range(8):\n", 355 | " print(x*x)" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": null, 361 | "metadata": { 362 | "collapsed": false 363 | }, 364 | "outputs": [], 365 | "source": [ 366 | "squares = []\n", 367 | "for x in range(8):\n", 368 | " s = x*x\n", 369 | " squares.append(s)\n", 370 | " \n", 371 | "print(squares)" 372 | ] 373 | }, 374 | { 375 | "cell_type": "markdown", 376 | "metadata": {}, 377 | "source": [ 378 | "Looping through list indices" 379 | ] 380 | }, 381 | { 382 | "cell_type": "code", 383 | "execution_count": null, 384 | "metadata": { 385 | "collapsed": false 386 | }, 387 | "outputs": [], 388 | "source": [ 389 | "codes = ['NA06984', 'NA06985', 'NA06986', 'NA06989', 'NA06991']\n", 390 | "\n", 391 | "for index in range(len(codes)):\n", 392 | " print(index, codes[index])" 393 | ] 394 | }, 395 | { 396 | "cell_type": "markdown", 397 | "metadata": {}, 398 | "source": [ 399 | "Looping through indices for two lists" 400 | ] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "execution_count": null, 405 | "metadata": { 406 | "collapsed": false 407 | }, 408 | "outputs": [], 409 | "source": [ 410 | "codes = ['NA06984', 'NA06985', 'NA06986', 'NA06989', 'NA06991']\n", 411 | "more_codes = ['NA06993', 'NA06994', 'NA06995', 'NA06997', 'NA07000']\n", 412 | "\n", 413 | "for index in range(len(codes)):\n", 414 | " print(index, codes[index], more_codes[index])" 415 | ] 416 | }, 417 | { 418 | "cell_type": "markdown", 419 | "metadata": {}, 420 | "source": [ 421 | "### Using enumerate" 422 | ] 423 | }, 424 | { 425 | "cell_type": "markdown", 426 | "metadata": {}, 427 | "source": [ 428 | "Given a sequence, `enumerate()` allows you to iterate over the sequence generating a tuple containing each value along with a corresponding index." 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "execution_count": null, 434 | "metadata": { 435 | "collapsed": false 436 | }, 437 | "outputs": [], 438 | "source": [ 439 | "letters = ['A','C','G','T']\n", 440 | "for index, letter in enumerate(letters):\n", 441 | " print(index, letter)" 442 | ] 443 | }, 444 | { 445 | "cell_type": "code", 446 | "execution_count": null, 447 | "metadata": { 448 | "collapsed": false 449 | }, 450 | "outputs": [], 451 | "source": [ 452 | "numbered_letters = list(enumerate(letters))\n", 453 | "print(numbered_letters)" 454 | ] 455 | }, 456 | { 457 | "cell_type": "markdown", 458 | "metadata": {}, 459 | "source": [ 460 | "## Filtering in loops" 461 | ] 462 | }, 463 | { 464 | "cell_type": "code", 465 | "execution_count": null, 466 | "metadata": { 467 | "collapsed": false 468 | }, 469 | "outputs": [], 470 | "source": [ 471 | "city_pops = {\n", 472 | " 'London': 8200000,\n", 473 | " 'Cambridge': 130000,\n", 474 | " 'Edinburgh': 420000,\n", 475 | " 'Glasgow': 1200000\n", 476 | "}\n", 477 | "\n", 478 | "big_cities = []\n", 479 | "for city in city_pops:\n", 480 | " if city_pops[city] >= 1000000:\n", 481 | " big_cities.append(city)\n", 482 | "\n", 483 | "print(big_cities)" 484 | ] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": null, 489 | "metadata": { 490 | "collapsed": false 491 | }, 492 | "outputs": [], 493 | "source": [ 494 | "total = 0\n", 495 | "for city in city_pops:\n", 496 | " total += city_pops[city]\n", 497 | "print(\"total population:\", total)" 498 | ] 499 | }, 500 | { 501 | "cell_type": "code", 502 | "execution_count": null, 503 | "metadata": { 504 | "collapsed": false 505 | }, 506 | "outputs": [], 507 | "source": [ 508 | "pops = list(city_pops.values())\n", 509 | "print(\"total population:\", sum(pops))" 510 | ] 511 | }, 512 | { 513 | "cell_type": "markdown", 514 | "metadata": {}, 515 | "source": [ 516 | "## Exercises 1.4.2\n", 517 | "\n", 518 | "1. Let's calculate the GC content of a DNA sequence. Use the 15-base sequence you created for the exercises above. Create a variable, `gc`, which we will use to count the number of Gs or Cs in our sequence.\n", 519 | "2. Create a loop to iterate over the bases in your sequence. If the base is a G or the base is a C, add one to your `gc` variable.\n", 520 | "3. When the loop is done, divide the number of GC bases by the length of the sequence and multiply by 100 to get the GC percentage." 521 | ] 522 | }, 523 | { 524 | "cell_type": "markdown", 525 | "metadata": {}, 526 | "source": [ 527 | "## Congratulation! You reached the end of day 1! \n", 528 | "\n", 529 | "Go to our next notebook: [Introduction_to_python_day_2_introduction](Introduction_to_python_day_2_introduction.ipynb)" 530 | ] 531 | } 532 | ], 533 | "metadata": { 534 | "kernelspec": { 535 | "display_name": "Python 3", 536 | "language": "python", 537 | "name": "python3" 538 | }, 539 | "language_info": { 540 | "codemirror_mode": { 541 | "name": "ipython", 542 | "version": 3 543 | }, 544 | "file_extension": ".py", 545 | "mimetype": "text/x-python", 546 | "name": "python", 547 | "nbconvert_exporter": "python", 548 | "pygments_lexer": "ipython3", 549 | "version": "3.5.2" 550 | } 551 | }, 552 | "nbformat": 4, 553 | "nbformat_minor": 0 554 | } 555 | -------------------------------------------------------------------------------- /Introduction_to_python_day_1_session_1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "# An introduction to solving biological problems with Python\n", 12 | "\n", 13 | "## Day 1 - Session 1: \n", 14 | "\n", 15 | "- [Printing values](#Printing-values)\n", 16 | "- [Using variables](#Using-variables)\n", 17 | "- [Simple data types](#Simple-data-types): [Booleans](#Booleans), [Integers](#Integers), [Floating point numbers](#Floating-point-numbers), and [Strings](#Strings)\n", 18 | "- [Comments](#Comments)\n", 19 | "- [Exercises 1.1.1](#Exercises-1.1.1)\n", 20 | "- [Arithmetic](#Arithmetic)\n", 21 | "- [Exercises 1.1.2](#Exercises-1.1.2)\n", 22 | "- [Saving code in files](#Saving-code-in-files)\n", 23 | "- [Exercises 1.1.3](#Exercises-1.1.3)\n" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": { 29 | "slideshow": { 30 | "slide_type": "slide" 31 | } 32 | }, 33 | "source": [ 34 | "## Printing values\n", 35 | "\n", 36 | "The first bit of python syntax we're going to learn is the print statement. This command lets us print messages to the user, and also to see what Python thinks is the value of some expression (very useful when debugging your programs).\n", 37 | "\n", 38 | "We will go into details later on, but for now just note that to print some text you have to enclose it in \"quotation marks\". \n", 39 | "\n", 40 | "We will go into detail on the arithmetic operations supported in python shortly, but you can try exploring python's calculating abilities." 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": { 47 | "collapsed": false, 48 | "slideshow": { 49 | "slide_type": "fragment" 50 | } 51 | }, 52 | "outputs": [], 53 | "source": [ 54 | "print(\"Hello from python!\")" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": { 61 | "collapsed": false, 62 | "slideshow": { 63 | "slide_type": "fragment" 64 | } 65 | }, 66 | "outputs": [], 67 | "source": [ 68 | "print(34)" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": { 75 | "collapsed": false, 76 | "slideshow": { 77 | "slide_type": "fragment" 78 | } 79 | }, 80 | "outputs": [], 81 | "source": [ 82 | "print(2 + 3)" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": { 88 | "slideshow": { 89 | "slide_type": "slide" 90 | } 91 | }, 92 | "source": [ 93 | "You can print multiple expressions you need to seperate them with commas. Python will insert a space between each element, and a newline at the end of the message (though you can suppress this behaviour by leaving a trailing comma at the end of the command)." 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": { 100 | "collapsed": false, 101 | "slideshow": { 102 | "slide_type": "fragment" 103 | } 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "print(\"The answer:\", 42)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": { 113 | "slideshow": { 114 | "slide_type": "slide" 115 | } 116 | }, 117 | "source": [ 118 | "## Using variables" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "In the print commands above we have directly operated on values such as text strings and numbers. When programming we will typically want to deal with rather more complex expressions where it is useful to be able to assign a name to an expression, especially if we are trying to deal with multiple values at the same time.\n", 126 | "\n", 127 | "We can give a name to a value using _variables_, the name is apt because the values stored in a variable can _vary_. Unlike some other languages, the type of value assigned to a variable can also change (this is one of the reasons why python is known as a _dynamic_ language).\n", 128 | "\n", 129 | "A variable can be assigned to a simple value..." 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": { 136 | "collapsed": false 137 | }, 138 | "outputs": [], 139 | "source": [ 140 | "x = 3\n", 141 | "print(x)" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "... or the outcome of a more complex expression." 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": { 155 | "collapsed": false 156 | }, 157 | "outputs": [], 158 | "source": [ 159 | "x = 2 + 2\n", 160 | "print(x)" 161 | ] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "metadata": {}, 166 | "source": [ 167 | "A variable can be called whatever you like (as long as it starts with a character, it does not contain space and is meaningful) and you assign a value to a variable with the **`=` operator**. Note that this is different to mathematical equality (which we will come to later...)\n", 168 | "\n", 169 | "You can print a variable to see what python thinks its current value is." 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": null, 175 | "metadata": { 176 | "collapsed": false 177 | }, 178 | "outputs": [], 179 | "source": [ 180 | "serine = \"TCA\"\n", 181 | "print(serine, \"codes for serine\")\n", 182 | "serine = \"TCG\"\n", 183 | "print(\"as does\", serine)" 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": {}, 189 | "source": [ 190 | "In the interactive interpreter you don't have to print everything, if you type a variable name (or just a value), the interpreter will automatically print out what python thinks the value is. Note though that this is not the case if your code is in a file." 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": { 197 | "collapsed": false 198 | }, 199 | "outputs": [], 200 | "source": [ 201 | "3 + 4" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "metadata": { 208 | "collapsed": false 209 | }, 210 | "outputs": [], 211 | "source": [ 212 | "x = 5\n", 213 | "3 * x" 214 | ] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "metadata": {}, 219 | "source": [ 220 | "Variables can be used on the right hand side of an assignment as well, in which case they will be evaluated before the value is assigned to the variable on the left hand side." 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": null, 226 | "metadata": { 227 | "collapsed": false 228 | }, 229 | "outputs": [], 230 | "source": [ 231 | "x = 5\n", 232 | "y = x * 3\n", 233 | "print(y)" 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "metadata": {}, 239 | "source": [ 240 | "or just `y` in the interpreter and in Jupyter notebook" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": null, 246 | "metadata": { 247 | "collapsed": false 248 | }, 249 | "outputs": [], 250 | "source": [ 251 | "y" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "metadata": {}, 257 | "source": [ 258 | "You can use the current value of a variable itself in an assignment" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": null, 264 | "metadata": { 265 | "collapsed": false 266 | }, 267 | "outputs": [], 268 | "source": [ 269 | "y = y + 1\n", 270 | "y" 271 | ] 272 | }, 273 | { 274 | "cell_type": "markdown", 275 | "metadata": {}, 276 | "source": [ 277 | "In fact this is such a common idiom that there are special operators that will do this implicitly (more on these later)" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": null, 283 | "metadata": { 284 | "collapsed": false 285 | }, 286 | "outputs": [], 287 | "source": [ 288 | "y += 1\n", 289 | "y" 290 | ] 291 | }, 292 | { 293 | "cell_type": "markdown", 294 | "metadata": {}, 295 | "source": [ 296 | "## Simple data types" 297 | ] 298 | }, 299 | { 300 | "cell_type": "markdown", 301 | "metadata": {}, 302 | "source": [ 303 | "Python (and computers in general) treats different types of data differently. Python has 5 main basic data types. Types are useful to constrain some operations to a certain category of variables. For example it doesn't really make sense to try to divide a string.\n", 304 | "\n", 305 | "We will see some examples of these in use shortly, but for now let's see all of the basic types available in python.\n", 306 | "\n", 307 | "### Booleans\n", 308 | "\n", 309 | "Boolean values represent truth or falsehood, as used in logical operations, for example. Not surprisingly, there are only two values, and in Python they are called True and False." 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": null, 315 | "metadata": { 316 | "collapsed": false 317 | }, 318 | "outputs": [], 319 | "source": [ 320 | "a = True\n", 321 | "b = False\n", 322 | "print(a, b)" 323 | ] 324 | }, 325 | { 326 | "cell_type": "markdown", 327 | "metadata": {}, 328 | "source": [ 329 | "### Integers\n", 330 | "\n", 331 | "Integers represent whole numbers, as you would use when counting items, and can be positive or negative." 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": null, 337 | "metadata": { 338 | "collapsed": false 339 | }, 340 | "outputs": [], 341 | "source": [ 342 | "i = -7\n", 343 | "j = 123\n", 344 | "print(i, j)" 345 | ] 346 | }, 347 | { 348 | "cell_type": "markdown", 349 | "metadata": {}, 350 | "source": [ 351 | "### Floating point numbers\n", 352 | "\n", 353 | "Floating point numbers, often simply referred to as floats, are numbers expressed in the decimal system, i.e. 2.1, 999.998, -0.000004 etc. The value 2.0 would also be interpreted as a floating point number, but the value 2, without the decimal point will not; it will be interpreted as an integer." 354 | ] 355 | }, 356 | { 357 | "cell_type": "code", 358 | "execution_count": null, 359 | "metadata": { 360 | "collapsed": false 361 | }, 362 | "outputs": [], 363 | "source": [ 364 | "x = 3.14159\n", 365 | "y = -42.3\n", 366 | "print(x * y)" 367 | ] 368 | }, 369 | { 370 | "cell_type": "markdown", 371 | "metadata": {}, 372 | "source": [ 373 | "Floating point numbers can also carry an e suffix that states which power of ten they operate at." 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": null, 379 | "metadata": { 380 | "collapsed": false 381 | }, 382 | "outputs": [], 383 | "source": [ 384 | "k = 1.5e3\n", 385 | "l = 3e-2\n", 386 | "print(k)\n", 387 | "print(l)" 388 | ] 389 | }, 390 | { 391 | "cell_type": "markdown", 392 | "metadata": {}, 393 | "source": [ 394 | "### Strings\n", 395 | "\n", 396 | "Strings represent text, i.e. \"strings\" of characters. They can be delimited by single quotes or double quotes , but you have to use the same delimiter at both ends. Unlike some programming languages, such as Perl, there is no difference between the two types of quote, although using one type does allow the other type to appear inside the string as a regular character.\n", 397 | "\n", 398 | "Normally a python statement ends at the end of the line, but if you want to type a string over several lines you can enclose it in triple quotation marks." 399 | ] 400 | }, 401 | { 402 | "cell_type": "code", 403 | "execution_count": null, 404 | "metadata": { 405 | "collapsed": false 406 | }, 407 | "outputs": [], 408 | "source": [ 409 | "s = \"ATGTCGTCTACAACACT\"\n", 410 | "t = 'Serine'\n", 411 | "u = \"It's a string with apostrophes\"\n", 412 | "v = \"\"\"A string that extends\n", 413 | "over multiple lines\"\"\"\n", 414 | "print(v)" 415 | ] 416 | }, 417 | { 418 | "cell_type": "markdown", 419 | "metadata": {}, 420 | "source": [ 421 | "### The None object\n", 422 | "\n", 423 | "The None object is special built-in value which can be thought of as **representing nothingness or that something is undefined**. For example, it can be used to indicate that a variable exists, but has not yet been set to anything specific." 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": null, 429 | "metadata": { 430 | "collapsed": false 431 | }, 432 | "outputs": [], 433 | "source": [ 434 | "z = None\n", 435 | "print(z)" 436 | ] 437 | }, 438 | { 439 | "cell_type": "markdown", 440 | "metadata": {}, 441 | "source": [ 442 | "### Object type\n", 443 | "\n", 444 | "You can check what type python thinks an expression is with the type function, which you can call with the name type immediately followed by parentheses enclosing the expression you want to check (either a variable or a value), e.g. type(3). (This is the general form for calling functions, we'll see lots more examples of functions later...)" 445 | ] 446 | }, 447 | { 448 | "cell_type": "code", 449 | "execution_count": null, 450 | "metadata": { 451 | "collapsed": false 452 | }, 453 | "outputs": [], 454 | "source": [ 455 | "a = True\n", 456 | "print(a, \"is of\", type(a))" 457 | ] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "execution_count": null, 462 | "metadata": { 463 | "collapsed": false 464 | }, 465 | "outputs": [], 466 | "source": [ 467 | "i = -7\n", 468 | "print(i, \"is of\", type(i))" 469 | ] 470 | }, 471 | { 472 | "cell_type": "code", 473 | "execution_count": null, 474 | "metadata": { 475 | "collapsed": false 476 | }, 477 | "outputs": [], 478 | "source": [ 479 | "x = 12.7893\n", 480 | "print(x, \"is of\", type(x))" 481 | ] 482 | }, 483 | { 484 | "cell_type": "code", 485 | "execution_count": null, 486 | "metadata": { 487 | "collapsed": false 488 | }, 489 | "outputs": [], 490 | "source": [ 491 | "s = \"ATGTCGTCTACAACACT\"\n", 492 | "print(s, \"is of\", type(s))" 493 | ] 494 | }, 495 | { 496 | "cell_type": "code", 497 | "execution_count": null, 498 | "metadata": { 499 | "collapsed": false 500 | }, 501 | "outputs": [], 502 | "source": [ 503 | "z = None\n", 504 | "print(z, \"is of\", type(z))" 505 | ] 506 | }, 507 | { 508 | "cell_type": "markdown", 509 | "metadata": {}, 510 | "source": [ 511 | "## Comments\n", 512 | "\n", 513 | "When you are writing a program it is often convenient to annotate your code to remind you what you were (intending) it to do. In programming these annotations are known as _comments_. You can include a comment in python by prefixing some text with a # character. All text following the # will then be ignored by the interpreter. You can start a comment on its own line, or you can include it at the end of a line of code.\n", 514 | "\n", 515 | "It is also often useful to temporarily remove some code from a script without deleting it. This is known as _commenting out_ some code." 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": null, 521 | "metadata": { 522 | "collapsed": false 523 | }, 524 | "outputs": [], 525 | "source": [ 526 | "print(\"Hi\") # this will be ignored\n", 527 | "# as will this\n", 528 | "print(\"Bye\")\n", 529 | "# print \"Never seen\"" 530 | ] 531 | }, 532 | { 533 | "cell_type": "markdown", 534 | "metadata": {}, 535 | "source": [ 536 | "## Exercises 1.1.1\n", 537 | "\n", 538 | "To start the Python interpreter, open a terminal window, type the command `python`, then enter Python commands after the prompt `>>>` and press `Enter` when you're done. \n", 539 | "\n", 540 | "Python will run the code you typed, and might display some output on the line below, before leaving you with another prompt which looks like `>>>`.\n", 541 | "\n", 542 | "If you want to exit the interactive interpreter you can type the command `quit()` or type `Ctrl-D`.\n", 543 | "\n", 544 | "In the interpreter:\n", 545 | "\n", 546 | "1. Create a variable and assign it the string value of your first name, assign your age to another variable (you are free to lie!), print out a message saying how old you are\n", 547 | "2. Use the addition operator to add 10 to your age and print out a message saying how old you will be in 10 years time" 548 | ] 549 | }, 550 | { 551 | "cell_type": "markdown", 552 | "metadata": {}, 553 | "source": [ 554 | "## Arithmetic" 555 | ] 556 | }, 557 | { 558 | "cell_type": "markdown", 559 | "metadata": {}, 560 | "source": [ 561 | "Python supports all the standard arithmetical operations on numerical types, and mostly uses a similar syntax to several other computer languages:" 562 | ] 563 | }, 564 | { 565 | "cell_type": "code", 566 | "execution_count": null, 567 | "metadata": { 568 | "collapsed": false 569 | }, 570 | "outputs": [], 571 | "source": [ 572 | "x = 4.5\n", 573 | "y = 2\n", 574 | "\n", 575 | "print('x', x, 'y', y)\n", 576 | "print('addition x + y =', x + y) \n", 577 | "print('subtraction x - y =', x - y) \n", 578 | "print('multiplication x * y =', x * y) \n", 579 | "print('division x / y =', x / y) " 580 | ] 581 | }, 582 | { 583 | "cell_type": "code", 584 | "execution_count": null, 585 | "metadata": { 586 | "collapsed": false 587 | }, 588 | "outputs": [], 589 | "source": [ 590 | "x = 4.5\n", 591 | "y = 2\n", 592 | "\n", 593 | "print('x', x, 'y', y)\n", 594 | "print('division x / y =', x / y)\n", 595 | "print('floored division x // y =', x // y) \n", 596 | "print('modulus (remainder of x/y) x % y =', x % y) \n", 597 | "print('exponentiation x ** y =', x ** y)" 598 | ] 599 | }, 600 | { 601 | "cell_type": "markdown", 602 | "metadata": {}, 603 | "source": [ 604 | "As usual in maths, division and multiplication have higher precedence than addition and subtraction, but arithmetic expressions can be grouped using parentheses to override the default precedence" 605 | ] 606 | }, 607 | { 608 | "cell_type": "code", 609 | "execution_count": null, 610 | "metadata": { 611 | "collapsed": false 612 | }, 613 | "outputs": [], 614 | "source": [ 615 | "x = 13\n", 616 | "y = 5\n", 617 | "\n", 618 | "print('x * (2 + y) =', x * (2 + y))\n", 619 | "print('(x * 2) + y =', (x * 2) + y)\n", 620 | "print('x * 2 + y =', x * 2 + y)" 621 | ] 622 | }, 623 | { 624 | "cell_type": "markdown", 625 | "metadata": {}, 626 | "source": [ 627 | "You can mix (some) types in arithmetic expressions and python will apply rules as to the type of the result\n" 628 | ] 629 | }, 630 | { 631 | "cell_type": "code", 632 | "execution_count": null, 633 | "metadata": { 634 | "collapsed": false 635 | }, 636 | "outputs": [], 637 | "source": [ 638 | "13 + 5.0" 639 | ] 640 | }, 641 | { 642 | "cell_type": "markdown", 643 | "metadata": {}, 644 | "source": [ 645 | "You can force python to use a particular type by converting an expression explicitly, using helpful named functions: float, int, str etc." 646 | ] 647 | }, 648 | { 649 | "cell_type": "code", 650 | "execution_count": null, 651 | "metadata": { 652 | "collapsed": false 653 | }, 654 | "outputs": [], 655 | "source": [ 656 | "float(3) + float(7)" 657 | ] 658 | }, 659 | { 660 | "cell_type": "code", 661 | "execution_count": null, 662 | "metadata": { 663 | "collapsed": false 664 | }, 665 | "outputs": [], 666 | "source": [ 667 | "int(3.14159) + 1" 668 | ] 669 | }, 670 | { 671 | "cell_type": "markdown", 672 | "metadata": {}, 673 | "source": [ 674 | "The addition operator `+` allows you also to concatenate strings together." 675 | ] 676 | }, 677 | { 678 | "cell_type": "code", 679 | "execution_count": null, 680 | "metadata": { 681 | "collapsed": false 682 | }, 683 | "outputs": [], 684 | "source": [ 685 | "print('number' + str(3))" 686 | ] 687 | }, 688 | { 689 | "cell_type": "markdown", 690 | "metadata": {}, 691 | "source": [ 692 | "Division in Python 2 sometimes trips up new (and experienced!) programmers. If you divide 2 integers you will only get an integer result. If you want a floating point result you should explicitly cast at least one of the arguments to a float." 693 | ] 694 | }, 695 | { 696 | "cell_type": "code", 697 | "execution_count": null, 698 | "metadata": { 699 | "collapsed": false 700 | }, 701 | "outputs": [], 702 | "source": [ 703 | "print(\"3/4 =\", 3/4)\n", 704 | "print(\"3.0/4 =\", 3.0/4)\n", 705 | "print(\"float(3)/4 =\", float(3)/4)" 706 | ] 707 | }, 708 | { 709 | "cell_type": "markdown", 710 | "metadata": {}, 711 | "source": [ 712 | "There are a few shortcut assignment statements to make modifying variables directly faster to type" 713 | ] 714 | }, 715 | { 716 | "cell_type": "code", 717 | "execution_count": null, 718 | "metadata": { 719 | "collapsed": false 720 | }, 721 | "outputs": [], 722 | "source": [ 723 | "x = 3\n", 724 | "x += 1 # equivalent to x = x + 1\n", 725 | "x" 726 | ] 727 | }, 728 | { 729 | "cell_type": "code", 730 | "execution_count": null, 731 | "metadata": { 732 | "collapsed": false 733 | }, 734 | "outputs": [], 735 | "source": [ 736 | "x = 2\n", 737 | "y = 10\n", 738 | "y *= x\n", 739 | "y" 740 | ] 741 | }, 742 | { 743 | "cell_type": "markdown", 744 | "metadata": {}, 745 | "source": [ 746 | "These shortcut operators are available for all arithmetic and logical operators." 747 | ] 748 | }, 749 | { 750 | "cell_type": "markdown", 751 | "metadata": {}, 752 | "source": [ 753 | "## Exercises 1.1.2\n", 754 | "\n", 755 | "In the interpreter:\n", 756 | "\n", 757 | "1. Assign numerical values to 2 variables, calculate the mean of these two variables and store the result in another variable. Print out the result to the screen." 758 | ] 759 | }, 760 | { 761 | "cell_type": "markdown", 762 | "metadata": {}, 763 | "source": [ 764 | "## Saving code in files\n", 765 | "\n", 766 | "### Excecute code in files\n", 767 | "\n", 768 | "As we mentioned earlier, you can also save python code in a file and then execute it later. We typically save python code in a file ending with the extension .py. The file, or _script_, can then be executed simply by supplying the name of the file as an argument to the python command in the terminal.\n", 769 | "\n", 770 | "The first file we will be looking at is located in the `scripts` directory and it is called `hello.py`. To execute the script, open a terminal window, navigate to the `scripts` directory and execute the code in the script `hello.py` by running `python hello.py` in your terminal:\n", 771 | "\n", 772 | "```bash\n", 773 | "ls\n", 774 | "cd scripts\n", 775 | "python hello.py\n", 776 | "```\n", 777 | "\n", 778 | "Shell commands:\n", 779 | "- `ls`: to list directory contents\n", 780 | "- `pwd`: to return working directory name\n", 781 | "- `cd to/this/directory/`: to change directory\n", 782 | "\n", 783 | "### Edit code in files\n", 784 | "\n", 785 | "You can use any text editor you know to edit your file, but the file should be saved as plain text, so programs like Microsoft Word aren't the best choice. Many text editors will highlight python syntax for you which can help avoid syntax errors.\n", 786 | "\n", 787 | "To open any Python scripts in a text editor, open [Gedit](https://wiki.gnome.org/Apps/Gedit) or [Atom](https://atom.io/) and use the File menu, navigate to the `scripts` directory and open `hello.py`. \n", 788 | "\n", 789 | "You can now modify the `print` statement, save the file and go back to the terminal window you've just opened to run the code again by using the command `python hello.py`.\n" 790 | ] 791 | }, 792 | { 793 | "cell_type": "markdown", 794 | "metadata": {}, 795 | "source": [ 796 | "## Exercises 1.1.3\n", 797 | "\n", 798 | "Create a new Python file to solve these exercises. It is good practice to create a new file each time you solve a new problem.\n", 799 | "\n", 800 | "1. Look up the genetic code. Create four string variables that store possible DNA encodings of serine (S), leucine (L), tyrosine (Y) and cysteine (C). Where multiple codings are available, just pick one for now.\n", 801 | "2. Create a variable containing a possible DNA sequence for the protein sequence SYLYC. (Note that the addition operator + allows you to concatenate strings together.) Print the DNA sequence.\n", 802 | "3. Include a comment in your file to remind you the purpose of the script" 803 | ] 804 | }, 805 | { 806 | "cell_type": "markdown", 807 | "metadata": {}, 808 | "source": [ 809 | "## Next session\n", 810 | "\n", 811 | "Go to our next notebook: [Introduction_to_python_day_1_session_2](Introduction_to_python_day_1_session_2.ipynb)" 812 | ] 813 | } 814 | ], 815 | "metadata": { 816 | "kernelspec": { 817 | "display_name": "Python 3", 818 | "language": "python", 819 | "name": "python3" 820 | }, 821 | "language_info": { 822 | "codemirror_mode": { 823 | "name": "ipython", 824 | "version": 3 825 | }, 826 | "file_extension": ".py", 827 | "mimetype": "text/x-python", 828 | "name": "python", 829 | "nbconvert_exporter": "python", 830 | "pygments_lexer": "ipython3", 831 | "version": "3.5.2" 832 | } 833 | }, 834 | "nbformat": 4, 835 | "nbformat_minor": 0 836 | } 837 | -------------------------------------------------------------------------------- /Introduction_to_python_day_1_introduction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "nbpresent": { 7 | "id": "dc7a1635-0bbd-4bf7-a07e-7a36f58e258b" 8 | }, 9 | "slideshow": { 10 | "slide_type": "slide" 11 | } 12 | }, 13 | "source": [ 14 | "# An introduction to solving biological problems with Python" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": { 20 | "nbpresent": { 21 | "id": "53eee250-b3d0-4262-ad09-e87fb2acf82e" 22 | }, 23 | "slideshow": { 24 | "slide_type": "-" 25 | } 26 | }, 27 | "source": [ 28 | "## Presenters for 5-6 December 2016\n", 29 | "- Mukarram Hossain, Cambridge\n", 30 | "- Anne Pajon, CRUK Cambridge Institute" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": { 36 | "nbpresent": { 37 | "id": "21082cb9-e1b9-4fe9-80d5-9d9e8418937b" 38 | }, 39 | "slideshow": { 40 | "slide_type": "slide" 41 | } 42 | }, 43 | "source": [ 44 | "## Learning objectives\n", 45 | "- **Recall** how to print, create variables and save Python code in files\n", 46 | "- **List** the most common data types in Python\n", 47 | "- **Explain** how to write conditions and loops in Python\n", 48 | "- **Use and compare** these concepts in different code examples \n", 49 | "- **Propose and create** solutions using these concepts in different exercises" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": { 55 | "nbpresent": { 56 | "id": "ceb5f5a0-a5e8-435e-ae16-23c2ba8c6ab2" 57 | }, 58 | "slideshow": { 59 | "slide_type": "slide" 60 | } 61 | }, 62 | "source": [ 63 | "## Course schedule - day one\n", 64 | "\n", 65 | "- 09:30-10:00: [0h30] **Introduction**\n", 66 | "- 10:00-12:00: [2h00] **Session 1** - Print, Variables, Simple data types, Arithmetic and Saving code in files\n", 67 | "- 12:00-13:00: *lunch break*\n", 68 | "- 13:00-15:00: [2h00] **Session 2** - Collections: Lists, String and Dictionnaries\n", 69 | "- 15:00-15:15: *break*\n", 70 | "- 15:15-16:15: [1h00] **Session 3** - Conditional execution\n", 71 | "- 16:15-16:30: *break*\n", 72 | "- 16:30-17:30: [1h00] **Session 4** - Loops" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": { 78 | "nbpresent": { 79 | "id": "e6c2e441-eb7b-4a4c-9c9c-b88cc9a2527f" 80 | }, 81 | "slideshow": { 82 | "slide_type": "slide" 83 | } 84 | }, 85 | "source": [ 86 | "## Course schedule - day two\n", 87 | "\n", 88 | "- Functions, Files and BioPython" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": { 94 | "nbpresent": { 95 | "id": "8458de53-35b5-405e-a372-5db5d2e2c2c5" 96 | }, 97 | "slideshow": { 98 | "slide_type": "slide" 99 | } 100 | }, 101 | "source": [ 102 | "## Course materials\n", 103 | "\n", 104 | "- There is a course webpage with links to the materials, example solutions to the exercises etc.:\n", 105 | " - http://pycam.github.io\n", 106 | "- All course materiel is available on GitHub https://github.com/pycam\n", 107 | "- We’d like you to follow along with the example code as we go through the material, and attempt the exercises to practice what you’ve learned\n", 108 | "- Questions are welcome at any point!\n", 109 | "- If you have specific projects/problems you like to use Python for we are happy to (try to) help during the exercises\n" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": { 115 | "nbpresent": { 116 | "id": "96ca5c44-2cfc-471c-8da7-39870c822e20" 117 | }, 118 | "slideshow": { 119 | "slide_type": "slide" 120 | } 121 | }, 122 | "source": [ 123 | "## What is *Python*?\n", 124 | "\n", 125 | "- Python is a *dynamic, interpreted* general purpose programming language initially created by Guido van Rossum in 1991\n", 126 | "- It is a powerful language that supports several popular programming paradigms:\n", 127 | " - procedural\n", 128 | " - object-oriented\n", 129 | " - functional\n", 130 | "- Python is widely used in bioinformatics and scientific computing, as well as many other fields and in industry\n", 131 | "- Python is available on all popular operating systems\n", 132 | " - Macs\n", 133 | " - Windows\n", 134 | " - Linux" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": { 140 | "nbpresent": { 141 | "id": "9110098b-9675-4d64-adf3-c947073d4c4d" 142 | }, 143 | "slideshow": { 144 | "slide_type": "slide" 145 | } 146 | }, 147 | "source": [ 148 | "## The Python programming language\n", 149 | "\n", 150 | "- Python is considered to come with \"batteries included\" and the standard library (some of which we will see in this course) provides built-in support for lots of common tasks:\n", 151 | " - numerical & mathematical functions \n", 152 | " - interacting with files and the operating system\n", 153 | " - ...\n", 154 | "\n", 155 | "- There is also a wide range of external libraries for areas not covered in the standard library, such as [Pandas](http://pandas.pydata.org/) the Python Data Analysis Library and the [BioPython](http://biopython.org/) Library which provides tools for bioinformatics - we look at this tomorrow" 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": { 161 | "nbpresent": { 162 | "id": "0d61b4b4-163f-47fe-80f1-092287218273" 163 | }, 164 | "slideshow": { 165 | "slide_type": "slide" 166 | } 167 | }, 168 | "source": [ 169 | "## Getting started\n", 170 | "\n", 171 | "- Python is an *interpreted* language, this means that your computer does not run Python code natively, but instead we run our code using the Python interpreter\n", 172 | "- There are three ways in which you can run Python code:\n", 173 | " - Directly typing **commands into the interpreter**: *Good for experimenting with the language, and for some interactive work*\n", 174 | " - Using a **Jupyter notebook**: *Great for experimenting with the language, and for sharing and learning*\n", 175 | " - Typing code **into a file** and then telling the interpreter to run the code from this file: *Good for larger programs, and when you want to run the same code repeatedly*\n" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": { 181 | "nbpresent": { 182 | "id": "b878a4f9-4345-4abb-81f4-5a731c639ab8" 183 | }, 184 | "slideshow": { 185 | "slide_type": "slide" 186 | } 187 | }, 188 | "source": [ 189 | "## How to start the Python interpreter?\n", 190 | "\n", 191 | "- How you start the interpreter will depend on which operating system you are using, but on a Mac or Linux machine you should start a terminal and then just type the command `python3`\n", 192 | "- This will print out some information about your installation of python and then leave you with a command prompt which looks like `>>>` \n", 193 | "- You can then type commands and press `Enter` when you're done. Python will run the code you typed, and might display some output on the line below, before leaving you with another prompt.\n", 194 | "- If you want to exit the interactive interpreter you can type the command `quit()` or type `Ctrl-D`" 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "metadata": { 200 | "nbpresent": { 201 | "id": "8a4ac456-6c4b-4249-8662-b1cabfd7cee4" 202 | }, 203 | "slideshow": { 204 | "slide_type": "slide" 205 | } 206 | }, 207 | "source": [ 208 | "## The terminal\n", 209 | "\n", 210 | "We will see later how to save code in a file and run it.\n", 211 | "
" 212 | ] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": { 217 | "nbpresent": { 218 | "id": "f5bcbcb5-4352-4674-a7b6-c8e576220422" 219 | }, 220 | "slideshow": { 221 | "slide_type": "slide" 222 | } 223 | }, 224 | "source": [ 225 | "## The shell command lines you may need\n", 226 | "\n", 227 | "- `ls`: to list directory contents\n", 228 | "- `pwd`: to return working directory name\n", 229 | "- `cd to/this/directory/`: to change directory\n", 230 | "- `cat hello.py`: to print the content of a text file \n" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": { 236 | "nbpresent": { 237 | "id": "9814e8d7-60e0-43e6-aee0-3c33cc2cc809" 238 | }, 239 | "slideshow": { 240 | "slide_type": "slide" 241 | } 242 | }, 243 | "source": [ 244 | "## What is a Jupyter notebook?\n", 245 | "\n", 246 | "\n", 247 | "\n", 248 | "- The [Jupyter Notebook](http://jupyter.org/) is a web application that allows you to create and share documents that contain live code, equations, visualizations and explanatory text. \n", 249 | "\n", 250 | "- Jupyter provides a rich architecture for interactive data science and scientific computing with: \n", 251 | " - Over 40 programming languages such as Python, R, Julia and Scala.\n", 252 | " - A browser-based notebook with support for code, rich text, math expressions, plots and other rich media.\n", 253 | " - Support for interactive data visualization.\n", 254 | " - Easy to use tools for parallel computing." 255 | ] 256 | }, 257 | { 258 | "cell_type": "markdown", 259 | "metadata": { 260 | "nbpresent": { 261 | "id": "62fdd00c-a006-4f11-b9dc-e2ca072225d7" 262 | }, 263 | "slideshow": { 264 | "slide_type": "slide" 265 | } 266 | }, 267 | "source": [ 268 | "## How to install Jupyter on your own computer?\n", 269 | "\n", 270 | "\n", 271 | "\n", 272 | "- [See Installing Jupyter Notebook](https://jupyter.readthedocs.io/en/latest/install.html)\n", 273 | "\n", 274 | "- For new users, we recommend [installing Anaconda](https://www.continuum.io/downloads). Anaconda conveniently installs Python, the Jupyter Notebook, and other commonly used packages for scientific computing and data science.\n", 275 | "\n", 276 | "- Start the notebook server from the command line:\n", 277 | "```\n", 278 | "jupyter notebook\n", 279 | "```\n", 280 | "- You should see the notebook home page open in your web browser.\n" 281 | ] 282 | }, 283 | { 284 | "cell_type": "markdown", 285 | "metadata": { 286 | "nbpresent": { 287 | "id": "0e25dad3-add0-466e-8f71-e771d6ec4500" 288 | }, 289 | "slideshow": { 290 | "slide_type": "slide" 291 | } 292 | }, 293 | "source": [ 294 | "## How to run python in a Jupyter notebook?\n", 295 | "\n", 296 | "\n", 297 | "\n", 298 | "- See [Jupyter Notebook Basics](http://nbviewer.jupyter.org/github/jupyter/notebook/blob/master/docs/source/examples/Notebook/Notebook%20Basics.ipynb)\n", 299 | "\n", 300 | "\n", 301 | "- Go to our notebook for the fist session: [Introduction_to_python_day_1_session_1](Introduction_to_python_day_1_session_1.ipynb)" 302 | ] 303 | } 304 | ], 305 | "metadata": { 306 | "anaconda-cloud": {}, 307 | "celltoolbar": "Slideshow", 308 | "kernelspec": { 309 | "display_name": "Python 3", 310 | "language": "python", 311 | "name": "python3" 312 | }, 313 | "language_info": { 314 | "codemirror_mode": { 315 | "name": "ipython", 316 | "version": 3 317 | }, 318 | "file_extension": ".py", 319 | "mimetype": "text/x-python", 320 | "name": "python", 321 | "nbconvert_exporter": "python", 322 | "pygments_lexer": "ipython3", 323 | "version": "3.5.2" 324 | }, 325 | "nbpresent": { 326 | "slides": { 327 | "152c5a3b-78f9-4183-bce2-379a4012baf6": { 328 | "id": "152c5a3b-78f9-4183-bce2-379a4012baf6", 329 | "layout": "grid", 330 | "prev": "5613e857-5b4e-42e4-9feb-df0440592ca2", 331 | "regions": { 332 | "20d6059c-7745-410d-a5fb-0b91cacbc2e2": { 333 | "attrs": { 334 | "height": 0.6666666666666666, 335 | "pad": 0.01, 336 | "treemap:weight": 1, 337 | "width": 0.5, 338 | "x": 0, 339 | "y": 0 340 | }, 341 | "id": "20d6059c-7745-410d-a5fb-0b91cacbc2e2" 342 | }, 343 | "300e6ccd-ecf4-425e-8574-3debe305aafb": { 344 | "attrs": { 345 | "height": 0.3333333333333333, 346 | "pad": 0.01, 347 | "treemap:weight": 1, 348 | "width": 1, 349 | "x": 0, 350 | "y": 0.6666666666666666 351 | }, 352 | "content": { 353 | "cell": "9814e8d7-60e0-43e6-aee0-3c33cc2cc809", 354 | "part": "whole" 355 | }, 356 | "id": "300e6ccd-ecf4-425e-8574-3debe305aafb" 357 | }, 358 | "df2dd6ff-570b-4b75-9cb7-1ff1dbdd4f55": { 359 | "attrs": { 360 | "height": 0.6666666666666666, 361 | "pad": 0.01, 362 | "treemap:weight": 1, 363 | "width": 0.5, 364 | "x": 0.5, 365 | "y": 0 366 | }, 367 | "id": "df2dd6ff-570b-4b75-9cb7-1ff1dbdd4f55" 368 | } 369 | } 370 | }, 371 | "2586ca7d-5091-40ea-b566-ccc5fbf833c6": { 372 | "id": "2586ca7d-5091-40ea-b566-ccc5fbf833c6", 373 | "prev": "f001d476-5814-4664-a722-f04f5d23cd52", 374 | "regions": { 375 | "d6011048-43db-4990-a82e-768683aa4fe5": { 376 | "attrs": { 377 | "height": 0.8, 378 | "width": 0.8, 379 | "x": 0.1, 380 | "y": 0.1 381 | }, 382 | "content": { 383 | "cell": "ceb5f5a0-a5e8-435e-ae16-23c2ba8c6ab2", 384 | "part": "whole" 385 | }, 386 | "id": "d6011048-43db-4990-a82e-768683aa4fe5" 387 | } 388 | } 389 | }, 390 | "27ee4130-d0bb-4287-b8fe-75a7b0ecf178": { 391 | "id": "27ee4130-d0bb-4287-b8fe-75a7b0ecf178", 392 | "prev": "2586ca7d-5091-40ea-b566-ccc5fbf833c6", 393 | "regions": { 394 | "7a689d66-0c9d-4492-928b-f35bfd2ffc4c": { 395 | "attrs": { 396 | "height": 0.8, 397 | "width": 0.8, 398 | "x": 0.1, 399 | "y": 0.1 400 | }, 401 | "content": { 402 | "cell": "e6c2e441-eb7b-4a4c-9c9c-b88cc9a2527f", 403 | "part": "whole" 404 | }, 405 | "id": "7a689d66-0c9d-4492-928b-f35bfd2ffc4c" 406 | } 407 | } 408 | }, 409 | "2de0c027-7a07-4f7e-8594-a98d36125372": { 410 | "id": "2de0c027-7a07-4f7e-8594-a98d36125372", 411 | "prev": "75e76bd9-24ae-4c42-b6bc-5f58a0550ba8", 412 | "regions": { 413 | "868fd842-e6fb-48b2-9ac5-95e8fe20927e": { 414 | "attrs": { 415 | "height": 0.8, 416 | "width": 0.8, 417 | "x": 0.1, 418 | "y": 0.1 419 | }, 420 | "content": { 421 | "cell": "0e25dad3-add0-466e-8f71-e771d6ec4500", 422 | "part": "whole" 423 | }, 424 | "id": "868fd842-e6fb-48b2-9ac5-95e8fe20927e" 425 | } 426 | } 427 | }, 428 | "5613e857-5b4e-42e4-9feb-df0440592ca2": { 429 | "id": "5613e857-5b4e-42e4-9feb-df0440592ca2", 430 | "prev": "564dae42-4185-46c1-b156-e503f475e25c", 431 | "regions": { 432 | "17e888b0-050b-406a-a5a3-0d5c1605b8df": { 433 | "attrs": { 434 | "height": 0.8, 435 | "width": 0.8, 436 | "x": 0.1, 437 | "y": 0.1 438 | }, 439 | "content": { 440 | "cell": "f5bcbcb5-4352-4674-a7b6-c8e576220422", 441 | "part": "whole" 442 | }, 443 | "id": "17e888b0-050b-406a-a5a3-0d5c1605b8df" 444 | } 445 | } 446 | }, 447 | "564dae42-4185-46c1-b156-e503f475e25c": { 448 | "id": "564dae42-4185-46c1-b156-e503f475e25c", 449 | "prev": "ba285213-f645-4314-afd5-0a656fa35631", 450 | "regions": { 451 | "328d4d72-cd9e-4e5b-aaa8-175833f5bfdb": { 452 | "attrs": { 453 | "height": 0.8, 454 | "width": 0.8, 455 | "x": 0.1, 456 | "y": 0.1 457 | }, 458 | "content": { 459 | "cell": "8a4ac456-6c4b-4249-8662-b1cabfd7cee4", 460 | "part": "whole" 461 | }, 462 | "id": "328d4d72-cd9e-4e5b-aaa8-175833f5bfdb" 463 | } 464 | } 465 | }, 466 | "6ff94ac3-8ded-442e-ae43-aa0a5c14d468": { 467 | "id": "6ff94ac3-8ded-442e-ae43-aa0a5c14d468", 468 | "prev": "27ee4130-d0bb-4287-b8fe-75a7b0ecf178", 469 | "regions": { 470 | "ad759b3a-6080-4356-a9fd-87f2b1b90bc2": { 471 | "attrs": { 472 | "height": 0.8, 473 | "width": 0.8, 474 | "x": 0.1, 475 | "y": 0.1 476 | }, 477 | "content": { 478 | "cell": "8458de53-35b5-405e-a372-5db5d2e2c2c5", 479 | "part": "whole" 480 | }, 481 | "id": "ad759b3a-6080-4356-a9fd-87f2b1b90bc2" 482 | } 483 | } 484 | }, 485 | "75e76bd9-24ae-4c42-b6bc-5f58a0550ba8": { 486 | "id": "75e76bd9-24ae-4c42-b6bc-5f58a0550ba8", 487 | "prev": "152c5a3b-78f9-4183-bce2-379a4012baf6", 488 | "regions": { 489 | "4afd3b41-071f-44eb-a8f6-9a7f780041c2": { 490 | "attrs": { 491 | "height": 0.8, 492 | "width": 0.8, 493 | "x": 0.1, 494 | "y": 0.1 495 | }, 496 | "content": { 497 | "cell": "62fdd00c-a006-4f11-b9dc-e2ca072225d7", 498 | "part": "whole" 499 | }, 500 | "id": "4afd3b41-071f-44eb-a8f6-9a7f780041c2" 501 | } 502 | } 503 | }, 504 | "8c46fa2c-d5dc-4ef7-8d99-f504e2c3a4a1": { 505 | "id": "8c46fa2c-d5dc-4ef7-8d99-f504e2c3a4a1", 506 | "prev": "e2f5626f-0d60-47cb-967f-0edababb0329", 507 | "regions": { 508 | "af33776f-ec36-45be-a627-39573a78b1d6": { 509 | "attrs": { 510 | "height": 0.8, 511 | "width": 0.8, 512 | "x": 0.1, 513 | "y": 0.1 514 | }, 515 | "content": { 516 | "cell": "0d61b4b4-163f-47fe-80f1-092287218273", 517 | "part": "whole" 518 | }, 519 | "id": "af33776f-ec36-45be-a627-39573a78b1d6" 520 | } 521 | } 522 | }, 523 | "ae3f4c01-80dc-4add-889a-05c74f7155a5": { 524 | "id": "ae3f4c01-80dc-4add-889a-05c74f7155a5", 525 | "prev": "6ff94ac3-8ded-442e-ae43-aa0a5c14d468", 526 | "regions": { 527 | "15f00a98-7b04-439d-996d-851b773b060a": { 528 | "attrs": { 529 | "height": 0.8, 530 | "width": 0.8, 531 | "x": 0.1, 532 | "y": 0.1 533 | }, 534 | "content": { 535 | "cell": "96ca5c44-2cfc-471c-8da7-39870c822e20", 536 | "part": "whole" 537 | }, 538 | "id": "15f00a98-7b04-439d-996d-851b773b060a" 539 | } 540 | } 541 | }, 542 | "ba285213-f645-4314-afd5-0a656fa35631": { 543 | "id": "ba285213-f645-4314-afd5-0a656fa35631", 544 | "prev": "8c46fa2c-d5dc-4ef7-8d99-f504e2c3a4a1", 545 | "regions": { 546 | "6cddb9f2-8e39-4010-8fab-3e70b3a8993f": { 547 | "attrs": { 548 | "height": 0.8, 549 | "width": 0.8, 550 | "x": 0.1, 551 | "y": 0.1 552 | }, 553 | "content": { 554 | "cell": "b878a4f9-4345-4abb-81f4-5a731c639ab8", 555 | "part": "whole" 556 | }, 557 | "id": "6cddb9f2-8e39-4010-8fab-3e70b3a8993f" 558 | } 559 | } 560 | }, 561 | "cd587236-8a19-444d-8b18-69d782dbf725": { 562 | "id": "cd587236-8a19-444d-8b18-69d782dbf725", 563 | "prev": null, 564 | "regions": { 565 | "ef377bfe-ff45-49db-b471-f79ecb10b580": { 566 | "attrs": { 567 | "height": 0.8, 568 | "width": 0.8, 569 | "x": 0.1, 570 | "y": 0.1 571 | }, 572 | "content": { 573 | "cell": "dc7a1635-0bbd-4bf7-a07e-7a36f58e258b", 574 | "part": "whole" 575 | }, 576 | "id": "ef377bfe-ff45-49db-b471-f79ecb10b580" 577 | } 578 | } 579 | }, 580 | "e2f5626f-0d60-47cb-967f-0edababb0329": { 581 | "id": "e2f5626f-0d60-47cb-967f-0edababb0329", 582 | "prev": "ae3f4c01-80dc-4add-889a-05c74f7155a5", 583 | "regions": { 584 | "eef49fa0-0f9b-4228-8fb8-79e079bf7682": { 585 | "attrs": { 586 | "height": 0.8, 587 | "width": 0.8, 588 | "x": 0.1, 589 | "y": 0.1 590 | }, 591 | "content": { 592 | "cell": "9110098b-9675-4d64-adf3-c947073d4c4d", 593 | "part": "whole" 594 | }, 595 | "id": "eef49fa0-0f9b-4228-8fb8-79e079bf7682" 596 | } 597 | } 598 | }, 599 | "f001d476-5814-4664-a722-f04f5d23cd52": { 600 | "id": "f001d476-5814-4664-a722-f04f5d23cd52", 601 | "prev": "cd587236-8a19-444d-8b18-69d782dbf725", 602 | "regions": { 603 | "5a176076-c5a5-4b50-ab2c-9cd0baedad45": { 604 | "attrs": { 605 | "height": 0.8, 606 | "width": 0.8, 607 | "x": 0.1, 608 | "y": 0.1 609 | }, 610 | "content": { 611 | "cell": "53eee250-b3d0-4262-ad09-e87fb2acf82e", 612 | "part": "whole" 613 | }, 614 | "id": "5a176076-c5a5-4b50-ab2c-9cd0baedad45" 615 | } 616 | } 617 | } 618 | }, 619 | "themes": { 620 | "default": "c6b5d1ad-d691-4000-9f62-de7fc0e83644", 621 | "theme": { 622 | "586a6e7a-f661-4d6c-90d0-1392715bea27": { 623 | "id": "586a6e7a-f661-4d6c-90d0-1392715bea27", 624 | "palette": { 625 | "19cc588f-0593-49c9-9f4b-e4d7cc113b1c": { 626 | "id": "19cc588f-0593-49c9-9f4b-e4d7cc113b1c", 627 | "rgb": [ 628 | 252, 629 | 252, 630 | 252 631 | ] 632 | }, 633 | "31af15d2-7e15-44c5-ab5e-e04b16a89eff": { 634 | "id": "31af15d2-7e15-44c5-ab5e-e04b16a89eff", 635 | "rgb": [ 636 | 68, 637 | 68, 638 | 68 639 | ] 640 | }, 641 | "50f92c45-a630-455b-aec3-788680ec7410": { 642 | "id": "50f92c45-a630-455b-aec3-788680ec7410", 643 | "rgb": [ 644 | 155, 645 | 177, 646 | 192 647 | ] 648 | }, 649 | "c5cc3653-2ee1-402a-aba2-7caae1da4f6c": { 650 | "id": "c5cc3653-2ee1-402a-aba2-7caae1da4f6c", 651 | "rgb": [ 652 | 43, 653 | 126, 654 | 184 655 | ] 656 | }, 657 | "efa7f048-9acb-414c-8b04-a26811511a21": { 658 | "id": "efa7f048-9acb-414c-8b04-a26811511a21", 659 | "rgb": [ 660 | 25.118061674008803, 661 | 73.60176211453744, 662 | 107.4819383259912 663 | ] 664 | } 665 | }, 666 | "rules": { 667 | "blockquote": { 668 | "color": "50f92c45-a630-455b-aec3-788680ec7410" 669 | }, 670 | "code": { 671 | "font-family": "Anonymous Pro" 672 | }, 673 | "h1": { 674 | "color": "c5cc3653-2ee1-402a-aba2-7caae1da4f6c", 675 | "font-family": "Lato", 676 | "font-size": 8 677 | }, 678 | "h2": { 679 | "color": "c5cc3653-2ee1-402a-aba2-7caae1da4f6c", 680 | "font-family": "Lato", 681 | "font-size": 6 682 | }, 683 | "h3": { 684 | "color": "50f92c45-a630-455b-aec3-788680ec7410", 685 | "font-family": "Lato", 686 | "font-size": 5.5 687 | }, 688 | "h4": { 689 | "color": "c5cc3653-2ee1-402a-aba2-7caae1da4f6c", 690 | "font-family": "Lato", 691 | "font-size": 5 692 | }, 693 | "h5": { 694 | "font-family": "Lato" 695 | }, 696 | "h6": { 697 | "font-family": "Lato" 698 | }, 699 | "h7": { 700 | "font-family": "Lato" 701 | }, 702 | "pre": { 703 | "font-family": "Anonymous Pro", 704 | "font-size": 4 705 | } 706 | }, 707 | "text-base": { 708 | "font-family": "Merriweather", 709 | "font-size": 4 710 | } 711 | }, 712 | "c6b5d1ad-d691-4000-9f62-de7fc0e83644": { 713 | "backgrounds": { 714 | "dc7afa04-bf90-40b1-82a5-726e3cff5267": { 715 | "background-color": "31af15d2-7e15-44c5-ab5e-e04b16a89eff", 716 | "id": "dc7afa04-bf90-40b1-82a5-726e3cff5267" 717 | } 718 | }, 719 | "id": "c6b5d1ad-d691-4000-9f62-de7fc0e83644", 720 | "palette": { 721 | "19cc588f-0593-49c9-9f4b-e4d7cc113b1c": { 722 | "id": "19cc588f-0593-49c9-9f4b-e4d7cc113b1c", 723 | "rgb": [ 724 | 252, 725 | 252, 726 | 252 727 | ] 728 | }, 729 | "31af15d2-7e15-44c5-ab5e-e04b16a89eff": { 730 | "id": "31af15d2-7e15-44c5-ab5e-e04b16a89eff", 731 | "rgb": [ 732 | 68, 733 | 68, 734 | 68 735 | ] 736 | }, 737 | "50f92c45-a630-455b-aec3-788680ec7410": { 738 | "id": "50f92c45-a630-455b-aec3-788680ec7410", 739 | "rgb": [ 740 | 197, 741 | 226, 742 | 245 743 | ] 744 | }, 745 | "c5cc3653-2ee1-402a-aba2-7caae1da4f6c": { 746 | "id": "c5cc3653-2ee1-402a-aba2-7caae1da4f6c", 747 | "rgb": [ 748 | 43, 749 | 126, 750 | 184 751 | ] 752 | }, 753 | "efa7f048-9acb-414c-8b04-a26811511a21": { 754 | "id": "efa7f048-9acb-414c-8b04-a26811511a21", 755 | "rgb": [ 756 | 25.118061674008803, 757 | 73.60176211453744, 758 | 107.4819383259912 759 | ] 760 | } 761 | }, 762 | "rules": { 763 | "a": { 764 | "color": "19cc588f-0593-49c9-9f4b-e4d7cc113b1c" 765 | }, 766 | "blockquote": { 767 | "color": "50f92c45-a630-455b-aec3-788680ec7410", 768 | "font-size": 3 769 | }, 770 | "code": { 771 | "font-family": "Anonymous Pro" 772 | }, 773 | "h1": { 774 | "color": "19cc588f-0593-49c9-9f4b-e4d7cc113b1c", 775 | "font-family": "Merriweather", 776 | "font-size": 8 777 | }, 778 | "h2": { 779 | "color": "19cc588f-0593-49c9-9f4b-e4d7cc113b1c", 780 | "font-family": "Merriweather", 781 | "font-size": 6 782 | }, 783 | "h3": { 784 | "color": "50f92c45-a630-455b-aec3-788680ec7410", 785 | "font-family": "Lato", 786 | "font-size": 5.5 787 | }, 788 | "h4": { 789 | "color": "c5cc3653-2ee1-402a-aba2-7caae1da4f6c", 790 | "font-family": "Lato", 791 | "font-size": 5 792 | }, 793 | "h5": { 794 | "font-family": "Lato" 795 | }, 796 | "h6": { 797 | "font-family": "Lato" 798 | }, 799 | "h7": { 800 | "font-family": "Lato" 801 | }, 802 | "li": { 803 | "color": "50f92c45-a630-455b-aec3-788680ec7410", 804 | "font-size": 3.25 805 | }, 806 | "pre": { 807 | "font-family": "Anonymous Pro", 808 | "font-size": 4 809 | } 810 | }, 811 | "text-base": { 812 | "color": "19cc588f-0593-49c9-9f4b-e4d7cc113b1c", 813 | "font-family": "Lato", 814 | "font-size": 4 815 | } 816 | } 817 | } 818 | } 819 | } 820 | }, 821 | "nbformat": 4, 822 | "nbformat_minor": 0 823 | } 824 | -------------------------------------------------------------------------------- /Introduction_to_python_day_2_session_1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# An introduction to solving biological problems with Python\n", 8 | "\n", 9 | "## Session 2.1: Functions\n", 10 | "\n", 11 | "- [Function definition syntax](#Function-definition-syntax)\n", 12 | "- [Excercises 2.1.1](#Excercises-2.1.1)\n", 13 | "- [Return value](#Return-value)\n", 14 | "- [Exercises 2.1.2](#Exercises-2.1.2)\n", 15 | "- [Function arguments](#Function-arguments)\n", 16 | "- [Exercises 2.1.3](#Exercises-2.1.3)\n", 17 | "- [Variable scope](#Variable-scope)" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "## Function basics" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "We have already seen a number of functions built into python that let us do useful things to strings, collections and numbers etc. For example `print()` or `len()` which is passed some kind of sequence object and returns the length of the sequence.\n", 32 | "\n", 33 | "This is the general form of a function; it takes some input _arguments_ and returns some output based on the supplied arguments.\n", 34 | "\n", 35 | "The arguments to a function, if any, are supplied in parentheses and the result of the function _call_ is the result of evaluating the function.\n" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": { 42 | "collapsed": false 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "x = abs(-3.0)\n", 47 | "print(x)\n", 48 | "\n", 49 | "l = len(\"ACGGTGTCAA\")\n", 50 | "print(l)" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "As well as using python's built in functions, you can write your own. Functions are a nice way to **encapsulate some code that you want to reuse** elsewhere in your program, rather than repeating the same bit of code multiple times. They also provide a way to name some coherent block of code and allow you to structure a complex program." 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "## Function definition syntax" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "Functions are defined in Python using the `def` keyword followed by the name of the function. If your function takes some arguments (input data) then you can name these in parentheses after the function name. If your function does not take any arguments you still need some empty parentheses. Here we define a simple function named `sayHello` that prints a line of text to the screen:" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": { 78 | "collapsed": false 79 | }, 80 | "outputs": [], 81 | "source": [ 82 | "def sayHello():\n", 83 | " print('Hello world!')" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "Note that the code block for the function (just a single print line in this case) is indented relative to the `def`. The above definition just decalares the function in an abstract way and nothing will be printed when the definition is made. To actually use a function you need to invoke it (call it) by using its name and a pair of round parentheses:" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": { 97 | "collapsed": false 98 | }, 99 | "outputs": [], 100 | "source": [ 101 | "sayHello() # Call the function to print 'Hello world'" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "If required, a function may be written so it accepts input. Here we specify a variable called `name` in the brackets of the function definition and this variable is then used by the function. Although the input variable is referred to inside the function the variable does not represent any particular value. It only takes a value if the function is actually used in context." 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": { 115 | "collapsed": false 116 | }, 117 | "outputs": [], 118 | "source": [ 119 | "def sayHello(name):\n", 120 | " print('Hello', name)" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "When we call (invoke) this function we specify a specific value for the input. Here we pass in the value `User`, so the name variable takes that value and uses it to print a message, as defined in the function. " 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": { 134 | "collapsed": false 135 | }, 136 | "outputs": [], 137 | "source": [ 138 | "sayHello('User') # Prints 'Hello User'" 139 | ] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": {}, 144 | "source": [ 145 | "When we call the function again with a different input value we naturally get a different message. Here we also illustrate that the input value can also be passed-in as a variable (text in this case)." 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "metadata": { 152 | "collapsed": false 153 | }, 154 | "outputs": [], 155 | "source": [ 156 | "text = 'Mary'\n", 157 | "sayHello(text) # Prints 'Hello Mary'" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "A function may also generate output that is passed back or returned to the program at the point at which the function was called. For example here we define a function to do a simple calculation of the square of input (`x`) to create an output (`y`):" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "metadata": { 171 | "collapsed": false 172 | }, 173 | "outputs": [], 174 | "source": [ 175 | "def square(x):\n", 176 | " y = x*x\n", 177 | " return y" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": {}, 183 | "source": [ 184 | "Once the `return` statement is reached the operation of the function will end, and anything on the return line will be passed back as output. Here we call the function on an input number and catch the output value as result. Notice how the names of the variables used inside the function definition are separate from any variable names we may choose to use when calling the function.\n", 185 | " " 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "metadata": { 192 | "collapsed": false 193 | }, 194 | "outputs": [], 195 | "source": [ 196 | "number = 7\n", 197 | "result = square(number) # Call the square() function which returns a result\n", 198 | "print(result) # Prints: 49" 199 | ] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "metadata": {}, 204 | "source": [ 205 | "The function `square` can be used from now on anywhere in your program as many times as required on any (numeric) input values we like." 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": null, 211 | "metadata": { 212 | "collapsed": false 213 | }, 214 | "outputs": [], 215 | "source": [ 216 | "print(square(1.2e-3)) # Prints: 1.4399999999999998e-06" 217 | ] 218 | }, 219 | { 220 | "cell_type": "markdown", 221 | "metadata": {}, 222 | "source": [ 223 | "A function can accept multiple input values, otherwise known as arguments. These are separated by commas inside the brackets of the function definition. Here we define a function that takes two arguments and performs a calculation on both, before sending back the result.\n" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": null, 229 | "metadata": { 230 | "collapsed": false 231 | }, 232 | "outputs": [], 233 | "source": [ 234 | "def calcFunc(x, y):\n", 235 | " z = x*x + y*y\n", 236 | " return z\n", 237 | "\n", 238 | "\n", 239 | "result = calcFunc(1.414, 2.0)\n", 240 | "print(result) # 5.999396" 241 | ] 242 | }, 243 | { 244 | "cell_type": "markdown", 245 | "metadata": {}, 246 | "source": [ 247 | "Note that this function does not check that x and y are valid forms of input. For the function to work properly we assume they are numbers. Depending on how this function is going to be used, appropriate checks could be added." 248 | ] 249 | }, 250 | { 251 | "cell_type": "markdown", 252 | "metadata": {}, 253 | "source": [ 254 | "Functions can be arbitrarily long and can peform very complex operations. However, to make a function reusable, it is often better to assign it a single responsibility and a descriptive name.\n", 255 | "Let's define now a function to calculate the [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance) between two vectors:" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "metadata": { 262 | "collapsed": false 263 | }, 264 | "outputs": [], 265 | "source": [ 266 | "def calcDistance(vec1, vec2): \n", 267 | " dist = 0\n", 268 | " for i in range(len(vec1)):\n", 269 | " delta = vec1[i] - vec2[i]\n", 270 | " dist += delta*delta\n", 271 | " dist = dist**(1/2) # square-root\n", 272 | " return dist" 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "For the record, the [prefered way to calcule a square-root](https://docs.python.org/3/library/math.html#math.sqrt) is by using the built-in function `sqrt()` from the `math` library:\n", 280 | "```python\n", 281 | "import math\n", 282 | "math.sqrt(x)\n", 283 | "```\n", 284 | "\n", 285 | "Let's experiment a little with our function." 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": null, 291 | "metadata": { 292 | "collapsed": false 293 | }, 294 | "outputs": [], 295 | "source": [ 296 | "w1 = ( 23.1, 17.8, -5.6 )\n", 297 | "w2 = ( 8.4, 15.9, 7.7 )\n", 298 | "calcDistance( w1, w2 )" 299 | ] 300 | }, 301 | { 302 | "cell_type": "markdown", 303 | "metadata": {}, 304 | "source": [ 305 | "Note that the function is general and handles any two vectors (irrespective of their representation) as long as their dimensions are compatible:" 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": null, 311 | "metadata": { 312 | "collapsed": false 313 | }, 314 | "outputs": [], 315 | "source": [ 316 | "calcDistance( ( 1, 2 ), ( 3, 4 ) ) # dimension: 2" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": null, 322 | "metadata": { 323 | "collapsed": false 324 | }, 325 | "outputs": [], 326 | "source": [ 327 | "calcDistance( [ 1, 2 ], [ 3, 4 ] ) # vectors represented as lists" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": null, 333 | "metadata": { 334 | "collapsed": false 335 | }, 336 | "outputs": [], 337 | "source": [ 338 | "calcDistance( ( 1, 2 ), [ 3, 4 ] ) # mixed representation" 339 | ] 340 | }, 341 | { 342 | "cell_type": "markdown", 343 | "metadata": {}, 344 | "source": [ 345 | "## Excercises 2.1.1\n", 346 | "\n", 347 | "- a. Calculate the mean\n", 348 | " - Write a function that takes 2 numerical arguments and returns their mean. Test your function on some examples.\n", 349 | " - Write another function that takes a list of numbers and returns the mean of all the numbers in the list.\n", 350 | "- b. Write a function that takes a single DNA sequence as an argument and estimates the molecular weight of this sequence. Test your function using some example sequences. The following table gives the weight of each (single-stranded) nucleotide in g/mol:\n", 351 | "\n", 352 | "\n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | "
DNA ResidueWeight
A331
C307
G347
T306
\n", 359 | "\n", 360 | "\n", 361 | "- c. If the sequence passed contains base `N`, use the mean weight of the other bases as the weight of base `N`." 362 | ] 363 | }, 364 | { 365 | "cell_type": "markdown", 366 | "metadata": {}, 367 | "source": [ 368 | "## Return value" 369 | ] 370 | }, 371 | { 372 | "cell_type": "markdown", 373 | "metadata": {}, 374 | "source": [ 375 | "There can be more than one `return` statement in a function, although typically there is only one, at the bottom. Consider the following function to get some text to say whether a number is positive or negative. It has three return statements: the first two return statements pass back text strings but the last, which would be reached if the input value were zero, has no explicit return value and thus passes back the Python `None` object. Any function code after this final return is ignored. \n", 376 | "The `return` keyword immediately exits the function, and no more of the code in that function will be run once the function has returned (as program flow will be returned to the call site)" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": null, 382 | "metadata": { 383 | "collapsed": false 384 | }, 385 | "outputs": [], 386 | "source": [ 387 | "def getSign(value):\n", 388 | " \n", 389 | " if value > 0:\n", 390 | " return \"Positive\"\n", 391 | " \n", 392 | " elif value < 0:\n", 393 | " return \"Negative\"\n", 394 | " \n", 395 | " return # implicit 'None'\n", 396 | "\n", 397 | " print(\"Hello world\") # execution does not reach this line\n", 398 | " \n", 399 | "print(\"getSign( 33.6 ):\", getSign( 33.6 ))\n", 400 | "print(\"getSign( -7 ):\", getSign( -7 ))\n", 401 | "print(\"getSign( 0 ):\", getSign( 0 ))" 402 | ] 403 | }, 404 | { 405 | "cell_type": "markdown", 406 | "metadata": {}, 407 | "source": [ 408 | "All of the examples of functions so far have returned only single values, however it is possible to pass back more than one value via the `return` statement. In the following example we define a function that takes two arguments and passes back three values. The return values are really passed back inside a single tuple, which can be caught as a single collection of values. " 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": null, 414 | "metadata": { 415 | "collapsed": false 416 | }, 417 | "outputs": [], 418 | "source": [ 419 | "def myFunction(value1, value2):\n", 420 | " \n", 421 | " total = value1 + value2\n", 422 | " difference = value1 - value2\n", 423 | " product = value1 * value2\n", 424 | " \n", 425 | " return total, difference, product\n", 426 | "\n", 427 | "values = myFunction( 3, 7 ) # Grab output as a whole tuple\n", 428 | "print(\"Results as a tuple:\", values)\n", 429 | "\n", 430 | "x, y, z = myFunction( 3, 7 ) # Unpack tuple to grab individual values\n", 431 | "print(\"x:\", x)\n", 432 | "print(\"y:\", y)\n", 433 | "print(\"z:\", z)" 434 | ] 435 | }, 436 | { 437 | "cell_type": "markdown", 438 | "metadata": {}, 439 | "source": [ 440 | "## Exercises 2.1.2\n", 441 | "\n", 442 | "a. Write a function that counts the number of each base found in a DNA sequence. Return the result as a tuple of 4 numbers representing the counts of each base `A`, `C`, `G` and `T`.\n", 443 | "\n", 444 | "b. Write a function to return the reverse-complement of a nucleotide sequence." 445 | ] 446 | }, 447 | { 448 | "cell_type": "markdown", 449 | "metadata": {}, 450 | "source": [ 451 | "## Function arguments" 452 | ] 453 | }, 454 | { 455 | "cell_type": "markdown", 456 | "metadata": {}, 457 | "source": [ 458 | "### Mandatory arguments\n", 459 | "\n", 460 | "The arguments we have passed to functions so far have all been _mandatory_, if we do not supply them or if supply the wrong number of arguments python will throw an error also called an exception:" 461 | ] 462 | }, 463 | { 464 | "cell_type": "code", 465 | "execution_count": null, 466 | "metadata": { 467 | "collapsed": true 468 | }, 469 | "outputs": [], 470 | "source": [ 471 | "def square(number):\n", 472 | " # one mandatory argument\n", 473 | " y = number*number\n", 474 | " return y" 475 | ] 476 | }, 477 | { 478 | "cell_type": "code", 479 | "execution_count": null, 480 | "metadata": { 481 | "collapsed": false 482 | }, 483 | "outputs": [], 484 | "source": [ 485 | "square(2)" 486 | ] 487 | }, 488 | { 489 | "cell_type": "markdown", 490 | "metadata": {}, 491 | "source": [ 492 | "**Mandatory arguments are assumed to come in the same order as the arguments in the function definition**, but you can also opt to specify the arguments using the argument names as _keywords_, supplying the values corresponding to each keyword with a `=` sign." 493 | ] 494 | }, 495 | { 496 | "cell_type": "code", 497 | "execution_count": null, 498 | "metadata": { 499 | "collapsed": false 500 | }, 501 | "outputs": [], 502 | "source": [ 503 | "square(number=3)" 504 | ] 505 | }, 506 | { 507 | "cell_type": "code", 508 | "execution_count": null, 509 | "metadata": { 510 | "collapsed": false 511 | }, 512 | "outputs": [], 513 | "source": [ 514 | "def repeat(seq, n):\n", 515 | " # two mandatory arguments\n", 516 | " result = ''\n", 517 | " for i in range(0,n):\n", 518 | " result += seq\n", 519 | " return result\n", 520 | "\n", 521 | "print(repeat(\"CTA\", 3))\n", 522 | "print(repeat(n=4, seq=\"GTT\"))" 523 | ] 524 | }, 525 | { 526 | "cell_type": "markdown", 527 | "metadata": {}, 528 | "source": [ 529 | "
**NOTE** Unnamed (positional) arguments must come before named arguments, even if they look to be in the right order.
" 530 | ] 531 | }, 532 | { 533 | "cell_type": "code", 534 | "execution_count": null, 535 | "metadata": { 536 | "collapsed": false 537 | }, 538 | "outputs": [], 539 | "source": [ 540 | "print(repeat(seq=\"CTA\", n=3))" 541 | ] 542 | }, 543 | { 544 | "cell_type": "markdown", 545 | "metadata": {}, 546 | "source": [ 547 | "### Arguments with default values\n", 548 | "Sometimes it is useful to give some arguments a default value that the caller can override, but which will be used if the caller does not supply a value for this argument. We can do this by assigning some value to the named argument with the `=` operator in the function definition." 549 | ] 550 | }, 551 | { 552 | "cell_type": "code", 553 | "execution_count": null, 554 | "metadata": { 555 | "collapsed": false 556 | }, 557 | "outputs": [], 558 | "source": [ 559 | "def runSimulation(nsteps=1000):\n", 560 | " print(\"Running simulation for\", nsteps, \"steps\")\n", 561 | "\n", 562 | "runSimulation(500)\n", 563 | "runSimulation()" 564 | ] 565 | }, 566 | { 567 | "cell_type": "markdown", 568 | "metadata": {}, 569 | "source": [ 570 | "
**CAVEAT**: default arguments are defined once and keep their state between calls. This can be a problem for *mutable* objects:
" 571 | ] 572 | }, 573 | { 574 | "cell_type": "code", 575 | "execution_count": null, 576 | "metadata": { 577 | "collapsed": false 578 | }, 579 | "outputs": [], 580 | "source": [ 581 | "def myFunction(parameters=[]):\n", 582 | " parameters.append( 100 )\n", 583 | " print(parameters)\n", 584 | " \n", 585 | "myFunction()\n", 586 | "myFunction()\n", 587 | "myFunction()\n", 588 | "myFunction([])\n", 589 | "myFunction([])\n", 590 | "myFunction([])" 591 | ] 592 | }, 593 | { 594 | "cell_type": "markdown", 595 | "metadata": {}, 596 | "source": [ 597 | "... or avoid modifying *mutable* default arguments." 598 | ] 599 | }, 600 | { 601 | "cell_type": "code", 602 | "execution_count": null, 603 | "metadata": { 604 | "collapsed": false 605 | }, 606 | "outputs": [], 607 | "source": [ 608 | "def myFunction(parameters):\n", 609 | " # one mandatory argument without default value\n", 610 | " parameters.append( 100 )\n", 611 | " print(parameters)\n", 612 | " \n", 613 | "my_list = []\n", 614 | "myFunction(my_list)\n", 615 | "myFunction(my_list)\n", 616 | "myFunction(my_list)\n", 617 | "my_new_list = []\n", 618 | "myFunction(my_new_list)" 619 | ] 620 | }, 621 | { 622 | "cell_type": "markdown", 623 | "metadata": {}, 624 | "source": [ 625 | "### Position of mandatory arguments\n", 626 | "Arrange function arguments so that *mandatory* arguments come first:" 627 | ] 628 | }, 629 | { 630 | "cell_type": "code", 631 | "execution_count": null, 632 | "metadata": { 633 | "collapsed": false 634 | }, 635 | "outputs": [], 636 | "source": [ 637 | "def runSimulation(initialTemperature, nsteps=1000):\n", 638 | " # one mandatory argument followed by one with default value\n", 639 | " print(\"Running simulation starting at\", initialTemperature, \"K and doing\", nsteps, \"steps\")\n", 640 | " \n", 641 | "runSimulation(300, 500)\n", 642 | "runSimulation(300)" 643 | ] 644 | }, 645 | { 646 | "cell_type": "markdown", 647 | "metadata": {}, 648 | "source": [ 649 | "As before, no positional argument can appear after a keyword argument, and all required arguments must still be provided." 650 | ] 651 | }, 652 | { 653 | "cell_type": "code", 654 | "execution_count": null, 655 | "metadata": { 656 | "collapsed": false 657 | }, 658 | "outputs": [], 659 | "source": [ 660 | "runSimulation( nsteps=100, initialTemperature=300 )" 661 | ] 662 | }, 663 | { 664 | "cell_type": "code", 665 | "execution_count": null, 666 | "metadata": { 667 | "collapsed": false 668 | }, 669 | "outputs": [], 670 | "source": [ 671 | "runSimulation( initialTemperature=300 )" 672 | ] 673 | }, 674 | { 675 | "cell_type": "code", 676 | "execution_count": null, 677 | "metadata": { 678 | "collapsed": false 679 | }, 680 | "outputs": [], 681 | "source": [ 682 | "runSimulation( nsteps=100 ) # Error: missing required argument 'initialTemperature'" 683 | ] 684 | }, 685 | { 686 | "cell_type": "code", 687 | "execution_count": null, 688 | "metadata": { 689 | "collapsed": false 690 | }, 691 | "outputs": [], 692 | "source": [ 693 | "runSimulation( nsteps=100, 300 ) # Error: positional argument follows keyword argument" 694 | ] 695 | }, 696 | { 697 | "cell_type": "markdown", 698 | "metadata": {}, 699 | "source": [ 700 | "Keyword names must naturally match to those declared:" 701 | ] 702 | }, 703 | { 704 | "cell_type": "code", 705 | "execution_count": null, 706 | "metadata": { 707 | "collapsed": false 708 | }, 709 | "outputs": [], 710 | "source": [ 711 | "runSimulation( initialTemperature=300, numSteps=100 ) # Error: unexpected keyword argument 'numSteps'" 712 | ] 713 | }, 714 | { 715 | "cell_type": "markdown", 716 | "metadata": {}, 717 | "source": [ 718 | "Function cannot be defined with mandatory arguments after default ones." 719 | ] 720 | }, 721 | { 722 | "cell_type": "code", 723 | "execution_count": null, 724 | "metadata": { 725 | "collapsed": false 726 | }, 727 | "outputs": [], 728 | "source": [ 729 | "def badFunction(nsteps=1000, initialTemperature):\n", 730 | " pass" 731 | ] 732 | }, 733 | { 734 | "cell_type": "markdown", 735 | "metadata": {}, 736 | "source": [ 737 | "## Exercises 2.1.3\n", 738 | "\n", 739 | "Extend your solution to the previous exercise estimating the weight of a DNA sequence so that it can also calculate the weight of an RNA sequence, use an optional argument to specify the molecule type, but default to DNA. The weights of RNA residues are:\n", 740 | "\n", 741 | "\n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | "
RNA ResidueWeight
A347
C323
G363
U324
\n" 748 | ] 749 | }, 750 | { 751 | "cell_type": "markdown", 752 | "metadata": {}, 753 | "source": [ 754 | "## Variable scope" 755 | ] 756 | }, 757 | { 758 | "cell_type": "markdown", 759 | "metadata": {}, 760 | "source": [ 761 | "Every variable in python has a _scope_ in which it is defined. Variables defined at the outermost level are known as _globals_ (although typically only for the current module). In contrast, variables defined within a function are local, and cannot be accessed from the outside." 762 | ] 763 | }, 764 | { 765 | "cell_type": "code", 766 | "execution_count": null, 767 | "metadata": { 768 | "collapsed": false 769 | }, 770 | "outputs": [], 771 | "source": [ 772 | "def mathFunction(x, y):\n", 773 | " math_func_result = ( x + y ) * ( x - y )\n", 774 | " return math_func_result" 775 | ] 776 | }, 777 | { 778 | "cell_type": "code", 779 | "execution_count": null, 780 | "metadata": { 781 | "collapsed": false 782 | }, 783 | "outputs": [], 784 | "source": [ 785 | "answer = mathFunction( 4, 7 )\n", 786 | "print(answer)" 787 | ] 788 | }, 789 | { 790 | "cell_type": "code", 791 | "execution_count": null, 792 | "metadata": { 793 | "collapsed": false 794 | }, 795 | "outputs": [], 796 | "source": [ 797 | "answer = mathFunction( 4, 7 )\n", 798 | "print(math_func_result)" 799 | ] 800 | }, 801 | { 802 | "cell_type": "markdown", 803 | "metadata": {}, 804 | "source": [ 805 | "Generally, variables defined in an outer scope are also visible in functions, but you should be careful manipulating them as this can lead to confusing code and python will actually raise an error if you try to change the value of a global variable inside a function. Instead it is a good idea to avoid using global variables and, for example, to pass any necessary variables as parameters to your functions." 806 | ] 807 | }, 808 | { 809 | "cell_type": "code", 810 | "execution_count": null, 811 | "metadata": { 812 | "collapsed": false 813 | }, 814 | "outputs": [], 815 | "source": [ 816 | "counter = 1\n", 817 | "def increment(): \n", 818 | " print(counter)\n", 819 | " counter += 1\n", 820 | "\n", 821 | "increment()\n", 822 | "print(counter)" 823 | ] 824 | }, 825 | { 826 | "cell_type": "markdown", 827 | "metadata": {}, 828 | "source": [ 829 | "If you really want to do this, there is a way round this using the `global` statement. Any variable which is changed or created inside of a function is local, if it hasn't been declared as a global variable. To tell Python that we want to use the global variable, we have to explicitly state this by using the keyword `global`." 830 | ] 831 | }, 832 | { 833 | "cell_type": "code", 834 | "execution_count": null, 835 | "metadata": { 836 | "collapsed": false 837 | }, 838 | "outputs": [], 839 | "source": [ 840 | "counter = 1\n", 841 | "def increment(): \n", 842 | " global counter\n", 843 | " print(counter)\n", 844 | " counter += 1\n", 845 | "\n", 846 | "increment()\n", 847 | "print(counter)" 848 | ] 849 | }, 850 | { 851 | "cell_type": "markdown", 852 | "metadata": {}, 853 | "source": [ 854 | "
**NOTE** It is normally better to avoid global variables and passing through arguments to functions instead.
" 855 | ] 856 | }, 857 | { 858 | "cell_type": "code", 859 | "execution_count": null, 860 | "metadata": { 861 | "collapsed": false 862 | }, 863 | "outputs": [], 864 | "source": [ 865 | "def increment(counter): \n", 866 | " return counter + 1\n", 867 | "\n", 868 | "counter = 0\n", 869 | "counter = increment( counter ) \n", 870 | "print(counter)" 871 | ] 872 | }, 873 | { 874 | "cell_type": "markdown", 875 | "metadata": {}, 876 | "source": [ 877 | "## Next session\n", 878 | "\n", 879 | "Go to our next notebook: [Introduction_to_python_day_2_session_2](Introduction_to_python_day_2_session_2.ipynb)" 880 | ] 881 | } 882 | ], 883 | "metadata": { 884 | "kernelspec": { 885 | "display_name": "Python 3", 886 | "language": "python", 887 | "name": "python3" 888 | }, 889 | "language_info": { 890 | "codemirror_mode": { 891 | "name": "ipython", 892 | "version": 3 893 | }, 894 | "file_extension": ".py", 895 | "mimetype": "text/x-python", 896 | "name": "python", 897 | "nbconvert_exporter": "python", 898 | "pygments_lexer": "ipython3", 899 | "version": "3.5.2" 900 | } 901 | }, 902 | "nbformat": 4, 903 | "nbformat_minor": 0 904 | } 905 | --------------------------------------------------------------------------------