├── data
    ├── myfile.txt
    ├── searchSRS.txt
    ├── datafile.txt
    ├── dna.txt
    ├── mydata.txt
    ├── genes.txt
    ├── sample_accessions.txt
    ├── glpa.fa
    └── mySeqFile.fa
├── scripts
    └── hello.py
├── my_first_module.py
├── img
    └── python_shell.png
├── solutions
    ├── ex2_3_1_a.py
    ├── ex1_1_3.py
    ├── ex1_1_2.py
    ├── ex1_2_3.py
    ├── ex2_2_2.py
    ├── ex1_3_1_b.py
    ├── ex2_3_1_b.py
    ├── ex1_3_1_a.py
    ├── ex1_1_1.py
    ├── ex2_1_1_b.py
    ├── ex2_1_2_a.py
    ├── ex1_4_1.py
    ├── ex2_1_2_b.py
    ├── ex1_4_2.py
    ├── ex2_1_1_a.py
    ├── ex2_2_4.py
    ├── ex1_2_2.py
    ├── ex2_2_3.py
    ├── ex2_1_1_c.py
    ├── ex1_2_4_extra.py
    ├── ex1_2_4.py
    ├── ex1_2_1.py
    ├── ex2_3_2_a.py
    ├── ex2_3_2_b.py
    ├── ex2_1_3.py
    ├── ex2_4_1.py
    ├── ex2_0_1.py
    └── ex2_2_1.py
├── .gitignore
├── install
    ├── Dockerfile
    ├── vbox_installer.sh
    └── 2to3_nb.py
├── README.md
├── feedback.md
├── Introduction_to_python_day_2_session_2.ipynb
├── Introduction_to_python_day_2_session_4.ipynb
├── planning.md
├── Introduction_to_python_day_1_session_3.ipynb
├── Introduction_to_python_day_2_introduction.ipynb
├── Introduction_to_python_day_1_session_4.ipynb
├── Introduction_to_python_day_1_session_1.ipynb
├── Introduction_to_python_day_1_introduction.ipynb
└── Introduction_to_python_day_2_session_1.ipynb


/data/myfile.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/scripts/hello.py:
--------------------------------------------------------------------------------
1 | print("Hello world!")
2 | 


--------------------------------------------------------------------------------
/data/searchSRS.txt:
--------------------------------------------------------------------------------
1 | SRS006837
2 | SRS003875
3 | SRS009999


--------------------------------------------------------------------------------
/data/datafile.txt:
--------------------------------------------------------------------------------
1 | Header
2 | First line
3 | Second line
4 | 


--------------------------------------------------------------------------------
/data/dna.txt:
--------------------------------------------------------------------------------
1 | CGGCTAGATCCAGAT
2 | CGTGTAA
3 | GTACACCCA
4 | GTCAACACTTA
5 | 


--------------------------------------------------------------------------------
/my_first_module.py:
--------------------------------------------------------------------------------
1 | def say_hello(user):
2 |     print('Hello', user, '!')
3 | 


--------------------------------------------------------------------------------
/img/python_shell.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pycam/python-intro/master/img/python_shell.png


--------------------------------------------------------------------------------
/data/mydata.txt:
--------------------------------------------------------------------------------
1 | Index Organism Score
2 | 1 Human 1.076
3 | 2 Mouse 1.202
4 | 3 Frog 2.2362
5 | 4 Fly 0.9853
6 | 


--------------------------------------------------------------------------------
/data/genes.txt:
--------------------------------------------------------------------------------
1 | gene	chrom	start	end
2 | BRCA2	13	32889611	32973805
3 | TNFAIP3	6	138188351	138204449
4 | TCF7	5	133450402	133487556
5 | 


--------------------------------------------------------------------------------
/data/sample_accessions.txt:
--------------------------------------------------------------------------------
 1 | SRS006837
 2 | SRS006838
 3 | SRS006839
 4 | SRS106839
 5 | SRS006840
 6 | SRS006841
 7 | SRS506841
 8 | SRS006842
 9 | SRS006843
10 | SRS206853
11 | SRS006844
12 | SRS006845
13 | SRS006846
14 | 


--------------------------------------------------------------------------------
/data/glpa.fa:
--------------------------------------------------------------------------------
1 | >swissprot|P02724|GLPA_HUMAN Glycophorin-A;
2 | MYGKIIFVLLLSEIVSISASSTTGVAMHTSTSSSVTKSYISSQTNDTHKRDTYAATPRAH
3 | EVSEISVRTVYPPEEETGERVQLAHHFSEPEITLIIFGVMAGVIGTILLISYGIRRLIKK
4 | SPSDVKPLPSPDTDVPLSSVEIENPETSDQ
5 | 
6 | 


--------------------------------------------------------------------------------
/solutions/ex2_3_1_a.py:
--------------------------------------------------------------------------------
1 | # Script that writes the values of a list of numbers to a file,
2 | # with each number on a seperate line.
3 | 
4 | data = [2, 4, 6, 8, 10]
5 | 
6 | with open("numbers.txt", "w") as f:
7 |     for d in data:
8 |         f.write(str(d) + "\n")
9 | 


--------------------------------------------------------------------------------
/solutions/ex1_1_3.py:
--------------------------------------------------------------------------------
 1 | # Experimenting with python variables in a file
 2 | 
 3 | S = "TCT"
 4 | L = "CTT"
 5 | Y = "TAT"
 6 | C = "TGT"
 7 | 
 8 | # possible DNA sequence for the protein sequence SYLYC
 9 | dna = S + Y + L + Y + C
10 | 
11 | # print the DNA sequence
12 | print("DNA sequence of SYLYC:", dna)
13 | 


--------------------------------------------------------------------------------
/solutions/ex1_1_2.py:
--------------------------------------------------------------------------------
 1 | # This exercise should be done in the interpreter
 2 | 
 3 | # Assign numerical values to 2 variables, calculate the mean of these two variables
 4 | # and store the result in another variable. Print out the result to the screen
 5 | 
 6 | v1 = 5.0
 7 | v2 = 10.0
 8 | mean = (v1 + v2) / 2
 9 | print(mean)
10 | 


--------------------------------------------------------------------------------
/solutions/ex1_2_3.py:
--------------------------------------------------------------------------------
 1 | # Protein sequence given
 2 | seq = "MPISEPTFFEIF"
 3 | 
 4 | # Split the sequence into its component amino acids
 5 | seq_list = list(seq)
 6 | 
 7 | # Use a set to establish the unique amino acids
 8 | unique_amino_acids = set(seq_list)
 9 | 
10 | # Print out the unique amino acids
11 | print(unique_amino_acids)
12 | 


--------------------------------------------------------------------------------
/solutions/ex2_2_2.py:
--------------------------------------------------------------------------------
 1 | def gc_content(sequence):
 2 |     """Calculate the GC content of a DNA sequence
 3 |     """
 4 |     gc = 0
 5 |     for base in sequence:
 6 |         if (base == 'G') or (base == 'C'):
 7 |             gc += 1
 8 |     return 100 * (gc / len(sequence))
 9 | 
10 | 
11 | #print('GC%', gc_content('ATGGTGCATCTGACTCCTGAGGAGAAGTCTGCCGTTACTGCCCTGTGGGGCAAGGTG'))
12 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | .idea
 7 | .ipynb_checkpoints
 8 | 
 9 | .DS_Store
10 | 
11 | venv
12 | 
13 | biopython.fa
14 | 
15 | csvdata.tsv
16 | 
17 | csvdictdata.tsv
18 | 
19 | data/mydata.csv
20 | 
21 | gene_lengths_csv.tsv
22 | 
23 | gene_lengths.tsv
24 | 
25 | out.txt
26 | 
27 | sample.long.fa
28 | 
29 | mySeqFile.fa
30 | 


--------------------------------------------------------------------------------
/install/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu
 2 | MAINTAINER Mark Dunning<mark.dunning@cruk.cam.ac.uk>
 3 | 
 4 | RUN sudo apt-get update
 5 | RUN apt-get install -y ipython ipython-notebook git
 6 | RUN git clone https://github.com/pycam/python-intro.git
 7 | 
 8 | EXPOSE 8888
 9 | ENV USE_HTTP 0
10 | 
11 | WORKDIR python-intro/
12 | RUN ipython notebook --no-browser --port 8888 --ip=* Introduction_to_python_session_1.ipynb
13 | 


--------------------------------------------------------------------------------
/solutions/ex1_3_1_b.py:
--------------------------------------------------------------------------------
 1 | # Check if a DNA sequence contains a stop codon
 2 | 
 3 | # DNA sequence given
 4 | dna = "GTT GCA CCA CAA CCG TAG TAA TGA"
 5 | 
 6 | # Check if the sequence contains one possible stop codon
 7 | if "TAG" in dna:
 8 |     print("TAG found")
 9 | 
10 | # Check if the sequence contains any of the 3 stop codons
11 | if ("TAG" in dna) or ("TAA" in dna) or ("TGA" in dna):
12 |     print("Stop codon found")
13 | 


--------------------------------------------------------------------------------
/solutions/ex2_3_1_b.py:
--------------------------------------------------------------------------------
 1 | # Write a script that reads a file containing many lines of nucleotide sequence
 2 | # For each line in the file, print out the line number,
 3 | # the length of the sequence and the sequence
 4 | 
 5 | import sys
 6 | 
 7 | with open('data/dna.txt', "r") as f:
 8 |     line_num = 0
 9 |     for line in f:
10 |         line = line.rstrip()
11 |         line_num += 1
12 |         print(line_num, ":", len(line), "\t", line)
13 | 


--------------------------------------------------------------------------------
/solutions/ex1_3_1_a.py:
--------------------------------------------------------------------------------
 1 | # Compare the age of two persons
 2 | 
 3 | # Variables containing your name and your age
 4 | name_1 = "James"
 5 | age_1 = 31
 6 | 
 7 | # Variables containing another person name and age
 8 | name_2 = "Mark"
 9 | age_2 = 29
10 | 
11 | # Print a statement which says if you are younger, older or the same age
12 | if age_1 > age_2:
13 |     print(name_1, 'is older than', name_2)
14 | elif age_1 < age_2:
15 |     print(name_1, 'is younger than', name_2)
16 | else:
17 |     print(name_1, 'and', name_2, 'have the same age')
18 | 


--------------------------------------------------------------------------------
/solutions/ex1_1_1.py:
--------------------------------------------------------------------------------
 1 | # This exercise should be done in the interpreter
 2 | 
 3 | # Create a variable and assign it the string value of your first name,
 4 | # assign your age to another variable (you are free to lie!), print out a message saying how old you are
 5 | 
 6 | name = "John"
 7 | age = 21
 8 | print("my name is", name, "and I am", age, "years old.")
 9 | 
10 | 
11 | # Use the addition operator to add 10 to your age and print out a message saying how old you will be in 10 years time
12 | age += 10
13 | print(name, "will be", age, "in 10 years.")
14 | 


--------------------------------------------------------------------------------
/solutions/ex2_1_1_b.py:
--------------------------------------------------------------------------------
 1 | def molecular_weight(sequence):
 2 |     """Function that takes a single DNA sequence as an argument and estimates
 3 |     the molecular weight of this sequence.
 4 |     """
 5 |     sequence = sequence.upper()
 6 |     base_weights = {'A': 331, 'C': 307, 'G': 347, 'T': 306}
 7 |     total_weight = 0
 8 |     for base in sequence:
 9 |         total_weight += base_weights[base]
10 |     return total_weight
11 | 
12 | # Test your function using some example sequences.
13 | weight = molecular_weight("ACTTGGGCAGATAGTCGCG")
14 | print("Molecular weight:", weight, "g/mol")
15 | 


--------------------------------------------------------------------------------
/solutions/ex2_1_2_a.py:
--------------------------------------------------------------------------------
 1 | def base_composition(sequence):
 2 |     """Write a function that counts the number of each base found
 3 |     in a DNA sequence.
 4 |     """
 5 |     sequence = sequence.upper()
 6 |     num_As = sequence.count('A')
 7 |     num_Cs = sequence.count('C')
 8 |     num_Gs = sequence.count('G')
 9 |     num_Ts = sequence.count('T')
10 |     # Return the result as a tuple of 4 numbers representing the counts of each base A, C, G and T.
11 |     return (num_As, num_Cs, num_Gs, num_Ts)
12 | 
13 | dna = "ACAGTGTCGTACAGATCAGTCAGATACA"
14 | print('base composition', base_composition(dna))
15 | 


--------------------------------------------------------------------------------
/solutions/ex1_4_1.py:
--------------------------------------------------------------------------------
 1 | # Create a list where each element is an individual base of DNA.
 2 | # Make the array 15 bases long.
 3 | bases = ['A', 'T', 'T', 'C', 'G', 'G', 'T', 'C', 'A', 'T', 'G', 'C', 'T', 'A', 'A']
 4 | 
 5 | # Print the length of the list
 6 | print("DNA sequence length:", len(bases))
 7 | 
 8 | # Create a for loop to output every base of the sequence on a new line.
 9 | print("All bases:")
10 | for base in bases:
11 |     print(base)
12 | 
13 | # Create a while loop that starts at the third base in the sequence
14 | # and outputs every third base until the 12th.
15 | print("Every 3rd base:")
16 | pos = 2
17 | while pos <= 12:
18 |     print(pos, bases[pos])
19 |     pos += 3
20 | 


--------------------------------------------------------------------------------
/solutions/ex2_1_2_b.py:
--------------------------------------------------------------------------------
 1 | def reverse_complement(sequence):
 2 |     """Write a function to return the reverse-complement of a nucleotide
 3 |     sequence.
 4 |     """
 5 |     reverse_base = {'A': 'T', 'T': 'A', 'G': 'C', 'C': 'G'}
 6 |     sequence = sequence.upper()
 7 |     sequence = reversed(sequence)
 8 |     result = []
 9 |     for base in sequence:
10 |         # check if sequence is a DNA sequence or not
11 |         if base not in 'ATCG':
12 |             return base + " is NOT a known DNA base"
13 |         result.append(reverse_base[base])
14 |     return "".join(result)
15 | 
16 | print(reverse_complement('ATCGTAGCatgcAATTGGC'))
17 | print(reverse_complement('ATCGTAGCatgcxAATTGGC'))
18 | 


--------------------------------------------------------------------------------
/solutions/ex1_4_2.py:
--------------------------------------------------------------------------------
 1 | # Calculate GC content of a DNA sequence
 2 | 
 3 | # 15-base array you created for the previous exercise
 4 | bases = ['A', 'T', 'T', 'C', 'G', 'G', 'T', 'C', 'A', 'T', 'G', 'C', 'T', 'A', 'A']
 5 | 
 6 | # Create a variable, gc, which we will use to count the number of Gs or Cs in our sequence
 7 | gc = 0
 8 | 
 9 | # Loop over the bases in your sequence.
10 | # If the base is a G or a C, add one to your gc variable.
11 | for base in bases:
12 |     if (base == 'G') or (base == 'C'):
13 |         gc += 1
14 | print("Frequency of GC in the sequence:", gc)
15 | 
16 | # Calculate the GC percentage and print it
17 | gc_percent = 100 * (gc / len(bases))
18 | print("%GC:", gc_percent)
19 | 


--------------------------------------------------------------------------------
/solutions/ex2_1_1_a.py:
--------------------------------------------------------------------------------
 1 | def simple_mean(x, y):
 2 |     """Function that takes 2 numerical arguments and returns their mean.
 3 |     """
 4 |     mean = (x + y) / 2
 5 |     return mean
 6 | 
 7 | 
 8 | def advanced_mean(values):
 9 |     """Function that takes a list of numbers and returns the mean of all
10 |     the numbers in the list.
11 |     """
12 |     total = 0
13 |     for v in values:
14 |         total += v
15 |     mean = total / len(values)
16 |     return mean
17 | 
18 | print("Mean of 2 & 3:", simple_mean(2, 3))
19 | print("Mean of 8 & 10:", simple_mean(8, 10))
20 | print("Mean of [2, 4, 6]", advanced_mean([2, 4, 6]))
21 | print("Mean of values even numbers under 20:", advanced_mean(list(range(0, 20, 2))))
22 | 


--------------------------------------------------------------------------------
/solutions/ex2_2_4.py:
--------------------------------------------------------------------------------
 1 | from ex2_2_2 import gc_content
 2 | from ex2_2_3 import extract_sub_sequences
 3 | 
 4 | 
 5 | def gc_content_along_the_chain(dna_sequence, window_size):
 6 |     """Returns a list of GC along the DNA sequence
 7 |     given a DNA sequence and the size of the sliding window
 8 |     """
 9 |     sub_sequences = extract_sub_sequences(dna_sequence, window_size)
10 |     gc_results = []
11 |     for sub_sequence in sub_sequences:
12 |         gc_results.append(gc_content(sub_sequence))
13 |     return gc_results
14 | 
15 | dna = 'ATGGTGCATCTGACTCCTGAGGAGAAGTCTGCCGTTACTGCCCTGTGGGGCAAGGTG'
16 | print(gc_content(dna))
17 | print(extract_sub_sequences(dna, 5))
18 | print(gc_content_along_the_chain(dna, 5))
19 | 


--------------------------------------------------------------------------------
/solutions/ex1_2_2.py:
--------------------------------------------------------------------------------
 1 | # Create a string variable with your full name
 2 | name = "Boris Johnson"
 3 | 
 4 | # Split the string into a list
 5 | names = name.split(" ")
 6 | 
 7 | # Print out your surname
 8 | surname = names[-1]
 9 | print("Surname:", surname)
10 | 
11 | # Check if your surname contains the letter 'e'
12 | pos = surname.find("e")
13 | print("Position of 'e':", pos)
14 | 
15 | # or contains the letter 'o'
16 | pos = surname.find("o")
17 | print("Position of 'o':", pos)
18 | 
19 | ### Optional --------------------------------------------------------
20 | 
21 | # Use a format string to print out your first name and the length of your first name
22 | print("{:s} is {:d} characters long".format(names[0], len(names[0])))
23 | 


--------------------------------------------------------------------------------
/solutions/ex2_2_3.py:
--------------------------------------------------------------------------------
 1 | def extract_sub_sequences(sequence, window_size):
 2 |     """Extract a list of overlaping sub-sequences for a given window size
 3 |     from a given sequence.
 4 |     """
 5 |     if window_size <= 0:
 6 |         return "Window size must be a positive integer"
 7 |     if window_size > len(sequence):
 8 |         return "Window size is larger than sequence length"
 9 |     result = []
10 |     nr_windows = len(sequence) - window_size + 1
11 |     for i in range(nr_windows):
12 |         sub_sequence = sequence[i:i + window_size]
13 |         result.append(sub_sequence)
14 |     return result
15 | 
16 | 
17 | #dna = 'ATGGTGCATCTGACTCCTGAGGAGAAGTCTGCCGTTACTGCCCTGTGGGGCAAGGTG'
18 | #print(extract_sub_sequences(dna, 5))
19 | 


--------------------------------------------------------------------------------
/solutions/ex2_1_1_c.py:
--------------------------------------------------------------------------------
 1 | def molecular_weight(sequence):
 2 |     """Function that takes a single DNA sequence as an argument and estimates
 3 |     the molecular weight of this sequence.
 4 |     If the sequence passed in above contains N bases,
 5 |     use the mean weight of the other bases as the weight.
 6 |     """
 7 |     sequence = sequence.upper()
 8 |     base_weights = {'A': 331, 'C': 307, 'G': 347, 'T': 306}
 9 |     base_weights['N'] = sum(base_weights.values()) / len(base_weights)
10 |     total_weight = 0
11 |     for base in sequence:
12 |         total_weight += base_weights[base]
13 |     return total_weight
14 | 
15 | weight = molecular_weight("AAGGACTGTCNCGTNNCGTAGGATNATAGNN")
16 | print("Moelacular weight:", weight, "g/mol")
17 | 


--------------------------------------------------------------------------------
/solutions/ex1_2_4_extra.py:
--------------------------------------------------------------------------------
 1 | # 1-letter code lysozyme protein sequence given
 2 | seq = "MKALIVLGLVLLSVTVQGKVFERCELARTLKRLGMDGYRGISLANWMCLAKWESGYNTRATNYNAGDRSTDYGIFQINSRYWCNDGKTPGAVNACHLSCSALLQDNIADAVACAKRVVRDPQGIRAWVAWRNRCQNRDVRQYVQGCGV"
 3 | 
 4 | # Count the abundance of different residue types and store the result in a dictionary
 5 | aa_counts = {}
 6 | aa_counts['A'] = seq.count('A')
 7 | aa_counts['C'] = seq.count('C')
 8 | aa_counts['D'] = seq.count('D')
 9 | aa_counts['E'] = seq.count('E')
10 | # etc...
11 | 
12 | # Print the results in alphabetical order
13 | print('A has', aa_counts['A'], 'occurrence(s)')
14 | print('C has', aa_counts['C'], 'occurrence(s)')
15 | print('D has', aa_counts['D'], 'occurrence(s)')
16 | print('E has', aa_counts['E'], 'occurrence(s)')
17 | 


--------------------------------------------------------------------------------
/solutions/ex1_2_4.py:
--------------------------------------------------------------------------------
 1 | # DNA sequence given
 2 | codon_string = "GTT GCA CCA CAA CCG"
 3 | 
 4 | # Split this string into the individual codons
 5 | codon_list = codon_string.split()
 6 | 
 7 | # Dictionnary to map between codon sequences and amino acids they encode
 8 | genetic_code = {
 9 |     "GTT": "Val",
10 |     "GCA": "Ala",
11 |     "CCA": "Pro",
12 |     "CAA": "Glu",
13 |     "CCG": "Pro"
14 | }
15 | 
16 | # Print each codon and its corresponding amino acid
17 | print(codon_list[0], "codes for", genetic_code[codon_list[0]])
18 | print(codon_list[1], "codes for", genetic_code[codon_list[1]])
19 | print(codon_list[2], "codes for", genetic_code[codon_list[2]])
20 | print(codon_list[3], "codes for", genetic_code[codon_list[3]])
21 | print(codon_list[4], "codes for", genetic_code[codon_list[4]])
22 | 


--------------------------------------------------------------------------------
/solutions/ex1_2_1.py:
--------------------------------------------------------------------------------
 1 | # Use the codon variables you defined previously
 2 | S = "TCT"
 3 | L = "CTT"
 4 | Y = "TAT"
 5 | C = "TGT"
 6 | 
 7 | # Create a list for the protein sequence CLYSY
 8 | codons = [C, L, Y, S, Y]
 9 | 
10 | # Print the DNA sequence of the protein
11 | print("DNA sequence:", codons)
12 | 
13 | # Print the DNA sequence of the last amino acid
14 | print("Last codon:", codons[-1])
15 | 
16 | # Create two more variables containing the DNA sequence for a stop codon and a start codon
17 | start = "ATG"
18 | stop = "TGA"
19 | 
20 | # Replace the first element of the list with the start codon
21 | codons[0] = start
22 | 
23 | # Append the stop codon to the end of the list
24 | codons.append(stop)
25 | 
26 | # Print the resulting DNA sequence
27 | print("DNA sequence after alteration:", "".join(codons))
28 | 


--------------------------------------------------------------------------------
/data/mySeqFile.fa:
--------------------------------------------------------------------------------
 1 | >sp|P61626|LYSC_HUMAN Lysozyme C OS=Homo sapiens GN=LYZ PE=1 SV=1
 2 | MKALIVLGLVLLSVTVQGKVFERCELARTLKRLGMDGYRGISLANWMCLAKWESGYNTRA
 3 | TNYNAGDRSTDYGIFQINSRYWCNDGKTPGAVNACHLSCSALLQDNIADAVACAKRVVRD
 4 | PQGIRAWVAWRNRCQNRDVRQYVQGCGV
 5 | >sp|P04421|LYSC_BOVIN Lysozyme C OS=Bos taurus GN=LYZ1 PE=1 SV=2
 6 | MKALVILGFLFLSVAVQGKVFERCELARTLKKLGLDGYKGVSLANWLCLTKWESSYNTKA
 7 | TNYNPSSESTDYGIFQINSKWWCNDGKTPNAVDGCHVSCRELMENDIAKAVACAKHIVSE
 8 | QGITAWVAWKSHCRDHDVSSYVEGCTL
 9 | >sp|P17897|LYZ1_MOUSE Lysozyme C-1 OS=Mus musculus GN=Lyz1 PE=1 SV=1
10 | MKALLTLGLLLLSVTAQAKVYNRCELARILKRNGMDGYRGVKLADWVCLAQHESNYNTRA
11 | TNYNRGDRSTDYGIFQINSRYWCNDGKTPRSKNACGINCSALLQDDITAAIQCAKRVVRD
12 | PQGIRAWVAWRTQCQNRDLSQYIRNCGV
13 | >sp|P00697|LYSC1_RAT Lysozyme C-1 OS=Rattus norvegicus GN=Lyz1 PE=1 SV=2
14 | MKALLVLGFLLLSASVQAKIYERCQFARTLKRNGMSGYYGVSLADWVCLAQHESNYNTQA
15 | RNYNPGDQSTDYGIFQINSRYWCNDGKTPRAKNACGIPCSALLQDDITQAIQCAKRVVRD
16 | PQGIRAWVAWQRHCKNRDLSGYIRNCGV
17 | >sp|Q8T1G5|LYSC_DICDI Lysozyme C OS=Dictyostelium discoideum GN=alyC PE=3 SV=1
18 | MRIAFFLLILSIIVGLAYGYSCPKPCYGNMCCSTSPDHKYYLTDFCGSTSACGPKPSCSG
19 | KLYFTADSQRFGCGKHLNLCRGKKCVKAKVYDAGPAEWVEKDAGKMIIDASPTICHELTG
20 | GSSCGWSDKFEITATVTSLTDSRPLGPFNVTEEEMDQLFIDHEIAMAQCEAEKTCNGFDL
21 | E
22 | 
23 | 


--------------------------------------------------------------------------------
/solutions/ex2_3_2_a.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | 
 3 | # Read a tab delimited file which has 4 columns: gene, chromosome, start and end coordinates.
 4 | # Check if the file exists, then compute the length of each gene and store
 5 | # its name and corresponding length into a dictionary.
 6 | # Write the results into a new tab separated file.
 7 | 
 8 | gene_file = os.path.join('data', 'genes.txt')
 9 | output_file = "gene_lengths.tsv"
10 | 
11 | if os.path.exists(gene_file):
12 |     results = []
13 |     with open(gene_file) as f:
14 |         header = f.readline()
15 |         for line in f:
16 |             gene, chrom, start, end = line.strip().split("\t")
17 |             row = {'gene': gene, 'length': int(end) - int(start) + 1}
18 |             results.append(row)
19 |     print(results)
20 |     with open(output_file, "w") as out:
21 |         out.write('gene' + "\t" + 'length' + "\n")  # write header
22 |         for record in results:
23 |             out.write(record['gene'] + "\t" + str(record['length']) + "\n")
24 | else:
25 |     print(gene_file, 'does not exists!')
26 | 
27 | if os.path.exists(output_file):
28 |     # print contents of output file
29 |     with open(output_file) as f:
30 |         print(f.read())
31 | else:
32 |     print(output_file, 'does not exists!')
33 | 


--------------------------------------------------------------------------------
/solutions/ex2_3_2_b.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | import csv
 3 | 
 4 | # Read a tab delimited file which has 4 columns: gene, chromosome, start and end coordinates.
 5 | # Check if the file exists, then compute the length of each gene and store
 6 | # its name and corresponding length into a dictionary.
 7 | # Write the results into a new tab separated file and make use of the csv module.
 8 | 
 9 | gene_file = os.path.join('data', 'genes.txt')
10 | output_file = "gene_lengths_csv.tsv"
11 | 
12 | if os.path.exists(gene_file):
13 |     results = []
14 |     with open(gene_file) as f:
15 |         reader = csv.DictReader(f, delimiter='\t')
16 |         for row in reader:
17 |             record = {'gene': row['gene'], 'length': int(row['end']) - int(row['start']) + 1}
18 |             results.append(record)
19 |     print(results)
20 |     with open(output_file, "w") as out:
21 |         writer = csv.DictWriter(out, results[0].keys(), delimiter='\t')
22 |         writer.writeheader()  # write header
23 |         for record in results:
24 |             writer.writerow(record)
25 | else:
26 |     print(gene_file, 'does not exists!')
27 | 
28 | if os.path.exists(output_file):
29 |     # print contents of output file
30 |     with open(output_file) as f:
31 |         print(f.read())
32 | else:
33 |     print(output_file, 'does not exists!')
34 | 


--------------------------------------------------------------------------------
/solutions/ex2_1_3.py:
--------------------------------------------------------------------------------
 1 | def molecular_weight(sequence, molecule_type='DNA'):
 2 |     """Function that takes a single DNA or RNA sequence as an argument
 3 |     and estimates the molecular weight of this sequence.
 4 |     If the sequence passed in above contains N bases,
 5 |     use the mean weight of the other bases as the weight.
 6 |     Use an optional argument to specify the molecule type, but default to DNA.
 7 |     """
 8 |     sequence = sequence.upper()
 9 |     molecule_type = molecule_type.upper()
10 | 
11 |     dna_weights = {'A': 331, 'C': 307, 'G': 347, 'T': 306}
12 |     rna_weights = {'A': 347, 'C': 323, 'G': 363, 'U': 324}
13 | 
14 |     if molecule_type == 'DNA':
15 |         base_weights = dna_weights
16 |     elif molecule_type == 'RNA':
17 |         base_weights = rna_weights
18 |     else:
19 |         return "Unrecognised molecule_type " + molecule_type
20 | 
21 |     total_weight = 0
22 |     for base in sequence:
23 |         # check if base is a DNA base or not
24 |         if base not in base_weights:
25 |             return base + " is NOT a known DNA base"
26 |         total_weight += base_weights[base]
27 |     return total_weight
28 | 
29 | 
30 | print("RNA weight:", molecular_weight("AACGUCGAAUCCUAGCGC", molecule_type="RNA"), "g/mol")
31 | print("DNA weight:", molecular_weight("AACGTCGAATCCTAGCGC"), "g/mol")
32 | print("Other sequence weight:", molecular_weight("AACGTCGAATXXXCCTAGCGC"), "g/mol")
33 | 


--------------------------------------------------------------------------------
/install/vbox_installer.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # lubuntu LTS 14.04 VirtualBox installer based on lubuntu-14.04.2-desktop-i386
 3 | # computer name: crukci-training-vm; user: training; password: admin123
 4 | 
 5 | sudo su -
 6 | apt-get install gedit
 7 | apt-get install vim
 8 | apt-get install git
 9 | apt-get install python-pip
10 | apt-get install python-zmq
11 | apt-get install python-matplotlib
12 | apt-get install python-biopython
13 | apt-get install ncbi-blast+
14 | 
15 | # Install VirtualBox Additions
16 | # From the VirtualBox menu of lubuntu go to Devices > Insert Guest Additions CD image... and do
17 | cd /media/training/VBOXADDITIONS_4.3.26_98988
18 | sudo ./VBoxLinuxAdditions.run
19 | 
20 | # To increase screen resolution
21 | # Start > Preferences > Additional Drivers: Using x86 virtualization solution... and click Apply Changes
22 | # Then Start > Preferences > Monitor Settings and select 1440x1050 and click Save and Apply
23 | 
24 | pip install ipython[notebook]
25 | 
26 | apt-get autoremove
27 | apt-get clean
28 | 
29 | 
30 | adduser pycam # password: pycam123
31 | 
32 | exit
33 | 
34 | # login as pycam --------------------------------------------------------------
35 | 
36 | git clone https://github.com/pycam/python-intro.git course
37 | 
38 | # Add ipython at startup from lubuntu menu do to...
39 | # Preferences > Default applications for LXSession then tab Autostart and add:
40 | # /usr/local/bin/ipython notebook --no-browser --port=8888 --ip=127.0.0.1 /home/pycam/course/
41 | 
42 | # Add bookmarks into firefox: (1) pycam.github.io (2) 127.0.0.1:8888
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/solutions/ex2_4_1.py:
--------------------------------------------------------------------------------
 1 | from Bio import SeqIO
 2 | from Bio.SeqUtils import GC
 3 | 
 4 | # Read in a FASTA file named data/sample.fa
 5 | seqList = list(SeqIO.parse('data/sample.fa', 'fasta'))
 6 | 
 7 | # find the number of sequences present in the file
 8 | numSeq = len(seqList)
 9 | print('Total number of sequences:', numSeq)
10 | 
11 | # find IDs and lengths of the longest and the shortest sequences
12 | maxLen = minLen = len(seqList[0].seq)
13 | 
14 | lSeq = sSeq = seqList[0].id
15 | 
16 | for i in range(1, numSeq):
17 |     if len(seqList[i].seq) > maxLen:
18 |         # update maxLen and lSeq
19 |         maxLen = len(seqList[i].seq)
20 |         lSeq = seqList[i].id
21 |     elif len(seqList[i].seq) < minLen:
22 |         # update minLen and sSeq
23 |         minLen = len(seqList[i].seq)
24 |         sSeq = seqList[i].id
25 | 
26 | print('Longest sequence is', lSeq, 'with length', maxLen, 'bp')
27 | print('Shortest sequence is', sSeq, 'with length', minLen, 'bp')
28 | 
29 | # Creating a new sequence list containing sequences longer than 500bp
30 | # Calculate the average length of these sequences
31 | # calculate and print the percentage of GC contents
32 | 
33 | longSeqList = list()  # empty list for sequences
34 | 
35 | totLength = 0
36 | for sequence in seqList:
37 |     if len(sequence) > 500:
38 |         longSeqList.append(sequence)
39 |         totLength += len(sequence)
40 |         gc = GC(sequence.seq)
41 |         print('Percentage of GC content in', sequence.id, 'is', gc)
42 | 
43 | avgLength = totLength / len(longSeqList)
44 | 
45 | print('Average length for sequences longer than 500bp is', avgLength)
46 | 
47 | # Write sequences in the longSeqList in a file with 'GenBank' format
48 | SeqIO.write(longSeqList, 'sample.long.fa', 'fasta')
49 | 


--------------------------------------------------------------------------------
/solutions/ex2_0_1.py:
--------------------------------------------------------------------------------
 1 | lyrics = """
 2 | Imagine there's no Heaven
 3 | It's easy if you try
 4 | No Hell below us
 5 | Above us only sky
 6 | 
 7 | Imagine all the people
 8 | Living for today
 9 | Aaa haa
10 | 
11 | Imagine there's no countries
12 | It isn't hard to do
13 | Nothing to kill or die for
14 | And no religion too
15 | 
16 | Imagine all the people
17 | Living life in peace
18 | Yoo hoo
19 | 
20 | You may say I'm a dreamer
21 | But I'm not the only one
22 | I hope someday you'll join us
23 | And the world will be as one
24 | 
25 | Imagine no possessions
26 | I wonder if you can
27 | No need for greed or hunger
28 | A brotherhood of man
29 | 
30 | Imagine all the people
31 | Sharing all the world
32 | Yoo hoo
33 | 
34 | You may say I'm a dreamer
35 | But I'm not the only one
36 | I hope someday you'll join us
37 | And the world will live as one
38 | """
39 | 
40 | # Change all character to lower ones
41 | lyrics = lyrics.lower()
42 | 
43 | # Split into words
44 | words = lyrics.split()
45 | # Print the total number of words
46 | print('There are', len(words), 'words in this song.')
47 | 
48 | # Print the number of unique words
49 | unique_words = set(words)
50 | print('There are', len(unique_words), 'unique ones.')
51 | 
52 | # Calculate the frequency of each word and store the result into a dictionary
53 | results = {}
54 | for w in unique_words:
55 |     results[w.lower()] = words.count(w)
56 | 
57 | # Print each unique word along with its frequency
58 | for r in results:
59 |     print(results[r], '\t', r)
60 | 
61 | # Find the most frequent word in the song
62 | most_frequent = 0
63 | for r in results:
64 |     if results[r] > most_frequent:
65 |         most_frequent = results[r]
66 |         most_frequent_word = r
67 | 
68 | # Print the most frequent word with its frequency
69 | print('"', most_frequent_word, '" is the most frequent word being used', most_frequent, 'times.')
70 | 


--------------------------------------------------------------------------------
/solutions/ex2_2_1.py:
--------------------------------------------------------------------------------
 1 | standardGeneticCode = {
 2 |           'UUU': 'Phe', 'UUC': 'Phe', 'UCU': 'Ser', 'UCC': 'Ser',
 3 |           'UAU': 'Tyr', 'UAC': 'Tyr', 'UGU': 'Cys', 'UGC': 'Cys',
 4 |           'UUA': 'Leu', 'UCA': 'Ser', 'UAA': None, 'UGA': None,
 5 |           'UUG': 'Leu', 'UCG': 'Ser', 'UAG': None, 'UGG': 'Trp',
 6 |           'CUU': 'Leu', 'CUC': 'Leu', 'CCU': 'Pro', 'CCC': 'Pro',
 7 |           'CAU': 'His', 'CAC': 'His', 'CGU': 'Arg', 'CGC': 'Arg',
 8 |           'CUA': 'Leu', 'CUG': 'Leu', 'CCA': 'Pro', 'CCG': 'Pro',
 9 |           'CAA': 'Gln', 'CAG': 'Gln', 'CGA': 'Arg', 'CGG': 'Arg',
10 |           'AUU': 'Ile', 'AUC': 'Ile', 'ACU': 'Thr', 'ACC': 'Thr',
11 |           'AAU': 'Asn', 'AAC': 'Asn', 'AGU': 'Ser', 'AGC': 'Ser',
12 |           'AUA': 'Ile', 'ACA': 'Thr', 'AAA': 'Lys', 'AGA': 'Arg',
13 |           'AUG': 'Met', 'ACG': 'Thr', 'AAG': 'Lys', 'AGG': 'Arg',
14 |           'GUU': 'Val', 'GUC': 'Val', 'GCU': 'Ala', 'GCC': 'Ala',
15 |           'GAU': 'Asp', 'GAC': 'Asp', 'GGU': 'Gly', 'GGC': 'Gly',
16 |           'GUA': 'Val', 'GUG': 'Val', 'GCA': 'Ala', 'GCG': 'Ala',
17 |           'GAA': 'Glu', 'GAG': 'Glu', 'GGA': 'Gly', 'GGG': 'Gly'}
18 | 
19 | def protein_translation(sequence, geneticCode):
20 |     """This function translates a nucleic acid sequence into a
21 |     protein sequence, until the end or until it comes across
22 |     a stop codon.
23 |     """
24 |     protein_sequence = []
25 |     for i in range(0, len(sequence)-2, 3):
26 |         codon = sequence[i:i + 3]
27 |         codon.upper()
28 | 
29 |         # Convert DNA into RNA sequence
30 |         if "T" in codon:
31 |             # replace T by U
32 |             codon = codon.replace('T', 'U')
33 | 
34 |         # Make sure the codon corresponds to a amino acid
35 |         if codon in geneticCode:
36 |             aminoAcid = geneticCode[codon]
37 |         else:
38 |             return codon + " codon not in dictionary of genetic code"
39 | 
40 |         # Break if stop codon is found
41 |         if aminoAcid is None:
42 |             break
43 | 
44 |         protein_sequence.append(aminoAcid)
45 | 
46 |     return protein_sequence
47 | 
48 | 
49 | dna_sequence = 'ATGGTGCATCTGACTCCTGAGGAGAAGTCTGCCGTTACTGCCCTGTGGGGCAAGGTG'
50 | print(dna_sequence)
51 | protein_3letter_sequence = protein_translation(dna_sequence, standardGeneticCode)
52 | print("".join(protein_3letter_sequence))
53 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ARCHIVED - READ ONLY - OLD 3 DAYS COURSE REPO
 2 | 
 3 | --- 
 4 | 
 5 | # An introduction to solving biological problems with Python - course materials
 6 | 
 7 | Materials for the course run by the Graduate School of Life Sciences, University of Cambridge.
 8 | 
 9 | - Course website: http://pycam.github.io/
10 | - Booking website: http://www.training.cam.ac.uk/
11 | 
12 | 
13 | If you wish to run the course on your personal computer, here are the steps to follow to get up and running.
14 | 
15 | ## Clone this github project
16 | 
17 | ```bash
18 | git clone https://github.com/pycam/python-intro.git
19 | cd python-intro
20 | ```
21 | 
22 | ## Dependencies
23 | 
24 | Install Python 3 by downloading the latest version from https://www.python.org/.
25 | 
26 | Python 2.x is legacy, Python 3.x is the present and future of the language.
27 | 
28 | Create first a virtual environment using the [`venv` library](https://docs.python.org/3/library/venv.html). Update pip if needed, install [jupyter](http://jupyter.org/) and [RISE](https://github.com/damianavila/RISE) to get a slideshow extension into jupyter.
29 | 
30 | ***Note*** A virtual environment is a Python environment such that the Python interpreter, libraries and scripts installed into it are isolated from those installed in other virtual environments.
31 | 
32 | ```bash
33 | python3 -m venv venv
34 | # activate your virtual environment
35 | source venv/bin/activate
36 | # update pip if needed
37 | pip install --upgrade pip
38 | # install jupyter
39 | pip install jupyter
40 | 
41 | # slideshow extension
42 | pip install rise
43 | jupyter-nbextension install rise --py --sys-prefix
44 | jupyter nbextension enable rise --py --sys-prefix
45 | 
46 | # biopython
47 | pip install biopython
48 | ```
49 | 
50 | On mac OSX you may need to run this command to accept the XCode license, before installing biopython:
51 | 
52 | ```bash
53 | sudo xcodebuild -license
54 | ```
55 | 
56 | ## Usage
57 | 
58 | Go to the directory where you've cloned this repository, activate your virtual environment and run jupyter.
59 | 
60 | Your web browser should automatically open with this url http://localhost:8888/tree where you see the directory tree of the course with all the jupyter notebooks.
61 | 
62 | ```bash
63 | cd python-intro
64 | source venv/bin/activate
65 | jupyter notebook
66 | ```
67 | 
68 | To shutdown jupyter, type ctrl-C into the terminal you've ran `jupyter notebook`, answer `y` and press `enter`.
69 | 
70 | You may wish to deactivate the virtual environment, by entering into the terminal:
71 | ```
72 | deactivate
73 | ```
74 | .
75 | 


--------------------------------------------------------------------------------
/install/2to3_nb.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | To run: python3 nb2to3.py notebook-or-directory
 4 | """
 5 | # Authors: Thomas Kluyver, Fernando Perez
 6 | # See: https://gist.github.com/takluyver/c8839593c615bb2f6e80
 7 | 
 8 | import argparse
 9 | import pathlib
10 | from nbformat import read, write
11 | 
12 | import lib2to3
13 | from lib2to3.refactor import RefactoringTool, get_fixers_from_package
14 | 
15 | 
16 | def refactor_notebook_inplace(rt, path):
17 |     
18 |     def refactor_cell(src):
19 |         #print('\n***SRC***\n', src)
20 |         try:
21 |             tree = rt.refactor_string(src+'\n', str(path) + '/cell-%d' % i)
22 |         except (lib2to3.pgen2.parse.ParseError,
23 |                 lib2to3.pgen2.tokenize.TokenError):
24 |             return src
25 |         else:
26 |             return str(tree)[:-1]
27 | 
28 |     
29 |     print("Refactoring:", path)
30 |     nb = read(str(path), as_version=4)
31 |     
32 |     # Run 2to3 on code
33 |     for i, cell in enumerate(nb.cells, start=1):
34 |         if cell.cell_type == 'code':
35 |             if cell.execution_count in ('&nbsp;', '*'):
36 |                 cell.execution_count = None
37 | 
38 |             if cell.source.startswith('%%'):
39 |                 # For cell magics, try to refactor the body, in case it's
40 |                 # valid python
41 |                 head, source = cell.source.split('\n', 1)
42 |                 cell.source = head + '\n' + refactor_cell(source)
43 |             else:
44 |                 cell.source = refactor_cell(cell.source)
45 |                    
46 | 
47 |     # Update notebook metadata
48 |     nb.metadata.kernelspec = {
49 |         'display_name': 'Python 3',
50 |         'name': 'python3',
51 |         'language': 'python',
52 |     }
53 |     if 'language_info' in nb.metadata:
54 |         nb.metadata.language_info.codemirror_mode = {
55 |             'name': 'ipython',
56 |             'version': 3,
57 |         }
58 |         nb.metadata.language_info.pygments_lexer = 'ipython3'
59 |         nb.metadata.language_info.pop('version', None)
60 | 
61 |     write(nb, str(path))
62 | 
63 | def main(argv=None):
64 |     ap = argparse.ArgumentParser()
65 |     ap.add_argument('path', type=pathlib.Path,
66 |         help="Notebook or directory containing notebooks")
67 |     
68 |     options = ap.parse_args(argv)
69 |     
70 |     avail_fixes = set(get_fixers_from_package('lib2to3.fixes'))
71 |     rt = RefactoringTool(avail_fixes)
72 |     
73 |     if options.path.is_dir():
74 |         for nb_path in options.path.rglob('*.ipynb'):
75 |             refactor_notebook_inplace(rt, nb_path)
76 |     else:
77 |         refactor_notebook_inplace(rt, options.path)
78 | 
79 | if __name__ == '__main__':
80 |     main()
81 | 


--------------------------------------------------------------------------------
/feedback.md:
--------------------------------------------------------------------------------
 1 | # Feedback
 2 | 
 3 | ## Trainers
 4 | - _Adrian Baez-Ortega_
 5 | - **Tomás Di Domenico**
 6 | - **Mareike Herzog**
 7 | - **Mukarram Hossain**
 8 | - _Maire Lawlor_
 9 | - Sergio Martinez Cuesta
10 | - **Anne Pajon**
11 | - Cristian Riccio
12 | 
13 | **Present at the meeting after training on 22-23 September 2016**
14 | 
15 | ## Discussion
16 | 
17 | - convert to python 3
18 | - split course into beginner/intermediate/advance
19 | - other python libraries to cover like pandas
20 | - course feedbacks
21 | 
22 | **Convert to Python 3** — for sure
23 | - BioPython, pandas all migrated
24 | - anaconda is 3 by default
25 | 
26 | **Add linux command introduction** — basic command and tree representation; it is essential to run python from a script on the command line
27 | 
28 | **Better introduction** — on why we use python in a notebook, on the command line and in file explaining how we run things
29 | 
30 | **Contents to drop**
31 | - file I/O should stay but remove reading XML/PDB format only keep delimited files and maybe JSON
32 | - drop system call
33 | - drop exceptions
34 | 
35 | **Contents to add**
36 | - way of searching into python library documentation for string
37 | - independent learners
38 | - bonus exercise — use pandas to parse this file to keep them busy
39 | - add BioPython exercise for the reverse complement function by writing second function using BioPython doing the same thing
40 | 
41 | **Contents to re-arrange**
42 | - restructure the second day
43 | - section 4 (1) file then (2) command line
44 | - move functions before section 2
45 | 
46 | **How to better stimulate the learner**
47 | - better explanation on how to solve each exercises by splitting problems into smaller chunks
48 |   - (1) explain how to solve an exercise by starting with a blank page and comments to divide the problem in small chunks
49 |   - (2) search for solution on internet either on forum or in python library
50 |   - (3) program together using functions to solve exercises
51 | - better manage expectations by being clearer on objectives
52 | - build connected exercises not disconnected ones, build upon learning — start simple and increase complexity
53 | - write code in group of 2 or 3; one function each to solve one bigger problem; explain and make them sure to write together a bigger program; then exchange code and progress to next level
54 | - bring your questions
55 | 
56 | ## Actions
57 | 
58 | > Meeting once a month — to get running.
59 | 
60 | **Next course** — December 16
61 | - Keep exercises for sure
62 | - Minimize the lecture part
63 | - Make them search for solution
64 | - Assignment — like workshop couple of urls
65 | - Create 3 levels of exercises beginner/intermediate/advance
66 | 
67 | **Checklists for December course**
68 | - [ ] Anne to re-organize course and add BioPython exercise
69 | - [ ] Tomás to convert code to Python 3
70 | - [ ] Christian to correct typos
71 | 
72 | Christian sent his feedback by email — exceptions and reading PDB and XML files is too advanced for a beginner course.
73 | There are a few typos in the course that I can correct if somebody tells me how.
74 | 


--------------------------------------------------------------------------------
/Introduction_to_python_day_2_session_2.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# An introduction to solving biological problems with Python\n",
  8 |     "\n",
  9 |     "## Session 2.2: Exercises and Modules\n",
 10 |     "\n",
 11 |     "- [Excercises 2.2.1](#Excercises-2.1.1)\n",
 12 |     "- [Excercises 2.2.2](#Excercises-2.2.2)\n",
 13 |     "- [Excercises 2.2.3](#Excercises-2.2.3)\n",
 14 |     "- [Modules](#Modules)\n",
 15 |     "- [Excercises 2.2.4](#Excercises-2.2.4)"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "markdown",
 20 |    "metadata": {},
 21 |    "source": [
 22 |     "## Excercises 2.2.1\n",
 23 |     "\n",
 24 |     "### Translate DNA sequence into protein sequence\n",
 25 |     "\n",
 26 |     "Write a function that translates a DNA sequence into a protein, a sequence of amino acids. The function should take 2 arguments, a DNA sequence and a dictionary that defines the standard genetic code.\n",
 27 |     "\n",
 28 |     "For mapping RNA codons to amino acids you can use the dictionary `standardGeneticCode` defined below. Notice that it only maps strings in upper case, so make sure that `codon` is in upper case before your look up. You can translate codon into an upper case with the `upper()` method on String. Notice also that it maps RNA codons and not DNA ones.\n",
 29 |     "\n",
 30 |     "First, loop over the sequence to extract every three basees until the end or until a stop codon either by using a `for` loop or a `while` one. \n",
 31 |     "\n",
 32 |     "Then convert the DNA into an RNA sequence, by replacing all T bases by U. Make sure that the codon corresponds to an amino accid. Convert the RNA codon into an amino acid using the dictionary provided and return the protein sequence as a list of amino acids."
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": null,
 38 |    "metadata": {
 39 |     "collapsed": true
 40 |    },
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "standardGeneticCode = { \n",
 44 |     "          'UUU':'Phe', 'UUC':'Phe', 'UCU':'Ser', 'UCC':'Ser',\n",
 45 |     "          'UAU':'Tyr', 'UAC':'Tyr', 'UGU':'Cys', 'UGC':'Cys',\n",
 46 |     "          'UUA':'Leu', 'UCA':'Ser', 'UAA': None, 'UGA': None,\n",
 47 |     "          'UUG':'Leu', 'UCG':'Ser', 'UAG': None, 'UGG':'Trp',\n",
 48 |     "          'CUU':'Leu', 'CUC':'Leu', 'CCU':'Pro', 'CCC':'Pro',\n",
 49 |     "          'CAU':'His', 'CAC':'His', 'CGU':'Arg', 'CGC':'Arg',\n",
 50 |     "          'CUA':'Leu', 'CUG':'Leu', 'CCA':'Pro', 'CCG':'Pro',\n",
 51 |     "          'CAA':'Gln', 'CAG':'Gln', 'CGA':'Arg', 'CGG':'Arg',\n",
 52 |     "          'AUU':'Ile', 'AUC':'Ile', 'ACU':'Thr', 'ACC':'Thr',\n",
 53 |     "          'AAU':'Asn', 'AAC':'Asn', 'AGU':'Ser', 'AGC':'Ser',\n",
 54 |     "          'AUA':'Ile', 'ACA':'Thr', 'AAA':'Lys', 'AGA':'Arg',\n",
 55 |     "          'AUG':'Met', 'ACG':'Thr', 'AAG':'Lys', 'AGG':'Arg',\n",
 56 |     "          'GUU':'Val', 'GUC':'Val', 'GCU':'Ala', 'GCC':'Ala',\n",
 57 |     "          'GAU':'Asp', 'GAC':'Asp', 'GGU':'Gly', 'GGC':'Gly',\n",
 58 |     "          'GUA':'Val', 'GUG':'Val', 'GCA':'Ala', 'GCG':'Ala', \n",
 59 |     "          'GAA':'Glu', 'GAG':'Glu', 'GGA':'Gly', 'GGG':'Gly'}"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "markdown",
 64 |    "metadata": {},
 65 |    "source": [
 66 |     "## Excercises 2.2.2\n",
 67 |     "\n",
 68 |     "### Calculate the GC content of a DNA sequence\n",
 69 |     "\n",
 70 |     "Write a function that calculates the GC content of a DNA sequence by re-using the code written for the [Exercises 1.4.2](Introduction_to_python_day_1_session_4.ipynb#Exercises-1.4.2) yesterday."
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "markdown",
 75 |    "metadata": {},
 76 |    "source": [
 77 |     "## Excercises 2.2.3\n",
 78 |     "\n",
 79 |     "### Extract the list of all overlaping sub-sequences\n",
 80 |     "Write a function that extracts a list of overlapping sub-sequences for a given window size from a given sequence. Do not forget to test it on a given DNA sequence."
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "metadata": {},
 86 |    "source": [
 87 |     "## Modules"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "markdown",
 92 |    "metadata": {},
 93 |    "source": [
 94 |     "So far we have been writing Python code in files as executable scripts without knowning that they are also modules from which we are able to call the different functions defined in them.\n",
 95 |     "\n",
 96 |     "A module is a file containing Python definitions and statements. The file name is the module name with the suffix .py appended. Create a file called `my_first_module.py` in the current directory with the following contents:"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": null,
102 |    "metadata": {
103 |     "collapsed": true
104 |    },
105 |    "outputs": [],
106 |    "source": [
107 |     "def say_hello(user):\n",
108 |     "    print('hello', user, '!')"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "markdown",
113 |    "metadata": {},
114 |    "source": [
115 |     "Now enter the Python interpreter from the directory you've created `my_first_module.py` file and import the `say_hello` function from this module with the following command:\n",
116 |     "\n",
117 |     "```bash\n",
118 |     "python3\n",
119 |     "Python 3.5.2 (default, Jun 30 2016, 18:10:25) \n",
120 |     "[GCC 4.2.1 Compatible Apple LLVM 7.0.2 (clang-700.1.81)] on darwin\n",
121 |     "Type \"help\", \"copyright\", \"credits\" or \"license\" for more information.\n",
122 |     ">>> from my_first_module import say_hello\n",
123 |     ">>> say_hello('Anne')\n",
124 |     "hello Anne !\n",
125 |     ">>> \n",
126 |     "```\n",
127 |     "\n",
128 |     "There is one module already stored in the course directory called `my_first_module.py`, if you wish to import it into this notebook, below is what you need to do. If you wish to edit this file and change the code or add another function, you will have to restart the notebook to have these changes taken into account using the restart the kernel button in the menu bar."
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": null,
134 |    "metadata": {
135 |     "collapsed": false
136 |    },
137 |    "outputs": [],
138 |    "source": [
139 |     "from my_first_module import say_hello\n",
140 |     "say_hello('Anne')"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "markdown",
145 |    "metadata": {},
146 |    "source": [
147 |     "A module can contain executable statements as well as function definitions. These statements are intended to initialize the module. They are executed only the first time the module name is encountered in an import statement. \n",
148 |     "They are also run if the file is executed as a script.\n",
149 |     "\n",
150 |     "Do comment out these executable statements if you do not wish to have them executed when importing your module.\n",
151 |     "\n",
152 |     "For more information about modules, https://docs.python.org/3/tutorial/modules.html."
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "markdown",
157 |    "metadata": {},
158 |    "source": [
159 |     "## Excercises 2.2.4\n",
160 |     "### Calculate GC content along the DNA sequence\n",
161 |     "Combine the two methods written above to calculates the GC content of each overlapping sliding window along a DNA sequence from start to end. \n",
162 |     "\n",
163 |     "From the two files you wrote, import the methods written at exercices 2.2.2 and 2.2.3.\n",
164 |     "The new function should take two arguments, the DNA sequence and the size of the sliding window, and re-use the previous methods written to calculate the GC content of a DNA sequence and to extract the list of all overlapping sub-sequences. It returns a list of GC% along the DNA sequence."
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "markdown",
169 |    "metadata": {},
170 |    "source": [
171 |     "## Next session\n",
172 |     "\n",
173 |     "Go to our next notebook: [Introduction_to_python_day_2_session_3](Introduction_to_python_day_2_session_3.ipynb)"
174 |    ]
175 |   }
176 |  ],
177 |  "metadata": {
178 |   "kernelspec": {
179 |    "display_name": "Python 3",
180 |    "language": "python",
181 |    "name": "python3"
182 |   },
183 |   "language_info": {
184 |    "codemirror_mode": {
185 |     "name": "ipython",
186 |     "version": 3
187 |    },
188 |    "file_extension": ".py",
189 |    "mimetype": "text/x-python",
190 |    "name": "python",
191 |    "nbconvert_exporter": "python",
192 |    "pygments_lexer": "ipython3",
193 |    "version": "3.5.2"
194 |   }
195 |  },
196 |  "nbformat": 4,
197 |  "nbformat_minor": 0
198 | }
199 | 


--------------------------------------------------------------------------------
/Introduction_to_python_day_2_session_4.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# An introduction to solving biological problems with Python\n",
  8 |     "\n",
  9 |     "## Session 2.4: BioPython\n",
 10 |     "\n",
 11 |     "- [Working with sequences](#Working-with-sequences)\n",
 12 |     "- [Connecting with biological databases](#Connecting-with-biological-databases)\n",
 13 |     "- [Exercises 2.4.1](#Exercises-2.4.1)"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "markdown",
 18 |    "metadata": {},
 19 |    "source": [
 20 |     "## Using third party library, BioPython\n",
 21 |     "\n",
 22 |     "Biopython tutorial: http://biopython.org/DIST/docs/tutorial/Tutorial.html\n",
 23 |     "\n",
 24 |     "The goal of Biopython is to make it as easy as possible to use Python for bioinformatics by creating high-quality, reusable modules and classes. Biopython features include parsers for various Bioinformatics file formats (BLAST, Clustalw, FASTA, Genbank,...), access to online services (NCBI, Expasy,...), interfaces to common and not-so-common programs (Clustalw, DSSP, MSMS...), a standard sequence class, various clustering modules, a KD tree data structure etc. and even documentation."
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "markdown",
 29 |    "metadata": {},
 30 |    "source": [
 31 |     "## Working with sequences"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "markdown",
 36 |    "metadata": {},
 37 |    "source": [
 38 |     "We can create a sequence by defining a `Seq` object with strings. `Bio.Seq()` takes as input a string and converts in into a Seq object. We can print the sequences, individual residues, lengths and use other functions to get summary statistics.  "
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "metadata": {
 45 |     "collapsed": false
 46 |    },
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "# Creating sequence\n",
 50 |     "from Bio.Seq import Seq\n",
 51 |     "my_seq = Seq(\"AGTACACTGGT\")\n",
 52 |     "print(my_seq)\n",
 53 |     "print(my_seq[10])\n",
 54 |     "print(my_seq[1:5])\n",
 55 |     "print(len(my_seq))\n",
 56 |     "print(my_seq.count( \"A\" ))"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "markdown",
 61 |    "metadata": {},
 62 |    "source": [
 63 |     "We can use functions from `Bio.SeqUtils` to get idea about a sequence "
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "metadata": {
 70 |     "collapsed": false
 71 |    },
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "# Calculate the molecular weight\n",
 75 |     "from Bio.SeqUtils import GC, molecular_weight\n",
 76 |     "print(GC( my_seq ))\n",
 77 |     "print(molecular_weight( my_seq ))"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "markdown",
 82 |    "metadata": {},
 83 |    "source": [
 84 |     "One letter code protein sequences can be converted into three letter codes using `seq3` utility "
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": null,
 90 |    "metadata": {
 91 |     "collapsed": false
 92 |    },
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "from Bio.SeqUtils import seq3\n",
 96 |     "print(seq3( my_seq ))"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "markdown",
101 |    "metadata": {},
102 |    "source": [
103 |     "Alphabets defines how the strings are going to be treated as sequence object. `Bio.Alphabet` module defines the available alphabets for Biopython. `Bio.Alphabet.IUPAC` provides basic definition for DNA, RNA and proteins. "
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": null,
109 |    "metadata": {
110 |     "collapsed": false
111 |    },
112 |    "outputs": [],
113 |    "source": [
114 |     "from Bio.Alphabet import IUPAC\n",
115 |     "my_dna = Seq(\"AGTACATGACTGGTTTAG\", IUPAC.unambiguous_dna)\n",
116 |     "print(my_dna)\n",
117 |     "print(my_dna.alphabet)"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": null,
123 |    "metadata": {
124 |     "collapsed": false
125 |    },
126 |    "outputs": [],
127 |    "source": [
128 |     "my_dna.complement()"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": null,
134 |    "metadata": {
135 |     "collapsed": false
136 |    },
137 |    "outputs": [],
138 |    "source": [
139 |     "my_dna.reverse_complement()"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": null,
145 |    "metadata": {
146 |     "collapsed": false
147 |    },
148 |    "outputs": [],
149 |    "source": [
150 |     "my_dna.translate()"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "markdown",
155 |    "metadata": {},
156 |    "source": [
157 |     "### Parsing sequence file format: FASTA files"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "markdown",
162 |    "metadata": {},
163 |    "source": [
164 |     "Sequence files can be parsed and read the same way we read other files. "
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "code",
169 |    "execution_count": null,
170 |    "metadata": {
171 |     "collapsed": false
172 |    },
173 |    "outputs": [],
174 |    "source": [
175 |     "with open( \"data/glpa.fa\" ) as fileObj:\n",
176 |     "    print(fileObj.read())"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "markdown",
181 |    "metadata": {},
182 |    "source": [
183 |     "Biopython provides specific functions to allow parsing/reading sequence files. "
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": null,
189 |    "metadata": {
190 |     "collapsed": false
191 |    },
192 |    "outputs": [],
193 |    "source": [
194 |     "# Reading FASTA files\n",
195 |     "from Bio import SeqIO\n",
196 |     "\n",
197 |     "fileObj = open(\"data/glpa.fa\")\n",
198 |     "\n",
199 |     "for protein in SeqIO.parse(fileObj, 'fasta'):\n",
200 |     "    print(protein.id)\n",
201 |     "    print(protein.seq)"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "markdown",
206 |    "metadata": {},
207 |    "source": [
208 |     "Sequence objects can be written into files using file handles with the function `SeqIO.write()`. We need to provide the name of the output sequence file and the sequence file format. "
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": null,
214 |    "metadata": {
215 |     "collapsed": false
216 |    },
217 |    "outputs": [],
218 |    "source": [
219 |     "# Writing FASTA files\n",
220 |     "from Bio.SeqRecord import SeqRecord\n",
221 |     "from Bio.Seq import Seq\n",
222 |     "from Bio.Alphabet import IUPAC\n",
223 |     "\n",
224 |     "sequence = 'MYGKIIFVLLLSEIVSISASSTTGVAMHTSTSSSVTKSYISSQTNDTHKRDTYAATPRAHEVSEISVRTVYPPEEETGERVQLAHHFSEPEITLIIFG'\n",
225 |     "\n",
226 |     "fileObj = open( \"mySeqFile.fa\", \"w\")\n",
227 |     "  \n",
228 |     "seqObj = Seq(sequence, IUPAC.protein)\n",
229 |     "proteinObjs = [SeqRecord(seqObj, id=\"MYID\", description='my description'),]\n",
230 |     "\n",
231 |     "SeqIO.write(proteinObjs, fileObj,  'fasta')\n",
232 |     "\n",
233 |     "fileObj.close()\n",
234 |     "\n",
235 |     "with open( \"biopython.fa\" ) as fileObj:\n",
236 |     "    print(fileObj.read())"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "markdown",
241 |    "metadata": {},
242 |    "source": [
243 |     "## Connecting with biological databases"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "markdown",
248 |    "metadata": {},
249 |    "source": [
250 |     "Sequences can be searched and downloaded from public databases. "
251 |    ]
252 |   },
253 |   {
254 |    "cell_type": "code",
255 |    "execution_count": null,
256 |    "metadata": {
257 |     "collapsed": false
258 |    },
259 |    "outputs": [],
260 |    "source": [
261 |     "# Read FASTA file from NCBI GenBank\n",
262 |     "from Bio import Entrez\n",
263 |     "\n",
264 |     "Entrez.email = 'A.N.Other@example.com'\n",
265 |     "socketObj = Entrez.efetch(db=\"protein\", rettype=\"fasta\", id=\"71066805\")\n",
266 |     "dnaObj = SeqIO.read(socketObj, \"fasta\")\n",
267 |     "socketObj.close()\n",
268 |     "\n",
269 |     "print(dnaObj.description)\n",
270 |     "print(dnaObj.seq)"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "code",
275 |    "execution_count": null,
276 |    "metadata": {
277 |     "collapsed": false
278 |    },
279 |    "outputs": [],
280 |    "source": [
281 |     "# Read SWISSPROT record\n",
282 |     "from Bio import ExPASy\n",
283 |     "\n",
284 |     "socketObj = ExPASy.get_sprot_raw('HBB_HUMAN')\n",
285 |     "proteinObj = SeqIO.read(socketObj, \"swiss\")\n",
286 |     "socketObj.close()\n",
287 |     "\n",
288 |     "print(proteinObj.description)\n",
289 |     "print(proteinObj.seq)"
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "markdown",
294 |    "metadata": {},
295 |    "source": [
296 |     "## Exercises 2.4.1"
297 |    ]
298 |   },
299 |   {
300 |    "cell_type": "markdown",
301 |    "metadata": {
302 |     "collapsed": true
303 |    },
304 |    "source": [
305 |     "- Retrieve a FASTA file named `data/sample.fa` and answer the following questions:\n",
306 |     "  - How many sequences are in the file?\n",
307 |     "  - What are the IDs and the lengths of the longest and the shortest sequences?\n",
308 |     "  - Create a new object that contains only sequences with length longer than 500bp. What is the average length of these sequences?\n",
309 |     "  - Calculate and print the percentage of GC contents in each of the sequences.\n",
310 |     "  - Write the newly created sequence object into a FASTA file named `sample.long.fa` "
311 |    ]
312 |   },
313 |   {
314 |    "cell_type": "markdown",
315 |    "metadata": {},
316 |    "source": [
317 |     "## Congratulation! You reached the end of day 2! "
318 |    ]
319 |   }
320 |  ],
321 |  "metadata": {
322 |   "kernelspec": {
323 |    "display_name": "Python 3",
324 |    "language": "python",
325 |    "name": "python3"
326 |   },
327 |   "language_info": {
328 |    "codemirror_mode": {
329 |     "name": "ipython",
330 |     "version": 3
331 |    },
332 |    "file_extension": ".py",
333 |    "mimetype": "text/x-python",
334 |    "name": "python",
335 |    "nbconvert_exporter": "python",
336 |    "pygments_lexer": "ipython3",
337 |    "version": "3.5.2"
338 |   }
339 |  },
340 |  "nbformat": 4,
341 |  "nbformat_minor": 0
342 | }
343 | 


--------------------------------------------------------------------------------
/planning.md:
--------------------------------------------------------------------------------
  1 | # Planning and ideas
  2 | 
  3 | 'An Introduction to Solving Biological Problems with Python' training can be divided into 4 sessions over two days.
  4 | 
  5 | 1. DAY 1. MORNING. SESSION 1.: running the Python interpreter, variables and types, arithmetic, basic data structures
  6 | 2. DAY 1. AFTERNOON. SESSION 2.: logic & flow control, loops, exceptions, importing libraries
  7 | 3. DAY 2. MORNING. SESSION 3.: custom functions, variable scope, some biological examples
  8 | 4. DAY 2. AFTERNOON. SESSION 4.: dealing with files, parsing file formats, introduction to BioPython
  9 | 
 10 | ## DAY 1. MORNING. SESSION 1.
 11 | 
 12 | ### Part 1.
 13 | 
 14 | INTRO: the python programming language & python interpreter (command line)
 15 | Python is free, cross-platform, widely used, well documented & well supported.
 16 | Python is a simple interpreted language, with no separate compilation step.
 17 | 
 18 | - Getting started
 19 | - Printing values
 20 | - Using variables: they are names for values, created by use. No declaration necessary.
 21 | A variable is just a name, it does not have a type. Values are garbage collected,
 22 | if nothing refers to data any longer, it can be recycled. Must assign value to variable
 23 | before using it. Python does not assume default values for variables,
 24 | doing so can mask many errors.
 25 | - Simple data types: Values do have types. Use functions to convert between types.
 26 |     - booleans
 27 |     - integers
 28 |     - floating point numbers
 29 |     - complex numbers
 30 |     - strings are sequences of characters
 31 |     - the None object
 32 | - Arithmetic: addition, subtraction, multiplication, division, exponentiation, remainder
 33 | - Saving code in files
 34 |     - Comments
 35 | 
 36 | #### EXERCISES
 37 | 
 38 | ```
 39 | * create a variable, print out a message
 40 | * addition operator
 41 | * calculate the mean of two variables
 42 | * [1.1] Print DNA sequence from amino acid one.
 43 | ```
 44 | 
 45 | ### Part 2.
 46 | 
 47 | As well as the basic data types we introduced, python has several ways of storing
 48 | a collection of values. We are going to see four of them: tuples, lists, sets and
 49 | dictionaries.
 50 | 
 51 | - Collections: complex data types
 52 |     - tuples: A tuple is a sequence of immutable Python objects. Tuples are sequences,
 53 |     just like lists. The only difference is that tuples can't be changed i.e.,
 54 |     tuples are immutable and tuples use parentheses and lists use square brackets.
 55 |     - lists: the most popular [value, value, value, ...] it is mutable, it can be
 56 |     changed after been created. It is heterogeneous, it can store values of many kinds.
 57 |     Appending values to a list lengthens it, deleting values shortens it. Most
 58 |     operations on lists are methods. Two that are often used incorrectly sort() and reverse()
 59 |     - manipulating tuples and lists
 60 | 
 61 | Online Python doc: https://docs.python.org/2/  Library | 5.6. Sequence Types | Mutable Sequence Types (5.6.4)
 62 | 
 63 | #### EXERCISES
 64 | 
 65 | ```
 66 | * [1.2] Print DNA sequence from a list of DNA codons
 67 | ```
 68 | 
 69 | - String manipulations and format: strings are indexed exactly like lists.
 70 | Strings are immutable, they cannot be changed in place. Use + to concatenate strings.
 71 | Concatenation always produces a new string. Use string % to format output.
 72 | Use triple quotes for multi-line strings. Strings have methods: capitalize()
 73 | upper() lower() count() find() replace()
 74 | 
 75 | Online Python doc: https://docs.python.org/2/ Library | 5.6. Sequence Types | 5.6.1. String Methods
 76 | 
 77 | Online Python doc: https://docs.python.org/2/ Library | 5.6. Sequence Types | 5.6.2. String Formatting Operations
 78 | 
 79 | #### EXERCISES
 80 | 
 81 | ```
 82 | * [1.3] String manipulation using your name
 83 | ```
 84 | 
 85 | - Sets contain unique unordered elements. They are very similar to lists but
 86 | because the elements are not in order they do not have an index.
 87 | 
 88 | Online Python doc: https://docs.python.org/2/ Library | 5.7. Set Types
 89 | 
 90 | #### EXERCISES
 91 | 
 92 | ```
 93 | * [1.4] Find the unique amino acid codes in a protein sequence
 94 | ```
 95 | 
 96 | - Dictionaries contain a mapping of keys to values
 97 | 
 98 | Online Python doc: https://docs.python.org/2/ Library | 5.8. Mapping Types
 99 | 
100 | ```
101 | Dictionary can be very useful when combined with string formatting e.g.
102 | format_string = "Dear %(name)s, we have sequenced %(num)d libraries. The yield is %(yield)dM reads."
103 | print format_string % {'name': 'Anne', 'num':3, 'yield': 182}
104 | ```
105 | 
106 | #### EXERCISES
107 | 
108 | ```
109 | * [1.5] Use a dictionary to map between codon sequences and amino acids they
110 | encode to print out the name of the amino acids of a DNA sequence
111 | ```
112 | 
113 | 
114 | ```
115 | >>> TAKE HOME MESSAGE
116 | >>> Variables are labels that refer to data.
117 | >>> Many variables may refer to the same piece of data.
118 | >>> Use strings to store text.
119 | >>> Use lists to store many related values in order.
120 | >>> User sets to store unique related values in order.
121 | >>> Use dictionaries to store key/value pairs.
122 | ```
123 | 
124 | ## DAY 1. AFTERNOON. SESSION 2.
125 | 
126 | ### Part 1.
127 | 
128 | INTRO: program control and logic - code blocks: if/loops/exceptions.
129 | Real power of programs comes from repetition and selection. Why indentation?
130 | Because it makes the code you write clearer and easier to read.
131 | Python style guide (PEP 8) recommends 4 spaces.
132 | Loops let us do things many times. Collections let us store many values together.
133 | 
134 | - code blocks
135 | - conditional execution
136 |     - the if statement: use if/elif/else to make choices
137 |     - comparisons and truth
138 | 
139 | #### EXERCISES
140 | 
141 | ```
142 | [2.0] Compare your age with other persons and print if you are younger/older/same age
143 | [2.?] Check if a DNA sequence contain a stop codon
144 | ```
145 | 
146 | - loops
147 |     - the for loop: a for loop is used to access each value in turn
148 |     - the while loop: a while loop is used to step through all possible indices
149 |     - skipping and breaking loops
150 |     - looping gotchas
151 | 
152 | #### EXERCISES
153 | 
154 | ```
155 | [2.1] Loop over a list of bases using for and while loops
156 | ```
157 | 
158 | - more looping
159 |     - using enumerate
160 |     - using zip
161 |     - filtering in loops
162 | 
163 | #### EXERCISES
164 | 
165 | ```
166 | [2.2] Calculate the GC content of a DNA sequence
167 | ```
168 | 
169 | ### Part 2 (after break)
170 | 
171 | Python provides two very important features to handle any unexpected error in your
172 | Python programs and to add debugging capabilities in them: exceptions and assertions.
173 | 
174 | - exceptions: An exception is an event, which occurs during the execution of a program,
175 | that disrupts the normal flow of the program's instructions. In general, when a Python
176 | script encounters a situation that it can't cope with, it raises an exception.
177 | An exception is a Python object that represents an error.
178 | 
179 | #### EXERCISES
180 | 
181 | ```
182 | [2.3] Raise an exception if the DNA sequence is not valid
183 | ```
184 | 
185 | - importing modules and libraries
186 |     - help(math)
187 |     - import sys
188 |     - print sys.version & sys.platform
189 |     - print sys.path which defines the list of directories Python searches in to find modules.
190 |     sys.argv: The most commonly-used element of sys is probably sys.argv, which holds the command-line arguments of the currently-executing program.
191 | 
192 | ```
193 | >>> TAKE HOME MESSAGE
194 | >>> Use while to repeat something until something changes.
195 | >>> Use for to do something once for each part of a larger whole.
196 | >>> Use if and else to make choices.
197 | ```
198 | 
199 | ## DAY 2. MORNING. SESSION 3.
200 | 
201 | ### Part 1.
202 | 
203 | INTRO: function basics and definition
204 | A programming language should not include everything anyone might ever want
205 | Instead, it should make it easy for people to create what they need
206 | to solve specific problems by defining functions to create higher-level operations.
207 | In python it is done using the keyword 'def'.
208 | 
209 | - function definition syntax
210 | 
211 | #### EXERCISES
212 | 
213 | ```
214 | [3.1a] Create a function that calculate the means of two number and then from a list of number
215 | [3.1b] Create a function to calculate the molecular weight of a DNA sequence
216 | ```
217 | 
218 | - function arguments
219 | 
220 | #### EXERCISES
221 | 
222 | ```
223 | [3.2] Extend the previous function to also calculate the weight of a RNA sequence
224 | ```
225 | 
226 | - return value
227 | 
228 | #### EXERCISES
229 | 
230 | ```
231 | [3.3] Write a function that counts the number of each base found in a DNA sequence
232 | ```
233 | 
234 | ### Part 2.
235 | 
236 | - variable scope: globals vs within blocks
237 | - advanced topics: anonymous functions (lambda); functions as values; nested functions
238 | 
239 | #### EXERCISES
240 | 
241 | ```
242 | BIO examples
243 | - program ribosome that translates RNA into protein
244 |   - extra points for also taking DNA (T -> U)
245 |   - extra points for all reading frames.
246 | 
247 | - calculate GC content of DNA not on whole sequence but with sliding window.
248 | 
249 | - calculate hydrophobicity with sliding window.
250 | ```
251 | 
252 | ```
253 | >>> TAKE HOME MESSAGE
254 | >>> Define functions to break programs down into manageable pieces.
255 | >>> Remember that a function is really just another kind of data.
256 | ```
257 | 
258 | # Day 2. AFTERNOON. SESSION 4.
259 | 
260 | ### Part 1.
261 | 
262 | INTRO: In this session we cover 2 widely used ways of reading data into our
263 | programs, via the command line and by reading files from disk.
264 | 
265 | - reading command line arguments
266 | 
267 | #### EXERCISES
268 | 
269 | ```
270 | [4.1a] Write a script that takes 2 integers from the command line using the sys.argv
271 | library, add the two numbers and printout the result
272 | [4.1b] Write a script tha takes a DNA sequence from the command line and prints out
273 | its length and GC content
274 | ```
275 | 
276 |   - the argparse library
277 | 
278 | #### EXERCISES
279 | 
280 | ```
281 | [4.1c] Use the argparse library to do the same exercise as above
282 | ```
283 | 
284 | ### Part 2.
285 | 
286 | - file objects
287 |     - mode modifiers
288 |     - error checking
289 | - closing files
290 | - reading from files
291 |     - the with statement
292 | - writing to files
293 | 
294 | #### EXERCISES
295 | 
296 | ```
297 | [4.2a] Write a script that writes a list of number to a file, with each number
298 | on a separate line
299 | [4.2b] Open a file and for each line print out the line number and its length
300 | ```
301 | 
302 | - data formats
303 | - delimited files
304 |     - reading delimited files
305 |     - writing delimited files
306 | - more advanced examples
307 |     - read csv file
308 |     - write csv file
309 | 
310 | #### EXERCISES
311 | 
312 | ```
313 | [4.3a] Read a tab separated file
314 | [4.3b] Write a csv file
315 | ```
316 | 
317 | - fixed format files (PDB)
318 | - XML files
319 | - python file libraries: os & os.path
320 | - more advanced examples
321 |     - recursive file search
322 |     - recursive delete
323 | 
324 | - system calls
325 | 
326 | #### EXERCISES
327 | 
328 | ```
329 | [4.4] Write a script that execute the command 'ls' to get the list of files
330 | then modify your script to only print python files
331 | ```
332 | 
333 | ### Part 3.
334 | 
335 | - using BioPython
336 | 
337 | Biopython is to make it as easy as possible to use Python for bioinformatics by
338 | creating high-quality, reusable modules and classes. Biopython features include
339 | parsers for various Bioinformatics file formats (BLAST, Clustalw, FASTA, Genbank,...),
340 | access to online services (NCBI, Expasy,...), interfaces to common and not-so-common programs
341 | (Clustalw, DSSP, MSMS...), a standard sequence class, various clustering modules,
342 | a KD tree data structure etc. and even documentation.
343 | 
344 | Basically, we just like to program in Python and want to make it as easy as possible
345 | to use Python for bioinformatics by creating high-quality, reusable modules and scripts.
346 | 
347 | Biopython tutorial http://biopython.org | Tutorial | 1.2  What can I find in the Biopython package
348 | 
349 | #### BioPython EXAMPLES
350 | 
351 | - more advanced examples
352 |     - writing FASTA files
353 |     - reading FASTA files
354 | 
355 | ```
356 | >>> TAKE HOME MESSAGE
357 | >>> Happy Python programming!
358 | ```
359 | 
360 | IDEAS: if you need help: http://stackoverflow.com/
361 | 
362 | IDEAS: Pylint is a tool that checks for errors in python code, tries to enforce a coding standard and looks for bad code smells: http://www.pylint.org/
363 | 
364 | IDEAS: Any code that hasn't been tested is probably wrong: Python unit testing framework unittest
365 | 
366 | IDEAS: from http://software-carpentry.org/v4/python
367 | 


--------------------------------------------------------------------------------
/Introduction_to_python_day_1_session_3.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "collapsed": true
  7 |    },
  8 |    "source": [
  9 |     "# An introduction to solving biological problems with Python\n",
 10 |     "\n",
 11 |     "## Session 1.3: Conditional execution\n",
 12 |     "\n",
 13 |     "- [Code blocks](#Code-blocks)\n",
 14 |     "- [Conditional execution](#Conditional-execution)\n",
 15 |     "- [Exercises 1.3.1](#Exercises-1.3.1)"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "markdown",
 20 |    "metadata": {},
 21 |    "source": [
 22 |     "## Program control and logic"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "A program will normally run by executing the stated commands, one after the other in sequential order. Frequently however, you will need the program to deviate from this. There are several ways of diverting from the line-by-line paradigm:\n",
 30 |     "\n",
 31 |     "- With conditional statements. Here you can check if some statement or expression is true, and if it is then you continue on with the following block of code, otherwise you might skip it or execute a different bit of code.\n",
 32 |     "\n",
 33 |     "- By performing repetitive loops through the same block of code, where each time through the loop different values may be used for the variables.\n",
 34 |     "\n",
 35 |     "- Through the use of functions (subroutines) where the program’s execution jumps from a particular line of code to an entirely different spot, even in a different file or module, to do a task before (usually) jumping back again. Functions are covered in the next session, so we will not discuss them yet.\n",
 36 |     "\n",
 37 |     "- By checking if an error or exception occurs, i.e. something illegal has happened, and executing different blocks of code accordingly"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "## Code blocks"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "markdown",
 49 |    "metadata": {},
 50 |    "source": [
 51 |     "With all of the means by which Python code execution can jump about we naturally need to be aware of the boundaries of the block of code we jump into, so that it is clear at what point the job is done, and program execution can jump back again. In essence it is required that the end of a function, loop or conditional statement be defined, so that we know the bounds of their respective code blocks."
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "markdown",
 56 |    "metadata": {},
 57 |    "source": [
 58 |     "Python uses indentation to show which statements are in a block of code, other languages use specific `begin` and `end` statements or curly braces `{}`. It doesn't matter how much indentation you use, but the whole block must be consistent, i.e., if the first statement is indented by four spaces, the rest of the block must be indented by the same amount. The Python style guide recommends using 4-space indentation. Use spaces, rather than tabs, since different editors display tab characters with different widths.\n",
 59 |     "\n",
 60 |     "The use of indentation to delineate code blocks is illustrated in an abstract manner in the following scheme: \n",
 61 |     "\n",
 62 |     "Statement 1:\n",
 63 |     "\n",
 64 |     "    Command A – in the block of statement 1\n",
 65 |     "    Command B – in the block of statement 1\n",
 66 |     "  \n",
 67 |     "    Statement 2:\n",
 68 |     "        Command C – in the block of statement 2\n",
 69 |     "        Command D – in the block of statement 2\n",
 70 |     "  \n",
 71 |     "    Command E – back in the block of statement 1\n",
 72 |     "\n",
 73 |     "Command F – outside all statement blocks\n"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "markdown",
 78 |    "metadata": {},
 79 |    "source": [
 80 |     "## Conditional execution"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "metadata": {},
 86 |    "source": [
 87 |     "### The <tt>if</tt> statement"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "markdown",
 92 |    "metadata": {},
 93 |    "source": [
 94 |     "A conditional <tt>if</tt> statement is used to specify that some block of code should only be executed if some associated test is upheld; a conditional expression evaluates to <tt>True</tt>. This might also involve subsidiary checks using the <tt>elif</tt> statement to use an alternative block if the previous expression turns out to be False. There can even be a final <tt>else</tt> statement to do something if none of the checks are passed. \n",
 95 |     "\n",
 96 |     "The following uses statements that test whether a number is less than zero, greater than zero or otherwise equal to zero and will print out a different message in each case:"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": null,
102 |    "metadata": {
103 |     "collapsed": false
104 |    },
105 |    "outputs": [],
106 |    "source": [
107 |     "x = -3\n",
108 |     "\n",
109 |     "if x > 0:\n",
110 |     "  print(\"Value is positive\")\n",
111 |     "\n",
112 |     "elif x < 0:\n",
113 |     "  print(\"Value is negative\")\n",
114 |     "\n",
115 |     "else:\n",
116 |     "  print(\"Value is zero\")"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "markdown",
121 |    "metadata": {},
122 |    "source": [
123 |     "The general form of writing out such combined conditional statements is as follows:"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "markdown",
128 |    "metadata": {},
129 |    "source": [
130 |     "<pre>\n",
131 |     "if conditionalExpression1:\n",
132 |     "    # codeBlock1\n",
133 |     "\n",
134 |     "elif conditionalExpression2:\n",
135 |     "    # codeBlock2\n",
136 |     "\n",
137 |     "elif conditionalExpressionN:\n",
138 |     "    # codeBlockN\n",
139 |     "    +any number of additional elif statements, then finally:\n",
140 |     "\n",
141 |     "else:\n",
142 |     "    # codeBlockE\n",
143 |     "</pre>"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "markdown",
148 |    "metadata": {},
149 |    "source": [
150 |     "\n",
151 |     "The <tt>elif</tt> block is optional, and we can use as many as we like. The <tt>else</tt> block is also optional, so will only have the <tt>if</tt> statement, which is a fairly common situation. It is often good practice to include <tt>else</tt> where possible though, so that you always catch cases that do not pass, otherwise values might go unnoticed, which might not be the desired behaviour."
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "markdown",
156 |    "metadata": {},
157 |    "source": [
158 |     "Placeholders are needed for “empty” code blocks:"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": null,
164 |    "metadata": {
165 |     "collapsed": false
166 |    },
167 |    "outputs": [],
168 |    "source": [
169 |     "gene = \"BRCA2\"\n",
170 |     "geneExpression = -1.2\n",
171 |     "\n",
172 |     "if geneExpression < 0:\n",
173 |     "    print(gene, \"is downregulated\")\n",
174 |     "        \n",
175 |     "elif geneExpression > 0:\n",
176 |     "    print(gene, \"is upregulated\")\n",
177 |     "        \n",
178 |     "else:\n",
179 |     "    pass"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "markdown",
184 |    "metadata": {},
185 |    "source": [
186 |     "For very simple conditional checks, you can write the `if` statement on a single line as a single expression, and the result will be the expression before the `if` if the condition is true or the expression after the `else` otherwise.\n",
187 |     "\n"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": null,
193 |    "metadata": {
194 |     "collapsed": false
195 |    },
196 |    "outputs": [],
197 |    "source": [
198 |     "x = 11\n",
199 |     "\n",
200 |     "if x < 10:\n",
201 |     "    s = \"Yes\"\n",
202 |     "else:\n",
203 |     "    s = \"No\"\n",
204 |     "print(s)\n",
205 |     "\n",
206 |     "# Could also be written onto one line\n",
207 |     "s = \"Yes\" if x < 10 else \"No\"\n",
208 |     "print(s)"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "markdown",
213 |    "metadata": {},
214 |    "source": [
215 |     "### Comparisons and truth"
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "markdown",
220 |    "metadata": {},
221 |    "source": [
222 |     "With conditional execution the question naturally arises as to which expressions are deemed to be true and which false. For the python boolean values <tt>True</tt> and <tt>False</tt> the answer is (hopefully) obvious. Also, the logical states of truth and falsehood that result from conditional checks like “Is x greater than 5?” or “Is y in this list?” are also clear. When comparing values Python has the standard comparison (or relational) operators, some of which we have already seen:\n",
223 |     "\n",
224 |     "|Operator |\tDescription |\tExample |\n",
225 |     "|---------|-------------|-----------|\n",
226 |     "|`==`  |\t    equality |\t1 == 2 # False |\n",
227 |     "|`!=`  |\t    non equality |\t1 != 2 # True |\n",
228 |     "| `<`  |\t    less than |\t1 < 2 # True |\n",
229 |     "| `<=` |\t    equal or less than |\t2 <= 2 # True |\n",
230 |     "| `>`  |\t    greater then |\t1 > 2 # False |\n",
231 |     "| `>=` |\t    equal or greater than |\t1 >= 1 # True |\n",
232 |     "\n",
233 |     "It is notable that comparison operations can be combined, for example to check if a value is within a range."
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": null,
239 |    "metadata": {
240 |     "collapsed": false
241 |    },
242 |    "outputs": [],
243 |    "source": [
244 |     "x = -5\n",
245 |     "\n",
246 |     "if x > 0 and x < 10:\n",
247 |     "    print(\"In range A\")\n",
248 |     "    \n",
249 |     "elif x < 0 or x > 10:\n",
250 |     "    print(\"In range B\")"
251 |    ]
252 |   },
253 |   {
254 |    "cell_type": "markdown",
255 |    "metadata": {},
256 |    "source": [
257 |     "Python has two additional comparison operators <tt>is</tt> and <tt>is not</tt>. These compare whether two objects are the same object, whereas <tt>==</tt> and <tt>!=</tt> compare whether values are the same.\n",
258 |     "\n",
259 |     "As an example in Python:"
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "code",
264 |    "execution_count": null,
265 |    "metadata": {
266 |     "collapsed": false
267 |    },
268 |    "outputs": [],
269 |    "source": [
270 |     "x = [123, 54, 92, 87, 33]\n",
271 |     "y = x[:] # y is a copy of x\n",
272 |     "z = x\n",
273 |     "print(x)\n",
274 |     "print(\"Are values of y and x the same?\", y == x)\n",
275 |     "print(\"Are objects y and x the same?\", y is x)\n",
276 |     "print(\"Are values of z and x the same?\", z == x)\n",
277 |     "print(\"Are objects z and x the same?\", z is x)\n",
278 |     "# Let's change x\n",
279 |     "x[1] = 23\n",
280 |     "print(x)\n",
281 |     "print(\"Are values of y and x the same?\", y == x)\n",
282 |     "print(\"Are objects y and x the same?\", y is x)\n",
283 |     "print(\"Are values of z and x the same?\", z == x)\n",
284 |     "print(\"Are objects z and x the same?\", z is x)"
285 |    ]
286 |   },
287 |   {
288 |    "cell_type": "markdown",
289 |    "metadata": {},
290 |    "source": [
291 |     "In Python even expressions that do not involve an obvious boolean value can be assigned a status of \"truthfulness\";  the value of an item itself can be forced to be considered as either True or False inside an if statement. For the Python built-in types discussed in this chapter the following are deemed to be False in such a context:\n",
292 |     "\n",
293 |     "| False value | Description | \n",
294 |     "|-------------|-------------|\n",
295 |     "| `None` |\tnumeric equality |\n",
296 |     "| `False` |\tFalse boolean |\n",
297 |     "| `0`\t| 0 integer |\n",
298 |     "| `0.0` |\t0.0 floating point |\n",
299 |     "| `\"\"` |\tempty string |\n",
300 |     "| `()` |\tempty tuple |\n",
301 |     "| `[]` |\tempty list |\n",
302 |     "| `{}` |\tempty dictonary |\n",
303 |     "| `set()` |\tempty set |\n",
304 |     "\n",
305 |     "And everything else is deemed to be True in a conditional context."
306 |    ]
307 |   },
308 |   {
309 |    "cell_type": "code",
310 |    "execution_count": null,
311 |    "metadata": {
312 |     "collapsed": false
313 |    },
314 |    "outputs": [],
315 |    "source": [
316 |     "x = ''      # An empty string\n",
317 |     "y = ['a']   # A list with one item\n",
318 |     "\n",
319 |     "if x:\n",
320 |     "    print(\"x is true\")\n",
321 |     "else: \n",
322 |     "    print(\"x is false\")     \n",
323 |     "\n",
324 |     "if y:\n",
325 |     "    print(\"y is true\")\n",
326 |     "else:\n",
327 |     "    print(\"y is false\")"
328 |    ]
329 |   },
330 |   {
331 |    "cell_type": "markdown",
332 |    "metadata": {},
333 |    "source": [
334 |     "## Exercises 1.3.1\n",
335 |     "\n",
336 |     "1. (a) Create a `if..elif..else` block that will compare a variable containing your age to another variable containing another person's age and print a statement which says if you are younger, older or the same age as that person.\n",
337 |     "2. (b) Use an `if` statement to check if some variable containing DNA sequence contains a stop codon. (e.g. `dna = \"ATGGCGGTCGAATAG\"`), first just check for one possible stop, but then extend your code to look for any of the 3 stop codons (`TAG`, `TAA`, `TGA`). Hint: recall that the `in` operator lets you check if a string contains some substring, and returns `True` or `False` accordingly."
338 |    ]
339 |   },
340 |   {
341 |    "cell_type": "markdown",
342 |    "metadata": {},
343 |    "source": [
344 |     "## Next session\n",
345 |     "\n",
346 |     "Go to our next notebook: [Introduction_to_python_day_1_session_4](Introduction_to_python_day_1_session_4.ipynb)"
347 |    ]
348 |   }
349 |  ],
350 |  "metadata": {
351 |   "kernelspec": {
352 |    "display_name": "Python 3",
353 |    "language": "python",
354 |    "name": "python3"
355 |   },
356 |   "language_info": {
357 |    "codemirror_mode": {
358 |     "name": "ipython",
359 |     "version": 3
360 |    },
361 |    "file_extension": ".py",
362 |    "mimetype": "text/x-python",
363 |    "name": "python",
364 |    "nbconvert_exporter": "python",
365 |    "pygments_lexer": "ipython3",
366 |    "version": "3.5.2"
367 |   }
368 |  },
369 |  "nbformat": 4,
370 |  "nbformat_minor": 0
371 | }
372 | 


--------------------------------------------------------------------------------
/Introduction_to_python_day_2_introduction.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "slideshow": {
  7 |      "slide_type": "slide"
  8 |     }
  9 |    },
 10 |    "source": [
 11 |     "# An introduction to solving biological problems with Python\n",
 12 |     "\n",
 13 |     "- Our course webpage: http://pycam.github.io\n",
 14 |     "- Python website: https://www.python.org/ \n",
 15 |     "- [Python 3 Standard Library](https://docs.python.org/3/library/index.html])"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "markdown",
 20 |    "metadata": {
 21 |     "slideshow": {
 22 |      "slide_type": "slide"
 23 |     }
 24 |    },
 25 |    "source": [
 26 |     "## Learning objectives\n",
 27 |     "\n",
 28 |     "- **Recall** what we've learned so far on variables, common data types, conditions and loops\n",
 29 |     "- **Propose and create** solutions using these concepts in an exercise\n",
 30 |     "- **Employ** functions and modules to reuse code\n",
 31 |     "- **Practice** reading and writing files with Python\n",
 32 |     "- **Use** third-party library BioPython\n",
 33 |     "- **Solve** more complex exercises"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {
 39 |     "slideshow": {
 40 |      "slide_type": "slide"
 41 |     }
 42 |    },
 43 |    "source": [
 44 |     "## Course schedule - day two\n",
 45 |     "\n",
 46 |     "- 09:30-10:30: [1h00] **Introduction** - What we've learned so far...\n",
 47 |     "- 10:30-10:45: *break*\n",
 48 |     "- 10:45-11:45: [1h00] **Session 2.1** - Functions\n",
 49 |     "- 11:45-12:00: *break*\n",
 50 |     "- 12:00-13:00: [1h00] **Session 2.2** - Exercices and Modules\n",
 51 |     "- 13:00-14:00: *lunch break*\n",
 52 |     "- 14:00-15:30: [1h30] **Session 2.3** - Files\n",
 53 |     "- 15:30-15:45: *break*\n",
 54 |     "- 15:45-16:45: [1h00] **Session 2.4** - BioPython"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "metadata": {
 60 |     "slideshow": {
 61 |      "slide_type": "slide"
 62 |     }
 63 |    },
 64 |    "source": [
 65 |     "## What we've learned so far\n",
 66 |     "\n",
 67 |     "- Simple data types, Collections\n",
 68 |     "- Conditional execution\n",
 69 |     "- Loops\n",
 70 |     "- Functions used so far..."
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "markdown",
 75 |    "metadata": {
 76 |     "slideshow": {
 77 |      "slide_type": "slide"
 78 |     }
 79 |    },
 80 |    "source": [
 81 |     "## Collections"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": 43,
 87 |    "metadata": {
 88 |     "collapsed": false,
 89 |     "slideshow": {
 90 |      "slide_type": "subslide"
 91 |     }
 92 |    },
 93 |    "outputs": [
 94 |     {
 95 |      "name": "stdout",
 96 |      "output_type": "stream",
 97 |      "text": [
 98 |       "A tuple: (2, 3, 4, 5)\n",
 99 |       "First element of tuple: 2\n"
100 |      ]
101 |     }
102 |    ],
103 |    "source": [
104 |     "## Tuple - immutable\n",
105 |     "example_tuple = (2, 3, 4, 5)\n",
106 |     "print('A tuple:', example_tuple)\n",
107 |     "print('First element of tuple:', example_tuple[0])"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 44,
113 |    "metadata": {
114 |     "collapsed": false,
115 |     "slideshow": {
116 |      "slide_type": "subslide"
117 |     }
118 |    },
119 |    "outputs": [
120 |     {
121 |      "name": "stdout",
122 |      "output_type": "stream",
123 |      "text": [
124 |       "A list: [2, 3, 4, 5]\n",
125 |       "First element of list: 2\n",
126 |       "Appended list: [2, 3, 4, 5, 12]\n",
127 |       "Modified list: [45, 3, 4, 5, 12]\n"
128 |      ]
129 |     }
130 |    ],
131 |    "source": [
132 |     "## List\n",
133 |     "example_list = [2, 3, 4, 5]\n",
134 |     "print('A list:', example_list)\n",
135 |     "print('First element of list:', example_list[0])\n",
136 |     "example_list.append(12)\n",
137 |     "print('Appended list:', example_list)\n",
138 |     "example_list[0] = 45\n",
139 |     "print('Modified list:', example_list)"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 45,
145 |    "metadata": {
146 |     "collapsed": false,
147 |     "slideshow": {
148 |      "slide_type": "subslide"
149 |     }
150 |    },
151 |    "outputs": [
152 |     {
153 |      "name": "stdout",
154 |      "output_type": "stream",
155 |      "text": [
156 |       "Here is a string: ATGTCATTT\n",
157 |       "First character: A\n",
158 |       "Number of characters in text 9\n"
159 |      ]
160 |     }
161 |    ],
162 |    "source": [
163 |     "## String - immutable, tuple of characters\n",
164 |     "text = \"ATGTCATTT\"\n",
165 |     "print('Here is a string:', text)\n",
166 |     "print('First character:', text[0])\n",
167 |     "print('Number of characters in text', len(text))"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": 46,
173 |    "metadata": {
174 |     "collapsed": false,
175 |     "slideshow": {
176 |      "slide_type": "subslide"
177 |     }
178 |    },
179 |    "outputs": [
180 |     {
181 |      "name": "stdout",
182 |      "output_type": "stream",
183 |      "text": [
184 |       "A set: {1, 2, 4, 5, 6}\n"
185 |      ]
186 |     }
187 |    ],
188 |    "source": [
189 |     "## Set - unique unordered elements\n",
190 |     "example_set = set([1,2,2,2,2,4,5,6,6,6])\n",
191 |     "print('A set:', example_set)"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": 47,
197 |    "metadata": {
198 |     "collapsed": false,
199 |     "slideshow": {
200 |      "slide_type": "subslide"
201 |     }
202 |    },
203 |    "outputs": [
204 |     {
205 |      "name": "stdout",
206 |      "output_type": "stream",
207 |      "text": [
208 |       "A dictionary: {'G': 'Guanine', 'T': 'Thymine', 'A': 'Adenine', 'C': 'Cytosine'}\n",
209 |       "Value associated to key C: Cytosine\n"
210 |      ]
211 |     }
212 |    ],
213 |    "source": [
214 |     "## Dictionary\n",
215 |     "example_dictionary = {\"A\": \"Adenine\", \n",
216 |     "                      \"C\": \"Cytosine\", \n",
217 |     "                      \"G\": \"Guanine\", \n",
218 |     "                      \"T\": \"Thymine\"}\n",
219 |     "print('A dictionary:', example_dictionary)\n",
220 |     "print('Value associated to key C:', example_dictionary['C'])"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "markdown",
225 |    "metadata": {
226 |     "slideshow": {
227 |      "slide_type": "slide"
228 |     }
229 |    },
230 |    "source": [
231 |     "## Conditional execution"
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "code",
236 |    "execution_count": 48,
237 |    "metadata": {
238 |     "collapsed": false
239 |    },
240 |    "outputs": [
241 |     {
242 |      "name": "stdout",
243 |      "output_type": "stream",
244 |      "text": [
245 |       "Is 2 < 5? True\n",
246 |       "Is 2 == 5? False\n",
247 |       "Is 2 < 5 and 2 > 1? True\n",
248 |       "x is equal to 2\n"
249 |      ]
250 |     }
251 |    ],
252 |    "source": [
253 |     "x = 2\n",
254 |     "print('Is 2 < 5?', x < 5)\n",
255 |     "print('Is 2 == 5?', x == 5)\n",
256 |     "print('Is 2 < 5 and 2 > 1?', (x < 5) & (x > 1))\n",
257 |     "\n",
258 |     "if x == 2:\n",
259 |     "    print('x is equal to 2')"
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "markdown",
264 |    "metadata": {
265 |     "slideshow": {
266 |      "slide_type": "slide"
267 |     }
268 |    },
269 |    "source": [
270 |     "## Loops"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "code",
275 |    "execution_count": 49,
276 |    "metadata": {
277 |     "collapsed": false,
278 |     "slideshow": {
279 |      "slide_type": "-"
280 |     }
281 |    },
282 |    "outputs": [
283 |     {
284 |      "name": "stdout",
285 |      "output_type": "stream",
286 |      "text": [
287 |       "The element in list is: A\n",
288 |       "The element in list is: C\n",
289 |       "The element in list is: A\n",
290 |       "The element in list is: T\n",
291 |       "The element in list is: G\n"
292 |      ]
293 |     }
294 |    ],
295 |    "source": [
296 |     "example_list = ['A', 'C', 'A', 'T', 'G']\n",
297 |     "\n",
298 |     "## Looping through a list\n",
299 |     "for element in example_list:\n",
300 |     "    print(\"The element in list is:\", element)"
301 |    ]
302 |   },
303 |   {
304 |    "cell_type": "code",
305 |    "execution_count": 50,
306 |    "metadata": {
307 |     "collapsed": false,
308 |     "slideshow": {
309 |      "slide_type": "subslide"
310 |     }
311 |    },
312 |    "outputs": [
313 |     {
314 |      "name": "stdout",
315 |      "output_type": "stream",
316 |      "text": [
317 |       "The index in loop is 0 and its corresponding element is in the list A\n",
318 |       "The index in loop is 1 and its corresponding element is in the list C\n",
319 |       "The index in loop is 2 and its corresponding element is in the list A\n",
320 |       "The index in loop is 3 and its corresponding element is in the list T\n",
321 |       "The index in loop is 4 and its corresponding element is in the list G\n"
322 |      ]
323 |     }
324 |    ],
325 |    "source": [
326 |     "example_list = ['A', 'C', 'A', 'T', 'G']\n",
327 |     "\n",
328 |     "## Looping through list of indices using range() method\n",
329 |     "for index in range(len(example_list)):\n",
330 |     "    print(\"The index in loop is\", index, 'and its corresponding element is in the list', example_list[index])"
331 |    ]
332 |   },
333 |   {
334 |    "cell_type": "code",
335 |    "execution_count": 51,
336 |    "metadata": {
337 |     "collapsed": false,
338 |     "slideshow": {
339 |      "slide_type": "subslide"
340 |     }
341 |    },
342 |    "outputs": [
343 |     {
344 |      "name": "stdout",
345 |      "output_type": "stream",
346 |      "text": [
347 |       "A found, the corresponding value is Adenine for item at position 0\n",
348 |       "Other value for item at position 1\n",
349 |       "A found, the corresponding value is Adenine for item at position 2\n",
350 |       "T found, the corresponding value is Thymine for item at position 3\n",
351 |       "Other value for item at position 4\n"
352 |      ]
353 |     }
354 |    ],
355 |    "source": [
356 |     "## Looping using enumerate()\n",
357 |     "example_list = ['A', 'C', 'A', 'T', 'G']\n",
358 |     "example_dictionary = {\"A\": \"Adenine\", \"C\": \"Cytosine\", \"G\": \"Guanine\", \"T\": \"Thymine\"}\n",
359 |     "\n",
360 |     "for index, value in enumerate(example_list):\n",
361 |     "    if value == 'A':\n",
362 |     "        print(value, \"found, the corresponding value is\", example_dictionary[value], 'for item at position', index)\n",
363 |     "    elif value == 'T':\n",
364 |     "        print(value, \"found, the corresponding value is\", example_dictionary[value], 'for item at position', index)\n",
365 |     "    else:\n",
366 |     "        print(\"Other value\", 'for item at position', index)"
367 |    ]
368 |   },
369 |   {
370 |    "cell_type": "markdown",
371 |    "metadata": {
372 |     "slideshow": {
373 |      "slide_type": "slide"
374 |     }
375 |    },
376 |    "source": [
377 |     "## Functions used so far..."
378 |    ]
379 |   },
380 |   {
381 |    "cell_type": "code",
382 |    "execution_count": 52,
383 |    "metadata": {
384 |     "collapsed": false
385 |    },
386 |    "outputs": [
387 |     {
388 |      "name": "stdout",
389 |      "output_type": "stream",
390 |      "text": [
391 |       "There are 5 elements in the list ['A', 'C', 'A', 'T', 'G']\n",
392 |       "['ATG', 'TCA', 'CCG', 'GGC']\n"
393 |      ]
394 |     }
395 |    ],
396 |    "source": [
397 |     "example_list = ['A', 'C', 'A', 'T', 'G']\n",
398 |     "print('There are', len(example_list), 'elements in the list', example_list)\n",
399 |     "print(\"ATG TCA CCG GGC\".split())"
400 |    ]
401 |   },
402 |   {
403 |    "cell_type": "markdown",
404 |    "metadata": {
405 |     "slideshow": {
406 |      "slide_type": "slide"
407 |     }
408 |    },
409 |    "source": [
410 |     "## Exercise 2.0.1\n",
411 |     "\n",
412 |     "- Create a string variable with the lyrics of Imagine by John Lennon, 1971. Split into words. Print the total number of words, and the number of unique words. Calculate the frequency of each word and store the result into a dictionary. Print each unique word along with its frequency. Find the most frequent word in the song, print it with its frequency.\n",
413 |     "\n",
414 |     "<center><img src=\"https://upload.wikimedia.org/wikipedia/en/1/1d/John_Lennon_-_Imagine_John_Lennon.jpg\"/></center>"
415 |    ]
416 |   },
417 |   {
418 |    "cell_type": "code",
419 |    "execution_count": 53,
420 |    "metadata": {
421 |     "collapsed": false,
422 |     "slideshow": {
423 |      "slide_type": "subslide"
424 |     }
425 |    },
426 |    "outputs": [],
427 |    "source": [
428 |     "lyrics = \"\"\"\n",
429 |     "Imagine there's no Heaven\n",
430 |     "It's easy if you try\n",
431 |     "No Hell below us\n",
432 |     "Above us only sky\n",
433 |     "\n",
434 |     "Imagine all the people\n",
435 |     "Living for today\n",
436 |     "Aaa haa\n",
437 |     "\n",
438 |     "Imagine there's no countries\n",
439 |     "It isn't hard to do\n",
440 |     "Nothing to kill or die for\n",
441 |     "And no religion too\n",
442 |     "\n",
443 |     "Imagine all the people\n",
444 |     "Living life in peace\n",
445 |     "Yoo hoo\n",
446 |     "\n",
447 |     "You may say I'm a dreamer\n",
448 |     "But I'm not the only one\n",
449 |     "I hope someday you'll join us\n",
450 |     "And the world will be as one\n",
451 |     "\n",
452 |     "Imagine no possessions\n",
453 |     "I wonder if you can\n",
454 |     "No need for greed or hunger\n",
455 |     "A brotherhood of man\n",
456 |     "\n",
457 |     "Imagine all the people\n",
458 |     "Sharing all the world\n",
459 |     "Yoo hoo\n",
460 |     "\n",
461 |     "You may say I'm a dreamer\n",
462 |     "But I'm not the only one\n",
463 |     "I hope someday you'll join us\n",
464 |     "And the world will live as one\n",
465 |     "\"\"\""
466 |    ]
467 |   },
468 |   {
469 |    "cell_type": "markdown",
470 |    "metadata": {
471 |     "slideshow": {
472 |      "slide_type": "slide"
473 |     }
474 |    },
475 |    "source": [
476 |     "## Next session\n",
477 |     "\n",
478 |     "Go to our next notebook: [Introduction_to_python_day_2_session_1](Introduction_to_python_day_2_session_1.ipynb)"
479 |    ]
480 |   }
481 |  ],
482 |  "metadata": {
483 |   "celltoolbar": "Slideshow",
484 |   "kernelspec": {
485 |    "display_name": "Python 3",
486 |    "language": "python",
487 |    "name": "python3"
488 |   },
489 |   "language_info": {
490 |    "codemirror_mode": {
491 |     "name": "ipython",
492 |     "version": 3
493 |    },
494 |    "file_extension": ".py",
495 |    "mimetype": "text/x-python",
496 |    "name": "python",
497 |    "nbconvert_exporter": "python",
498 |    "pygments_lexer": "ipython3",
499 |    "version": "3.5.2"
500 |   }
501 |  },
502 |  "nbformat": 4,
503 |  "nbformat_minor": 0
504 | }
505 | 


--------------------------------------------------------------------------------
/Introduction_to_python_day_1_session_4.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "collapsed": true
  7 |    },
  8 |    "source": [
  9 |     "# An introduction to solving biological problems with Python\n",
 10 |     "\n",
 11 |     "## Session 1.4: Loops\n",
 12 |     "\n",
 13 |     "- [The <tt>for</tt> loop](#The-for-loop)\n",
 14 |     "- [The <tt>while</tt> loop](#The-while-loop)\n",
 15 |     "- [Skipping and breaking loops](#Skipping-and-breaking-loops)\n",
 16 |     "- [Exercises 1.4.1](#Exercises-1.4.1)\n",
 17 |     "- [More looping using range and enumerate](#More-looping)\n",
 18 |     "- [Filtering in loops](#Filtering-in-loops)\n",
 19 |     "- [Exercises 1.4.2](#Exercises-1.4.2)"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "markdown",
 24 |    "metadata": {},
 25 |    "source": [
 26 |     "## Loops"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "markdown",
 31 |    "metadata": {},
 32 |    "source": [
 33 |     "When an operation needs to be repeated multiple times, for example on all of the items in a list, we \n",
 34 |     "avoid having to type (or copy and paste) repetitive code by creating a loop. There are two ways of creating loops in Python, the <tt>for</tt> loop and the <tt>while</tt> loop."
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "## The <tt>for</tt> loop"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "markdown",
 46 |    "metadata": {},
 47 |    "source": [
 48 |     "The for loop in Python iterates over each item in a sequence (such as a list or tuple) in the order that they appear in the sequence. What this means is that a variable (<tt>code</tt> in the below example) is set to each item from the sequence of values in turn, and each time this happens the indented block of code is executed again."
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": null,
 54 |    "metadata": {
 55 |     "collapsed": false
 56 |    },
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "codeList = ['NA06984', 'NA06985', 'NA06986', 'NA06989', 'NA06991']\n",
 60 |     "\n",
 61 |     "for code in codeList:\n",
 62 |     "    print(code)"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "markdown",
 67 |    "metadata": {},
 68 |    "source": [
 69 |     "A <tt>for</tt> loop can iterate over the individual characters in a string:"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": null,
 75 |    "metadata": {
 76 |     "collapsed": false
 77 |    },
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "dnaSequence = 'ATGGTGTTGCC'\n",
 81 |     "\n",
 82 |     "for base in dnaSequence:\n",
 83 |     "    print(base)"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "metadata": {},
 89 |    "source": [
 90 |     "And also over the keys of a dictionary: "
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": null,
 96 |    "metadata": {
 97 |     "collapsed": false
 98 |    },
 99 |    "outputs": [],
100 |    "source": [
101 |     "rnaMassDict = {\"G\":345.21, \"C\":305.18, \"A\":329.21, \"U\":302.16}\n",
102 |     "\n",
103 |     "for x in rnaMassDict:\n",
104 |     "    print(x, rnaMassDict[x])"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "markdown",
109 |    "metadata": {},
110 |    "source": [
111 |     "Any variables that are defined before the loop can be accessed from inside the loop. So for example to calculate the summation of the items in a list of values we could define the total initially to be zero and add each value to the total in the loop:"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": null,
117 |    "metadata": {
118 |     "collapsed": false
119 |    },
120 |    "outputs": [],
121 |    "source": [
122 |     "total = 0\n",
123 |     "values = [1, 2, 4, 8, 16]\n",
124 |     "\n",
125 |     "for v in values:\n",
126 |     "    total = total + v\n",
127 |     "    # total += v\n",
128 |     "    print(total)\n",
129 |     "\n",
130 |     "print(total)"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "markdown",
135 |    "metadata": {},
136 |    "source": [
137 |     "Naturally we can combine a <tt>for</tt> loop with an <tt>if</tt> statement, noting that we need two indentation levels, one for the outer loop and another for the conditional blocks:"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": null,
143 |    "metadata": {
144 |     "collapsed": false
145 |    },
146 |    "outputs": [],
147 |    "source": [
148 |     "geneExpression = {\n",
149 |     "    'Beta-Catenin': 2.5, \n",
150 |     "    'Beta-Actin': 1.7, \n",
151 |     "    'Pax6': 0, \n",
152 |     "    'HoxA2': -3.2\n",
153 |     "}\n",
154 |     "\n",
155 |     "for gene in geneExpression:\n",
156 |     "    if geneExpression.get(gene) < 0:\n",
157 |     "        print(gene, \"is downregulated\")\n",
158 |     "        \n",
159 |     "    elif geneExpression.get(gene) > 0:\n",
160 |     "        print(gene, \"is upregulated\")\n",
161 |     "        \n",
162 |     "    else:\n",
163 |     "        print(\"No change in expression of \", gene)"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "markdown",
168 |    "metadata": {},
169 |    "source": [
170 |     "## The <tt>while</tt> loop"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "markdown",
175 |    "metadata": {},
176 |    "source": [
177 |     "In addition to the <tt>for</tt> loop that operates on a collection of items, there is a <tt>while</tt> loop that simply repeats while some statement evaluates to True and stops when it is False. Note that if the tested expression never evaluates to False then you have an “infinite loop”, which is not good.\n",
178 |     "\n",
179 |     "In this example we generate a series of numbers by doubling a value after each iteration, until a limit is reached: "
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": null,
185 |    "metadata": {
186 |     "collapsed": false
187 |    },
188 |    "outputs": [],
189 |    "source": [
190 |     "value = 0.25\n",
191 |     "while value < 8:\n",
192 |     "    value = value * 2\n",
193 |     "    print(value)\n",
194 |     "\n",
195 |     "print(\"final value:\", value)"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "markdown",
200 |    "metadata": {},
201 |    "source": [
202 |     "Whats going on here is that the value is doubled in each iteration and once it gets to 8 the while test fails (8 is not less than 8) and that last value is preserved. Note that if the test were instead value `<= 8` then we would get one more doubling and the value would reach 16."
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "markdown",
207 |    "metadata": {},
208 |    "source": [
209 |     "## Skipping and breaking loops"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "markdown",
214 |    "metadata": {},
215 |    "source": [
216 |     "Python has two ways of affecting the flow of the <tt>for</tt> or <tt>while</tt> loop inside the block. The <tt>continue</tt> statement means that the rest of the code in the block is skipped for this particular item in the collection, i.e. jump to the next iteration. In this example negative numbers are left out of a summation:"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": null,
222 |    "metadata": {
223 |     "collapsed": false
224 |    },
225 |    "outputs": [],
226 |    "source": [
227 |     "values = [10, -5, 3, -1, 7]\n",
228 |     "\n",
229 |     "total = 0\n",
230 |     "for v in values:\n",
231 |     "    if v < 0:\n",
232 |     "        continue # Skip this iteration   \n",
233 |     "    total += v\n",
234 |     "\n",
235 |     "print(total)"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "markdown",
240 |    "metadata": {},
241 |    "source": [
242 |     "The other way of affecting a loop is with the <tt>break</tt> statement. In contrast to the <tt>continue</tt> statement, this immediately causes all looping to finish, and execution is resumed at the next statement _after_ the loop."
243 |    ]
244 |   },
245 |   {
246 |    "cell_type": "code",
247 |    "execution_count": null,
248 |    "metadata": {
249 |     "collapsed": false
250 |    },
251 |    "outputs": [],
252 |    "source": [
253 |     "geneticCode = {'TAT': 'Tyrosine',  'TAC': 'Tyrosine',\n",
254 |     "               'CAA': 'Glutamine', 'CAG': 'Glutamine',\n",
255 |     "               'TAG': 'STOP'}\n",
256 |     "\n",
257 |     "sequence = ['CAG','TAC','CAA','TAG','TAC','CAG','CAA']\n",
258 |     "\n",
259 |     "for codon in sequence:\n",
260 |     "    if geneticCode[codon] == 'STOP':\n",
261 |     "        break            # Quit looping at this point\n",
262 |     "    else:\n",
263 |     "        print(geneticCode[codon])"
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "markdown",
268 |    "metadata": {},
269 |    "source": [
270 |     "## Looping gotchas"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "markdown",
275 |    "metadata": {},
276 |    "source": [
277 |     "An internal counter is used to keep track of which item is used next, and this is incremented on each iteration. When this counter has reached the length of the sequence the loop terminates. This means that if you delete the current item from the sequence, the next item will be skipped (since it gets the index of the current item which has already been treated). Likewise, if you insert an item in a sequence before the current item, the current item will be treated again the next time through the loop. This can lead to nasty bugs that can be avoided by making a temporary copy using a slice of the whole sequence."
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "markdown",
282 |    "metadata": {},
283 |    "source": [
284 |     "<div class=\"alert-warning\">\n",
285 |     "**When looping, never modify the collection!** Always create a copy of it first.\n",
286 |     "</div>"
287 |    ]
288 |   },
289 |   {
290 |    "cell_type": "markdown",
291 |    "metadata": {},
292 |    "source": [
293 |     "## Exercises 1.4.1\n",
294 |     "\n",
295 |     "1. Create a sequence where each element is an individual base of DNA. Make the sequence 15 bases long.\n",
296 |     "2. Print the length of the sequence.\n",
297 |     "3. Create a for loop to output every base of the sequence on a new line.\n",
298 |     "4. Create a <tt>while</tt> loop similar to the one above that starts at the third base in the sequence and outputs every third base until the 12th."
299 |    ]
300 |   },
301 |   {
302 |    "cell_type": "markdown",
303 |    "metadata": {},
304 |    "source": [
305 |     "## More looping"
306 |    ]
307 |   },
308 |   {
309 |    "cell_type": "markdown",
310 |    "metadata": {},
311 |    "source": [
312 |     "### Using range"
313 |    ]
314 |   },
315 |   {
316 |    "cell_type": "markdown",
317 |    "metadata": {},
318 |    "source": [
319 |     "If you would like to iterate over a numeric sequence then this is possible by combining the `range()` function and a for loop."
320 |    ]
321 |   },
322 |   {
323 |    "cell_type": "code",
324 |    "execution_count": null,
325 |    "metadata": {
326 |     "collapsed": false
327 |    },
328 |    "outputs": [],
329 |    "source": [
330 |     "print(list(range(10)))\n",
331 |     "\n",
332 |     "print(list(range(5, 10)))\n",
333 |     "\n",
334 |     "print(list(range(0, 10, 3)))\n",
335 |     "\n",
336 |     "print(list(range(7, 2, -2)))"
337 |    ]
338 |   },
339 |   {
340 |    "cell_type": "markdown",
341 |    "metadata": {},
342 |    "source": [
343 |     "Looping through ranges "
344 |    ]
345 |   },
346 |   {
347 |    "cell_type": "code",
348 |    "execution_count": null,
349 |    "metadata": {
350 |     "collapsed": false
351 |    },
352 |    "outputs": [],
353 |    "source": [
354 |     "for x in range(8):\n",
355 |     "    print(x*x)"
356 |    ]
357 |   },
358 |   {
359 |    "cell_type": "code",
360 |    "execution_count": null,
361 |    "metadata": {
362 |     "collapsed": false
363 |    },
364 |    "outputs": [],
365 |    "source": [
366 |     "squares = []\n",
367 |     "for x in range(8):\n",
368 |     "    s = x*x\n",
369 |     "    squares.append(s)\n",
370 |     "    \n",
371 |     "print(squares)"
372 |    ]
373 |   },
374 |   {
375 |    "cell_type": "markdown",
376 |    "metadata": {},
377 |    "source": [
378 |     "Looping through list indices"
379 |    ]
380 |   },
381 |   {
382 |    "cell_type": "code",
383 |    "execution_count": null,
384 |    "metadata": {
385 |     "collapsed": false
386 |    },
387 |    "outputs": [],
388 |    "source": [
389 |     "codes = ['NA06984', 'NA06985', 'NA06986', 'NA06989', 'NA06991']\n",
390 |     "\n",
391 |     "for index in range(len(codes)):\n",
392 |     "    print(index, codes[index])"
393 |    ]
394 |   },
395 |   {
396 |    "cell_type": "markdown",
397 |    "metadata": {},
398 |    "source": [
399 |     "Looping through indices for two lists"
400 |    ]
401 |   },
402 |   {
403 |    "cell_type": "code",
404 |    "execution_count": null,
405 |    "metadata": {
406 |     "collapsed": false
407 |    },
408 |    "outputs": [],
409 |    "source": [
410 |     "codes      = ['NA06984', 'NA06985', 'NA06986', 'NA06989', 'NA06991']\n",
411 |     "more_codes = ['NA06993', 'NA06994', 'NA06995', 'NA06997', 'NA07000']\n",
412 |     "\n",
413 |     "for index in range(len(codes)):\n",
414 |     "    print(index, codes[index], more_codes[index])"
415 |    ]
416 |   },
417 |   {
418 |    "cell_type": "markdown",
419 |    "metadata": {},
420 |    "source": [
421 |     "### Using enumerate"
422 |    ]
423 |   },
424 |   {
425 |    "cell_type": "markdown",
426 |    "metadata": {},
427 |    "source": [
428 |     "Given a sequence, `enumerate()` allows you to iterate over the sequence generating a tuple containing each value along with a corresponding index."
429 |    ]
430 |   },
431 |   {
432 |    "cell_type": "code",
433 |    "execution_count": null,
434 |    "metadata": {
435 |     "collapsed": false
436 |    },
437 |    "outputs": [],
438 |    "source": [
439 |     "letters = ['A','C','G','T']\n",
440 |     "for index, letter in enumerate(letters):\n",
441 |     "    print(index, letter)"
442 |    ]
443 |   },
444 |   {
445 |    "cell_type": "code",
446 |    "execution_count": null,
447 |    "metadata": {
448 |     "collapsed": false
449 |    },
450 |    "outputs": [],
451 |    "source": [
452 |     "numbered_letters = list(enumerate(letters))\n",
453 |     "print(numbered_letters)"
454 |    ]
455 |   },
456 |   {
457 |    "cell_type": "markdown",
458 |    "metadata": {},
459 |    "source": [
460 |     "## Filtering in loops"
461 |    ]
462 |   },
463 |   {
464 |    "cell_type": "code",
465 |    "execution_count": null,
466 |    "metadata": {
467 |     "collapsed": false
468 |    },
469 |    "outputs": [],
470 |    "source": [
471 |     "city_pops = {\n",
472 |     "    'London': 8200000,\n",
473 |     "    'Cambridge': 130000,\n",
474 |     "    'Edinburgh': 420000,\n",
475 |     "    'Glasgow': 1200000\n",
476 |     "}\n",
477 |     "\n",
478 |     "big_cities = []\n",
479 |     "for city in city_pops:\n",
480 |     "    if city_pops[city] >= 1000000:\n",
481 |     "         big_cities.append(city)\n",
482 |     "\n",
483 |     "print(big_cities)"
484 |    ]
485 |   },
486 |   {
487 |    "cell_type": "code",
488 |    "execution_count": null,
489 |    "metadata": {
490 |     "collapsed": false
491 |    },
492 |    "outputs": [],
493 |    "source": [
494 |     "total = 0\n",
495 |     "for city in city_pops:\n",
496 |     "    total += city_pops[city]\n",
497 |     "print(\"total population:\", total)"
498 |    ]
499 |   },
500 |   {
501 |    "cell_type": "code",
502 |    "execution_count": null,
503 |    "metadata": {
504 |     "collapsed": false
505 |    },
506 |    "outputs": [],
507 |    "source": [
508 |     "pops = list(city_pops.values())\n",
509 |     "print(\"total population:\", sum(pops))"
510 |    ]
511 |   },
512 |   {
513 |    "cell_type": "markdown",
514 |    "metadata": {},
515 |    "source": [
516 |     "## Exercises 1.4.2\n",
517 |     "\n",
518 |     "1. Let's calculate the GC content of a DNA sequence. Use the 15-base sequence you created for the exercises above. Create a variable, `gc`, which we will use to count the number of Gs or Cs in our sequence.\n",
519 |     "2. Create a loop to iterate over the bases in your sequence. If the base is a G or the base is a C, add one to your `gc` variable.\n",
520 |     "3. When the loop is done, divide the number of GC bases by the length of the sequence and multiply by 100 to get the GC percentage."
521 |    ]
522 |   },
523 |   {
524 |    "cell_type": "markdown",
525 |    "metadata": {},
526 |    "source": [
527 |     "## Congratulation! You reached the end of day 1! \n",
528 |     "\n",
529 |     "Go to our next notebook: [Introduction_to_python_day_2_introduction](Introduction_to_python_day_2_introduction.ipynb)"
530 |    ]
531 |   }
532 |  ],
533 |  "metadata": {
534 |   "kernelspec": {
535 |    "display_name": "Python 3",
536 |    "language": "python",
537 |    "name": "python3"
538 |   },
539 |   "language_info": {
540 |    "codemirror_mode": {
541 |     "name": "ipython",
542 |     "version": 3
543 |    },
544 |    "file_extension": ".py",
545 |    "mimetype": "text/x-python",
546 |    "name": "python",
547 |    "nbconvert_exporter": "python",
548 |    "pygments_lexer": "ipython3",
549 |    "version": "3.5.2"
550 |   }
551 |  },
552 |  "nbformat": 4,
553 |  "nbformat_minor": 0
554 | }
555 | 


--------------------------------------------------------------------------------
/Introduction_to_python_day_1_session_1.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "slideshow": {
  7 |      "slide_type": "slide"
  8 |     }
  9 |    },
 10 |    "source": [
 11 |     "# An introduction to solving biological problems with Python\n",
 12 |     "\n",
 13 |     "## Day 1 - Session 1: \n",
 14 |     "\n",
 15 |     "- [Printing values](#Printing-values)\n",
 16 |     "- [Using variables](#Using-variables)\n",
 17 |     "- [Simple data types](#Simple-data-types): [Booleans](#Booleans), [Integers](#Integers), [Floating point numbers](#Floating-point-numbers), and [Strings](#Strings)\n",
 18 |     "- [Comments](#Comments)\n",
 19 |     "- [Exercises 1.1.1](#Exercises-1.1.1)\n",
 20 |     "- [Arithmetic](#Arithmetic)\n",
 21 |     "- [Exercises 1.1.2](#Exercises-1.1.2)\n",
 22 |     "- [Saving code in files](#Saving-code-in-files)\n",
 23 |     "- [Exercises 1.1.3](#Exercises-1.1.3)\n"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "markdown",
 28 |    "metadata": {
 29 |     "slideshow": {
 30 |      "slide_type": "slide"
 31 |     }
 32 |    },
 33 |    "source": [
 34 |     "## Printing values\n",
 35 |     "\n",
 36 |     "The first bit of python syntax we're going to learn is the <tt>print</tt> statement. This command lets us print messages to the user, and also to see what Python thinks is the value of some expression (very useful when debugging your programs).\n",
 37 |     "\n",
 38 |     "We will go into details later on, but for now just note that to print some text you have to enclose it in  \"quotation marks\". \n",
 39 |     "\n",
 40 |     "We will go into detail on the arithmetic operations supported in python shortly, but you can try exploring python's calculating abilities."
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "metadata": {
 47 |     "collapsed": false,
 48 |     "slideshow": {
 49 |      "slide_type": "fragment"
 50 |     }
 51 |    },
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "print(\"Hello from python!\")"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "metadata": {
 61 |     "collapsed": false,
 62 |     "slideshow": {
 63 |      "slide_type": "fragment"
 64 |     }
 65 |    },
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "print(34)"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": null,
 74 |    "metadata": {
 75 |     "collapsed": false,
 76 |     "slideshow": {
 77 |      "slide_type": "fragment"
 78 |     }
 79 |    },
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "print(2 + 3)"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "markdown",
 87 |    "metadata": {
 88 |     "slideshow": {
 89 |      "slide_type": "slide"
 90 |     }
 91 |    },
 92 |    "source": [
 93 |     "You can print  multiple expressions you need to seperate them with commas. Python will insert a space between each element, and a newline at the end of the message (though you can suppress this behaviour by leaving a trailing comma at the end of the command)."
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": null,
 99 |    "metadata": {
100 |     "collapsed": false,
101 |     "slideshow": {
102 |      "slide_type": "fragment"
103 |     }
104 |    },
105 |    "outputs": [],
106 |    "source": [
107 |     "print(\"The answer:\", 42)"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "markdown",
112 |    "metadata": {
113 |     "slideshow": {
114 |      "slide_type": "slide"
115 |     }
116 |    },
117 |    "source": [
118 |     "## Using variables"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "metadata": {},
124 |    "source": [
125 |     "In the <tt>print</tt> commands above we have directly operated on values such as text strings and numbers. When programming we will typically want to deal with rather more complex expressions where it is useful to be able to assign a name to an expression, especially if we are trying to deal with multiple values at the same time.\n",
126 |     "\n",
127 |     "We can give a name to a value using _variables_, the name is apt because the values stored in a variable can _vary_. Unlike some other languages, the type of value assigned to a variable can also change (this is one of the reasons why python is known as a _dynamic_ language).\n",
128 |     "\n",
129 |     "A variable can be assigned to a simple value..."
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": null,
135 |    "metadata": {
136 |     "collapsed": false
137 |    },
138 |    "outputs": [],
139 |    "source": [
140 |     "x = 3\n",
141 |     "print(x)"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "markdown",
146 |    "metadata": {},
147 |    "source": [
148 |     "... or the outcome of a more complex expression."
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": null,
154 |    "metadata": {
155 |     "collapsed": false
156 |    },
157 |    "outputs": [],
158 |    "source": [
159 |     "x = 2 + 2\n",
160 |     "print(x)"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "markdown",
165 |    "metadata": {},
166 |    "source": [
167 |     "A variable can be called whatever you like (as long as it starts with a character, it does not contain space and is meaningful) and you assign a value to a variable with the **`=` operator**. Note that this is different to mathematical equality (which we will come to later...)\n",
168 |     "\n",
169 |     "You can <tt>print</tt> a variable to see what python thinks its current value is."
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "code",
174 |    "execution_count": null,
175 |    "metadata": {
176 |     "collapsed": false
177 |    },
178 |    "outputs": [],
179 |    "source": [
180 |     "serine = \"TCA\"\n",
181 |     "print(serine, \"codes for serine\")\n",
182 |     "serine = \"TCG\"\n",
183 |     "print(\"as does\", serine)"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "markdown",
188 |    "metadata": {},
189 |    "source": [
190 |     "In the interactive interpreter you don't have to <tt>print</tt> everything, if you type a variable name (or just a value), the interpreter will automatically print out what python thinks the value is. Note though that this is not the case if your code is in a file."
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": null,
196 |    "metadata": {
197 |     "collapsed": false
198 |    },
199 |    "outputs": [],
200 |    "source": [
201 |     "3 + 4"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": null,
207 |    "metadata": {
208 |     "collapsed": false
209 |    },
210 |    "outputs": [],
211 |    "source": [
212 |     "x = 5\n",
213 |     "3 * x"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "markdown",
218 |    "metadata": {},
219 |    "source": [
220 |     "Variables can be used on the right hand side of an assignment as well, in which case they will be evaluated before the value is assigned to the variable on the left hand side."
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "code",
225 |    "execution_count": null,
226 |    "metadata": {
227 |     "collapsed": false
228 |    },
229 |    "outputs": [],
230 |    "source": [
231 |     "x = 5\n",
232 |     "y = x * 3\n",
233 |     "print(y)"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "markdown",
238 |    "metadata": {},
239 |    "source": [
240 |     "or just `y` in the interpreter and in Jupyter notebook"
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "code",
245 |    "execution_count": null,
246 |    "metadata": {
247 |     "collapsed": false
248 |    },
249 |    "outputs": [],
250 |    "source": [
251 |     "y"
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "markdown",
256 |    "metadata": {},
257 |    "source": [
258 |     "You can use the current value of a variable itself in an assignment"
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "code",
263 |    "execution_count": null,
264 |    "metadata": {
265 |     "collapsed": false
266 |    },
267 |    "outputs": [],
268 |    "source": [
269 |     "y = y + 1\n",
270 |     "y"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "markdown",
275 |    "metadata": {},
276 |    "source": [
277 |     "In fact this is such a common idiom that there are special operators that will do this implicitly (more on these later)"
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "code",
282 |    "execution_count": null,
283 |    "metadata": {
284 |     "collapsed": false
285 |    },
286 |    "outputs": [],
287 |    "source": [
288 |     "y += 1\n",
289 |     "y"
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "markdown",
294 |    "metadata": {},
295 |    "source": [
296 |     "## Simple data types"
297 |    ]
298 |   },
299 |   {
300 |    "cell_type": "markdown",
301 |    "metadata": {},
302 |    "source": [
303 |     "Python (and computers in general) treats different types of data differently. Python has 5 main basic data types. Types are useful to constrain some operations to a certain category of variables. For example it doesn't really make sense to try to divide a string.\n",
304 |     "\n",
305 |     "We will see some examples of these in use shortly, but for now let's see all of the basic types available in python.\n",
306 |     "\n",
307 |     "### Booleans\n",
308 |     "\n",
309 |     "Boolean values represent truth or falsehood, as used in logical operations, for example. Not surprisingly, there are only two values, and in Python they are called <tt>True</tt> and <tt>False</tt>."
310 |    ]
311 |   },
312 |   {
313 |    "cell_type": "code",
314 |    "execution_count": null,
315 |    "metadata": {
316 |     "collapsed": false
317 |    },
318 |    "outputs": [],
319 |    "source": [
320 |     "a = True\n",
321 |     "b = False\n",
322 |     "print(a, b)"
323 |    ]
324 |   },
325 |   {
326 |    "cell_type": "markdown",
327 |    "metadata": {},
328 |    "source": [
329 |     "### Integers\n",
330 |     "\n",
331 |     "Integers represent whole numbers, as you would use when counting items, and can be positive or negative."
332 |    ]
333 |   },
334 |   {
335 |    "cell_type": "code",
336 |    "execution_count": null,
337 |    "metadata": {
338 |     "collapsed": false
339 |    },
340 |    "outputs": [],
341 |    "source": [
342 |     "i = -7\n",
343 |     "j = 123\n",
344 |     "print(i, j)"
345 |    ]
346 |   },
347 |   {
348 |    "cell_type": "markdown",
349 |    "metadata": {},
350 |    "source": [
351 |     "### Floating point numbers\n",
352 |     "\n",
353 |     "Floating point numbers, often simply referred to as <tt>float</tt>s, are numbers expressed in the decimal system, i.e. 2.1, 999.998, -0.000004 etc. The value 2.0 would also be interpreted as a floating point number, but the value 2, without the decimal point will not; it will be interpreted as an integer."
354 |    ]
355 |   },
356 |   {
357 |    "cell_type": "code",
358 |    "execution_count": null,
359 |    "metadata": {
360 |     "collapsed": false
361 |    },
362 |    "outputs": [],
363 |    "source": [
364 |     "x = 3.14159\n",
365 |     "y = -42.3\n",
366 |     "print(x * y)"
367 |    ]
368 |   },
369 |   {
370 |    "cell_type": "markdown",
371 |    "metadata": {},
372 |    "source": [
373 |     "Floating point numbers can also carry an <tt>e</tt> suffix that states which power of ten they operate at."
374 |    ]
375 |   },
376 |   {
377 |    "cell_type": "code",
378 |    "execution_count": null,
379 |    "metadata": {
380 |     "collapsed": false
381 |    },
382 |    "outputs": [],
383 |    "source": [
384 |     "k = 1.5e3\n",
385 |     "l = 3e-2\n",
386 |     "print(k)\n",
387 |     "print(l)"
388 |    ]
389 |   },
390 |   {
391 |    "cell_type": "markdown",
392 |    "metadata": {},
393 |    "source": [
394 |     "### Strings\n",
395 |     "\n",
396 |     "Strings represent text, i.e. \"strings\" of characters. They can be delimited by single quotes <tt>‘</tt> or double quotes <tt>“</tt>, but you have to use the same delimiter at both ends. Unlike some programming languages, such as Perl, there is no difference between the two types of quote, although using one type does allow the other type to appear inside the string as a regular character.\n",
397 |     "\n",
398 |     "Normally a python statement ends at the end of the line, but if you want to type a string over several lines you can enclose it in triple quotation marks."
399 |    ]
400 |   },
401 |   {
402 |    "cell_type": "code",
403 |    "execution_count": null,
404 |    "metadata": {
405 |     "collapsed": false
406 |    },
407 |    "outputs": [],
408 |    "source": [
409 |     "s = \"ATGTCGTCTACAACACT\"\n",
410 |     "t = 'Serine'\n",
411 |     "u = \"It's a string with apostrophes\"\n",
412 |     "v = \"\"\"A string that extends\n",
413 |     "over multiple lines\"\"\"\n",
414 |     "print(v)"
415 |    ]
416 |   },
417 |   {
418 |    "cell_type": "markdown",
419 |    "metadata": {},
420 |    "source": [
421 |     "### The <tt>None</tt> object\n",
422 |     "\n",
423 |     "The None object is special built-in value which can be thought of as **representing nothingness or that something is undefined**. For example, it can be used to indicate that a variable exists, but has not yet been set to anything specific."
424 |    ]
425 |   },
426 |   {
427 |    "cell_type": "code",
428 |    "execution_count": null,
429 |    "metadata": {
430 |     "collapsed": false
431 |    },
432 |    "outputs": [],
433 |    "source": [
434 |     "z = None\n",
435 |     "print(z)"
436 |    ]
437 |   },
438 |   {
439 |    "cell_type": "markdown",
440 |    "metadata": {},
441 |    "source": [
442 |     "### Object type\n",
443 |     "\n",
444 |     "You can check what type python thinks an expression is with the <tt>type</tt> function, which you can call with the name <tt>type</tt> immediately followed by parentheses enclosing the expression you want to check (either a variable or a value), e.g. <tt>type(3)</tt>. (This is the general form for calling functions, we'll see lots more examples of functions later...)"
445 |    ]
446 |   },
447 |   {
448 |    "cell_type": "code",
449 |    "execution_count": null,
450 |    "metadata": {
451 |     "collapsed": false
452 |    },
453 |    "outputs": [],
454 |    "source": [
455 |     "a = True\n",
456 |     "print(a, \"is of\", type(a))"
457 |    ]
458 |   },
459 |   {
460 |    "cell_type": "code",
461 |    "execution_count": null,
462 |    "metadata": {
463 |     "collapsed": false
464 |    },
465 |    "outputs": [],
466 |    "source": [
467 |     "i = -7\n",
468 |     "print(i, \"is of\", type(i))"
469 |    ]
470 |   },
471 |   {
472 |    "cell_type": "code",
473 |    "execution_count": null,
474 |    "metadata": {
475 |     "collapsed": false
476 |    },
477 |    "outputs": [],
478 |    "source": [
479 |     "x = 12.7893\n",
480 |     "print(x, \"is of\", type(x))"
481 |    ]
482 |   },
483 |   {
484 |    "cell_type": "code",
485 |    "execution_count": null,
486 |    "metadata": {
487 |     "collapsed": false
488 |    },
489 |    "outputs": [],
490 |    "source": [
491 |     "s = \"ATGTCGTCTACAACACT\"\n",
492 |     "print(s, \"is of\", type(s))"
493 |    ]
494 |   },
495 |   {
496 |    "cell_type": "code",
497 |    "execution_count": null,
498 |    "metadata": {
499 |     "collapsed": false
500 |    },
501 |    "outputs": [],
502 |    "source": [
503 |     "z = None\n",
504 |     "print(z, \"is of\", type(z))"
505 |    ]
506 |   },
507 |   {
508 |    "cell_type": "markdown",
509 |    "metadata": {},
510 |    "source": [
511 |     "## Comments\n",
512 |     "\n",
513 |     "When you are writing a program it is often convenient to annotate your code to remind you what you were (intending) it to do. In programming these annotations are known as _comments_. You can include a comment in python by prefixing some text with a <tt>#</tt> character. All text following the <tt>#</tt> will then be ignored by the interpreter. You can start a comment on its own line, or you can include it at the end of a line of code.\n",
514 |     "\n",
515 |     "It is also often useful to temporarily remove some code from a script without deleting it. This is known as _commenting out_ some code."
516 |    ]
517 |   },
518 |   {
519 |    "cell_type": "code",
520 |    "execution_count": null,
521 |    "metadata": {
522 |     "collapsed": false
523 |    },
524 |    "outputs": [],
525 |    "source": [
526 |     "print(\"Hi\") # this will be ignored\n",
527 |     "# as will this\n",
528 |     "print(\"Bye\")\n",
529 |     "# print \"Never seen\""
530 |    ]
531 |   },
532 |   {
533 |    "cell_type": "markdown",
534 |    "metadata": {},
535 |    "source": [
536 |     "## Exercises 1.1.1\n",
537 |     "\n",
538 |     "To start the Python interpreter, open a terminal window, type the command `python`, then enter Python commands after the prompt `>>>` and press `Enter` when you're done. \n",
539 |     "\n",
540 |     "Python will run the code you typed, and might display some output on the line below, before leaving you with another prompt which looks like `>>>`.\n",
541 |     "\n",
542 |     "If you want to exit the interactive interpreter you can type the command `quit()` or type `Ctrl-D`.\n",
543 |     "\n",
544 |     "In the interpreter:\n",
545 |     "\n",
546 |     "1. Create a variable and assign it the string value of your first name, assign your age to another variable (you are free to lie!), print out a message saying how old you are\n",
547 |     "2. Use the addition operator to add 10 to your age and print out a message saying how old you will be in 10 years time"
548 |    ]
549 |   },
550 |   {
551 |    "cell_type": "markdown",
552 |    "metadata": {},
553 |    "source": [
554 |     "## Arithmetic"
555 |    ]
556 |   },
557 |   {
558 |    "cell_type": "markdown",
559 |    "metadata": {},
560 |    "source": [
561 |     "Python supports all the standard arithmetical operations on numerical types, and mostly uses a similar syntax to several other computer languages:"
562 |    ]
563 |   },
564 |   {
565 |    "cell_type": "code",
566 |    "execution_count": null,
567 |    "metadata": {
568 |     "collapsed": false
569 |    },
570 |    "outputs": [],
571 |    "source": [
572 |     "x = 4.5\n",
573 |     "y = 2\n",
574 |     "\n",
575 |     "print('x', x, 'y', y)\n",
576 |     "print('addition x + y =', x + y) \n",
577 |     "print('subtraction x - y =', x - y) \n",
578 |     "print('multiplication x * y =', x * y) \n",
579 |     "print('division x / y =', x / y) "
580 |    ]
581 |   },
582 |   {
583 |    "cell_type": "code",
584 |    "execution_count": null,
585 |    "metadata": {
586 |     "collapsed": false
587 |    },
588 |    "outputs": [],
589 |    "source": [
590 |     "x = 4.5\n",
591 |     "y = 2\n",
592 |     "\n",
593 |     "print('x', x, 'y', y)\n",
594 |     "print('division x / y =', x / y)\n",
595 |     "print('floored division x // y =', x // y) \n",
596 |     "print('modulus (remainder of x/y) x % y =', x % y) \n",
597 |     "print('exponentiation x ** y =', x ** y)"
598 |    ]
599 |   },
600 |   {
601 |    "cell_type": "markdown",
602 |    "metadata": {},
603 |    "source": [
604 |     "As usual in maths, division and multiplication have higher precedence than addition and subtraction, but arithmetic expressions can be grouped using parentheses to override the default precedence"
605 |    ]
606 |   },
607 |   {
608 |    "cell_type": "code",
609 |    "execution_count": null,
610 |    "metadata": {
611 |     "collapsed": false
612 |    },
613 |    "outputs": [],
614 |    "source": [
615 |     "x = 13\n",
616 |     "y = 5\n",
617 |     "\n",
618 |     "print('x * (2 + y) =', x * (2 + y))\n",
619 |     "print('(x * 2) + y =', (x * 2) + y)\n",
620 |     "print('x * 2 + y =', x * 2 + y)"
621 |    ]
622 |   },
623 |   {
624 |    "cell_type": "markdown",
625 |    "metadata": {},
626 |    "source": [
627 |     "You can mix (some) types in arithmetic expressions and python will apply rules as to the type of the result\n"
628 |    ]
629 |   },
630 |   {
631 |    "cell_type": "code",
632 |    "execution_count": null,
633 |    "metadata": {
634 |     "collapsed": false
635 |    },
636 |    "outputs": [],
637 |    "source": [
638 |     "13 + 5.0"
639 |    ]
640 |   },
641 |   {
642 |    "cell_type": "markdown",
643 |    "metadata": {},
644 |    "source": [
645 |     "You can force python to use a particular type by converting an expression explicitly, using helpful named functions: <tt>float</tt>, <tt>int</tt>, <tt>str</tt> etc."
646 |    ]
647 |   },
648 |   {
649 |    "cell_type": "code",
650 |    "execution_count": null,
651 |    "metadata": {
652 |     "collapsed": false
653 |    },
654 |    "outputs": [],
655 |    "source": [
656 |     "float(3) + float(7)"
657 |    ]
658 |   },
659 |   {
660 |    "cell_type": "code",
661 |    "execution_count": null,
662 |    "metadata": {
663 |     "collapsed": false
664 |    },
665 |    "outputs": [],
666 |    "source": [
667 |     "int(3.14159) + 1"
668 |    ]
669 |   },
670 |   {
671 |    "cell_type": "markdown",
672 |    "metadata": {},
673 |    "source": [
674 |     "The addition operator `+` allows you also to concatenate strings together."
675 |    ]
676 |   },
677 |   {
678 |    "cell_type": "code",
679 |    "execution_count": null,
680 |    "metadata": {
681 |     "collapsed": false
682 |    },
683 |    "outputs": [],
684 |    "source": [
685 |     "print('number' + str(3))"
686 |    ]
687 |   },
688 |   {
689 |    "cell_type": "markdown",
690 |    "metadata": {},
691 |    "source": [
692 |     "Division in Python 2 sometimes trips up new (and experienced!) programmers. If you divide 2 integers you will only get an integer result. If you want a floating point result you should explicitly cast at least one of the arguments to a <tt>float</tt>."
693 |    ]
694 |   },
695 |   {
696 |    "cell_type": "code",
697 |    "execution_count": null,
698 |    "metadata": {
699 |     "collapsed": false
700 |    },
701 |    "outputs": [],
702 |    "source": [
703 |     "print(\"3/4 =\", 3/4)\n",
704 |     "print(\"3.0/4 =\", 3.0/4)\n",
705 |     "print(\"float(3)/4 =\", float(3)/4)"
706 |    ]
707 |   },
708 |   {
709 |    "cell_type": "markdown",
710 |    "metadata": {},
711 |    "source": [
712 |     "There are a few shortcut assignment statements to make modifying variables directly faster to type"
713 |    ]
714 |   },
715 |   {
716 |    "cell_type": "code",
717 |    "execution_count": null,
718 |    "metadata": {
719 |     "collapsed": false
720 |    },
721 |    "outputs": [],
722 |    "source": [
723 |     "x = 3\n",
724 |     "x += 1 # equivalent to x = x + 1\n",
725 |     "x"
726 |    ]
727 |   },
728 |   {
729 |    "cell_type": "code",
730 |    "execution_count": null,
731 |    "metadata": {
732 |     "collapsed": false
733 |    },
734 |    "outputs": [],
735 |    "source": [
736 |     "x = 2\n",
737 |     "y = 10\n",
738 |     "y *= x\n",
739 |     "y"
740 |    ]
741 |   },
742 |   {
743 |    "cell_type": "markdown",
744 |    "metadata": {},
745 |    "source": [
746 |     "These shortcut operators are available for all arithmetic and logical operators."
747 |    ]
748 |   },
749 |   {
750 |    "cell_type": "markdown",
751 |    "metadata": {},
752 |    "source": [
753 |     "## Exercises 1.1.2\n",
754 |     "\n",
755 |     "In the interpreter:\n",
756 |     "\n",
757 |     "1. Assign numerical values to 2 variables, calculate the mean of these two variables and store the result in another variable. Print out the result to the screen."
758 |    ]
759 |   },
760 |   {
761 |    "cell_type": "markdown",
762 |    "metadata": {},
763 |    "source": [
764 |     "## Saving code in files\n",
765 |     "\n",
766 |     "### Excecute code in files\n",
767 |     "\n",
768 |     "As we mentioned earlier, you can also save python code in a file and then execute it later. We typically save python code in a file ending with the extension <tt>.py</tt>. The file, or _script_, can then be executed simply by supplying the name of the file as an argument to the <tt>python</tt> command in the terminal.\n",
769 |     "\n",
770 |     "The first file we will be looking at is located in the `scripts` directory and it is called `hello.py`. To execute the script, open a terminal window, navigate to the `scripts` directory and execute the code in the script `hello.py` by running `python hello.py` in your terminal:\n",
771 |     "\n",
772 |     "```bash\n",
773 |     "ls\n",
774 |     "cd scripts\n",
775 |     "python hello.py\n",
776 |     "```\n",
777 |     "\n",
778 |     "Shell commands:\n",
779 |     "- `ls`: to list directory contents\n",
780 |     "- `pwd`: to return working directory name\n",
781 |     "- `cd to/this/directory/`: to change directory\n",
782 |     "\n",
783 |     "### Edit code in files\n",
784 |     "\n",
785 |     "You can use any text editor you know to edit your file, but the file should be saved as plain text, so programs like Microsoft Word aren't the best choice. Many text editors will highlight python syntax for you which can help avoid syntax errors.\n",
786 |     "\n",
787 |     "To open any Python scripts in a text editor, open [Gedit](https://wiki.gnome.org/Apps/Gedit) or [Atom](https://atom.io/) and use the File menu, navigate to the `scripts` directory and open `hello.py`. \n",
788 |     "\n",
789 |     "You can now modify the `print` statement, save the file and go back to the terminal window you've just opened to run the code again by using the command `python hello.py`.\n"
790 |    ]
791 |   },
792 |   {
793 |    "cell_type": "markdown",
794 |    "metadata": {},
795 |    "source": [
796 |     "## Exercises 1.1.3\n",
797 |     "\n",
798 |     "Create a new Python file to solve these exercises. It is good practice to create a new file each time you solve a new problem.\n",
799 |     "\n",
800 |     "1. Look up the <a href=\"http://en.wikipedia.org/wiki/DNA_codon_table\">genetic code</a>. Create four string variables that store possible DNA encodings of serine (S), leucine (L), tyrosine (Y) and cysteine (C). Where multiple codings are available, just pick one for now.\n",
801 |     "2. Create a variable containing a possible DNA sequence for the protein sequence SYLYC. (Note that the addition operator <tt>+</tt> allows you to concatenate strings together.) Print the DNA sequence.\n",
802 |     "3. Include a comment in your file to remind you the purpose of the script"
803 |    ]
804 |   },
805 |   {
806 |    "cell_type": "markdown",
807 |    "metadata": {},
808 |    "source": [
809 |     "## Next session\n",
810 |     "\n",
811 |     "Go to our next notebook: [Introduction_to_python_day_1_session_2](Introduction_to_python_day_1_session_2.ipynb)"
812 |    ]
813 |   }
814 |  ],
815 |  "metadata": {
816 |   "kernelspec": {
817 |    "display_name": "Python 3",
818 |    "language": "python",
819 |    "name": "python3"
820 |   },
821 |   "language_info": {
822 |    "codemirror_mode": {
823 |     "name": "ipython",
824 |     "version": 3
825 |    },
826 |    "file_extension": ".py",
827 |    "mimetype": "text/x-python",
828 |    "name": "python",
829 |    "nbconvert_exporter": "python",
830 |    "pygments_lexer": "ipython3",
831 |    "version": "3.5.2"
832 |   }
833 |  },
834 |  "nbformat": 4,
835 |  "nbformat_minor": 0
836 | }
837 | 


--------------------------------------------------------------------------------
/Introduction_to_python_day_1_introduction.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "nbpresent": {
  7 |      "id": "dc7a1635-0bbd-4bf7-a07e-7a36f58e258b"
  8 |     },
  9 |     "slideshow": {
 10 |      "slide_type": "slide"
 11 |     }
 12 |    },
 13 |    "source": [
 14 |     "# An introduction to solving biological problems with Python"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {
 20 |     "nbpresent": {
 21 |      "id": "53eee250-b3d0-4262-ad09-e87fb2acf82e"
 22 |     },
 23 |     "slideshow": {
 24 |      "slide_type": "-"
 25 |     }
 26 |    },
 27 |    "source": [
 28 |     "## Presenters for 5-6 December 2016\n",
 29 |     "- Mukarram Hossain, Cambridge\n",
 30 |     "- Anne Pajon, CRUK Cambridge Institute"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {
 36 |     "nbpresent": {
 37 |      "id": "21082cb9-e1b9-4fe9-80d5-9d9e8418937b"
 38 |     },
 39 |     "slideshow": {
 40 |      "slide_type": "slide"
 41 |     }
 42 |    },
 43 |    "source": [
 44 |     "## Learning objectives\n",
 45 |     "- **Recall** how to print, create variables and save Python code in files\n",
 46 |     "- **List** the most common data types in Python\n",
 47 |     "- **Explain** how to write conditions and loops in Python\n",
 48 |     "- **Use and compare** these concepts in different code examples \n",
 49 |     "- **Propose and create** solutions using these concepts in different exercises"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "markdown",
 54 |    "metadata": {
 55 |     "nbpresent": {
 56 |      "id": "ceb5f5a0-a5e8-435e-ae16-23c2ba8c6ab2"
 57 |     },
 58 |     "slideshow": {
 59 |      "slide_type": "slide"
 60 |     }
 61 |    },
 62 |    "source": [
 63 |     "## Course schedule - day one\n",
 64 |     "\n",
 65 |     "- 09:30-10:00: [0h30] **Introduction**\n",
 66 |     "- 10:00-12:00: [2h00] **Session 1** - Print, Variables, Simple data types, Arithmetic and Saving code in files\n",
 67 |     "- 12:00-13:00: *lunch break*\n",
 68 |     "- 13:00-15:00: [2h00] **Session 2** - Collections: Lists, String and Dictionnaries\n",
 69 |     "- 15:00-15:15: *break*\n",
 70 |     "- 15:15-16:15: [1h00] **Session 3** - Conditional execution\n",
 71 |     "- 16:15-16:30: *break*\n",
 72 |     "- 16:30-17:30: [1h00] **Session 4** - Loops"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "markdown",
 77 |    "metadata": {
 78 |     "nbpresent": {
 79 |      "id": "e6c2e441-eb7b-4a4c-9c9c-b88cc9a2527f"
 80 |     },
 81 |     "slideshow": {
 82 |      "slide_type": "slide"
 83 |     }
 84 |    },
 85 |    "source": [
 86 |     "## Course schedule - day two\n",
 87 |     "\n",
 88 |     "- Functions, Files and BioPython"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {
 94 |     "nbpresent": {
 95 |      "id": "8458de53-35b5-405e-a372-5db5d2e2c2c5"
 96 |     },
 97 |     "slideshow": {
 98 |      "slide_type": "slide"
 99 |     }
100 |    },
101 |    "source": [
102 |     "## Course materials\n",
103 |     "\n",
104 |     "- There is a course webpage with links to the materials, example solutions to the exercises etc.:\n",
105 |     "    - http://pycam.github.io\n",
106 |     "- All course materiel is available on GitHub https://github.com/pycam\n",
107 |     "- We’d like you to follow along with the example code as we go through the material, and attempt the exercises to practice what you’ve learned\n",
108 |     "- Questions are welcome at any point!\n",
109 |     "- If you have specific projects/problems you like to use Python for we are happy to (try to) help during the exercises\n"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "markdown",
114 |    "metadata": {
115 |     "nbpresent": {
116 |      "id": "96ca5c44-2cfc-471c-8da7-39870c822e20"
117 |     },
118 |     "slideshow": {
119 |      "slide_type": "slide"
120 |     }
121 |    },
122 |    "source": [
123 |     "## What is *Python*?\n",
124 |     "\n",
125 |     "- Python is a *dynamic, interpreted* general purpose programming language initially created by Guido van Rossum in 1991\n",
126 |     "- It is a powerful language that supports several popular programming paradigms:\n",
127 |     "    - procedural\n",
128 |     "    - object-oriented\n",
129 |     "    - functional\n",
130 |     "- Python is widely used in bioinformatics and scientific computing, as well as many other fields and in industry\n",
131 |     "- Python is available on all popular operating systems\n",
132 |     "    - Macs\n",
133 |     "    - Windows\n",
134 |     "    - Linux"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "markdown",
139 |    "metadata": {
140 |     "nbpresent": {
141 |      "id": "9110098b-9675-4d64-adf3-c947073d4c4d"
142 |     },
143 |     "slideshow": {
144 |      "slide_type": "slide"
145 |     }
146 |    },
147 |    "source": [
148 |     "## The Python programming language\n",
149 |     "\n",
150 |     "- Python is considered to come with \"batteries included\" and the <a href=\"https://docs.python.org/3.5/library/\">standard library</a> (some of which we will see in this course) provides built-in support for lots of common tasks:\n",
151 |     "    - numerical & mathematical functions \n",
152 |     "    - interacting with files and the operating system\n",
153 |     "    - ...\n",
154 |     "\n",
155 |     "- There is also a wide range of external libraries for areas not covered in the standard library, such as [Pandas](http://pandas.pydata.org/) the Python Data Analysis Library and the [BioPython](http://biopython.org/) Library which provides tools for bioinformatics - we look at this tomorrow"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "markdown",
160 |    "metadata": {
161 |     "nbpresent": {
162 |      "id": "0d61b4b4-163f-47fe-80f1-092287218273"
163 |     },
164 |     "slideshow": {
165 |      "slide_type": "slide"
166 |     }
167 |    },
168 |    "source": [
169 |     "## Getting started\n",
170 |     "\n",
171 |     "- Python is an *interpreted* language, this means that your computer does not run Python code natively, but instead we run our code using the Python interpreter\n",
172 |     "- There are three ways in which you can run Python code:\n",
173 |     "    - Directly typing **commands into the interpreter**: *Good for experimenting with the language, and for some interactive work*\n",
174 |     "    - Using a **Jupyter notebook**: *Great for experimenting with the language, and for sharing and learning*\n",
175 |     "    - Typing code **into a file** and then telling the interpreter to run the code from this file: *Good for larger programs, and when you want to run the same code repeatedly*\n"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "markdown",
180 |    "metadata": {
181 |     "nbpresent": {
182 |      "id": "b878a4f9-4345-4abb-81f4-5a731c639ab8"
183 |     },
184 |     "slideshow": {
185 |      "slide_type": "slide"
186 |     }
187 |    },
188 |    "source": [
189 |     "## How to start the Python interpreter?\n",
190 |     "\n",
191 |     "- How you start the interpreter will depend on which operating system you are using, but on a Mac or Linux machine you should start a terminal and then just type the command `python3`\n",
192 |     "- This will print out some information about your installation of python and then leave you with a command prompt which looks like `>>>` \n",
193 |     "- You can then type commands and press `Enter` when you're done. Python will run the code you typed, and might display some output on the line below, before leaving you with another prompt.\n",
194 |     "- If you want to exit the interactive interpreter you can type the command `quit()` or type `Ctrl-D`"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "markdown",
199 |    "metadata": {
200 |     "nbpresent": {
201 |      "id": "8a4ac456-6c4b-4249-8662-b1cabfd7cee4"
202 |     },
203 |     "slideshow": {
204 |      "slide_type": "slide"
205 |     }
206 |    },
207 |    "source": [
208 |     "## The terminal\n",
209 |     "\n",
210 |     "We will see later how to save code in a file and run it.\n",
211 |     "<center><img src=\"img/python_shell.png\"></center>"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "markdown",
216 |    "metadata": {
217 |     "nbpresent": {
218 |      "id": "f5bcbcb5-4352-4674-a7b6-c8e576220422"
219 |     },
220 |     "slideshow": {
221 |      "slide_type": "slide"
222 |     }
223 |    },
224 |    "source": [
225 |     "## The shell command lines you may need\n",
226 |     "\n",
227 |     "- `ls`: to list directory contents\n",
228 |     "- `pwd`: to return working directory name\n",
229 |     "- `cd to/this/directory/`: to change directory\n",
230 |     "- `cat hello.py`: to print the content of a text file \n"
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "markdown",
235 |    "metadata": {
236 |     "nbpresent": {
237 |      "id": "9814e8d7-60e0-43e6-aee0-3c33cc2cc809"
238 |     },
239 |     "slideshow": {
240 |      "slide_type": "slide"
241 |     }
242 |    },
243 |    "source": [
244 |     "## What is a Jupyter notebook?\n",
245 |     "\n",
246 |     "<img src=\"http://jupyter.org/assets/nav_logo.svg\">\n",
247 |     "\n",
248 |     "- The [Jupyter Notebook](http://jupyter.org/) is a web application that allows you to create and share documents that contain live code, equations, visualizations and explanatory text. \n",
249 |     "\n",
250 |     "- Jupyter provides a rich architecture for interactive data science and scientific computing with: \n",
251 |     "    - Over 40 programming languages such as Python, R, Julia and Scala.\n",
252 |     "    - A browser-based notebook with support for code, rich text, math expressions, plots and other rich media.\n",
253 |     "    - Support for interactive data visualization.\n",
254 |     "    - Easy to use tools for parallel computing."
255 |    ]
256 |   },
257 |   {
258 |    "cell_type": "markdown",
259 |    "metadata": {
260 |     "nbpresent": {
261 |      "id": "62fdd00c-a006-4f11-b9dc-e2ca072225d7"
262 |     },
263 |     "slideshow": {
264 |      "slide_type": "slide"
265 |     }
266 |    },
267 |    "source": [
268 |     "## How to install Jupyter on your own computer?\n",
269 |     "\n",
270 |     "<img src=\"http://jupyter.org/assets/nav_logo.svg\">\n",
271 |     "\n",
272 |     "- [See Installing Jupyter Notebook](https://jupyter.readthedocs.io/en/latest/install.html)\n",
273 |     "\n",
274 |     "- For new users, we recommend [installing Anaconda](https://www.continuum.io/downloads). Anaconda conveniently installs Python, the Jupyter Notebook, and other commonly used packages for scientific computing and data science.\n",
275 |     "\n",
276 |     "- Start the notebook server from the command line:\n",
277 |     "```\n",
278 |     "jupyter notebook\n",
279 |     "```\n",
280 |     "- You should see the notebook home page open in your web browser.\n"
281 |    ]
282 |   },
283 |   {
284 |    "cell_type": "markdown",
285 |    "metadata": {
286 |     "nbpresent": {
287 |      "id": "0e25dad3-add0-466e-8f71-e771d6ec4500"
288 |     },
289 |     "slideshow": {
290 |      "slide_type": "slide"
291 |     }
292 |    },
293 |    "source": [
294 |     "## How to run python in a Jupyter notebook?\n",
295 |     "\n",
296 |     "<img src=\"http://jupyter.org/assets/nav_logo.svg\">\n",
297 |     "\n",
298 |     "- See [Jupyter Notebook Basics](http://nbviewer.jupyter.org/github/jupyter/notebook/blob/master/docs/source/examples/Notebook/Notebook%20Basics.ipynb)\n",
299 |     "\n",
300 |     "\n",
301 |     "- Go to our notebook for the fist session: [Introduction_to_python_day_1_session_1](Introduction_to_python_day_1_session_1.ipynb)"
302 |    ]
303 |   }
304 |  ],
305 |  "metadata": {
306 |   "anaconda-cloud": {},
307 |   "celltoolbar": "Slideshow",
308 |   "kernelspec": {
309 |    "display_name": "Python 3",
310 |    "language": "python",
311 |    "name": "python3"
312 |   },
313 |   "language_info": {
314 |    "codemirror_mode": {
315 |     "name": "ipython",
316 |     "version": 3
317 |    },
318 |    "file_extension": ".py",
319 |    "mimetype": "text/x-python",
320 |    "name": "python",
321 |    "nbconvert_exporter": "python",
322 |    "pygments_lexer": "ipython3",
323 |    "version": "3.5.2"
324 |   },
325 |   "nbpresent": {
326 |    "slides": {
327 |     "152c5a3b-78f9-4183-bce2-379a4012baf6": {
328 |      "id": "152c5a3b-78f9-4183-bce2-379a4012baf6",
329 |      "layout": "grid",
330 |      "prev": "5613e857-5b4e-42e4-9feb-df0440592ca2",
331 |      "regions": {
332 |       "20d6059c-7745-410d-a5fb-0b91cacbc2e2": {
333 |        "attrs": {
334 |         "height": 0.6666666666666666,
335 |         "pad": 0.01,
336 |         "treemap:weight": 1,
337 |         "width": 0.5,
338 |         "x": 0,
339 |         "y": 0
340 |        },
341 |        "id": "20d6059c-7745-410d-a5fb-0b91cacbc2e2"
342 |       },
343 |       "300e6ccd-ecf4-425e-8574-3debe305aafb": {
344 |        "attrs": {
345 |         "height": 0.3333333333333333,
346 |         "pad": 0.01,
347 |         "treemap:weight": 1,
348 |         "width": 1,
349 |         "x": 0,
350 |         "y": 0.6666666666666666
351 |        },
352 |        "content": {
353 |         "cell": "9814e8d7-60e0-43e6-aee0-3c33cc2cc809",
354 |         "part": "whole"
355 |        },
356 |        "id": "300e6ccd-ecf4-425e-8574-3debe305aafb"
357 |       },
358 |       "df2dd6ff-570b-4b75-9cb7-1ff1dbdd4f55": {
359 |        "attrs": {
360 |         "height": 0.6666666666666666,
361 |         "pad": 0.01,
362 |         "treemap:weight": 1,
363 |         "width": 0.5,
364 |         "x": 0.5,
365 |         "y": 0
366 |        },
367 |        "id": "df2dd6ff-570b-4b75-9cb7-1ff1dbdd4f55"
368 |       }
369 |      }
370 |     },
371 |     "2586ca7d-5091-40ea-b566-ccc5fbf833c6": {
372 |      "id": "2586ca7d-5091-40ea-b566-ccc5fbf833c6",
373 |      "prev": "f001d476-5814-4664-a722-f04f5d23cd52",
374 |      "regions": {
375 |       "d6011048-43db-4990-a82e-768683aa4fe5": {
376 |        "attrs": {
377 |         "height": 0.8,
378 |         "width": 0.8,
379 |         "x": 0.1,
380 |         "y": 0.1
381 |        },
382 |        "content": {
383 |         "cell": "ceb5f5a0-a5e8-435e-ae16-23c2ba8c6ab2",
384 |         "part": "whole"
385 |        },
386 |        "id": "d6011048-43db-4990-a82e-768683aa4fe5"
387 |       }
388 |      }
389 |     },
390 |     "27ee4130-d0bb-4287-b8fe-75a7b0ecf178": {
391 |      "id": "27ee4130-d0bb-4287-b8fe-75a7b0ecf178",
392 |      "prev": "2586ca7d-5091-40ea-b566-ccc5fbf833c6",
393 |      "regions": {
394 |       "7a689d66-0c9d-4492-928b-f35bfd2ffc4c": {
395 |        "attrs": {
396 |         "height": 0.8,
397 |         "width": 0.8,
398 |         "x": 0.1,
399 |         "y": 0.1
400 |        },
401 |        "content": {
402 |         "cell": "e6c2e441-eb7b-4a4c-9c9c-b88cc9a2527f",
403 |         "part": "whole"
404 |        },
405 |        "id": "7a689d66-0c9d-4492-928b-f35bfd2ffc4c"
406 |       }
407 |      }
408 |     },
409 |     "2de0c027-7a07-4f7e-8594-a98d36125372": {
410 |      "id": "2de0c027-7a07-4f7e-8594-a98d36125372",
411 |      "prev": "75e76bd9-24ae-4c42-b6bc-5f58a0550ba8",
412 |      "regions": {
413 |       "868fd842-e6fb-48b2-9ac5-95e8fe20927e": {
414 |        "attrs": {
415 |         "height": 0.8,
416 |         "width": 0.8,
417 |         "x": 0.1,
418 |         "y": 0.1
419 |        },
420 |        "content": {
421 |         "cell": "0e25dad3-add0-466e-8f71-e771d6ec4500",
422 |         "part": "whole"
423 |        },
424 |        "id": "868fd842-e6fb-48b2-9ac5-95e8fe20927e"
425 |       }
426 |      }
427 |     },
428 |     "5613e857-5b4e-42e4-9feb-df0440592ca2": {
429 |      "id": "5613e857-5b4e-42e4-9feb-df0440592ca2",
430 |      "prev": "564dae42-4185-46c1-b156-e503f475e25c",
431 |      "regions": {
432 |       "17e888b0-050b-406a-a5a3-0d5c1605b8df": {
433 |        "attrs": {
434 |         "height": 0.8,
435 |         "width": 0.8,
436 |         "x": 0.1,
437 |         "y": 0.1
438 |        },
439 |        "content": {
440 |         "cell": "f5bcbcb5-4352-4674-a7b6-c8e576220422",
441 |         "part": "whole"
442 |        },
443 |        "id": "17e888b0-050b-406a-a5a3-0d5c1605b8df"
444 |       }
445 |      }
446 |     },
447 |     "564dae42-4185-46c1-b156-e503f475e25c": {
448 |      "id": "564dae42-4185-46c1-b156-e503f475e25c",
449 |      "prev": "ba285213-f645-4314-afd5-0a656fa35631",
450 |      "regions": {
451 |       "328d4d72-cd9e-4e5b-aaa8-175833f5bfdb": {
452 |        "attrs": {
453 |         "height": 0.8,
454 |         "width": 0.8,
455 |         "x": 0.1,
456 |         "y": 0.1
457 |        },
458 |        "content": {
459 |         "cell": "8a4ac456-6c4b-4249-8662-b1cabfd7cee4",
460 |         "part": "whole"
461 |        },
462 |        "id": "328d4d72-cd9e-4e5b-aaa8-175833f5bfdb"
463 |       }
464 |      }
465 |     },
466 |     "6ff94ac3-8ded-442e-ae43-aa0a5c14d468": {
467 |      "id": "6ff94ac3-8ded-442e-ae43-aa0a5c14d468",
468 |      "prev": "27ee4130-d0bb-4287-b8fe-75a7b0ecf178",
469 |      "regions": {
470 |       "ad759b3a-6080-4356-a9fd-87f2b1b90bc2": {
471 |        "attrs": {
472 |         "height": 0.8,
473 |         "width": 0.8,
474 |         "x": 0.1,
475 |         "y": 0.1
476 |        },
477 |        "content": {
478 |         "cell": "8458de53-35b5-405e-a372-5db5d2e2c2c5",
479 |         "part": "whole"
480 |        },
481 |        "id": "ad759b3a-6080-4356-a9fd-87f2b1b90bc2"
482 |       }
483 |      }
484 |     },
485 |     "75e76bd9-24ae-4c42-b6bc-5f58a0550ba8": {
486 |      "id": "75e76bd9-24ae-4c42-b6bc-5f58a0550ba8",
487 |      "prev": "152c5a3b-78f9-4183-bce2-379a4012baf6",
488 |      "regions": {
489 |       "4afd3b41-071f-44eb-a8f6-9a7f780041c2": {
490 |        "attrs": {
491 |         "height": 0.8,
492 |         "width": 0.8,
493 |         "x": 0.1,
494 |         "y": 0.1
495 |        },
496 |        "content": {
497 |         "cell": "62fdd00c-a006-4f11-b9dc-e2ca072225d7",
498 |         "part": "whole"
499 |        },
500 |        "id": "4afd3b41-071f-44eb-a8f6-9a7f780041c2"
501 |       }
502 |      }
503 |     },
504 |     "8c46fa2c-d5dc-4ef7-8d99-f504e2c3a4a1": {
505 |      "id": "8c46fa2c-d5dc-4ef7-8d99-f504e2c3a4a1",
506 |      "prev": "e2f5626f-0d60-47cb-967f-0edababb0329",
507 |      "regions": {
508 |       "af33776f-ec36-45be-a627-39573a78b1d6": {
509 |        "attrs": {
510 |         "height": 0.8,
511 |         "width": 0.8,
512 |         "x": 0.1,
513 |         "y": 0.1
514 |        },
515 |        "content": {
516 |         "cell": "0d61b4b4-163f-47fe-80f1-092287218273",
517 |         "part": "whole"
518 |        },
519 |        "id": "af33776f-ec36-45be-a627-39573a78b1d6"
520 |       }
521 |      }
522 |     },
523 |     "ae3f4c01-80dc-4add-889a-05c74f7155a5": {
524 |      "id": "ae3f4c01-80dc-4add-889a-05c74f7155a5",
525 |      "prev": "6ff94ac3-8ded-442e-ae43-aa0a5c14d468",
526 |      "regions": {
527 |       "15f00a98-7b04-439d-996d-851b773b060a": {
528 |        "attrs": {
529 |         "height": 0.8,
530 |         "width": 0.8,
531 |         "x": 0.1,
532 |         "y": 0.1
533 |        },
534 |        "content": {
535 |         "cell": "96ca5c44-2cfc-471c-8da7-39870c822e20",
536 |         "part": "whole"
537 |        },
538 |        "id": "15f00a98-7b04-439d-996d-851b773b060a"
539 |       }
540 |      }
541 |     },
542 |     "ba285213-f645-4314-afd5-0a656fa35631": {
543 |      "id": "ba285213-f645-4314-afd5-0a656fa35631",
544 |      "prev": "8c46fa2c-d5dc-4ef7-8d99-f504e2c3a4a1",
545 |      "regions": {
546 |       "6cddb9f2-8e39-4010-8fab-3e70b3a8993f": {
547 |        "attrs": {
548 |         "height": 0.8,
549 |         "width": 0.8,
550 |         "x": 0.1,
551 |         "y": 0.1
552 |        },
553 |        "content": {
554 |         "cell": "b878a4f9-4345-4abb-81f4-5a731c639ab8",
555 |         "part": "whole"
556 |        },
557 |        "id": "6cddb9f2-8e39-4010-8fab-3e70b3a8993f"
558 |       }
559 |      }
560 |     },
561 |     "cd587236-8a19-444d-8b18-69d782dbf725": {
562 |      "id": "cd587236-8a19-444d-8b18-69d782dbf725",
563 |      "prev": null,
564 |      "regions": {
565 |       "ef377bfe-ff45-49db-b471-f79ecb10b580": {
566 |        "attrs": {
567 |         "height": 0.8,
568 |         "width": 0.8,
569 |         "x": 0.1,
570 |         "y": 0.1
571 |        },
572 |        "content": {
573 |         "cell": "dc7a1635-0bbd-4bf7-a07e-7a36f58e258b",
574 |         "part": "whole"
575 |        },
576 |        "id": "ef377bfe-ff45-49db-b471-f79ecb10b580"
577 |       }
578 |      }
579 |     },
580 |     "e2f5626f-0d60-47cb-967f-0edababb0329": {
581 |      "id": "e2f5626f-0d60-47cb-967f-0edababb0329",
582 |      "prev": "ae3f4c01-80dc-4add-889a-05c74f7155a5",
583 |      "regions": {
584 |       "eef49fa0-0f9b-4228-8fb8-79e079bf7682": {
585 |        "attrs": {
586 |         "height": 0.8,
587 |         "width": 0.8,
588 |         "x": 0.1,
589 |         "y": 0.1
590 |        },
591 |        "content": {
592 |         "cell": "9110098b-9675-4d64-adf3-c947073d4c4d",
593 |         "part": "whole"
594 |        },
595 |        "id": "eef49fa0-0f9b-4228-8fb8-79e079bf7682"
596 |       }
597 |      }
598 |     },
599 |     "f001d476-5814-4664-a722-f04f5d23cd52": {
600 |      "id": "f001d476-5814-4664-a722-f04f5d23cd52",
601 |      "prev": "cd587236-8a19-444d-8b18-69d782dbf725",
602 |      "regions": {
603 |       "5a176076-c5a5-4b50-ab2c-9cd0baedad45": {
604 |        "attrs": {
605 |         "height": 0.8,
606 |         "width": 0.8,
607 |         "x": 0.1,
608 |         "y": 0.1
609 |        },
610 |        "content": {
611 |         "cell": "53eee250-b3d0-4262-ad09-e87fb2acf82e",
612 |         "part": "whole"
613 |        },
614 |        "id": "5a176076-c5a5-4b50-ab2c-9cd0baedad45"
615 |       }
616 |      }
617 |     }
618 |    },
619 |    "themes": {
620 |     "default": "c6b5d1ad-d691-4000-9f62-de7fc0e83644",
621 |     "theme": {
622 |      "586a6e7a-f661-4d6c-90d0-1392715bea27": {
623 |       "id": "586a6e7a-f661-4d6c-90d0-1392715bea27",
624 |       "palette": {
625 |        "19cc588f-0593-49c9-9f4b-e4d7cc113b1c": {
626 |         "id": "19cc588f-0593-49c9-9f4b-e4d7cc113b1c",
627 |         "rgb": [
628 |          252,
629 |          252,
630 |          252
631 |         ]
632 |        },
633 |        "31af15d2-7e15-44c5-ab5e-e04b16a89eff": {
634 |         "id": "31af15d2-7e15-44c5-ab5e-e04b16a89eff",
635 |         "rgb": [
636 |          68,
637 |          68,
638 |          68
639 |         ]
640 |        },
641 |        "50f92c45-a630-455b-aec3-788680ec7410": {
642 |         "id": "50f92c45-a630-455b-aec3-788680ec7410",
643 |         "rgb": [
644 |          155,
645 |          177,
646 |          192
647 |         ]
648 |        },
649 |        "c5cc3653-2ee1-402a-aba2-7caae1da4f6c": {
650 |         "id": "c5cc3653-2ee1-402a-aba2-7caae1da4f6c",
651 |         "rgb": [
652 |          43,
653 |          126,
654 |          184
655 |         ]
656 |        },
657 |        "efa7f048-9acb-414c-8b04-a26811511a21": {
658 |         "id": "efa7f048-9acb-414c-8b04-a26811511a21",
659 |         "rgb": [
660 |          25.118061674008803,
661 |          73.60176211453744,
662 |          107.4819383259912
663 |         ]
664 |        }
665 |       },
666 |       "rules": {
667 |        "blockquote": {
668 |         "color": "50f92c45-a630-455b-aec3-788680ec7410"
669 |        },
670 |        "code": {
671 |         "font-family": "Anonymous Pro"
672 |        },
673 |        "h1": {
674 |         "color": "c5cc3653-2ee1-402a-aba2-7caae1da4f6c",
675 |         "font-family": "Lato",
676 |         "font-size": 8
677 |        },
678 |        "h2": {
679 |         "color": "c5cc3653-2ee1-402a-aba2-7caae1da4f6c",
680 |         "font-family": "Lato",
681 |         "font-size": 6
682 |        },
683 |        "h3": {
684 |         "color": "50f92c45-a630-455b-aec3-788680ec7410",
685 |         "font-family": "Lato",
686 |         "font-size": 5.5
687 |        },
688 |        "h4": {
689 |         "color": "c5cc3653-2ee1-402a-aba2-7caae1da4f6c",
690 |         "font-family": "Lato",
691 |         "font-size": 5
692 |        },
693 |        "h5": {
694 |         "font-family": "Lato"
695 |        },
696 |        "h6": {
697 |         "font-family": "Lato"
698 |        },
699 |        "h7": {
700 |         "font-family": "Lato"
701 |        },
702 |        "pre": {
703 |         "font-family": "Anonymous Pro",
704 |         "font-size": 4
705 |        }
706 |       },
707 |       "text-base": {
708 |        "font-family": "Merriweather",
709 |        "font-size": 4
710 |       }
711 |      },
712 |      "c6b5d1ad-d691-4000-9f62-de7fc0e83644": {
713 |       "backgrounds": {
714 |        "dc7afa04-bf90-40b1-82a5-726e3cff5267": {
715 |         "background-color": "31af15d2-7e15-44c5-ab5e-e04b16a89eff",
716 |         "id": "dc7afa04-bf90-40b1-82a5-726e3cff5267"
717 |        }
718 |       },
719 |       "id": "c6b5d1ad-d691-4000-9f62-de7fc0e83644",
720 |       "palette": {
721 |        "19cc588f-0593-49c9-9f4b-e4d7cc113b1c": {
722 |         "id": "19cc588f-0593-49c9-9f4b-e4d7cc113b1c",
723 |         "rgb": [
724 |          252,
725 |          252,
726 |          252
727 |         ]
728 |        },
729 |        "31af15d2-7e15-44c5-ab5e-e04b16a89eff": {
730 |         "id": "31af15d2-7e15-44c5-ab5e-e04b16a89eff",
731 |         "rgb": [
732 |          68,
733 |          68,
734 |          68
735 |         ]
736 |        },
737 |        "50f92c45-a630-455b-aec3-788680ec7410": {
738 |         "id": "50f92c45-a630-455b-aec3-788680ec7410",
739 |         "rgb": [
740 |          197,
741 |          226,
742 |          245
743 |         ]
744 |        },
745 |        "c5cc3653-2ee1-402a-aba2-7caae1da4f6c": {
746 |         "id": "c5cc3653-2ee1-402a-aba2-7caae1da4f6c",
747 |         "rgb": [
748 |          43,
749 |          126,
750 |          184
751 |         ]
752 |        },
753 |        "efa7f048-9acb-414c-8b04-a26811511a21": {
754 |         "id": "efa7f048-9acb-414c-8b04-a26811511a21",
755 |         "rgb": [
756 |          25.118061674008803,
757 |          73.60176211453744,
758 |          107.4819383259912
759 |         ]
760 |        }
761 |       },
762 |       "rules": {
763 |        "a": {
764 |         "color": "19cc588f-0593-49c9-9f4b-e4d7cc113b1c"
765 |        },
766 |        "blockquote": {
767 |         "color": "50f92c45-a630-455b-aec3-788680ec7410",
768 |         "font-size": 3
769 |        },
770 |        "code": {
771 |         "font-family": "Anonymous Pro"
772 |        },
773 |        "h1": {
774 |         "color": "19cc588f-0593-49c9-9f4b-e4d7cc113b1c",
775 |         "font-family": "Merriweather",
776 |         "font-size": 8
777 |        },
778 |        "h2": {
779 |         "color": "19cc588f-0593-49c9-9f4b-e4d7cc113b1c",
780 |         "font-family": "Merriweather",
781 |         "font-size": 6
782 |        },
783 |        "h3": {
784 |         "color": "50f92c45-a630-455b-aec3-788680ec7410",
785 |         "font-family": "Lato",
786 |         "font-size": 5.5
787 |        },
788 |        "h4": {
789 |         "color": "c5cc3653-2ee1-402a-aba2-7caae1da4f6c",
790 |         "font-family": "Lato",
791 |         "font-size": 5
792 |        },
793 |        "h5": {
794 |         "font-family": "Lato"
795 |        },
796 |        "h6": {
797 |         "font-family": "Lato"
798 |        },
799 |        "h7": {
800 |         "font-family": "Lato"
801 |        },
802 |        "li": {
803 |         "color": "50f92c45-a630-455b-aec3-788680ec7410",
804 |         "font-size": 3.25
805 |        },
806 |        "pre": {
807 |         "font-family": "Anonymous Pro",
808 |         "font-size": 4
809 |        }
810 |       },
811 |       "text-base": {
812 |        "color": "19cc588f-0593-49c9-9f4b-e4d7cc113b1c",
813 |        "font-family": "Lato",
814 |        "font-size": 4
815 |       }
816 |      }
817 |     }
818 |    }
819 |   }
820 |  },
821 |  "nbformat": 4,
822 |  "nbformat_minor": 0
823 | }
824 | 


--------------------------------------------------------------------------------
/Introduction_to_python_day_2_session_1.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# An introduction to solving biological problems with Python\n",
  8 |     "\n",
  9 |     "## Session 2.1: Functions\n",
 10 |     "\n",
 11 |     "- [Function definition syntax](#Function-definition-syntax)\n",
 12 |     "- [Excercises 2.1.1](#Excercises-2.1.1)\n",
 13 |     "- [Return value](#Return-value)\n",
 14 |     "- [Exercises 2.1.2](#Exercises-2.1.2)\n",
 15 |     "- [Function arguments](#Function-arguments)\n",
 16 |     "- [Exercises 2.1.3](#Exercises-2.1.3)\n",
 17 |     "- [Variable scope](#Variable-scope)"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "markdown",
 22 |    "metadata": {},
 23 |    "source": [
 24 |     "## Function basics"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "markdown",
 29 |    "metadata": {},
 30 |    "source": [
 31 |     "We have already seen a number of functions built into python that let us do useful things to strings, collections and numbers etc. For example `print()` or `len()` which is passed some kind of sequence object and returns the length of the sequence.\n",
 32 |     "\n",
 33 |     "This is the general form of a function; it takes some input _arguments_ and returns some output based on the supplied arguments.\n",
 34 |     "\n",
 35 |     "The arguments to a function, if any, are supplied in parentheses and the result of the function _call_ is the result of evaluating the function.\n"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": null,
 41 |    "metadata": {
 42 |     "collapsed": false
 43 |    },
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "x = abs(-3.0)\n",
 47 |     "print(x)\n",
 48 |     "\n",
 49 |     "l = len(\"ACGGTGTCAA\")\n",
 50 |     "print(l)"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "markdown",
 55 |    "metadata": {},
 56 |    "source": [
 57 |     "As well as using python's built in functions, you can write your own. Functions are a nice way to **encapsulate some code that you want to reuse** elsewhere in your program, rather than repeating the same bit of code multiple times. They also provide a way to name some coherent block of code and allow you to structure a complex program."
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "markdown",
 62 |    "metadata": {},
 63 |    "source": [
 64 |     "## Function definition syntax"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "markdown",
 69 |    "metadata": {},
 70 |    "source": [
 71 |     "Functions are defined in Python using the `def` keyword followed by the name of the function. If your function takes some arguments (input data) then you can name these in parentheses after the function name. If your function does not take any arguments you still need some empty parentheses. Here we define a simple function named `sayHello` that prints a line of text to the screen:"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": null,
 77 |    "metadata": {
 78 |     "collapsed": false
 79 |    },
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "def sayHello():\n",
 83 |     "    print('Hello world!')"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "metadata": {},
 89 |    "source": [
 90 |     "Note that the code block for the function (just a single print line in this case) is indented relative to the `def`. The above definition just decalares the function in an abstract way and nothing will be printed when the definition is made. To actually use a function you need to invoke it (call it) by using its name and a pair of round parentheses:"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": null,
 96 |    "metadata": {
 97 |     "collapsed": false
 98 |    },
 99 |    "outputs": [],
100 |    "source": [
101 |     "sayHello() # Call the function to print 'Hello world'"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "markdown",
106 |    "metadata": {},
107 |    "source": [
108 |     "If required, a function may be written so it accepts input. Here we specify a variable called `name` in the brackets of the function definition and this variable is then used by the function. Although the input variable is referred to inside the function the variable does not represent any particular value. It only takes a value if the function is actually used in context."
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": null,
114 |    "metadata": {
115 |     "collapsed": false
116 |    },
117 |    "outputs": [],
118 |    "source": [
119 |     "def sayHello(name):\n",
120 |     "    print('Hello', name)"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "markdown",
125 |    "metadata": {},
126 |    "source": [
127 |     "When we call (invoke) this function we specify a specific value for the input. Here we pass in the value `User`, so the name variable takes that value and uses it to print a message, as defined in the function. "
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": null,
133 |    "metadata": {
134 |     "collapsed": false
135 |    },
136 |    "outputs": [],
137 |    "source": [
138 |     "sayHello('User')  # Prints 'Hello User'"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "markdown",
143 |    "metadata": {},
144 |    "source": [
145 |     "When we call the function again with a different input value we naturally get a different message. Here we also illustrate that the input value can also be passed-in as a variable (text in this case)."
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": null,
151 |    "metadata": {
152 |     "collapsed": false
153 |    },
154 |    "outputs": [],
155 |    "source": [
156 |     "text = 'Mary'\n",
157 |     "sayHello(text)     # Prints 'Hello Mary'"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "markdown",
162 |    "metadata": {},
163 |    "source": [
164 |     "A function may also generate output that is passed back or returned to the program at the point at which the function was called. For example here we define a function to do a simple calculation of the square of input (`x`) to create an output (`y`):"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "code",
169 |    "execution_count": null,
170 |    "metadata": {
171 |     "collapsed": false
172 |    },
173 |    "outputs": [],
174 |    "source": [
175 |     "def square(x):\n",
176 |     "  y = x*x\n",
177 |     "  return y"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "markdown",
182 |    "metadata": {},
183 |    "source": [
184 |     "Once the `return` statement is reached the operation of the function will end, and anything on the return line will be passed back as output. Here we call the function on an input number and catch the output value as result. Notice how the names of the variables used inside the function definition are separate from any variable names we may choose to use when calling the function.\n",
185 |     "  "
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "code",
190 |    "execution_count": null,
191 |    "metadata": {
192 |     "collapsed": false
193 |    },
194 |    "outputs": [],
195 |    "source": [
196 |     "number = 7\n",
197 |     "result = square(number) # Call the square() function which returns a result\n",
198 |     "print(result)           # Prints: 49"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "markdown",
203 |    "metadata": {},
204 |    "source": [
205 |     "The function `square` can be used from now on anywhere in your program as many times as required on any (numeric) input values we like."
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "code",
210 |    "execution_count": null,
211 |    "metadata": {
212 |     "collapsed": false
213 |    },
214 |    "outputs": [],
215 |    "source": [
216 |     "print(square(1.2e-3))   # Prints: 1.4399999999999998e-06"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "markdown",
221 |    "metadata": {},
222 |    "source": [
223 |     "A function can accept multiple input values, otherwise known as arguments. These are separated by commas inside the brackets of the function definition. Here we define a function that takes two arguments and performs a calculation on both, before sending back the result.\n"
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "code",
228 |    "execution_count": null,
229 |    "metadata": {
230 |     "collapsed": false
231 |    },
232 |    "outputs": [],
233 |    "source": [
234 |     "def calcFunc(x, y):\n",
235 |     "  z = x*x + y*y\n",
236 |     "  return z\n",
237 |     "\n",
238 |     "\n",
239 |     "result = calcFunc(1.414, 2.0)\n",
240 |     "print(result)  #  5.999396"
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "markdown",
245 |    "metadata": {},
246 |    "source": [
247 |     "Note that this function does not check that x and y are valid forms of input. For the function to work properly we assume they are numbers. Depending on how this function is going to be used, appropriate checks could be added."
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "markdown",
252 |    "metadata": {},
253 |    "source": [
254 |     "Functions can be arbitrarily long and can peform very complex operations. However, to make a function reusable, it is often better to assign it a single responsibility and a descriptive name.\n",
255 |     "Let's define now a function to calculate the [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance) between two vectors:"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "code",
260 |    "execution_count": null,
261 |    "metadata": {
262 |     "collapsed": false
263 |    },
264 |    "outputs": [],
265 |    "source": [
266 |     "def calcDistance(vec1, vec2):    \n",
267 |     "    dist = 0\n",
268 |     "    for i in range(len(vec1)):\n",
269 |     "        delta = vec1[i] - vec2[i]\n",
270 |     "        dist += delta*delta\n",
271 |     "    dist = dist**(1/2) # square-root\n",
272 |     "    return dist"
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "markdown",
277 |    "metadata": {},
278 |    "source": [
279 |     "For the record, the [prefered way to calcule a square-root](https://docs.python.org/3/library/math.html#math.sqrt) is by using the built-in function `sqrt()` from the `math` library:\n",
280 |     "```python\n",
281 |     "import math\n",
282 |     "math.sqrt(x)\n",
283 |     "```\n",
284 |     "\n",
285 |     "Let's experiment a little with our function."
286 |    ]
287 |   },
288 |   {
289 |    "cell_type": "code",
290 |    "execution_count": null,
291 |    "metadata": {
292 |     "collapsed": false
293 |    },
294 |    "outputs": [],
295 |    "source": [
296 |     "w1 = ( 23.1, 17.8, -5.6 )\n",
297 |     "w2 = ( 8.4, 15.9, 7.7 )\n",
298 |     "calcDistance( w1, w2 )"
299 |    ]
300 |   },
301 |   {
302 |    "cell_type": "markdown",
303 |    "metadata": {},
304 |    "source": [
305 |     "Note that the function is general and handles any two vectors (irrespective of their representation) as long as their dimensions are compatible:"
306 |    ]
307 |   },
308 |   {
309 |    "cell_type": "code",
310 |    "execution_count": null,
311 |    "metadata": {
312 |     "collapsed": false
313 |    },
314 |    "outputs": [],
315 |    "source": [
316 |     "calcDistance( ( 1, 2 ), ( 3, 4 ) ) # dimension: 2"
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "code",
321 |    "execution_count": null,
322 |    "metadata": {
323 |     "collapsed": false
324 |    },
325 |    "outputs": [],
326 |    "source": [
327 |     "calcDistance( [ 1, 2 ], [ 3, 4 ] ) # vectors represented as lists"
328 |    ]
329 |   },
330 |   {
331 |    "cell_type": "code",
332 |    "execution_count": null,
333 |    "metadata": {
334 |     "collapsed": false
335 |    },
336 |    "outputs": [],
337 |    "source": [
338 |     "calcDistance( ( 1, 2 ), [ 3, 4 ] ) # mixed representation"
339 |    ]
340 |   },
341 |   {
342 |    "cell_type": "markdown",
343 |    "metadata": {},
344 |    "source": [
345 |     "## Excercises 2.1.1\n",
346 |     "\n",
347 |     "- a. Calculate the mean\n",
348 |     "    - Write a function that takes 2 numerical arguments and returns their mean. Test your function on some examples.\n",
349 |     "    - Write another function that takes a list of numbers and returns the mean of all the numbers in the list.\n",
350 |     "- b. Write a function that takes a single DNA sequence as an argument and estimates the molecular weight of this sequence. Test your function using some example sequences. The following table gives the weight of each (single-stranded) nucleotide in g/mol:\n",
351 |     "\n",
352 |     "<table>\n",
353 |     "    <tr><th>DNA Residue</th><th>Weight</th></tr>\n",
354 |     "    <tr><td>A</td><td>331</td></tr>\n",
355 |     "    <tr><td>C</td><td>307</td></tr>\n",
356 |     "    <tr><td>G</td><td>347</td></tr>\n",
357 |     "    <tr><td>T</td><td>306</td></tr>\n",
358 |     "</table>\n",
359 |     "\n",
360 |     "\n",
361 |     "- c. If the sequence passed contains base `N`, use the mean weight of the other bases as the weight of base `N`."
362 |    ]
363 |   },
364 |   {
365 |    "cell_type": "markdown",
366 |    "metadata": {},
367 |    "source": [
368 |     "## Return value"
369 |    ]
370 |   },
371 |   {
372 |    "cell_type": "markdown",
373 |    "metadata": {},
374 |    "source": [
375 |     "There can be more than one `return` statement in a function, although typically there is only one, at the bottom. Consider the following function to get some text to say whether a number is positive or negative. It has three return statements: the first two return statements pass back text strings but the last, which would be reached if the input value were zero, has no explicit return value and thus passes back the Python `None` object. Any function code after this final return is ignored. \n",
376 |     "The `return` keyword immediately exits the function, and no more of the code in that function will be run once the function has returned (as program flow will be returned to the call site)"
377 |    ]
378 |   },
379 |   {
380 |    "cell_type": "code",
381 |    "execution_count": null,
382 |    "metadata": {
383 |     "collapsed": false
384 |    },
385 |    "outputs": [],
386 |    "source": [
387 |     "def getSign(value):\n",
388 |     "    \n",
389 |     "    if value > 0:\n",
390 |     "        return \"Positive\"\n",
391 |     "    \n",
392 |     "    elif value < 0:\n",
393 |     "        return \"Negative\"\n",
394 |     "    \n",
395 |     "    return # implicit 'None'\n",
396 |     "\n",
397 |     "    print(\"Hello world\") # execution does not reach this line\n",
398 |     "    \n",
399 |     "print(\"getSign( 33.6 ):\", getSign( 33.6 ))\n",
400 |     "print(\"getSign( -7 ):\", getSign( -7 ))\n",
401 |     "print(\"getSign( 0 ):\", getSign( 0 ))"
402 |    ]
403 |   },
404 |   {
405 |    "cell_type": "markdown",
406 |    "metadata": {},
407 |    "source": [
408 |     "All of the examples of functions so far have returned only single values, however it is possible to pass back more than one value via the `return` statement. In the following example we define a function that takes two arguments and passes back three values. The return values are really passed back inside a single tuple, which can be caught as a single collection of values. "
409 |    ]
410 |   },
411 |   {
412 |    "cell_type": "code",
413 |    "execution_count": null,
414 |    "metadata": {
415 |     "collapsed": false
416 |    },
417 |    "outputs": [],
418 |    "source": [
419 |     "def myFunction(value1, value2):\n",
420 |     "    \n",
421 |     "    total = value1 + value2\n",
422 |     "    difference = value1 - value2\n",
423 |     "    product = value1 * value2\n",
424 |     "    \n",
425 |     "    return total, difference, product\n",
426 |     "\n",
427 |     "values = myFunction( 3, 7 )  # Grab output as a whole tuple\n",
428 |     "print(\"Results as a tuple:\", values)\n",
429 |     "\n",
430 |     "x, y, z = myFunction( 3, 7 ) # Unpack tuple to grab individual values\n",
431 |     "print(\"x:\", x)\n",
432 |     "print(\"y:\", y)\n",
433 |     "print(\"z:\", z)"
434 |    ]
435 |   },
436 |   {
437 |    "cell_type": "markdown",
438 |    "metadata": {},
439 |    "source": [
440 |     "## Exercises 2.1.2\n",
441 |     "\n",
442 |     "a. Write a function that counts the number of each base found in a DNA sequence. Return the result as a tuple of 4 numbers representing the counts of each base `A`, `C`, `G` and `T`.\n",
443 |     "\n",
444 |     "b. Write a function to return the reverse-complement of a nucleotide sequence."
445 |    ]
446 |   },
447 |   {
448 |    "cell_type": "markdown",
449 |    "metadata": {},
450 |    "source": [
451 |     "## Function arguments"
452 |    ]
453 |   },
454 |   {
455 |    "cell_type": "markdown",
456 |    "metadata": {},
457 |    "source": [
458 |     "### Mandatory arguments\n",
459 |     "\n",
460 |     "The arguments we have passed to functions so far have all been _mandatory_, if we do not supply them or if supply the wrong number of arguments python will throw an error also called an exception:"
461 |    ]
462 |   },
463 |   {
464 |    "cell_type": "code",
465 |    "execution_count": null,
466 |    "metadata": {
467 |     "collapsed": true
468 |    },
469 |    "outputs": [],
470 |    "source": [
471 |     "def square(number):\n",
472 |     "    # one mandatory argument\n",
473 |     "    y = number*number\n",
474 |     "    return y"
475 |    ]
476 |   },
477 |   {
478 |    "cell_type": "code",
479 |    "execution_count": null,
480 |    "metadata": {
481 |     "collapsed": false
482 |    },
483 |    "outputs": [],
484 |    "source": [
485 |     "square(2)"
486 |    ]
487 |   },
488 |   {
489 |    "cell_type": "markdown",
490 |    "metadata": {},
491 |    "source": [
492 |     "**Mandatory arguments are assumed to come in the same order as the arguments in the function definition**, but you can also opt to specify the arguments using the argument names as _keywords_, supplying the values corresponding to each keyword with a `=` sign."
493 |    ]
494 |   },
495 |   {
496 |    "cell_type": "code",
497 |    "execution_count": null,
498 |    "metadata": {
499 |     "collapsed": false
500 |    },
501 |    "outputs": [],
502 |    "source": [
503 |     "square(number=3)"
504 |    ]
505 |   },
506 |   {
507 |    "cell_type": "code",
508 |    "execution_count": null,
509 |    "metadata": {
510 |     "collapsed": false
511 |    },
512 |    "outputs": [],
513 |    "source": [
514 |     "def repeat(seq, n):\n",
515 |     "    # two mandatory arguments\n",
516 |     "    result = ''\n",
517 |     "    for i in range(0,n):\n",
518 |     "        result += seq\n",
519 |     "    return result\n",
520 |     "\n",
521 |     "print(repeat(\"CTA\", 3))\n",
522 |     "print(repeat(n=4, seq=\"GTT\"))"
523 |    ]
524 |   },
525 |   {
526 |    "cell_type": "markdown",
527 |    "metadata": {},
528 |    "source": [
529 |     "<div class=\"alert-warning\">**NOTE** Unnamed (positional) arguments must come before named arguments, even if they look to be in the right order.</div>"
530 |    ]
531 |   },
532 |   {
533 |    "cell_type": "code",
534 |    "execution_count": null,
535 |    "metadata": {
536 |     "collapsed": false
537 |    },
538 |    "outputs": [],
539 |    "source": [
540 |     "print(repeat(seq=\"CTA\", n=3))"
541 |    ]
542 |   },
543 |   {
544 |    "cell_type": "markdown",
545 |    "metadata": {},
546 |    "source": [
547 |     "### Arguments with default values\n",
548 |     "Sometimes it is useful to give some arguments a default value that the caller can override, but which will be used if the caller does not supply a value for this argument. We can do this by assigning some value to the named argument with the `=` operator in the function definition."
549 |    ]
550 |   },
551 |   {
552 |    "cell_type": "code",
553 |    "execution_count": null,
554 |    "metadata": {
555 |     "collapsed": false
556 |    },
557 |    "outputs": [],
558 |    "source": [
559 |     "def runSimulation(nsteps=1000):\n",
560 |     "    print(\"Running simulation for\", nsteps, \"steps\")\n",
561 |     "\n",
562 |     "runSimulation(500)\n",
563 |     "runSimulation()"
564 |    ]
565 |   },
566 |   {
567 |    "cell_type": "markdown",
568 |    "metadata": {},
569 |    "source": [
570 |     "<div class=\"alert-warning\">**CAVEAT**: default arguments are defined once and keep their state between calls. This can be a problem for *mutable* objects:</div>"
571 |    ]
572 |   },
573 |   {
574 |    "cell_type": "code",
575 |    "execution_count": null,
576 |    "metadata": {
577 |     "collapsed": false
578 |    },
579 |    "outputs": [],
580 |    "source": [
581 |     "def myFunction(parameters=[]):\n",
582 |     "    parameters.append( 100 )\n",
583 |     "    print(parameters)\n",
584 |     "    \n",
585 |     "myFunction()\n",
586 |     "myFunction()\n",
587 |     "myFunction()\n",
588 |     "myFunction([])\n",
589 |     "myFunction([])\n",
590 |     "myFunction([])"
591 |    ]
592 |   },
593 |   {
594 |    "cell_type": "markdown",
595 |    "metadata": {},
596 |    "source": [
597 |     "... or avoid modifying *mutable* default arguments."
598 |    ]
599 |   },
600 |   {
601 |    "cell_type": "code",
602 |    "execution_count": null,
603 |    "metadata": {
604 |     "collapsed": false
605 |    },
606 |    "outputs": [],
607 |    "source": [
608 |     "def myFunction(parameters):\n",
609 |     "    # one mandatory argument without default value\n",
610 |     "    parameters.append( 100 )\n",
611 |     "    print(parameters)\n",
612 |     "    \n",
613 |     "my_list = []\n",
614 |     "myFunction(my_list)\n",
615 |     "myFunction(my_list)\n",
616 |     "myFunction(my_list)\n",
617 |     "my_new_list = []\n",
618 |     "myFunction(my_new_list)"
619 |    ]
620 |   },
621 |   {
622 |    "cell_type": "markdown",
623 |    "metadata": {},
624 |    "source": [
625 |     "### Position of mandatory arguments\n",
626 |     "Arrange function arguments so that *mandatory* arguments come first:"
627 |    ]
628 |   },
629 |   {
630 |    "cell_type": "code",
631 |    "execution_count": null,
632 |    "metadata": {
633 |     "collapsed": false
634 |    },
635 |    "outputs": [],
636 |    "source": [
637 |     "def runSimulation(initialTemperature, nsteps=1000):\n",
638 |     "    # one mandatory argument followed by one with default value\n",
639 |     "    print(\"Running simulation starting at\", initialTemperature, \"K and doing\", nsteps, \"steps\")\n",
640 |     "    \n",
641 |     "runSimulation(300, 500)\n",
642 |     "runSimulation(300)"
643 |    ]
644 |   },
645 |   {
646 |    "cell_type": "markdown",
647 |    "metadata": {},
648 |    "source": [
649 |     "As before, no positional argument can appear after a keyword argument, and all required arguments must still be provided."
650 |    ]
651 |   },
652 |   {
653 |    "cell_type": "code",
654 |    "execution_count": null,
655 |    "metadata": {
656 |     "collapsed": false
657 |    },
658 |    "outputs": [],
659 |    "source": [
660 |     "runSimulation( nsteps=100, initialTemperature=300 )"
661 |    ]
662 |   },
663 |   {
664 |    "cell_type": "code",
665 |    "execution_count": null,
666 |    "metadata": {
667 |     "collapsed": false
668 |    },
669 |    "outputs": [],
670 |    "source": [
671 |     "runSimulation( initialTemperature=300 )"
672 |    ]
673 |   },
674 |   {
675 |    "cell_type": "code",
676 |    "execution_count": null,
677 |    "metadata": {
678 |     "collapsed": false
679 |    },
680 |    "outputs": [],
681 |    "source": [
682 |     "runSimulation( nsteps=100 ) # Error: missing required argument 'initialTemperature'"
683 |    ]
684 |   },
685 |   {
686 |    "cell_type": "code",
687 |    "execution_count": null,
688 |    "metadata": {
689 |     "collapsed": false
690 |    },
691 |    "outputs": [],
692 |    "source": [
693 |     "runSimulation( nsteps=100, 300 ) # Error: positional argument follows keyword argument"
694 |    ]
695 |   },
696 |   {
697 |    "cell_type": "markdown",
698 |    "metadata": {},
699 |    "source": [
700 |     "Keyword names must naturally match to those declared:"
701 |    ]
702 |   },
703 |   {
704 |    "cell_type": "code",
705 |    "execution_count": null,
706 |    "metadata": {
707 |     "collapsed": false
708 |    },
709 |    "outputs": [],
710 |    "source": [
711 |     "runSimulation( initialTemperature=300, numSteps=100 ) # Error: unexpected keyword argument 'numSteps'"
712 |    ]
713 |   },
714 |   {
715 |    "cell_type": "markdown",
716 |    "metadata": {},
717 |    "source": [
718 |     "Function cannot be defined with mandatory arguments after default ones."
719 |    ]
720 |   },
721 |   {
722 |    "cell_type": "code",
723 |    "execution_count": null,
724 |    "metadata": {
725 |     "collapsed": false
726 |    },
727 |    "outputs": [],
728 |    "source": [
729 |     "def badFunction(nsteps=1000, initialTemperature):\n",
730 |     "    pass"
731 |    ]
732 |   },
733 |   {
734 |    "cell_type": "markdown",
735 |    "metadata": {},
736 |    "source": [
737 |     "## Exercises 2.1.3\n",
738 |     "\n",
739 |     "Extend your solution to the previous exercise estimating the weight of a DNA sequence so that it can also calculate the weight of an RNA sequence, use an optional argument to specify the molecule type, but default to DNA. The weights of RNA residues are:\n",
740 |     "\n",
741 |     "<table>\n",
742 |     "    <tr><th>RNA Residue</th><th>Weight</th></tr>\n",
743 |     "    <tr><td>A</td><td>347</td></tr>\n",
744 |     "    <tr><td>C</td><td>323</td></tr>\n",
745 |     "    <tr><td>G</td><td>363</td></tr>\n",
746 |     "    <tr><td>U</td><td>324</td></tr>\n",
747 |     "</table>\n"
748 |    ]
749 |   },
750 |   {
751 |    "cell_type": "markdown",
752 |    "metadata": {},
753 |    "source": [
754 |     "## Variable scope"
755 |    ]
756 |   },
757 |   {
758 |    "cell_type": "markdown",
759 |    "metadata": {},
760 |    "source": [
761 |     "Every variable in python has a _scope_ in which it is defined. Variables defined at the outermost level are known as _globals_ (although typically only for the current module). In contrast, variables defined within a function are local, and cannot be accessed from the outside."
762 |    ]
763 |   },
764 |   {
765 |    "cell_type": "code",
766 |    "execution_count": null,
767 |    "metadata": {
768 |     "collapsed": false
769 |    },
770 |    "outputs": [],
771 |    "source": [
772 |     "def mathFunction(x, y):\n",
773 |     "    math_func_result = ( x + y ) * ( x - y )\n",
774 |     "    return math_func_result"
775 |    ]
776 |   },
777 |   {
778 |    "cell_type": "code",
779 |    "execution_count": null,
780 |    "metadata": {
781 |     "collapsed": false
782 |    },
783 |    "outputs": [],
784 |    "source": [
785 |     "answer = mathFunction( 4, 7 )\n",
786 |     "print(answer)"
787 |    ]
788 |   },
789 |   {
790 |    "cell_type": "code",
791 |    "execution_count": null,
792 |    "metadata": {
793 |     "collapsed": false
794 |    },
795 |    "outputs": [],
796 |    "source": [
797 |     "answer = mathFunction( 4, 7 )\n",
798 |     "print(math_func_result)"
799 |    ]
800 |   },
801 |   {
802 |    "cell_type": "markdown",
803 |    "metadata": {},
804 |    "source": [
805 |     "Generally, variables defined in an outer scope are also visible in functions, but you should be careful manipulating them as this can lead to confusing code and python will actually raise an error if you try to change the value of a global variable inside a function. Instead it is a good idea to avoid using global variables and, for example, to pass any necessary variables as parameters to your functions."
806 |    ]
807 |   },
808 |   {
809 |    "cell_type": "code",
810 |    "execution_count": null,
811 |    "metadata": {
812 |     "collapsed": false
813 |    },
814 |    "outputs": [],
815 |    "source": [
816 |     "counter = 1\n",
817 |     "def increment(): \n",
818 |     "    print(counter)\n",
819 |     "    counter += 1\n",
820 |     "\n",
821 |     "increment()\n",
822 |     "print(counter)"
823 |    ]
824 |   },
825 |   {
826 |    "cell_type": "markdown",
827 |    "metadata": {},
828 |    "source": [
829 |     "If you really want to do this, there is a way round this using the `global` statement. Any variable which is changed or created inside of a function is local, if it hasn't been declared as a global variable. To tell Python that we want to use the global variable, we have to explicitly state this by using the keyword `global`."
830 |    ]
831 |   },
832 |   {
833 |    "cell_type": "code",
834 |    "execution_count": null,
835 |    "metadata": {
836 |     "collapsed": false
837 |    },
838 |    "outputs": [],
839 |    "source": [
840 |     "counter = 1\n",
841 |     "def increment(): \n",
842 |     "    global counter\n",
843 |     "    print(counter)\n",
844 |     "    counter += 1\n",
845 |     "\n",
846 |     "increment()\n",
847 |     "print(counter)"
848 |    ]
849 |   },
850 |   {
851 |    "cell_type": "markdown",
852 |    "metadata": {},
853 |    "source": [
854 |     "<div class=\"alert-warning\">**NOTE** It is normally better to avoid global variables and passing through arguments to functions instead.</div>"
855 |    ]
856 |   },
857 |   {
858 |    "cell_type": "code",
859 |    "execution_count": null,
860 |    "metadata": {
861 |     "collapsed": false
862 |    },
863 |    "outputs": [],
864 |    "source": [
865 |     "def increment(counter): \n",
866 |     "    return counter + 1\n",
867 |     "\n",
868 |     "counter = 0\n",
869 |     "counter = increment( counter ) \n",
870 |     "print(counter)"
871 |    ]
872 |   },
873 |   {
874 |    "cell_type": "markdown",
875 |    "metadata": {},
876 |    "source": [
877 |     "## Next session\n",
878 |     "\n",
879 |     "Go to our next notebook: [Introduction_to_python_day_2_session_2](Introduction_to_python_day_2_session_2.ipynb)"
880 |    ]
881 |   }
882 |  ],
883 |  "metadata": {
884 |   "kernelspec": {
885 |    "display_name": "Python 3",
886 |    "language": "python",
887 |    "name": "python3"
888 |   },
889 |   "language_info": {
890 |    "codemirror_mode": {
891 |     "name": "ipython",
892 |     "version": 3
893 |    },
894 |    "file_extension": ".py",
895 |    "mimetype": "text/x-python",
896 |    "name": "python",
897 |    "nbconvert_exporter": "python",
898 |    "pygments_lexer": "ipython3",
899 |    "version": "3.5.2"
900 |   }
901 |  },
902 |  "nbformat": 4,
903 |  "nbformat_minor": 0
904 | }
905 | 


--------------------------------------------------------------------------------