├── .ipynb_checkpoints
    └── python_intro-checkpoint.ipynb
├── Python questions
├── Python tutorial structure
├── README.md
├── milestone projects
    ├── Project 1 - GC content
    │   ├── project 1.0-GC content.py
    │   ├── project 1.1 - input validation.py
    │   ├── project 1.2 - function.py
    │   └── project 1.3 - RNA too.py
    ├── Project 2 - reverse complement
    │   ├── project 2.0-complement.py
    │   ├── project 2.1-string not list.py
    │   └── project 2.2-reverse.py
    ├── Project 3 - DNA to RNA to protein
    │   ├── Project 3-DNA_RNA_protein.py
    │   └── project 3-teacher version.py
    └── Project 4 - reading a fasta file
    │   └── read_fasta.py
├── python intro.py
└── python_intro.ipynb


/.ipynb_checkpoints/python_intro-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "name": "",
  4 |   "signature": "sha256:42512dd288705f0efed4039b19230604208bfe8b48a5e91bb30d2c401f6189fa"
  5 |  },
  6 |  "nbformat": 3,
  7 |  "nbformat_minor": 0,
  8 |  "worksheets": [
  9 |   {
 10 |    "cells": [
 11 |     {
 12 |      "cell_type": "heading",
 13 |      "level": 1,
 14 |      "metadata": {},
 15 |      "source": [
 16 |       " An intro to Python for biology"
 17 |      ]
 18 |     },
 19 |     {
 20 |      "cell_type": "heading",
 21 |      "level": 3,
 22 |      "metadata": {},
 23 |      "source": [
 24 |       "Variables and printing"
 25 |      ]
 26 |     },
 27 |     {
 28 |      "cell_type": "code",
 29 |      "collapsed": false,
 30 |      "input": [
 31 |       "# this is a comment, use these to tell people what your code does"
 32 |      ],
 33 |      "language": "python",
 34 |      "metadata": {},
 35 |      "outputs": []
 36 |     },
 37 |     {
 38 |      "cell_type": "code",
 39 |      "collapsed": false,
 40 |      "input": [
 41 |       "mystring = \"Homo sapiens\" # a string, letters, words, sentences, anything in quotes\n",
 42 |       "myinteger = 4 # an integer\n",
 43 |       "myfloat = 4.0 # a float (a number with a decimal point)\n",
 44 |       "\n",
 45 |       "print \"This is a string:\", mystring, \"this is a float:\", myfloat, \"and this is an integer: \", myinteger"
 46 |      ],
 47 |      "language": "python",
 48 |      "metadata": {},
 49 |      "outputs": [
 50 |       {
 51 |        "output_type": "stream",
 52 |        "stream": "stdout",
 53 |        "text": [
 54 |         "This is a string: Homo sapiens this is a float: 4.0 and this is an integer:  4\n"
 55 |        ]
 56 |       }
 57 |      ],
 58 |      "prompt_number": 4
 59 |     },
 60 |     {
 61 |      "cell_type": "heading",
 62 |      "level": 3,
 63 |      "metadata": {},
 64 |      "source": [
 65 |       "Basic Math"
 66 |      ]
 67 |     },
 68 |     {
 69 |      "cell_type": "markdown",
 70 |      "metadata": {},
 71 |      "source": [
 72 |       "You can do calculations:"
 73 |      ]
 74 |     },
 75 |     {
 76 |      "cell_type": "code",
 77 |      "collapsed": false,
 78 |      "input": [
 79 |       "myfloat = 2.0\n",
 80 |       "yourfloat = 3.0\n",
 81 |       "print myfloat/yourfloat"
 82 |      ],
 83 |      "language": "python",
 84 |      "metadata": {},
 85 |      "outputs": [
 86 |       {
 87 |        "output_type": "stream",
 88 |        "stream": "stdout",
 89 |        "text": [
 90 |         "0.666666666667\n"
 91 |        ]
 92 |       }
 93 |      ],
 94 |      "prompt_number": 6
 95 |     },
 96 |     {
 97 |      "cell_type": "markdown",
 98 |      "metadata": {},
 99 |      "source": [
100 |       "But be careful, when you calculate with only integers, you can only get an integer back, so Python rounds your answer down!"
101 |      ]
102 |     },
103 |     {
104 |      "cell_type": "code",
105 |      "collapsed": false,
106 |      "input": [
107 |       "myint = 2\n",
108 |       "yourint = 3\n",
109 |       "print myint/yourint"
110 |      ],
111 |      "language": "python",
112 |      "metadata": {},
113 |      "outputs": [
114 |       {
115 |        "output_type": "stream",
116 |        "stream": "stdout",
117 |        "text": [
118 |         "0\n"
119 |        ]
120 |       }
121 |      ],
122 |      "prompt_number": 7
123 |     },
124 |     {
125 |      "cell_type": "code",
126 |      "collapsed": false,
127 |      "input": [
128 |       "myint = 2\n",
129 |       "yourint = 3\n",
130 |       "\n",
131 |       "# Which of these work and why?"
132 |      ],
133 |      "language": "python",
134 |      "metadata": {},
135 |      "outputs": [],
136 |      "prompt_number": 10
137 |     },
138 |     {
139 |      "cell_type": "code",
140 |      "collapsed": false,
141 |      "input": [
142 |       "print float(myint)/yourint # 1"
143 |      ],
144 |      "language": "python",
145 |      "metadata": {},
146 |      "outputs": [
147 |       {
148 |        "output_type": "stream",
149 |        "stream": "stdout",
150 |        "text": [
151 |         "0.666666666667\n"
152 |        ]
153 |       }
154 |      ],
155 |      "prompt_number": 20
156 |     },
157 |     {
158 |      "cell_type": "code",
159 |      "collapsed": false,
160 |      "input": [
161 |       "print myint/float(yourint) # 2"
162 |      ],
163 |      "language": "python",
164 |      "metadata": {},
165 |      "outputs": [
166 |       {
167 |        "output_type": "stream",
168 |        "stream": "stdout",
169 |        "text": [
170 |         "0.666666666667\n"
171 |        ]
172 |       }
173 |      ],
174 |      "prompt_number": 21
175 |     },
176 |     {
177 |      "cell_type": "code",
178 |      "collapsed": false,
179 |      "input": [
180 |       "print (myint+0.0)/yourint # 3"
181 |      ],
182 |      "language": "python",
183 |      "metadata": {},
184 |      "outputs": [
185 |       {
186 |        "output_type": "stream",
187 |        "stream": "stdout",
188 |        "text": [
189 |         "0.666666666667\n"
190 |        ]
191 |       }
192 |      ],
193 |      "prompt_number": 22
194 |     },
195 |     {
196 |      "cell_type": "code",
197 |      "collapsed": false,
198 |      "input": [
199 |       "print myint/yourint*1.0 # 4"
200 |      ],
201 |      "language": "python",
202 |      "metadata": {},
203 |      "outputs": [
204 |       {
205 |        "output_type": "stream",
206 |        "stream": "stdout",
207 |        "text": [
208 |         "0.0\n"
209 |        ]
210 |       }
211 |      ],
212 |      "prompt_number": 23
213 |     },
214 |     {
215 |      "cell_type": "code",
216 |      "collapsed": false,
217 |      "input": [
218 |       "print myint*1.0/yourint # 5"
219 |      ],
220 |      "language": "python",
221 |      "metadata": {},
222 |      "outputs": [
223 |       {
224 |        "output_type": "stream",
225 |        "stream": "stdout",
226 |        "text": [
227 |         "0.666666666667\n"
228 |        ]
229 |       }
230 |      ],
231 |      "prompt_number": 24
232 |     },
233 |     {
234 |      "cell_type": "code",
235 |      "collapsed": false,
236 |      "input": [
237 |       "print float(myint/yourint) # 6"
238 |      ],
239 |      "language": "python",
240 |      "metadata": {},
241 |      "outputs": [
242 |       {
243 |        "output_type": "stream",
244 |        "stream": "stdout",
245 |        "text": [
246 |         "0.0\n"
247 |        ]
248 |       }
249 |      ],
250 |      "prompt_number": 25
251 |     },
252 |     {
253 |      "cell_type": "heading",
254 |      "level": 3,
255 |      "metadata": {},
256 |      "source": [
257 |       "Lists"
258 |      ]
259 |     },
260 |     {
261 |      "cell_type": "code",
262 |      "collapsed": false,
263 |      "input": [
264 |       "mylist = [6,3,6,7,2,6,2,9,7,0]\n",
265 |       "print mylist"
266 |      ],
267 |      "language": "python",
268 |      "metadata": {},
269 |      "outputs": [
270 |       {
271 |        "output_type": "stream",
272 |        "stream": "stdout",
273 |        "text": [
274 |         "[6, 3, 6, 7, 2, 6, 2, 9, 7, 0]\n"
275 |        ]
276 |       }
277 |      ],
278 |      "prompt_number": 32
279 |     },
280 |     {
281 |      "cell_type": "markdown",
282 |      "metadata": {},
283 |      "source": [
284 |       "You can get each item of the list using the brackets. Try to replace 0 with other numbers and see what you get:"
285 |      ]
286 |     },
287 |     {
288 |      "cell_type": "code",
289 |      "collapsed": false,
290 |      "input": [
291 |       "print mylist[0]"
292 |      ],
293 |      "language": "python",
294 |      "metadata": {},
295 |      "outputs": [
296 |       {
297 |        "output_type": "stream",
298 |        "stream": "stdout",
299 |        "text": [
300 |         "6\n"
301 |        ]
302 |       }
303 |      ],
304 |      "prompt_number": 33
305 |     },
306 |     {
307 |      "cell_type": "markdown",
308 |      "metadata": {},
309 |      "source": [
310 |       "Here the number inside the brackets is called an index, it represents the location of an item in the list. "
311 |      ]
312 |     },
313 |     {
314 |      "cell_type": "markdown",
315 |      "metadata": {},
316 |      "source": [
317 |       "You can also get more than one item at a time by providing a range of indices:"
318 |      ]
319 |     },
320 |     {
321 |      "cell_type": "code",
322 |      "collapsed": false,
323 |      "input": [
324 |       "print mylist[0:3]"
325 |      ],
326 |      "language": "python",
327 |      "metadata": {},
328 |      "outputs": [
329 |       {
330 |        "output_type": "stream",
331 |        "stream": "stdout",
332 |        "text": [
333 |         "[6, 3, 6]\n"
334 |        ]
335 |       }
336 |      ],
337 |      "prompt_number": 34
338 |     },
339 |     {
340 |      "cell_type": "markdown",
341 |      "metadata": {},
342 |      "source": [
343 |       "Strings can be words, sentences, and anything else you can write as a sequence of characters. They act a lot like lists. "
344 |      ]
345 |     },
346 |     {
347 |      "cell_type": "markdown",
348 |      "metadata": {},
349 |      "source": [
350 |       "You can make a string using quotes"
351 |      ]
352 |     },
353 |     {
354 |      "cell_type": "code",
355 |      "collapsed": false,
356 |      "input": [
357 |       "mystring = \"ACGT\"\n",
358 |       "print mystring"
359 |      ],
360 |      "language": "python",
361 |      "metadata": {},
362 |      "outputs": [
363 |       {
364 |        "output_type": "stream",
365 |        "stream": "stdout",
366 |        "text": [
367 |         "ACGT\n",
368 |         "4\n",
369 |         "A\n",
370 |         "C\n",
371 |         "G\n",
372 |         "T\n"
373 |        ]
374 |       }
375 |      ],
376 |      "prompt_number": 26
377 |     },
378 |     {
379 |      "cell_type": "markdown",
380 |      "metadata": {},
381 |      "source": [
382 |       "What happens if you remove the quotes? What about using single quotes like 'this'?"
383 |      ]
384 |     },
385 |     {
386 |      "cell_type": "code",
387 |      "collapsed": false,
388 |      "input": [
389 |       "mystring = ACGT\n",
390 |       "print mystring"
391 |      ],
392 |      "language": "python",
393 |      "metadata": {},
394 |      "outputs": [
395 |       {
396 |        "ename": "NameError",
397 |        "evalue": "name 'ACGT' is not defined",
398 |        "output_type": "pyerr",
399 |        "traceback": [
400 |         "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
401 |         "\u001b[0;32m<ipython-input-30-43dfcec84783>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmystring\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mACGT\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0mmystring\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
402 |         "\u001b[0;31mNameError\u001b[0m: name 'ACGT' is not defined"
403 |        ]
404 |       }
405 |      ],
406 |      "prompt_number": 30
407 |     },
408 |     {
409 |      "cell_type": "markdown",
410 |      "metadata": {},
411 |      "source": [
412 |       "You can get the length of the string using the len() function:"
413 |      ]
414 |     },
415 |     {
416 |      "cell_type": "code",
417 |      "collapsed": false,
418 |      "input": [
419 |       "mystring = \"ACGT\"\n",
420 |       "print len(mystring)"
421 |      ],
422 |      "language": "python",
423 |      "metadata": {},
424 |      "outputs": []
425 |     },
426 |     {
427 |      "cell_type": "markdown",
428 |      "metadata": {},
429 |      "source": [
430 |       "You can get each letter of the string using the brackets. Try to replace 0 with other numbers and see what you get:"
431 |      ]
432 |     },
433 |     {
434 |      "cell_type": "code",
435 |      "collapsed": false,
436 |      "input": [
437 |       "print mystring[0]"
438 |      ],
439 |      "language": "python",
440 |      "metadata": {},
441 |      "outputs": [
442 |       {
443 |        "output_type": "stream",
444 |        "stream": "stdout",
445 |        "text": [
446 |         "A\n"
447 |        ]
448 |       }
449 |      ],
450 |      "prompt_number": 29
451 |     },
452 |     {
453 |      "cell_type": "markdown",
454 |      "metadata": {},
455 |      "source": [
456 |       "Here is an example with DNA:"
457 |      ]
458 |     },
459 |     {
460 |      "cell_type": "code",
461 |      "collapsed": false,
462 |      "input": [
463 |       "DNA = \"CAACGGGCAATATGTCTCTGTGTG\"\n",
464 |       "print \"Your DNA is\", len(DNA), \"bases long\"\n",
465 |       "\n",
466 |       "print DNA[7]"
467 |      ],
468 |      "language": "python",
469 |      "metadata": {},
470 |      "outputs": [
471 |       {
472 |        "output_type": "stream",
473 |        "stream": "stdout",
474 |        "text": [
475 |         "Your DNA is 24 bases long\n",
476 |         "C\n"
477 |        ]
478 |       }
479 |      ],
480 |      "prompt_number": 35
481 |     }
482 |    ],
483 |    "metadata": {}
484 |   }
485 |  ]
486 | }


--------------------------------------------------------------------------------
/Python questions:
--------------------------------------------------------------------------------
  1 | Questions:
  2 | 
  3 | 
  4 | ########################################################################################
  5 | ######################## Variables and print statements ################################
  6 | ########################################################################################
  7 | 
  8 | ########## QUESTION 1: Variables ###########
  9 | What kind of variable are these? (integer, float, string)
 10 | A=93874
 11 | B="abc"
 12 | C="Asdflkj44534"
 13 | D="23423"
 14 | E=23
 15 | F=23.
 16 | 
 17 | 
 18 | ########## QUESTION 2: What is wrong with this print statement? ###########
 19 | 
 20 | print "My name is %d and my species is %s" % (myname,myspecies)
 21 | 
 22 | print "I am %s and my genome is %d base pairs long" % mygenomesize
 23 | 
 24 | print "A ribosome can make proteins at a rate of %d to %d amino acids per second" (10, 20)
 25 | 
 26 | 
 27 | ########################################################################################
 28 | ########################### Lists, strings, and for loops ##############################
 29 | ########################################################################################
 30 | 
 31 | ########## QUESTION 3: Which of these are correct? Which give errors? ###########
 32 | 
 33 | X=[3,5,6,2,2]
 34 | Y=["hello","world"]
 35 | print X[5]
 36 | print X[1+2]
 37 | print X[4]
 38 | print X[5]
 39 | print X[-1]
 40 | print Y[2]
 41 | print Y[3]
 42 | print Y[0]
 43 | print Y[-1]
 44 | print Y[-3]
 45 | 
 46 | ########## QUESTION 4: What is the output of this for loop? ###########
 47 | 
 48 | 
 49 | for tomato in xrange(4):
 50 | 	print tomato
 51 | 
 52 | for j in xrange(5,13):
 53 | 	print j
 54 | 
 55 | for j in xrange(5,13,3):
 56 | 	print j
 57 | 
 58 | fruits = ["Apples","Bananas","Coconut"]
 59 | 
 60 | for i in xrange(1,len(fruits)):
 61 | 	print item
 62 | 
 63 | 
 64 | 
 65 | 
 66 | 
 67 | ########################################################################################
 68 | ################################## If statements #######################################
 69 | ########################################################################################
 70 | 
 71 | ########## QUESTION 5: True or False ###########
 72 | 
 73 | (True and False)
 74 | (True and True)
 75 | (False or False)
 76 | (False or True)
 77 | (True and (False or True))
 78 | (False and (False or (True or False)))
 79 | 
 80 | 
 81 | 
 82 | ########################################################################################
 83 | ################################### Basic Math #########################################
 84 | ########################################################################################
 85 | 
 86 | ########## QUESTION 6: Which of these give the correct answer? ###########
 87 | 
 88 | 5 * 6
 89 | 7/3
 90 | 5/98
 91 | 23.0/123
 92 | 6/2
 93 | 87/(1.0 * 45)
 94 | 78/39
 95 | float(34)/234
 96 | 23/(234 * 1.0)
 97 | 23/234 * 1.0
 98 | 5 * 2.7
 99 | 
100 | 
101 | 
102 | 


--------------------------------------------------------------------------------
/Python tutorial structure:
--------------------------------------------------------------------------------
 1 | BioCoding Course:
 2 | Python Tutorial Structure:
 3 | 
 4 | 
 5 | 
 6 | Variables and print statements
 7 | - Lecture
 8 | - Quiz/Game
 9 | 
10 | Lists/strings/for loops
11 | - Lecture
12 | _ Quiz/Game
13 | 
14 | If statements
15 | - Lecture
16 | - Quiz/Game
17 | 
18 | Basic math
19 | - Lecture
20 | - Quiz/Game
21 | 
22 | Start project 1:
23 | - Make some code that takes a string called DNA and calculates the percentage of the nucleotides that either C or G (as opposed to A or T)
24 | 	- count them up and divide them by the total at the end
25 | 	- print out the answer
26 | - Put input validation so it prints a message for every letter in the DNA string that isn't a nucleotide, then make sure you ignore it in the calculations for GC content
27 | - Make all the DNA upper case so you don't have to check a,c,g, and t too. upper(DNA)
28 | 
29 | Functions
30 | - Lecture
31 | 
32 | Finish project 1:
33 | - Make project 1 into a function and call it!
34 | - Make a small change to project 1 so it can also work with RNA (U should now be recognized like A and T) (you don't have to change the variable name DNA)
35 | 
36 | 
37 | Dictionaries
38 | - Lecture
39 | 
40 | Choosing the right data structure
41 | - Game
42 | 
43 | Reverse complement:
44 | - Talk about how DNA is antiparallel and how the bases match each other
45 | - Learn to reverse complement by hand
46 | 
47 | Project 2:
48 | - write a function called reverse_complement() that gets a string called DNA
49 | - write a dictionary within this function that decodes each A to T, T to A, G to C, and C to G.
50 | - loop through the DNA and decode it one letter at a time: this is the complement
51 | - then reverse the complement using the cool indexing we learned
52 | 
53 | Teach them about transcription and translation
54 | - how to find a start codon by hand, mark off every 3 nucleotides on paper, and stop at any of the stop codons
55 | - each codon is a code for one amino acid 
56 | 
57 | 
58 | 
59 | Project 3: # we guide them a lot more in this project as it is pretty difficult. I would like to make it a class discussion, and then break them up for each part to actually code what we decided the pseudocode for as a class. The job of the teacher here is to ask questions and guide the discussion without actually giving the answers away.
60 | 
61 | - the goal is to write a function that transcribes DNA into RNA and then translates it into protein
62 | - parts of this process have already been written and the pieces are broken up neatly into functions
63 | - the students' job is to fill in the rest of the code that is missing 
64 | # Project 3 is pretty difficult, so I have made a framework for it. That way the students can code the simpler parts.  
65 | 
66 |  
67 | 
68 | 
69 | Project 4: Reading a fasta file. Also a bit difficult, but we just want them exploring the file type and how you can look through the lines in the file.
70 | 
71 | 
72 | 
73 | 
74 | 
75 | 
76 | 
77 | 
78 | 
79 | 
80 | 
81 | 
82 | Extras:
83 | 	Numpy
84 | 	- lecture written already 
85 | 	- problem set not written yet
86 | 
87 | 
88 | 	Matplotlib.pyplot
89 | 	- lecture not written yet
90 | 	- problem set not written yet
91 | 
92 | 
93 | 
94 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | BioCoding_Tutorial
2 | ==================
3 | 
4 | A tutorial in Python where all the examples teach students about biology. Written for high school students with no previous experience. 
5 | 


--------------------------------------------------------------------------------
/milestone projects/Project 1 - GC content/project 1.0-GC content.py:
--------------------------------------------------------------------------------
 1 | DNA = "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC"
 2 | 
 3 | 
 4 | AT = 0
 5 | GC = 0
 6 | for base in DNA:
 7 |     if base=="A" or base=="T":
 8 |         AT = AT+1
 9 |         # AT += 1
10 |     elif base=="G" or base=="C":
11 |         GC = GC+1
12 |         # GC += 1
13 | 
14 | GC_percentage = 100*float(GC)/float(GC+AT)
15 | #GC_percentage = 100.0*GC/GC+AT
16 | 
17 | print "The GC content of the input string is %f" % (GC_percentage)
18 | 
19 | 


--------------------------------------------------------------------------------
/milestone projects/Project 1 - GC content/project 1.1 - input validation.py:
--------------------------------------------------------------------------------
 1 | 
 2 | DNA = raw_input('Enter your DNA: ')
 3 | 
 4 | DNA = DNA.upper()
 5 | 
 6 | AT = 0
 7 | GC = 0
 8 | for base in DNA:
 9 |     if base=="A" or base=="T":
10 |         AT = AT+1
11 |         # AT += 1
12 |     elif base=="G" or base=="C":
13 |         GC = GC+1
14 |         # GC += 1
15 |     else:
16 |         print "The letter %s does not belong in DNA and was not counted" % (base)
17 | 
18 | if AT+GC == 0:
19 |     print "no nucleotides"
20 | else:
21 |     GC_percentage = 100*float(GC)/float(GC+AT)
22 |     #GC_percentage = 100.0*GC/GC+AT
23 |     print "The GC content of the input string is %f" % (GC_percentage)
24 | 
25 | 
26 | 
27 | 
28 | 
29 | 
30 | 
31 | # give inputs that their code should be able to handle:
32 | # AGCGATN
33 | # cgatcga
34 | # SLKFSLDKFJS
35 | 


--------------------------------------------------------------------------------
/milestone projects/Project 1 - GC content/project 1.2 - function.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | def calc_GC_content(DNA):
 5 |     DNA = DNA.upper()
 6 |     
 7 |     AT = 0
 8 |     GC = 0
 9 |     for base in DNA:
10 |         if base=="A" or base=="T":
11 |             AT = AT+1
12 |             # AT += 1
13 |         elif base=="G" or base=="C":
14 |             GC = GC+1
15 |             # GC += 1
16 |         else:
17 |             print "The letter %s does not belong in DNA and was not counted" % (base)
18 |     
19 |     if AT+GC == 0:
20 |         print "no nucleotides"
21 |         return 0 # always return something so you don't get an eror
22 |     else:
23 |         GC_percentage = 100*float(GC)/float(GC+AT)
24 |         #GC_percentage = 100.0*GC/GC+AT
25 |         return GC_percentage
26 | 
27 | 
28 | 
29 | 
30 | 
31 | 
32 | 
33 | DNA = raw_input('Enter your DNA: ')
34 | print calc_GC_content(DNA)


--------------------------------------------------------------------------------
/milestone projects/Project 1 - GC content/project 1.3 - RNA too.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # make it work with RNA as well
 4 | 
 5 | def calc_GC_content(DNA):
 6 |     DNA = DNA.upper()
 7 |     
 8 |     AT = 0
 9 |     GC = 0
10 |     for base in DNA:
11 |         if base=="A" or base=="T" or base=="U":
12 |             AT = AT+1
13 |             # AT += 1
14 |         elif base=="G" or base=="C":
15 |             GC = GC+1
16 |             # GC += 1
17 |         else:
18 |             print "The letter %s does not belong in DNA and was not counted" % (base)
19 |     
20 |     if AT+GC == 0:
21 |         print "no nucleotides"
22 |         return 0 # always return something so you don't get an eror
23 |     else:
24 |         GC_percentage = 100*float(GC)/float(GC+AT)
25 |         #GC_percentage = 100.0*GC/GC+AT
26 |         return GC_percentage
27 | 
28 | 
29 | 
30 | 
31 | 
32 | 
33 | 
34 | DNA = raw_input('Enter your DNA: ')
35 | print calc_GC_content(DNA)
36 | 


--------------------------------------------------------------------------------
/milestone projects/Project 2 - reverse complement/project 2.0-complement.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | def reverse_complement(DNA):
 4 |     DNA = DNA.upper()
 5 |     
 6 |     complement = []
 7 |     
 8 |     complement_dictionary = {
 9 |     "A":"T", 
10 |     "T":"A", 
11 |     "G":"C",
12 |     "C":"G"
13 |     }
14 |     
15 |     for base in DNA:
16 |         complement.append(complement_dictionary.get(base,"N"))
17 |         
18 |     return complement
19 | 
20 | 
21 | DNA = "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC"
22 | #DNA = raw_input('Enter your DNA: ')
23 | print reverse_complement(DNA)


--------------------------------------------------------------------------------
/milestone projects/Project 2 - reverse complement/project 2.1-string not list.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | def reverse_complement(DNA):
 4 |     DNA = DNA.upper()
 5 |     
 6 |     complement = str()
 7 |     
 8 |     complement_dictionary = {
 9 |     "A":"T", 
10 |     "T":"A", 
11 |     "G":"C",
12 |     "C":"G"
13 |     }
14 |     
15 |     for base in DNA:
16 |         complement+=complement_dictionary.get(base,"N")
17 |         
18 |     return complement
19 |     
20 | 
21 | DNA = "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC"
22 | #DNA = raw_input('Enter your DNA: ')
23 | print reverse_complement(DNA)


--------------------------------------------------------------------------------
/milestone projects/Project 2 - reverse complement/project 2.2-reverse.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | def reverse_complement(DNA):
 4 |     DNA = DNA.upper()
 5 |     
 6 |     complement = str()
 7 |     
 8 |     complement_dictionary = {
 9 |     "A":"T", 
10 |     "T":"A", 
11 |     "G":"C",
12 |     "C":"G"
13 |     }
14 |     
15 |     for base in DNA:
16 |         complement+=complement_dictionary.get(base,"N")
17 |     
18 |     revcomp = complement[::-1]
19 |     return revcomp
20 | 
21 | 
22 | DNA = "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC"
23 | #DNA = raw_input('Enter your DNA: ')
24 | print reverse_complement(DNA)
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/milestone projects/Project 3 - DNA to RNA to protein/Project 3-DNA_RNA_protein.py:
--------------------------------------------------------------------------------
  1 | 
  2 | def list_to_str(mylist):
  3 |     mystring = str()
  4 |     for item in mylist:
  5 |         mystring+=item
  6 |     return mystring
  7 |    
  8 | 
  9 | def reverse_complement(DNA):
 10 |     ########### Your code here #############
 11 |     
 12 |     return "not done yet"
 13 | 
 14 | 
 15 | def DNA_to_RNA(DNA):
 16 |     DNA=DNA.upper()
 17 |     
 18 |     ########### Your code here #############
 19 |     
 20 |  
 21 | def RNA_to_protein(RNA,frame):
 22 |     decoder = {'ACC': 'T', 'GCA': 'A', 'ACA': 'T', 'ACG': 'T', 'GUU': 'V', 
 23 |     'AAC': 'N', 'AGG': 'R', 'UGG': 'W', 'GUC': 'V', 'AGC': 'S', 'AUC': 'I', 
 24 |     'AGA': 'R', 'AAU': 'N', 'ACU': 'T', 'GUG': 'V', 'CAC': 'H', 'AAA': 'K', 
 25 |     'CCG': 'P', 'CCA': 'P', 'AGU': 'S', 'AAG': 'K', 'GGU': 'G', 'UCU': 'S', 
 26 |     'GCG': 'A', 'CGA': 'R', 'CAG': 'Q', 'GAU': 'D', 'UAU': 'Y', 'CGG': 'R', 
 27 |     'UCG': 'S', 'CCU': 'P', 'GGG': 'G', 'GGA': 'G', 'CCC': 'P', 'GGC': 'G', 
 28 |     'GAA': 'E', 'UAA': '*', 'UCC': 'S', 'UAC': 'Y', 'GAC': 'D', 'UGU': 'C', 
 29 |     'AUA': 'I', 'CUU': 'L', 'UCA': 'S', 'AUG': 'M', 'CGC': 'R', 'CUG': 'L', 
 30 |     'GAG': 'E', 'AUU': 'I', 'CAU': 'H', 'CUA': 'L', 'GCC': 'A', 'CAA': 'Q', 
 31 |     'UUU': 'F', 'CGU': 'R', 'GUA': 'V', 'UGC': 'C', 'GCU': 'A', 'UAG': '*', 
 32 |     'CUC': 'L', 'UUG': 'L', 'UUA': 'L', 'UGA': '*', 'UUC': 'F'}
 33 | 
 34 |     ########### Your code here #############
 35 |     
 36 |     for i in xrange(frame,len(RNA)-2,3):
 37 |         codon=RNA[i:i+3]
 38 |         
 39 |         
 40 |         ########### Your code here #############
 41 |         
 42 |     return protein
 43 | 
 44 | def find_coding_regions(protein):
 45 |     print protein
 46 |     ORFs=[]
 47 |     started = False
 48 |     start_indx=-1
 49 |     for i in xrange(len(protein)):
 50 |         aa=protein[i]    # aa means amino acid
 51 |         
 52 |         #if the protein is started
 53 |         if started==True: 
 54 |             if aa=='*': # stop codon
 55 |                 started=False
 56 |                 ORFs.append(protein[start_indx:i])
 57 |         # if the protein hasn't started
 58 |         elif aa=="M": # start codon
 59 |             started = True
 60 |             start_indx=i
 61 |             
 62 |     
 63 |     return ORFs
 64 |     
 65 | def translate(DNA):
 66 |     DNA_forward=DNA
 67 |     DNA_reverse=reverse_complement(DNA)
 68 |     RNA_forward=DNA_to_RNA(DNA_forward)
 69 |     RNA_reverse=DNA_to_RNA(DNA_reverse)
 70 |     
 71 |     allproteins=[]
 72 |     for frame in xrange(3):
 73 |         allproteins.append(RNA_to_protein(RNA_forward,frame))
 74 |         allproteins.append(RNA_to_protein(RNA_reverse,frame))
 75 |     
 76 |     all_ORFs=[]
 77 |     for protein in allproteins:
 78 |         all_ORFs.append(find_coding_regions(protein))
 79 |         
 80 |     return all_ORFs
 81 | 
 82 | def translate_only_longest_ORF(DNA):
 83 |     proteins=translate(DNA)
 84 | 
 85 |     lengths=[]
 86 |     for i in xrange(len(proteins)):
 87 |         if len(proteins[i]) < 1:
 88 |             lengths.append(0)
 89 |         else:
 90 |             lengths.append(len(proteins[i][0]))
 91 | 
 92 |     import numpy as np 
 93 |     lengths=np.array(lengths)
 94 |     if max(lengths) > 0:
 95 |         return proteins[np.argmax(lengths)][0]
 96 |     else:
 97 |         return "No open reading frames"
 98 | 
 99 | 
100 | myinput = "GCTATGAGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC"
101 | #myinput = raw_input('Enter your DNA: ')
102 |     
103 | allproteins= translate(myinput)
104 | 
105 | print allproteins
106 | 
107 | longest_protein=translate_only_longest_ORF(myinput)
108 | 
109 | 


--------------------------------------------------------------------------------
/milestone projects/Project 3 - DNA to RNA to protein/project 3-teacher version.py:
--------------------------------------------------------------------------------
  1 | # from the intro materials
  2 | 
  3 | def list_to_str(mylist):
  4 |     mystring = str()
  5 |     for item in mylist:
  6 |         mystring+=item
  7 |     return mystring
  8 |    
  9 |    
 10 | # from Project 2
 11 | 
 12 | def reverse_complement(DNA):
 13 |     DNA = DNA.upper()
 14 |     
 15 |     complement = str()
 16 |     
 17 |     complement_dictionary = {
 18 |     "A":"T", 
 19 |     "T":"A", 
 20 |     "G":"C",
 21 |     "C":"G"
 22 |     }
 23 |     
 24 |     for base in DNA:
 25 |         complement+=complement_dictionary.get(base,"N")
 26 |     
 27 |     revcomp = complement[::-1]
 28 |     return revcomp
 29 | 
 30 | 
 31 | def DNA_to_RNA(DNA):
 32 |     DNA=DNA.upper()
 33 |     RNA = str()
 34 |     for i in xrange(len(DNA)):
 35 |         if DNA[i]=="T":
 36 |             RNA+="U"
 37 |         else:
 38 |             RNA+=DNA[i]
 39 |     return RNA
 40 |     
 41 |  
 42 | def RNA_to_protein(RNA,frame):
 43 |     decoder = {'ACC': 'T', 'GCA': 'A', 'ACA': 'T', 'ACG': 'T', 'GUU': 'V', 
 44 |     'AAC': 'N', 'AGG': 'R', 'UGG': 'W', 'GUC': 'V', 'AGC': 'S', 'AUC': 'I', 
 45 |     'AGA': 'R', 'AAU': 'N', 'ACU': 'T', 'GUG': 'V', 'CAC': 'H', 'AAA': 'K', 
 46 |     'CCG': 'P', 'CCA': 'P', 'AGU': 'S', 'AAG': 'K', 'GGU': 'G', 'UCU': 'S', 
 47 |     'GCG': 'A', 'CGA': 'R', 'CAG': 'Q', 'GAU': 'D', 'UAU': 'Y', 'CGG': 'R', 
 48 |     'UCG': 'S', 'CCU': 'P', 'GGG': 'G', 'GGA': 'G', 'CCC': 'P', 'GGC': 'G', 
 49 |     'GAA': 'E', 'UAA': '*', 'UCC': 'S', 'UAC': 'Y', 'GAC': 'D', 'UGU': 'C', 
 50 |     'AUA': 'I', 'CUU': 'L', 'UCA': 'S', 'AUG': 'M', 'CGC': 'R', 'CUG': 'L', 
 51 |     'GAG': 'E', 'AUU': 'I', 'CAU': 'H', 'CUA': 'L', 'GCC': 'A', 'CAA': 'Q', 
 52 |     'UUU': 'F', 'CGU': 'R', 'GUA': 'V', 'UGC': 'C', 'GCU': 'A', 'UAG': '*', 
 53 |     'CUC': 'L', 'UUG': 'L', 'UUA': 'L', 'UGA': '*', 'UUC': 'F'}
 54 | 
 55 |     protein = []
 56 |     for i in xrange(frame,len(RNA)-2,3):
 57 |         codon=RNA[i:i+3]
 58 |         protein.append(decoder.get(codon,"?"))
 59 |     return list_to_str(protein)
 60 | 
 61 | def find_coding_regions(protein):
 62 |     print protein
 63 |     ORFs=[]
 64 |     started = False
 65 |     start_indx=-1
 66 |     for i in xrange(len(protein)):
 67 |         aa=protein[i]    # aa means amino acid
 68 |         
 69 |         #if the protein is started
 70 |         if started==True: 
 71 |             if aa=='*':
 72 |                 started=False
 73 |                 ORFs.append(protein[start_indx:i])
 74 |         # if the protein hasn't started
 75 |         elif aa=="M":
 76 |             started = True
 77 |             start_indx=i
 78 |             
 79 |     
 80 |     return ORFs
 81 |     
 82 | def translate(DNA):
 83 |     DNA_forward=DNA
 84 |     DNA_reverse=reverse_complement(DNA)
 85 |     RNA_forward=DNA_to_RNA(DNA_forward)
 86 |     RNA_reverse=DNA_to_RNA(DNA_reverse)
 87 |     
 88 |     allproteins=[]
 89 |     for frame in xrange(3):
 90 |         allproteins.append(RNA_to_protein(RNA_forward,frame))
 91 |         allproteins.append(RNA_to_protein(RNA_reverse,frame))
 92 |     
 93 |     all_ORFs=[]
 94 |     for protein in allproteins:
 95 |         all_ORFs.append(find_coding_regions(protein))
 96 |         
 97 |     return all_ORFs
 98 | 
 99 | 
100 | def translate_only_longest_ORF(DNA):
101 |     proteins=translate(DNA)
102 | 
103 |     lengths=[]
104 |     for i in xrange(len(proteins)):
105 |         if len(proteins[i]) < 1:
106 |             lengths.append(0)
107 |         else:
108 |             lengths.append(len(proteins[i][0]))
109 | 
110 |     import numpy as np 
111 |     lengths=np.array(lengths)
112 |     if max(lengths) > 0:
113 |         return proteins[np.argmax(lengths)][0]
114 |     else:
115 |         return "No open reading frames"
116 | 
117 | 
118 | 
119 | 
120 | myinput = "GCTATGAGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC"
121 | #myinput = raw_input('Enter your DNA: ')
122 |     
123 | allproteins= translate(myinput)
124 | 
125 | print allproteins
126 | 
127 | 


--------------------------------------------------------------------------------
/milestone projects/Project 4 - reading a fasta file/read_fasta.py:
--------------------------------------------------------------------------------
 1 | # File input/output
 2 | 
 3 | 
 4 | 
 5 | def read_fasta(filename):
 6 |     f = open(filename,'r')
 7 |     names = []
 8 |     genome = []
 9 |     counter = -1
10 |     for line in f:
11 |         if line[0]==">":
12 |             
13 |             counter += 1
14 |             
15 |             names.append(line[1:].strip())
16 |             genome.append([])
17 |             
18 |         else:
19 |             genome[counter].append(line.strip())
20 |    
21 |     return names, genome
22 | 
23 | names, genome = read_fasta("drosophila_genome.fa")
24 | 
25 | 
26 | 
27 | 
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/python intro.py:
--------------------------------------------------------------------------------
  1 | ########################################################################################
  2 | ##
  3 | ##                  An intro to Python for biology
  4 | ##
  5 | ########################################################################################
  6 | 
  7 | ########################################################################################
  8 | ######################## Variables and print statements ################################
  9 | ########################################################################################
 10 | 
 11 | # this is a comment, use these to tell people what your code does
 12 | 
 13 | mystring = "Homo sapiens" # a string, letters, words, sentences, anything in quotes
 14 | myinteger = 4 # an integer
 15 | myfloat = 4.0 # a float (a number with a decimal point)
 16 | 
 17 | print "This is a string: %s, this is a float: %f, and this is an integer: %d" % (mystring,myfloat,myinteger)
 18 | 
 19 | print "We are %s and there are %d bases in our DNA" % (mystring, myinteger)
 20 | 
 21 | # use %s when you need a string
 22 | # use %d when you need an integer
 23 | # use %f when you need a float (a number with a decimal point)
 24 | 
 25 | 
 26 | ########################################################################################
 27 | ################################### Basic Math #########################################
 28 | ########################################################################################
 29 | 
 30 | myfloat = 2.0
 31 | yourfloat = 3.0
 32 | print myfloat/yourfloat
 33 | 
 34 | 
 35 | myint = 2
 36 | yourint = 3
 37 | print myint/yourint
 38 | 
 39 | # The anwer is 2/3 = 0.666666667 but Python and many other programming lanuages have a quirk where they think that any math you do with integers can only output another integer. 
 40 | # Let's explore this and see if we can find tricks to deal with it
 41 | 
 42 | myint = 2
 43 | yourint = 3
 44 | 
 45 | # Which of these work and why?
 46 | 
 47 | print float(myint)/yourint # 1
 48 | 
 49 | print myint/float(yourint) # 2
 50 | 
 51 | print (myint+0.0)/yourint # 3
 52 | 
 53 | print myint/yourint + 0.0 # 4
 54 | 
 55 | print myint*1.0/yourint # 5
 56 | 
 57 | print myint/yourint*1.0 # 6
 58 | 
 59 | 
 60 | ########################################################################################
 61 | ########################### Lists, strings, and for loops ##############################
 62 | ########################################################################################
 63 | 
 64 | 
 65 | ####### LISTS #######
 66 | mylist = [6,3,6,7,2,6,2,9,7,0]
 67 | print mylist
 68 | 
 69 | print mylist[0]
 70 | print mylist[1]
 71 | print mylist[2]
 72 | 
 73 | ###### STRINGS ######
 74 | # strings are like lists of letters, so you can get individual letters using the bracket notation
 75 | 
 76 | mystring = "ACGT"
 77 | print len(mystring) ## => 4
 78 | 
 79 | print mystring[0]
 80 | print mystring[1]
 81 | print mystring[2]
 82 | print mystring[3]
 83 | #print mystring[4] # Error:"string index out of range" 
 84 | 
 85 | 
 86 | DNA = "CAACGGGCAATATGTCTCTGTGTG"
 87 | print "Your DNA is %d bases long" % (len(DNA))
 88 | 
 89 | print DNA[7]
 90 | 
 91 | ######### lists can contain anything #########
 92 | 
 93 | mylist = [1,"anything",2.45,5,10,"Hello world"]
 94 | 
 95 | print mylist[4]
 96 | 
 97 | ####### you can also make lists longer ######
 98 | print mylist
 99 | 
100 | mylist.append(56)
101 | 
102 | print mylist
103 | 
104 | # this means you can start with an empty list and add to it:
105 | nucleotides = []
106 | nucleotides.append("A")
107 | nucleotides.append("C")
108 | nucleotides.append("G")
109 | nucleotides.append("T")
110 | print nucleotides
111 | 
112 | ########## QUESTION 3: Which of these are correct? Which give errors? ###########
113 | 
114 | 
115 | 
116 | ################ indexing lists ###################
117 | 
118 | mylist = range(10)
119 | 
120 | print mylist
121 | 
122 | print mylist[0:10]
123 | print mylist[:]
124 | print mylist[1:10]
125 | print mylist[4:8]
126 | print mylist[3:]
127 | print mylist[0:10:2] # what does the third number do?
128 | print mylist[0:7:3]
129 | print mylist[::-1] # what does this do?
130 | 
131 | 
132 | 
133 | 
134 | 
135 | ###### Two different versions of a "for loop" #######
136 | 
137 | for letter in DNA:
138 |     print letter
139 |     
140 | # the variable letter can be anything, try switching it out with tomato
141 | 
142 | for tomato in DNA:
143 |     print tomato
144 | 
145 | # we only call it letter so it makes sense. This becomes important when you have many more variables. At that point, naming them after fruits suddenly isn't funny anymore.
146 | 
147 | 
148 | # you can also make a for loop like this: 
149 | # xrange gives a list from 0 to len(DNA), so i loops through that list
150 | for i in xrange(len(DNA)):
151 |     print i
152 | 
153 | # when you do this, you get the bases in the DNA by saying DNA[i]
154 | for i in xrange(len(DNA)):
155 |     print "Letter number %d in your DNA is %s" % (i,DNA[i])
156 |     
157 | # sometimes we really care about the indices, like when we are dealing with 2 or more lists in parallel
158 | DNA = "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC"
159 | RNA = "AGCUUUUCAUUCUGACUGCAACGGGCAAUAUGUCUCUGUGUGGAUUAAAAAAAGAGUGUCUGAUAGCAGC"
160 | 
161 | length = len(DNA)
162 | 
163 | for i in xrange(length):
164 |     print "DNA: %s, RNA: %s" % (DNA[i],RNA[i])
165 |     
166 |     
167 |     
168 | # xrange can do many things:
169 | 
170 | for b in xrange(0,4,1): #  start=0,stop=4,step=1
171 |     print b
172 | 
173 | for j in xrange(2,10,2):
174 |     print j
175 | 
176 | ########## Now you try to print (1,3,5,7,9) using a for loop ###########
177 | 
178 | 
179 | ########### You can also make empty strings and add to them #############
180 | 
181 | DNA = "AGCTTTTCATTC"
182 | 
183 | # just like lists, you can also add to strings:
184 | mystring = str() # makes an empty string, just like mylist = [] makes an empty list
185 | 
186 | for i in xrange(len(DNA)):
187 |     mystring+=DNA[i]
188 | 
189 | print mystring
190 | 
191 | 
192 | # how is this different from making a list?
193 | 
194 | mylist = []
195 | 
196 | for i in xrange(len(DNA)):
197 |     mylist.append(DNA[i])
198 | 
199 | print mylist
200 | 
201 | # if you do make a list and want it to be a string, you can always just do this:
202 | 
203 | mystring = str()
204 | for item in mylist:
205 |     mystring+=item
206 | print mystring
207 | 
208 | 
209 | 
210 | 
211 | 
212 | ########################################################################################
213 | ################################## If statements #######################################
214 | ########################################################################################
215 | 
216 |     
217 | #What do you think this does?
218 | 
219 | correct=True # this is another kind of variable, called a boolean: it can only be True or False
220 | 
221 | if correct==True:
222 |     print "You are right!"
223 | else:
224 |     print "You are wrong!"
225 |     
226 | 
227 | # You can set as many cases as you want using elif (short for "else if")
228 |     
229 | mystring = "ABCYSGD"
230 | for letter in mystring:
231 |     if letter == "A":
232 |         print "Alpha"
233 |     elif letter == "B":
234 |         print "Bravo"
235 |     elif letter == "C":
236 |         print "Charlie"
237 |     elif letter == "D":
238 |         print "Delta"
239 |     else:
240 |         print "Ummm, I don't know what %s is in military speak" % (letter)
241 | 
242 | 
243 | # You can also use these if statements for input validation
244 | # If you ask the user for a string of DNA, it would be nice to check that the string looks like DNA before you do a lot of work with it
245 | 
246 | ############### Challenge: Loop through this string of DNA and output a message to the user for every letter that isn't A, T, C, or G
247 | 
248 | DNA = "AGCUUUUCATKCTGACUUNNNAACGGGCAATAUGTCTCTGTHTGGATTAAAAAAAGAGTGTCMGATAGCAGC"
249 | 
250 | 
251 | 
252 | # This is called "Input validation"
253 | 
254 | # Now that you can validate a user's input, let's learn how to actually ask the user for input. It's pretty simple:
255 | 
256 | 
257 | DNA = raw_input('Enter your DNA: ')
258 | 
259 | # whatever the user writes before pressing enter gets stored in the variable DNA
260 | 
261 | ######## We can also check multiple things at once #########
262 | 
263 | letter = "A"
264 | 
265 | if letter=="A" or letter=="G":
266 |     print "Purine"
267 | elif letter=="U" or letter=="T" or letter=="C":
268 |     print "Pyrimidine"
269 | else:
270 |     print "Not a nucleotide"
271 | 
272 | 
273 | 
274 | ########################################################################################
275 | ##################################### Functions ########################################
276 | ########################################################################################
277 | mylist = [1,2,3,4,100]
278 | 
279 | #built-in functions:
280 | min(mylist)
281 | max(mylist)
282 | sum(mylist)
283 | 
284 | 
285 | # General syntax for making your own functions:
286 | 
287 | def function_name(argument1,argument2,etc): # don't forget the colon
288 |     # do something with the arguments
289 |     something = argument1+argument2*etc
290 |     return something
291 | 
292 | result = function_name(3,4,2)
293 | # an example:
294 | 
295 | def double(mylist):
296 |     doubled = []
297 |     for item in mylist:
298 |         doubled.append(item*2)
299 |     return doubled
300 |     
301 | result = double(mylist)
302 | print result
303 | # What happens if we call double() and pass it a string?
304 | # Try it!
305 | 
306 | 
307 | # write a function that takes in a list, loops through it, and adds each item to a string
308 | # we saw some code that did that already, so just turn it into a function 
309 | # make sure to return the string at the end
310 | 
311 | 
312 | ########################################################################################
313 | ################################### Dictionaries #######################################
314 | ########################################################################################
315 | 
316 | 
317 | 
318 | # a dictionary is very useful, it is a data structure (meaning it keeps data organized)
319 | # it has a bunch of key:value pairs just like a real dictionary, every word has a definition
320 | # you can also think of it as a decoder, every key is a code, and every value
321 | # stored with the code is what that code means
322 | 
323 | my_dictionary = {"key":"value","secondkey":"secondvalue","thirdkey":"thirdvalue"}
324 | 
325 | print my_dictionary["secondkey"]
326 | print my_dictionary.keys()
327 | 
328 | print my_dictionary.values()
329 | 
330 | for key in my_dictionary:
331 |     print "Key: %s, Value: %s" % (key,my_dictionary[key])
332 | 
333 | # notice that the order of the dictionary isn't how we started it
334 | # dictionaries are automatically sorted so they are really fast to search
335 | # we use a key in a dictionary just like we normally use an index in a list or array
336 | 
337 | # the useful part is when we use the dictionary as a decoder:
338 | mystring = "ABCYSGD"
339 | 
340 | military = {"A":"Alpha", "B":"Bravo", "C":"Charlie", "D":"Delta"}
341 | 
342 | for letter in mystring:
343 |     print military.get(letter,"Ummm, I don't know what %s is in military speak" % letter)
344 | 
345 | 
346 | # remember we did the same thing with if statements before?
347 | # military is a dictionary: the perfect decoder
348 | # .get() is a method that all dictionaries have when they are created, 
349 | # the first argument is the key (or the code) and the second is the default 
350 | # the default is when the key isn't found 
351 | # military["Y"] # gives an error: you can try it
352 | # but military.get("Y","?") # doesn't crash your application
353 | 
354 | 
355 | 
356 | 
357 | # DNA has a recipe for proteins with the code being each set of three nucleotides 
358 | # then this is made into RNA (all the T's become U's)
359 | # (bases or letters: AGA, ACU, GUU, etc.)
360 | # the code "UUA" means the amino acid "L" or "Leucine"
361 | 
362 | 
363 | 
364 | # this website has all the codes, can you make them into a dictionary?
365 | # https://www.manylabs.org/file/lessonMedia/69/geneticCode.png
366 | # Hint: we are gonna use this for the third project!
367 | 
368 | 
369 | ########################################################################################
370 | ##############################  File input/output  #####################################
371 | ########################################################################################
372 | 
373 | ############## INPUT ###############
374 | 
375 | # open a file
376 | f = open("filename",'r') # 'r' means read
377 | for line in f:
378 |     print line
379 | 
380 | f.close() 
381 | 
382 | 
383 | ############## OUTPUT ###############
384 | f2 = open("filename",'w') # 'w' means write, so you overwrite the file
385 | f2.write("Hello world")
386 | f2.write("\n") # \n means newline, as in pressing enter
387 | for i in xrange(10):
388 |     f2.write("%d\t" % i) # \t means tab
389 | 
390 | f2.close()# close the file safely to make sure the last part you wrote is saved
391 | 
392 | # once f2 is closed, you can reuse the name
393 | 
394 | f2 = open("filename2",'a') # 'a' means append, so you add to the file each time
395 | f2.write("Hello world")
396 | f2.write("\n") # \n means newline, as in pressing enter
397 | 
398 | for i in xrange(10):
399 |     f2.write("%d\t" % i) # \t means tab
400 | 
401 | f2.close()
402 | 
403 | 
404 | 
405 | 
406 | ########################################################################################
407 | ######################################  Numpy  #########################################
408 | ########################################################################################
409 | 
410 | # Try to find a function that would take the average, like mean(), average(), or avg() can you find one?
411 | # If you want to do something simple and can't find a function for it, you can search online.
412 | # Many of the simple math and science functions are in a module named numpy:
413 | 
414 | import numpy as np 
415 | # np is a common nickname for numpy, so now when you want to use it, you say:
416 | print np.mean(mylist)
417 | 
418 | # Modules are packages of functions and other useful things, including something called an array
419 | # arrays are a bit like lists, but they are only for numbers, 
420 | # that way you can do math on an entire array at once instead of looping through the list and doubling every element like we did before
421 | 
422 | mylist = [1,2,3,4,100]
423 | myarray = np.array(mylist) # this takes mylist and makes it into an array, once again np is just the nickname for numpy
424 | 
425 | print "doubling a list:"
426 | print mylist*2
427 | print "doubling an array:"
428 | print myarray*2
429 | 
430 | # many things we do are easier with arrays, including anything involving math
431 | # An important thing to keep in mind is how to start an empty array if you don't want to just make a list into an array
432 | 
433 | # if you have a sentence, and you want to save the lengths of all the words:
434 | 
435 | sentence = "if you have a sentence, and you want to save the lengths of all the words"
436 | 
437 | # one of many built-in tricks: 
438 | words = sentence.split()
439 | # this one looks like a function but flipped around. This is because it's a built-in method for a string. 
440 | # sentence is a string, and all strings have a bunch of methods that you can call by doing sentence.methodname()
441 | # Methods are functions that belong to objects, so split() is a method that all strings have when they are created, and you can use them whenever you need to
442 | # Other functions we made don't belong to any particular object, so they are very flexible. 
443 | 
444 | # using a list:
445 | lengths=[]
446 | for word in words:
447 |     lengths.append(len(word))
448 | print lengths
449 | 
450 | # you can then convert this to an array:
451 | lengths=np.array(lengths)
452 | 
453 | 
454 | # using an array:
455 | lengths = np.zeros(len(words))
456 | for i in xrange(len(words)):
457 |     lengths[i]=len(words[i])
458 | print lengths
459 | 
460 | # using the array seems harder in this case, but sometimes it might come in handy. 
461 | 
462 | # I usually start with lists and convert them to arrays right before I have to do math on them
463 | 
464 | 
465 | ######## useful numpy functions ###########
466 | 
467 | print np.std(myarray)
468 | print np.random.rand(10) # multiply, add, or round to get the random numbers you need
469 | 
470 | # I want 100 random numbers from 10 up to and including 15, but only integers
471 | print np.floor(np.random.rand(100)*6+10)  
472 | 
473 | # np.floor() rounds down, you can also use np.ceil() to round up, or np.around() to round up or down, whichever is closest (like you learn in school)
474 | 
475 | # There are tons of numpy modules, just search online.
476 | # Anything you have ever seen in a math class will probably exist in numpy
477 | 
478 | 
479 | 
480 | 
481 | 
482 | 
483 | 
484 | 
485 | 


--------------------------------------------------------------------------------
/python_intro.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "name": "",
  4 |   "signature": "sha256:42512dd288705f0efed4039b19230604208bfe8b48a5e91bb30d2c401f6189fa"
  5 |  },
  6 |  "nbformat": 3,
  7 |  "nbformat_minor": 0,
  8 |  "worksheets": [
  9 |   {
 10 |    "cells": [
 11 |     {
 12 |      "cell_type": "heading",
 13 |      "level": 1,
 14 |      "metadata": {},
 15 |      "source": [
 16 |       " An intro to Python for biology"
 17 |      ]
 18 |     },
 19 |     {
 20 |      "cell_type": "heading",
 21 |      "level": 3,
 22 |      "metadata": {},
 23 |      "source": [
 24 |       "Variables and printing"
 25 |      ]
 26 |     },
 27 |     {
 28 |      "cell_type": "code",
 29 |      "collapsed": false,
 30 |      "input": [
 31 |       "# this is a comment, use these to tell people what your code does"
 32 |      ],
 33 |      "language": "python",
 34 |      "metadata": {},
 35 |      "outputs": []
 36 |     },
 37 |     {
 38 |      "cell_type": "code",
 39 |      "collapsed": false,
 40 |      "input": [
 41 |       "mystring = \"Homo sapiens\" # a string, letters, words, sentences, anything in quotes\n",
 42 |       "myinteger = 4 # an integer\n",
 43 |       "myfloat = 4.0 # a float (a number with a decimal point)\n",
 44 |       "\n",
 45 |       "print \"This is a string:\", mystring, \"this is a float:\", myfloat, \"and this is an integer: \", myinteger"
 46 |      ],
 47 |      "language": "python",
 48 |      "metadata": {},
 49 |      "outputs": [
 50 |       {
 51 |        "output_type": "stream",
 52 |        "stream": "stdout",
 53 |        "text": [
 54 |         "This is a string: Homo sapiens this is a float: 4.0 and this is an integer:  4\n"
 55 |        ]
 56 |       }
 57 |      ],
 58 |      "prompt_number": 4
 59 |     },
 60 |     {
 61 |      "cell_type": "heading",
 62 |      "level": 3,
 63 |      "metadata": {},
 64 |      "source": [
 65 |       "Basic Math"
 66 |      ]
 67 |     },
 68 |     {
 69 |      "cell_type": "markdown",
 70 |      "metadata": {},
 71 |      "source": [
 72 |       "You can do calculations:"
 73 |      ]
 74 |     },
 75 |     {
 76 |      "cell_type": "code",
 77 |      "collapsed": false,
 78 |      "input": [
 79 |       "myfloat = 2.0\n",
 80 |       "yourfloat = 3.0\n",
 81 |       "print myfloat/yourfloat"
 82 |      ],
 83 |      "language": "python",
 84 |      "metadata": {},
 85 |      "outputs": [
 86 |       {
 87 |        "output_type": "stream",
 88 |        "stream": "stdout",
 89 |        "text": [
 90 |         "0.666666666667\n"
 91 |        ]
 92 |       }
 93 |      ],
 94 |      "prompt_number": 6
 95 |     },
 96 |     {
 97 |      "cell_type": "markdown",
 98 |      "metadata": {},
 99 |      "source": [
100 |       "But be careful, when you calculate with only integers, you can only get an integer back, so Python rounds your answer down!"
101 |      ]
102 |     },
103 |     {
104 |      "cell_type": "code",
105 |      "collapsed": false,
106 |      "input": [
107 |       "myint = 2\n",
108 |       "yourint = 3\n",
109 |       "print myint/yourint"
110 |      ],
111 |      "language": "python",
112 |      "metadata": {},
113 |      "outputs": [
114 |       {
115 |        "output_type": "stream",
116 |        "stream": "stdout",
117 |        "text": [
118 |         "0\n"
119 |        ]
120 |       }
121 |      ],
122 |      "prompt_number": 7
123 |     },
124 |     {
125 |      "cell_type": "code",
126 |      "collapsed": false,
127 |      "input": [
128 |       "myint = 2\n",
129 |       "yourint = 3\n",
130 |       "\n",
131 |       "# Which of these work and why?"
132 |      ],
133 |      "language": "python",
134 |      "metadata": {},
135 |      "outputs": [],
136 |      "prompt_number": 10
137 |     },
138 |     {
139 |      "cell_type": "code",
140 |      "collapsed": false,
141 |      "input": [
142 |       "print float(myint)/yourint # 1"
143 |      ],
144 |      "language": "python",
145 |      "metadata": {},
146 |      "outputs": [
147 |       {
148 |        "output_type": "stream",
149 |        "stream": "stdout",
150 |        "text": [
151 |         "0.666666666667\n"
152 |        ]
153 |       }
154 |      ],
155 |      "prompt_number": 20
156 |     },
157 |     {
158 |      "cell_type": "code",
159 |      "collapsed": false,
160 |      "input": [
161 |       "print myint/float(yourint) # 2"
162 |      ],
163 |      "language": "python",
164 |      "metadata": {},
165 |      "outputs": [
166 |       {
167 |        "output_type": "stream",
168 |        "stream": "stdout",
169 |        "text": [
170 |         "0.666666666667\n"
171 |        ]
172 |       }
173 |      ],
174 |      "prompt_number": 21
175 |     },
176 |     {
177 |      "cell_type": "code",
178 |      "collapsed": false,
179 |      "input": [
180 |       "print (myint+0.0)/yourint # 3"
181 |      ],
182 |      "language": "python",
183 |      "metadata": {},
184 |      "outputs": [
185 |       {
186 |        "output_type": "stream",
187 |        "stream": "stdout",
188 |        "text": [
189 |         "0.666666666667\n"
190 |        ]
191 |       }
192 |      ],
193 |      "prompt_number": 22
194 |     },
195 |     {
196 |      "cell_type": "code",
197 |      "collapsed": false,
198 |      "input": [
199 |       "print myint/yourint*1.0 # 4"
200 |      ],
201 |      "language": "python",
202 |      "metadata": {},
203 |      "outputs": [
204 |       {
205 |        "output_type": "stream",
206 |        "stream": "stdout",
207 |        "text": [
208 |         "0.0\n"
209 |        ]
210 |       }
211 |      ],
212 |      "prompt_number": 23
213 |     },
214 |     {
215 |      "cell_type": "code",
216 |      "collapsed": false,
217 |      "input": [
218 |       "print myint*1.0/yourint # 5"
219 |      ],
220 |      "language": "python",
221 |      "metadata": {},
222 |      "outputs": [
223 |       {
224 |        "output_type": "stream",
225 |        "stream": "stdout",
226 |        "text": [
227 |         "0.666666666667\n"
228 |        ]
229 |       }
230 |      ],
231 |      "prompt_number": 24
232 |     },
233 |     {
234 |      "cell_type": "code",
235 |      "collapsed": false,
236 |      "input": [
237 |       "print float(myint/yourint) # 6"
238 |      ],
239 |      "language": "python",
240 |      "metadata": {},
241 |      "outputs": [
242 |       {
243 |        "output_type": "stream",
244 |        "stream": "stdout",
245 |        "text": [
246 |         "0.0\n"
247 |        ]
248 |       }
249 |      ],
250 |      "prompt_number": 25
251 |     },
252 |     {
253 |      "cell_type": "heading",
254 |      "level": 3,
255 |      "metadata": {},
256 |      "source": [
257 |       "Lists"
258 |      ]
259 |     },
260 |     {
261 |      "cell_type": "code",
262 |      "collapsed": false,
263 |      "input": [
264 |       "mylist = [6,3,6,7,2,6,2,9,7,0]\n",
265 |       "print mylist"
266 |      ],
267 |      "language": "python",
268 |      "metadata": {},
269 |      "outputs": [
270 |       {
271 |        "output_type": "stream",
272 |        "stream": "stdout",
273 |        "text": [
274 |         "[6, 3, 6, 7, 2, 6, 2, 9, 7, 0]\n"
275 |        ]
276 |       }
277 |      ],
278 |      "prompt_number": 32
279 |     },
280 |     {
281 |      "cell_type": "markdown",
282 |      "metadata": {},
283 |      "source": [
284 |       "You can get each item of the list using the brackets. Try to replace 0 with other numbers and see what you get:"
285 |      ]
286 |     },
287 |     {
288 |      "cell_type": "code",
289 |      "collapsed": false,
290 |      "input": [
291 |       "print mylist[0]"
292 |      ],
293 |      "language": "python",
294 |      "metadata": {},
295 |      "outputs": [
296 |       {
297 |        "output_type": "stream",
298 |        "stream": "stdout",
299 |        "text": [
300 |         "6\n"
301 |        ]
302 |       }
303 |      ],
304 |      "prompt_number": 33
305 |     },
306 |     {
307 |      "cell_type": "markdown",
308 |      "metadata": {},
309 |      "source": [
310 |       "Here the number inside the brackets is called an index, it represents the location of an item in the list. "
311 |      ]
312 |     },
313 |     {
314 |      "cell_type": "markdown",
315 |      "metadata": {},
316 |      "source": [
317 |       "You can also get more than one item at a time by providing a range of indices:"
318 |      ]
319 |     },
320 |     {
321 |      "cell_type": "code",
322 |      "collapsed": false,
323 |      "input": [
324 |       "print mylist[0:3]"
325 |      ],
326 |      "language": "python",
327 |      "metadata": {},
328 |      "outputs": [
329 |       {
330 |        "output_type": "stream",
331 |        "stream": "stdout",
332 |        "text": [
333 |         "[6, 3, 6]\n"
334 |        ]
335 |       }
336 |      ],
337 |      "prompt_number": 34
338 |     },
339 |     {
340 |      "cell_type": "markdown",
341 |      "metadata": {},
342 |      "source": [
343 |       "Strings can be words, sentences, and anything else you can write as a sequence of characters. They act a lot like lists. "
344 |      ]
345 |     },
346 |     {
347 |      "cell_type": "markdown",
348 |      "metadata": {},
349 |      "source": [
350 |       "You can make a string using quotes"
351 |      ]
352 |     },
353 |     {
354 |      "cell_type": "code",
355 |      "collapsed": false,
356 |      "input": [
357 |       "mystring = \"ACGT\"\n",
358 |       "print mystring"
359 |      ],
360 |      "language": "python",
361 |      "metadata": {},
362 |      "outputs": [
363 |       {
364 |        "output_type": "stream",
365 |        "stream": "stdout",
366 |        "text": [
367 |         "ACGT\n",
368 |         "4\n",
369 |         "A\n",
370 |         "C\n",
371 |         "G\n",
372 |         "T\n"
373 |        ]
374 |       }
375 |      ],
376 |      "prompt_number": 26
377 |     },
378 |     {
379 |      "cell_type": "markdown",
380 |      "metadata": {},
381 |      "source": [
382 |       "What happens if you remove the quotes? What about using single quotes like 'this'?"
383 |      ]
384 |     },
385 |     {
386 |      "cell_type": "code",
387 |      "collapsed": false,
388 |      "input": [
389 |       "mystring = ACGT\n",
390 |       "print mystring"
391 |      ],
392 |      "language": "python",
393 |      "metadata": {},
394 |      "outputs": [
395 |       {
396 |        "ename": "NameError",
397 |        "evalue": "name 'ACGT' is not defined",
398 |        "output_type": "pyerr",
399 |        "traceback": [
400 |         "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
401 |         "\u001b[0;32m<ipython-input-30-43dfcec84783>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmystring\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mACGT\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0mmystring\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
402 |         "\u001b[0;31mNameError\u001b[0m: name 'ACGT' is not defined"
403 |        ]
404 |       }
405 |      ],
406 |      "prompt_number": 30
407 |     },
408 |     {
409 |      "cell_type": "markdown",
410 |      "metadata": {},
411 |      "source": [
412 |       "You can get the length of the string using the len() function:"
413 |      ]
414 |     },
415 |     {
416 |      "cell_type": "code",
417 |      "collapsed": false,
418 |      "input": [
419 |       "mystring = \"ACGT\"\n",
420 |       "print len(mystring)"
421 |      ],
422 |      "language": "python",
423 |      "metadata": {},
424 |      "outputs": []
425 |     },
426 |     {
427 |      "cell_type": "markdown",
428 |      "metadata": {},
429 |      "source": [
430 |       "You can get each letter of the string using the brackets. Try to replace 0 with other numbers and see what you get:"
431 |      ]
432 |     },
433 |     {
434 |      "cell_type": "code",
435 |      "collapsed": false,
436 |      "input": [
437 |       "print mystring[0]"
438 |      ],
439 |      "language": "python",
440 |      "metadata": {},
441 |      "outputs": [
442 |       {
443 |        "output_type": "stream",
444 |        "stream": "stdout",
445 |        "text": [
446 |         "A\n"
447 |        ]
448 |       }
449 |      ],
450 |      "prompt_number": 29
451 |     },
452 |     {
453 |      "cell_type": "markdown",
454 |      "metadata": {},
455 |      "source": [
456 |       "Here is an example with DNA:"
457 |      ]
458 |     },
459 |     {
460 |      "cell_type": "code",
461 |      "collapsed": false,
462 |      "input": [
463 |       "DNA = \"CAACGGGCAATATGTCTCTGTGTG\"\n",
464 |       "print \"Your DNA is\", len(DNA), \"bases long\"\n",
465 |       "\n",
466 |       "print DNA[7]"
467 |      ],
468 |      "language": "python",
469 |      "metadata": {},
470 |      "outputs": [
471 |       {
472 |        "output_type": "stream",
473 |        "stream": "stdout",
474 |        "text": [
475 |         "Your DNA is 24 bases long\n",
476 |         "C\n"
477 |        ]
478 |       }
479 |      ],
480 |      "prompt_number": 35
481 |     }
482 |    ],
483 |    "metadata": {}
484 |   }
485 |  ]
486 | }


--------------------------------------------------------------------------------