├── .ipynb_checkpoints └── python_intro-checkpoint.ipynb ├── Python questions ├── Python tutorial structure ├── README.md ├── milestone projects ├── Project 1 - GC content │ ├── project 1.0-GC content.py │ ├── project 1.1 - input validation.py │ ├── project 1.2 - function.py │ └── project 1.3 - RNA too.py ├── Project 2 - reverse complement │ ├── project 2.0-complement.py │ ├── project 2.1-string not list.py │ └── project 2.2-reverse.py ├── Project 3 - DNA to RNA to protein │ ├── Project 3-DNA_RNA_protein.py │ └── project 3-teacher version.py └── Project 4 - reading a fasta file │ └── read_fasta.py ├── python intro.py └── python_intro.ipynb /.ipynb_checkpoints/python_intro-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:42512dd288705f0efed4039b19230604208bfe8b48a5e91bb30d2c401f6189fa" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "heading", 13 | "level": 1, 14 | "metadata": {}, 15 | "source": [ 16 | " An intro to Python for biology" 17 | ] 18 | }, 19 | { 20 | "cell_type": "heading", 21 | "level": 3, 22 | "metadata": {}, 23 | "source": [ 24 | "Variables and printing" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "collapsed": false, 30 | "input": [ 31 | "# this is a comment, use these to tell people what your code does" 32 | ], 33 | "language": "python", 34 | "metadata": {}, 35 | "outputs": [] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "collapsed": false, 40 | "input": [ 41 | "mystring = \"Homo sapiens\" # a string, letters, words, sentences, anything in quotes\n", 42 | "myinteger = 4 # an integer\n", 43 | "myfloat = 4.0 # a float (a number with a decimal point)\n", 44 | "\n", 45 | "print \"This is a string:\", mystring, \"this is a float:\", myfloat, \"and this is an integer: \", myinteger" 46 | ], 47 | "language": "python", 48 | "metadata": {}, 49 | "outputs": [ 50 | { 51 | "output_type": "stream", 52 | "stream": "stdout", 53 | "text": [ 54 | "This is a string: Homo sapiens this is a float: 4.0 and this is an integer: 4\n" 55 | ] 56 | } 57 | ], 58 | "prompt_number": 4 59 | }, 60 | { 61 | "cell_type": "heading", 62 | "level": 3, 63 | "metadata": {}, 64 | "source": [ 65 | "Basic Math" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "You can do calculations:" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "collapsed": false, 78 | "input": [ 79 | "myfloat = 2.0\n", 80 | "yourfloat = 3.0\n", 81 | "print myfloat/yourfloat" 82 | ], 83 | "language": "python", 84 | "metadata": {}, 85 | "outputs": [ 86 | { 87 | "output_type": "stream", 88 | "stream": "stdout", 89 | "text": [ 90 | "0.666666666667\n" 91 | ] 92 | } 93 | ], 94 | "prompt_number": 6 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": {}, 99 | "source": [ 100 | "But be careful, when you calculate with only integers, you can only get an integer back, so Python rounds your answer down!" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "collapsed": false, 106 | "input": [ 107 | "myint = 2\n", 108 | "yourint = 3\n", 109 | "print myint/yourint" 110 | ], 111 | "language": "python", 112 | "metadata": {}, 113 | "outputs": [ 114 | { 115 | "output_type": "stream", 116 | "stream": "stdout", 117 | "text": [ 118 | "0\n" 119 | ] 120 | } 121 | ], 122 | "prompt_number": 7 123 | }, 124 | { 125 | "cell_type": "code", 126 | "collapsed": false, 127 | "input": [ 128 | "myint = 2\n", 129 | "yourint = 3\n", 130 | "\n", 131 | "# Which of these work and why?" 132 | ], 133 | "language": "python", 134 | "metadata": {}, 135 | "outputs": [], 136 | "prompt_number": 10 137 | }, 138 | { 139 | "cell_type": "code", 140 | "collapsed": false, 141 | "input": [ 142 | "print float(myint)/yourint # 1" 143 | ], 144 | "language": "python", 145 | "metadata": {}, 146 | "outputs": [ 147 | { 148 | "output_type": "stream", 149 | "stream": "stdout", 150 | "text": [ 151 | "0.666666666667\n" 152 | ] 153 | } 154 | ], 155 | "prompt_number": 20 156 | }, 157 | { 158 | "cell_type": "code", 159 | "collapsed": false, 160 | "input": [ 161 | "print myint/float(yourint) # 2" 162 | ], 163 | "language": "python", 164 | "metadata": {}, 165 | "outputs": [ 166 | { 167 | "output_type": "stream", 168 | "stream": "stdout", 169 | "text": [ 170 | "0.666666666667\n" 171 | ] 172 | } 173 | ], 174 | "prompt_number": 21 175 | }, 176 | { 177 | "cell_type": "code", 178 | "collapsed": false, 179 | "input": [ 180 | "print (myint+0.0)/yourint # 3" 181 | ], 182 | "language": "python", 183 | "metadata": {}, 184 | "outputs": [ 185 | { 186 | "output_type": "stream", 187 | "stream": "stdout", 188 | "text": [ 189 | "0.666666666667\n" 190 | ] 191 | } 192 | ], 193 | "prompt_number": 22 194 | }, 195 | { 196 | "cell_type": "code", 197 | "collapsed": false, 198 | "input": [ 199 | "print myint/yourint*1.0 # 4" 200 | ], 201 | "language": "python", 202 | "metadata": {}, 203 | "outputs": [ 204 | { 205 | "output_type": "stream", 206 | "stream": "stdout", 207 | "text": [ 208 | "0.0\n" 209 | ] 210 | } 211 | ], 212 | "prompt_number": 23 213 | }, 214 | { 215 | "cell_type": "code", 216 | "collapsed": false, 217 | "input": [ 218 | "print myint*1.0/yourint # 5" 219 | ], 220 | "language": "python", 221 | "metadata": {}, 222 | "outputs": [ 223 | { 224 | "output_type": "stream", 225 | "stream": "stdout", 226 | "text": [ 227 | "0.666666666667\n" 228 | ] 229 | } 230 | ], 231 | "prompt_number": 24 232 | }, 233 | { 234 | "cell_type": "code", 235 | "collapsed": false, 236 | "input": [ 237 | "print float(myint/yourint) # 6" 238 | ], 239 | "language": "python", 240 | "metadata": {}, 241 | "outputs": [ 242 | { 243 | "output_type": "stream", 244 | "stream": "stdout", 245 | "text": [ 246 | "0.0\n" 247 | ] 248 | } 249 | ], 250 | "prompt_number": 25 251 | }, 252 | { 253 | "cell_type": "heading", 254 | "level": 3, 255 | "metadata": {}, 256 | "source": [ 257 | "Lists" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "collapsed": false, 263 | "input": [ 264 | "mylist = [6,3,6,7,2,6,2,9,7,0]\n", 265 | "print mylist" 266 | ], 267 | "language": "python", 268 | "metadata": {}, 269 | "outputs": [ 270 | { 271 | "output_type": "stream", 272 | "stream": "stdout", 273 | "text": [ 274 | "[6, 3, 6, 7, 2, 6, 2, 9, 7, 0]\n" 275 | ] 276 | } 277 | ], 278 | "prompt_number": 32 279 | }, 280 | { 281 | "cell_type": "markdown", 282 | "metadata": {}, 283 | "source": [ 284 | "You can get each item of the list using the brackets. Try to replace 0 with other numbers and see what you get:" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "collapsed": false, 290 | "input": [ 291 | "print mylist[0]" 292 | ], 293 | "language": "python", 294 | "metadata": {}, 295 | "outputs": [ 296 | { 297 | "output_type": "stream", 298 | "stream": "stdout", 299 | "text": [ 300 | "6\n" 301 | ] 302 | } 303 | ], 304 | "prompt_number": 33 305 | }, 306 | { 307 | "cell_type": "markdown", 308 | "metadata": {}, 309 | "source": [ 310 | "Here the number inside the brackets is called an index, it represents the location of an item in the list. " 311 | ] 312 | }, 313 | { 314 | "cell_type": "markdown", 315 | "metadata": {}, 316 | "source": [ 317 | "You can also get more than one item at a time by providing a range of indices:" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "collapsed": false, 323 | "input": [ 324 | "print mylist[0:3]" 325 | ], 326 | "language": "python", 327 | "metadata": {}, 328 | "outputs": [ 329 | { 330 | "output_type": "stream", 331 | "stream": "stdout", 332 | "text": [ 333 | "[6, 3, 6]\n" 334 | ] 335 | } 336 | ], 337 | "prompt_number": 34 338 | }, 339 | { 340 | "cell_type": "markdown", 341 | "metadata": {}, 342 | "source": [ 343 | "Strings can be words, sentences, and anything else you can write as a sequence of characters. They act a lot like lists. " 344 | ] 345 | }, 346 | { 347 | "cell_type": "markdown", 348 | "metadata": {}, 349 | "source": [ 350 | "You can make a string using quotes" 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "collapsed": false, 356 | "input": [ 357 | "mystring = \"ACGT\"\n", 358 | "print mystring" 359 | ], 360 | "language": "python", 361 | "metadata": {}, 362 | "outputs": [ 363 | { 364 | "output_type": "stream", 365 | "stream": "stdout", 366 | "text": [ 367 | "ACGT\n", 368 | "4\n", 369 | "A\n", 370 | "C\n", 371 | "G\n", 372 | "T\n" 373 | ] 374 | } 375 | ], 376 | "prompt_number": 26 377 | }, 378 | { 379 | "cell_type": "markdown", 380 | "metadata": {}, 381 | "source": [ 382 | "What happens if you remove the quotes? What about using single quotes like 'this'?" 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "collapsed": false, 388 | "input": [ 389 | "mystring = ACGT\n", 390 | "print mystring" 391 | ], 392 | "language": "python", 393 | "metadata": {}, 394 | "outputs": [ 395 | { 396 | "ename": "NameError", 397 | "evalue": "name 'ACGT' is not defined", 398 | "output_type": "pyerr", 399 | "traceback": [ 400 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", 401 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmystring\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mACGT\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0mmystring\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 402 | "\u001b[0;31mNameError\u001b[0m: name 'ACGT' is not defined" 403 | ] 404 | } 405 | ], 406 | "prompt_number": 30 407 | }, 408 | { 409 | "cell_type": "markdown", 410 | "metadata": {}, 411 | "source": [ 412 | "You can get the length of the string using the len() function:" 413 | ] 414 | }, 415 | { 416 | "cell_type": "code", 417 | "collapsed": false, 418 | "input": [ 419 | "mystring = \"ACGT\"\n", 420 | "print len(mystring)" 421 | ], 422 | "language": "python", 423 | "metadata": {}, 424 | "outputs": [] 425 | }, 426 | { 427 | "cell_type": "markdown", 428 | "metadata": {}, 429 | "source": [ 430 | "You can get each letter of the string using the brackets. Try to replace 0 with other numbers and see what you get:" 431 | ] 432 | }, 433 | { 434 | "cell_type": "code", 435 | "collapsed": false, 436 | "input": [ 437 | "print mystring[0]" 438 | ], 439 | "language": "python", 440 | "metadata": {}, 441 | "outputs": [ 442 | { 443 | "output_type": "stream", 444 | "stream": "stdout", 445 | "text": [ 446 | "A\n" 447 | ] 448 | } 449 | ], 450 | "prompt_number": 29 451 | }, 452 | { 453 | "cell_type": "markdown", 454 | "metadata": {}, 455 | "source": [ 456 | "Here is an example with DNA:" 457 | ] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "collapsed": false, 462 | "input": [ 463 | "DNA = \"CAACGGGCAATATGTCTCTGTGTG\"\n", 464 | "print \"Your DNA is\", len(DNA), \"bases long\"\n", 465 | "\n", 466 | "print DNA[7]" 467 | ], 468 | "language": "python", 469 | "metadata": {}, 470 | "outputs": [ 471 | { 472 | "output_type": "stream", 473 | "stream": "stdout", 474 | "text": [ 475 | "Your DNA is 24 bases long\n", 476 | "C\n" 477 | ] 478 | } 479 | ], 480 | "prompt_number": 35 481 | } 482 | ], 483 | "metadata": {} 484 | } 485 | ] 486 | } -------------------------------------------------------------------------------- /Python questions: -------------------------------------------------------------------------------- 1 | Questions: 2 | 3 | 4 | ######################################################################################## 5 | ######################## Variables and print statements ################################ 6 | ######################################################################################## 7 | 8 | ########## QUESTION 1: Variables ########### 9 | What kind of variable are these? (integer, float, string) 10 | A=93874 11 | B="abc" 12 | C="Asdflkj44534" 13 | D="23423" 14 | E=23 15 | F=23. 16 | 17 | 18 | ########## QUESTION 2: What is wrong with this print statement? ########### 19 | 20 | print "My name is %d and my species is %s" % (myname,myspecies) 21 | 22 | print "I am %s and my genome is %d base pairs long" % mygenomesize 23 | 24 | print "A ribosome can make proteins at a rate of %d to %d amino acids per second" (10, 20) 25 | 26 | 27 | ######################################################################################## 28 | ########################### Lists, strings, and for loops ############################## 29 | ######################################################################################## 30 | 31 | ########## QUESTION 3: Which of these are correct? Which give errors? ########### 32 | 33 | X=[3,5,6,2,2] 34 | Y=["hello","world"] 35 | print X[5] 36 | print X[1+2] 37 | print X[4] 38 | print X[5] 39 | print X[-1] 40 | print Y[2] 41 | print Y[3] 42 | print Y[0] 43 | print Y[-1] 44 | print Y[-3] 45 | 46 | ########## QUESTION 4: What is the output of this for loop? ########### 47 | 48 | 49 | for tomato in xrange(4): 50 | print tomato 51 | 52 | for j in xrange(5,13): 53 | print j 54 | 55 | for j in xrange(5,13,3): 56 | print j 57 | 58 | fruits = ["Apples","Bananas","Coconut"] 59 | 60 | for i in xrange(1,len(fruits)): 61 | print item 62 | 63 | 64 | 65 | 66 | 67 | ######################################################################################## 68 | ################################## If statements ####################################### 69 | ######################################################################################## 70 | 71 | ########## QUESTION 5: True or False ########### 72 | 73 | (True and False) 74 | (True and True) 75 | (False or False) 76 | (False or True) 77 | (True and (False or True)) 78 | (False and (False or (True or False))) 79 | 80 | 81 | 82 | ######################################################################################## 83 | ################################### Basic Math ######################################### 84 | ######################################################################################## 85 | 86 | ########## QUESTION 6: Which of these give the correct answer? ########### 87 | 88 | 5 * 6 89 | 7/3 90 | 5/98 91 | 23.0/123 92 | 6/2 93 | 87/(1.0 * 45) 94 | 78/39 95 | float(34)/234 96 | 23/(234 * 1.0) 97 | 23/234 * 1.0 98 | 5 * 2.7 99 | 100 | 101 | 102 | -------------------------------------------------------------------------------- /Python tutorial structure: -------------------------------------------------------------------------------- 1 | BioCoding Course: 2 | Python Tutorial Structure: 3 | 4 | 5 | 6 | Variables and print statements 7 | - Lecture 8 | - Quiz/Game 9 | 10 | Lists/strings/for loops 11 | - Lecture 12 | _ Quiz/Game 13 | 14 | If statements 15 | - Lecture 16 | - Quiz/Game 17 | 18 | Basic math 19 | - Lecture 20 | - Quiz/Game 21 | 22 | Start project 1: 23 | - Make some code that takes a string called DNA and calculates the percentage of the nucleotides that either C or G (as opposed to A or T) 24 | - count them up and divide them by the total at the end 25 | - print out the answer 26 | - Put input validation so it prints a message for every letter in the DNA string that isn't a nucleotide, then make sure you ignore it in the calculations for GC content 27 | - Make all the DNA upper case so you don't have to check a,c,g, and t too. upper(DNA) 28 | 29 | Functions 30 | - Lecture 31 | 32 | Finish project 1: 33 | - Make project 1 into a function and call it! 34 | - Make a small change to project 1 so it can also work with RNA (U should now be recognized like A and T) (you don't have to change the variable name DNA) 35 | 36 | 37 | Dictionaries 38 | - Lecture 39 | 40 | Choosing the right data structure 41 | - Game 42 | 43 | Reverse complement: 44 | - Talk about how DNA is antiparallel and how the bases match each other 45 | - Learn to reverse complement by hand 46 | 47 | Project 2: 48 | - write a function called reverse_complement() that gets a string called DNA 49 | - write a dictionary within this function that decodes each A to T, T to A, G to C, and C to G. 50 | - loop through the DNA and decode it one letter at a time: this is the complement 51 | - then reverse the complement using the cool indexing we learned 52 | 53 | Teach them about transcription and translation 54 | - how to find a start codon by hand, mark off every 3 nucleotides on paper, and stop at any of the stop codons 55 | - each codon is a code for one amino acid 56 | 57 | 58 | 59 | Project 3: # we guide them a lot more in this project as it is pretty difficult. I would like to make it a class discussion, and then break them up for each part to actually code what we decided the pseudocode for as a class. The job of the teacher here is to ask questions and guide the discussion without actually giving the answers away. 60 | 61 | - the goal is to write a function that transcribes DNA into RNA and then translates it into protein 62 | - parts of this process have already been written and the pieces are broken up neatly into functions 63 | - the students' job is to fill in the rest of the code that is missing 64 | # Project 3 is pretty difficult, so I have made a framework for it. That way the students can code the simpler parts. 65 | 66 | 67 | 68 | 69 | Project 4: Reading a fasta file. Also a bit difficult, but we just want them exploring the file type and how you can look through the lines in the file. 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | Extras: 83 | Numpy 84 | - lecture written already 85 | - problem set not written yet 86 | 87 | 88 | Matplotlib.pyplot 89 | - lecture not written yet 90 | - problem set not written yet 91 | 92 | 93 | 94 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | BioCoding_Tutorial 2 | ================== 3 | 4 | A tutorial in Python where all the examples teach students about biology. Written for high school students with no previous experience. 5 | -------------------------------------------------------------------------------- /milestone projects/Project 1 - GC content/project 1.0-GC content.py: -------------------------------------------------------------------------------- 1 | DNA = "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC" 2 | 3 | 4 | AT = 0 5 | GC = 0 6 | for base in DNA: 7 | if base=="A" or base=="T": 8 | AT = AT+1 9 | # AT += 1 10 | elif base=="G" or base=="C": 11 | GC = GC+1 12 | # GC += 1 13 | 14 | GC_percentage = 100*float(GC)/float(GC+AT) 15 | #GC_percentage = 100.0*GC/GC+AT 16 | 17 | print "The GC content of the input string is %f" % (GC_percentage) 18 | 19 | -------------------------------------------------------------------------------- /milestone projects/Project 1 - GC content/project 1.1 - input validation.py: -------------------------------------------------------------------------------- 1 | 2 | DNA = raw_input('Enter your DNA: ') 3 | 4 | DNA = DNA.upper() 5 | 6 | AT = 0 7 | GC = 0 8 | for base in DNA: 9 | if base=="A" or base=="T": 10 | AT = AT+1 11 | # AT += 1 12 | elif base=="G" or base=="C": 13 | GC = GC+1 14 | # GC += 1 15 | else: 16 | print "The letter %s does not belong in DNA and was not counted" % (base) 17 | 18 | if AT+GC == 0: 19 | print "no nucleotides" 20 | else: 21 | GC_percentage = 100*float(GC)/float(GC+AT) 22 | #GC_percentage = 100.0*GC/GC+AT 23 | print "The GC content of the input string is %f" % (GC_percentage) 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | # give inputs that their code should be able to handle: 32 | # AGCGATN 33 | # cgatcga 34 | # SLKFSLDKFJS 35 | -------------------------------------------------------------------------------- /milestone projects/Project 1 - GC content/project 1.2 - function.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | def calc_GC_content(DNA): 5 | DNA = DNA.upper() 6 | 7 | AT = 0 8 | GC = 0 9 | for base in DNA: 10 | if base=="A" or base=="T": 11 | AT = AT+1 12 | # AT += 1 13 | elif base=="G" or base=="C": 14 | GC = GC+1 15 | # GC += 1 16 | else: 17 | print "The letter %s does not belong in DNA and was not counted" % (base) 18 | 19 | if AT+GC == 0: 20 | print "no nucleotides" 21 | return 0 # always return something so you don't get an eror 22 | else: 23 | GC_percentage = 100*float(GC)/float(GC+AT) 24 | #GC_percentage = 100.0*GC/GC+AT 25 | return GC_percentage 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | DNA = raw_input('Enter your DNA: ') 34 | print calc_GC_content(DNA) -------------------------------------------------------------------------------- /milestone projects/Project 1 - GC content/project 1.3 - RNA too.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | # make it work with RNA as well 4 | 5 | def calc_GC_content(DNA): 6 | DNA = DNA.upper() 7 | 8 | AT = 0 9 | GC = 0 10 | for base in DNA: 11 | if base=="A" or base=="T" or base=="U": 12 | AT = AT+1 13 | # AT += 1 14 | elif base=="G" or base=="C": 15 | GC = GC+1 16 | # GC += 1 17 | else: 18 | print "The letter %s does not belong in DNA and was not counted" % (base) 19 | 20 | if AT+GC == 0: 21 | print "no nucleotides" 22 | return 0 # always return something so you don't get an eror 23 | else: 24 | GC_percentage = 100*float(GC)/float(GC+AT) 25 | #GC_percentage = 100.0*GC/GC+AT 26 | return GC_percentage 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | DNA = raw_input('Enter your DNA: ') 35 | print calc_GC_content(DNA) 36 | -------------------------------------------------------------------------------- /milestone projects/Project 2 - reverse complement/project 2.0-complement.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def reverse_complement(DNA): 4 | DNA = DNA.upper() 5 | 6 | complement = [] 7 | 8 | complement_dictionary = { 9 | "A":"T", 10 | "T":"A", 11 | "G":"C", 12 | "C":"G" 13 | } 14 | 15 | for base in DNA: 16 | complement.append(complement_dictionary.get(base,"N")) 17 | 18 | return complement 19 | 20 | 21 | DNA = "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC" 22 | #DNA = raw_input('Enter your DNA: ') 23 | print reverse_complement(DNA) -------------------------------------------------------------------------------- /milestone projects/Project 2 - reverse complement/project 2.1-string not list.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def reverse_complement(DNA): 4 | DNA = DNA.upper() 5 | 6 | complement = str() 7 | 8 | complement_dictionary = { 9 | "A":"T", 10 | "T":"A", 11 | "G":"C", 12 | "C":"G" 13 | } 14 | 15 | for base in DNA: 16 | complement+=complement_dictionary.get(base,"N") 17 | 18 | return complement 19 | 20 | 21 | DNA = "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC" 22 | #DNA = raw_input('Enter your DNA: ') 23 | print reverse_complement(DNA) -------------------------------------------------------------------------------- /milestone projects/Project 2 - reverse complement/project 2.2-reverse.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def reverse_complement(DNA): 4 | DNA = DNA.upper() 5 | 6 | complement = str() 7 | 8 | complement_dictionary = { 9 | "A":"T", 10 | "T":"A", 11 | "G":"C", 12 | "C":"G" 13 | } 14 | 15 | for base in DNA: 16 | complement+=complement_dictionary.get(base,"N") 17 | 18 | revcomp = complement[::-1] 19 | return revcomp 20 | 21 | 22 | DNA = "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC" 23 | #DNA = raw_input('Enter your DNA: ') 24 | print reverse_complement(DNA) 25 | 26 | 27 | -------------------------------------------------------------------------------- /milestone projects/Project 3 - DNA to RNA to protein/Project 3-DNA_RNA_protein.py: -------------------------------------------------------------------------------- 1 | 2 | def list_to_str(mylist): 3 | mystring = str() 4 | for item in mylist: 5 | mystring+=item 6 | return mystring 7 | 8 | 9 | def reverse_complement(DNA): 10 | ########### Your code here ############# 11 | 12 | return "not done yet" 13 | 14 | 15 | def DNA_to_RNA(DNA): 16 | DNA=DNA.upper() 17 | 18 | ########### Your code here ############# 19 | 20 | 21 | def RNA_to_protein(RNA,frame): 22 | decoder = {'ACC': 'T', 'GCA': 'A', 'ACA': 'T', 'ACG': 'T', 'GUU': 'V', 23 | 'AAC': 'N', 'AGG': 'R', 'UGG': 'W', 'GUC': 'V', 'AGC': 'S', 'AUC': 'I', 24 | 'AGA': 'R', 'AAU': 'N', 'ACU': 'T', 'GUG': 'V', 'CAC': 'H', 'AAA': 'K', 25 | 'CCG': 'P', 'CCA': 'P', 'AGU': 'S', 'AAG': 'K', 'GGU': 'G', 'UCU': 'S', 26 | 'GCG': 'A', 'CGA': 'R', 'CAG': 'Q', 'GAU': 'D', 'UAU': 'Y', 'CGG': 'R', 27 | 'UCG': 'S', 'CCU': 'P', 'GGG': 'G', 'GGA': 'G', 'CCC': 'P', 'GGC': 'G', 28 | 'GAA': 'E', 'UAA': '*', 'UCC': 'S', 'UAC': 'Y', 'GAC': 'D', 'UGU': 'C', 29 | 'AUA': 'I', 'CUU': 'L', 'UCA': 'S', 'AUG': 'M', 'CGC': 'R', 'CUG': 'L', 30 | 'GAG': 'E', 'AUU': 'I', 'CAU': 'H', 'CUA': 'L', 'GCC': 'A', 'CAA': 'Q', 31 | 'UUU': 'F', 'CGU': 'R', 'GUA': 'V', 'UGC': 'C', 'GCU': 'A', 'UAG': '*', 32 | 'CUC': 'L', 'UUG': 'L', 'UUA': 'L', 'UGA': '*', 'UUC': 'F'} 33 | 34 | ########### Your code here ############# 35 | 36 | for i in xrange(frame,len(RNA)-2,3): 37 | codon=RNA[i:i+3] 38 | 39 | 40 | ########### Your code here ############# 41 | 42 | return protein 43 | 44 | def find_coding_regions(protein): 45 | print protein 46 | ORFs=[] 47 | started = False 48 | start_indx=-1 49 | for i in xrange(len(protein)): 50 | aa=protein[i] # aa means amino acid 51 | 52 | #if the protein is started 53 | if started==True: 54 | if aa=='*': # stop codon 55 | started=False 56 | ORFs.append(protein[start_indx:i]) 57 | # if the protein hasn't started 58 | elif aa=="M": # start codon 59 | started = True 60 | start_indx=i 61 | 62 | 63 | return ORFs 64 | 65 | def translate(DNA): 66 | DNA_forward=DNA 67 | DNA_reverse=reverse_complement(DNA) 68 | RNA_forward=DNA_to_RNA(DNA_forward) 69 | RNA_reverse=DNA_to_RNA(DNA_reverse) 70 | 71 | allproteins=[] 72 | for frame in xrange(3): 73 | allproteins.append(RNA_to_protein(RNA_forward,frame)) 74 | allproteins.append(RNA_to_protein(RNA_reverse,frame)) 75 | 76 | all_ORFs=[] 77 | for protein in allproteins: 78 | all_ORFs.append(find_coding_regions(protein)) 79 | 80 | return all_ORFs 81 | 82 | def translate_only_longest_ORF(DNA): 83 | proteins=translate(DNA) 84 | 85 | lengths=[] 86 | for i in xrange(len(proteins)): 87 | if len(proteins[i]) < 1: 88 | lengths.append(0) 89 | else: 90 | lengths.append(len(proteins[i][0])) 91 | 92 | import numpy as np 93 | lengths=np.array(lengths) 94 | if max(lengths) > 0: 95 | return proteins[np.argmax(lengths)][0] 96 | else: 97 | return "No open reading frames" 98 | 99 | 100 | myinput = "GCTATGAGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC" 101 | #myinput = raw_input('Enter your DNA: ') 102 | 103 | allproteins= translate(myinput) 104 | 105 | print allproteins 106 | 107 | longest_protein=translate_only_longest_ORF(myinput) 108 | 109 | -------------------------------------------------------------------------------- /milestone projects/Project 3 - DNA to RNA to protein/project 3-teacher version.py: -------------------------------------------------------------------------------- 1 | # from the intro materials 2 | 3 | def list_to_str(mylist): 4 | mystring = str() 5 | for item in mylist: 6 | mystring+=item 7 | return mystring 8 | 9 | 10 | # from Project 2 11 | 12 | def reverse_complement(DNA): 13 | DNA = DNA.upper() 14 | 15 | complement = str() 16 | 17 | complement_dictionary = { 18 | "A":"T", 19 | "T":"A", 20 | "G":"C", 21 | "C":"G" 22 | } 23 | 24 | for base in DNA: 25 | complement+=complement_dictionary.get(base,"N") 26 | 27 | revcomp = complement[::-1] 28 | return revcomp 29 | 30 | 31 | def DNA_to_RNA(DNA): 32 | DNA=DNA.upper() 33 | RNA = str() 34 | for i in xrange(len(DNA)): 35 | if DNA[i]=="T": 36 | RNA+="U" 37 | else: 38 | RNA+=DNA[i] 39 | return RNA 40 | 41 | 42 | def RNA_to_protein(RNA,frame): 43 | decoder = {'ACC': 'T', 'GCA': 'A', 'ACA': 'T', 'ACG': 'T', 'GUU': 'V', 44 | 'AAC': 'N', 'AGG': 'R', 'UGG': 'W', 'GUC': 'V', 'AGC': 'S', 'AUC': 'I', 45 | 'AGA': 'R', 'AAU': 'N', 'ACU': 'T', 'GUG': 'V', 'CAC': 'H', 'AAA': 'K', 46 | 'CCG': 'P', 'CCA': 'P', 'AGU': 'S', 'AAG': 'K', 'GGU': 'G', 'UCU': 'S', 47 | 'GCG': 'A', 'CGA': 'R', 'CAG': 'Q', 'GAU': 'D', 'UAU': 'Y', 'CGG': 'R', 48 | 'UCG': 'S', 'CCU': 'P', 'GGG': 'G', 'GGA': 'G', 'CCC': 'P', 'GGC': 'G', 49 | 'GAA': 'E', 'UAA': '*', 'UCC': 'S', 'UAC': 'Y', 'GAC': 'D', 'UGU': 'C', 50 | 'AUA': 'I', 'CUU': 'L', 'UCA': 'S', 'AUG': 'M', 'CGC': 'R', 'CUG': 'L', 51 | 'GAG': 'E', 'AUU': 'I', 'CAU': 'H', 'CUA': 'L', 'GCC': 'A', 'CAA': 'Q', 52 | 'UUU': 'F', 'CGU': 'R', 'GUA': 'V', 'UGC': 'C', 'GCU': 'A', 'UAG': '*', 53 | 'CUC': 'L', 'UUG': 'L', 'UUA': 'L', 'UGA': '*', 'UUC': 'F'} 54 | 55 | protein = [] 56 | for i in xrange(frame,len(RNA)-2,3): 57 | codon=RNA[i:i+3] 58 | protein.append(decoder.get(codon,"?")) 59 | return list_to_str(protein) 60 | 61 | def find_coding_regions(protein): 62 | print protein 63 | ORFs=[] 64 | started = False 65 | start_indx=-1 66 | for i in xrange(len(protein)): 67 | aa=protein[i] # aa means amino acid 68 | 69 | #if the protein is started 70 | if started==True: 71 | if aa=='*': 72 | started=False 73 | ORFs.append(protein[start_indx:i]) 74 | # if the protein hasn't started 75 | elif aa=="M": 76 | started = True 77 | start_indx=i 78 | 79 | 80 | return ORFs 81 | 82 | def translate(DNA): 83 | DNA_forward=DNA 84 | DNA_reverse=reverse_complement(DNA) 85 | RNA_forward=DNA_to_RNA(DNA_forward) 86 | RNA_reverse=DNA_to_RNA(DNA_reverse) 87 | 88 | allproteins=[] 89 | for frame in xrange(3): 90 | allproteins.append(RNA_to_protein(RNA_forward,frame)) 91 | allproteins.append(RNA_to_protein(RNA_reverse,frame)) 92 | 93 | all_ORFs=[] 94 | for protein in allproteins: 95 | all_ORFs.append(find_coding_regions(protein)) 96 | 97 | return all_ORFs 98 | 99 | 100 | def translate_only_longest_ORF(DNA): 101 | proteins=translate(DNA) 102 | 103 | lengths=[] 104 | for i in xrange(len(proteins)): 105 | if len(proteins[i]) < 1: 106 | lengths.append(0) 107 | else: 108 | lengths.append(len(proteins[i][0])) 109 | 110 | import numpy as np 111 | lengths=np.array(lengths) 112 | if max(lengths) > 0: 113 | return proteins[np.argmax(lengths)][0] 114 | else: 115 | return "No open reading frames" 116 | 117 | 118 | 119 | 120 | myinput = "GCTATGAGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC" 121 | #myinput = raw_input('Enter your DNA: ') 122 | 123 | allproteins= translate(myinput) 124 | 125 | print allproteins 126 | 127 | -------------------------------------------------------------------------------- /milestone projects/Project 4 - reading a fasta file/read_fasta.py: -------------------------------------------------------------------------------- 1 | # File input/output 2 | 3 | 4 | 5 | def read_fasta(filename): 6 | f = open(filename,'r') 7 | names = [] 8 | genome = [] 9 | counter = -1 10 | for line in f: 11 | if line[0]==">": 12 | 13 | counter += 1 14 | 15 | names.append(line[1:].strip()) 16 | genome.append([]) 17 | 18 | else: 19 | genome[counter].append(line.strip()) 20 | 21 | return names, genome 22 | 23 | names, genome = read_fasta("drosophila_genome.fa") 24 | 25 | 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /python intro.py: -------------------------------------------------------------------------------- 1 | ######################################################################################## 2 | ## 3 | ## An intro to Python for biology 4 | ## 5 | ######################################################################################## 6 | 7 | ######################################################################################## 8 | ######################## Variables and print statements ################################ 9 | ######################################################################################## 10 | 11 | # this is a comment, use these to tell people what your code does 12 | 13 | mystring = "Homo sapiens" # a string, letters, words, sentences, anything in quotes 14 | myinteger = 4 # an integer 15 | myfloat = 4.0 # a float (a number with a decimal point) 16 | 17 | print "This is a string: %s, this is a float: %f, and this is an integer: %d" % (mystring,myfloat,myinteger) 18 | 19 | print "We are %s and there are %d bases in our DNA" % (mystring, myinteger) 20 | 21 | # use %s when you need a string 22 | # use %d when you need an integer 23 | # use %f when you need a float (a number with a decimal point) 24 | 25 | 26 | ######################################################################################## 27 | ################################### Basic Math ######################################### 28 | ######################################################################################## 29 | 30 | myfloat = 2.0 31 | yourfloat = 3.0 32 | print myfloat/yourfloat 33 | 34 | 35 | myint = 2 36 | yourint = 3 37 | print myint/yourint 38 | 39 | # The anwer is 2/3 = 0.666666667 but Python and many other programming lanuages have a quirk where they think that any math you do with integers can only output another integer. 40 | # Let's explore this and see if we can find tricks to deal with it 41 | 42 | myint = 2 43 | yourint = 3 44 | 45 | # Which of these work and why? 46 | 47 | print float(myint)/yourint # 1 48 | 49 | print myint/float(yourint) # 2 50 | 51 | print (myint+0.0)/yourint # 3 52 | 53 | print myint/yourint + 0.0 # 4 54 | 55 | print myint*1.0/yourint # 5 56 | 57 | print myint/yourint*1.0 # 6 58 | 59 | 60 | ######################################################################################## 61 | ########################### Lists, strings, and for loops ############################## 62 | ######################################################################################## 63 | 64 | 65 | ####### LISTS ####### 66 | mylist = [6,3,6,7,2,6,2,9,7,0] 67 | print mylist 68 | 69 | print mylist[0] 70 | print mylist[1] 71 | print mylist[2] 72 | 73 | ###### STRINGS ###### 74 | # strings are like lists of letters, so you can get individual letters using the bracket notation 75 | 76 | mystring = "ACGT" 77 | print len(mystring) ## => 4 78 | 79 | print mystring[0] 80 | print mystring[1] 81 | print mystring[2] 82 | print mystring[3] 83 | #print mystring[4] # Error:"string index out of range" 84 | 85 | 86 | DNA = "CAACGGGCAATATGTCTCTGTGTG" 87 | print "Your DNA is %d bases long" % (len(DNA)) 88 | 89 | print DNA[7] 90 | 91 | ######### lists can contain anything ######### 92 | 93 | mylist = [1,"anything",2.45,5,10,"Hello world"] 94 | 95 | print mylist[4] 96 | 97 | ####### you can also make lists longer ###### 98 | print mylist 99 | 100 | mylist.append(56) 101 | 102 | print mylist 103 | 104 | # this means you can start with an empty list and add to it: 105 | nucleotides = [] 106 | nucleotides.append("A") 107 | nucleotides.append("C") 108 | nucleotides.append("G") 109 | nucleotides.append("T") 110 | print nucleotides 111 | 112 | ########## QUESTION 3: Which of these are correct? Which give errors? ########### 113 | 114 | 115 | 116 | ################ indexing lists ################### 117 | 118 | mylist = range(10) 119 | 120 | print mylist 121 | 122 | print mylist[0:10] 123 | print mylist[:] 124 | print mylist[1:10] 125 | print mylist[4:8] 126 | print mylist[3:] 127 | print mylist[0:10:2] # what does the third number do? 128 | print mylist[0:7:3] 129 | print mylist[::-1] # what does this do? 130 | 131 | 132 | 133 | 134 | 135 | ###### Two different versions of a "for loop" ####### 136 | 137 | for letter in DNA: 138 | print letter 139 | 140 | # the variable letter can be anything, try switching it out with tomato 141 | 142 | for tomato in DNA: 143 | print tomato 144 | 145 | # we only call it letter so it makes sense. This becomes important when you have many more variables. At that point, naming them after fruits suddenly isn't funny anymore. 146 | 147 | 148 | # you can also make a for loop like this: 149 | # xrange gives a list from 0 to len(DNA), so i loops through that list 150 | for i in xrange(len(DNA)): 151 | print i 152 | 153 | # when you do this, you get the bases in the DNA by saying DNA[i] 154 | for i in xrange(len(DNA)): 155 | print "Letter number %d in your DNA is %s" % (i,DNA[i]) 156 | 157 | # sometimes we really care about the indices, like when we are dealing with 2 or more lists in parallel 158 | DNA = "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC" 159 | RNA = "AGCUUUUCAUUCUGACUGCAACGGGCAAUAUGUCUCUGUGUGGAUUAAAAAAAGAGUGUCUGAUAGCAGC" 160 | 161 | length = len(DNA) 162 | 163 | for i in xrange(length): 164 | print "DNA: %s, RNA: %s" % (DNA[i],RNA[i]) 165 | 166 | 167 | 168 | # xrange can do many things: 169 | 170 | for b in xrange(0,4,1): # start=0,stop=4,step=1 171 | print b 172 | 173 | for j in xrange(2,10,2): 174 | print j 175 | 176 | ########## Now you try to print (1,3,5,7,9) using a for loop ########### 177 | 178 | 179 | ########### You can also make empty strings and add to them ############# 180 | 181 | DNA = "AGCTTTTCATTC" 182 | 183 | # just like lists, you can also add to strings: 184 | mystring = str() # makes an empty string, just like mylist = [] makes an empty list 185 | 186 | for i in xrange(len(DNA)): 187 | mystring+=DNA[i] 188 | 189 | print mystring 190 | 191 | 192 | # how is this different from making a list? 193 | 194 | mylist = [] 195 | 196 | for i in xrange(len(DNA)): 197 | mylist.append(DNA[i]) 198 | 199 | print mylist 200 | 201 | # if you do make a list and want it to be a string, you can always just do this: 202 | 203 | mystring = str() 204 | for item in mylist: 205 | mystring+=item 206 | print mystring 207 | 208 | 209 | 210 | 211 | 212 | ######################################################################################## 213 | ################################## If statements ####################################### 214 | ######################################################################################## 215 | 216 | 217 | #What do you think this does? 218 | 219 | correct=True # this is another kind of variable, called a boolean: it can only be True or False 220 | 221 | if correct==True: 222 | print "You are right!" 223 | else: 224 | print "You are wrong!" 225 | 226 | 227 | # You can set as many cases as you want using elif (short for "else if") 228 | 229 | mystring = "ABCYSGD" 230 | for letter in mystring: 231 | if letter == "A": 232 | print "Alpha" 233 | elif letter == "B": 234 | print "Bravo" 235 | elif letter == "C": 236 | print "Charlie" 237 | elif letter == "D": 238 | print "Delta" 239 | else: 240 | print "Ummm, I don't know what %s is in military speak" % (letter) 241 | 242 | 243 | # You can also use these if statements for input validation 244 | # If you ask the user for a string of DNA, it would be nice to check that the string looks like DNA before you do a lot of work with it 245 | 246 | ############### Challenge: Loop through this string of DNA and output a message to the user for every letter that isn't A, T, C, or G 247 | 248 | DNA = "AGCUUUUCATKCTGACUUNNNAACGGGCAATAUGTCTCTGTHTGGATTAAAAAAAGAGTGTCMGATAGCAGC" 249 | 250 | 251 | 252 | # This is called "Input validation" 253 | 254 | # Now that you can validate a user's input, let's learn how to actually ask the user for input. It's pretty simple: 255 | 256 | 257 | DNA = raw_input('Enter your DNA: ') 258 | 259 | # whatever the user writes before pressing enter gets stored in the variable DNA 260 | 261 | ######## We can also check multiple things at once ######### 262 | 263 | letter = "A" 264 | 265 | if letter=="A" or letter=="G": 266 | print "Purine" 267 | elif letter=="U" or letter=="T" or letter=="C": 268 | print "Pyrimidine" 269 | else: 270 | print "Not a nucleotide" 271 | 272 | 273 | 274 | ######################################################################################## 275 | ##################################### Functions ######################################## 276 | ######################################################################################## 277 | mylist = [1,2,3,4,100] 278 | 279 | #built-in functions: 280 | min(mylist) 281 | max(mylist) 282 | sum(mylist) 283 | 284 | 285 | # General syntax for making your own functions: 286 | 287 | def function_name(argument1,argument2,etc): # don't forget the colon 288 | # do something with the arguments 289 | something = argument1+argument2*etc 290 | return something 291 | 292 | result = function_name(3,4,2) 293 | # an example: 294 | 295 | def double(mylist): 296 | doubled = [] 297 | for item in mylist: 298 | doubled.append(item*2) 299 | return doubled 300 | 301 | result = double(mylist) 302 | print result 303 | # What happens if we call double() and pass it a string? 304 | # Try it! 305 | 306 | 307 | # write a function that takes in a list, loops through it, and adds each item to a string 308 | # we saw some code that did that already, so just turn it into a function 309 | # make sure to return the string at the end 310 | 311 | 312 | ######################################################################################## 313 | ################################### Dictionaries ####################################### 314 | ######################################################################################## 315 | 316 | 317 | 318 | # a dictionary is very useful, it is a data structure (meaning it keeps data organized) 319 | # it has a bunch of key:value pairs just like a real dictionary, every word has a definition 320 | # you can also think of it as a decoder, every key is a code, and every value 321 | # stored with the code is what that code means 322 | 323 | my_dictionary = {"key":"value","secondkey":"secondvalue","thirdkey":"thirdvalue"} 324 | 325 | print my_dictionary["secondkey"] 326 | print my_dictionary.keys() 327 | 328 | print my_dictionary.values() 329 | 330 | for key in my_dictionary: 331 | print "Key: %s, Value: %s" % (key,my_dictionary[key]) 332 | 333 | # notice that the order of the dictionary isn't how we started it 334 | # dictionaries are automatically sorted so they are really fast to search 335 | # we use a key in a dictionary just like we normally use an index in a list or array 336 | 337 | # the useful part is when we use the dictionary as a decoder: 338 | mystring = "ABCYSGD" 339 | 340 | military = {"A":"Alpha", "B":"Bravo", "C":"Charlie", "D":"Delta"} 341 | 342 | for letter in mystring: 343 | print military.get(letter,"Ummm, I don't know what %s is in military speak" % letter) 344 | 345 | 346 | # remember we did the same thing with if statements before? 347 | # military is a dictionary: the perfect decoder 348 | # .get() is a method that all dictionaries have when they are created, 349 | # the first argument is the key (or the code) and the second is the default 350 | # the default is when the key isn't found 351 | # military["Y"] # gives an error: you can try it 352 | # but military.get("Y","?") # doesn't crash your application 353 | 354 | 355 | 356 | 357 | # DNA has a recipe for proteins with the code being each set of three nucleotides 358 | # then this is made into RNA (all the T's become U's) 359 | # (bases or letters: AGA, ACU, GUU, etc.) 360 | # the code "UUA" means the amino acid "L" or "Leucine" 361 | 362 | 363 | 364 | # this website has all the codes, can you make them into a dictionary? 365 | # https://www.manylabs.org/file/lessonMedia/69/geneticCode.png 366 | # Hint: we are gonna use this for the third project! 367 | 368 | 369 | ######################################################################################## 370 | ############################## File input/output ##################################### 371 | ######################################################################################## 372 | 373 | ############## INPUT ############### 374 | 375 | # open a file 376 | f = open("filename",'r') # 'r' means read 377 | for line in f: 378 | print line 379 | 380 | f.close() 381 | 382 | 383 | ############## OUTPUT ############### 384 | f2 = open("filename",'w') # 'w' means write, so you overwrite the file 385 | f2.write("Hello world") 386 | f2.write("\n") # \n means newline, as in pressing enter 387 | for i in xrange(10): 388 | f2.write("%d\t" % i) # \t means tab 389 | 390 | f2.close()# close the file safely to make sure the last part you wrote is saved 391 | 392 | # once f2 is closed, you can reuse the name 393 | 394 | f2 = open("filename2",'a') # 'a' means append, so you add to the file each time 395 | f2.write("Hello world") 396 | f2.write("\n") # \n means newline, as in pressing enter 397 | 398 | for i in xrange(10): 399 | f2.write("%d\t" % i) # \t means tab 400 | 401 | f2.close() 402 | 403 | 404 | 405 | 406 | ######################################################################################## 407 | ###################################### Numpy ######################################### 408 | ######################################################################################## 409 | 410 | # Try to find a function that would take the average, like mean(), average(), or avg() can you find one? 411 | # If you want to do something simple and can't find a function for it, you can search online. 412 | # Many of the simple math and science functions are in a module named numpy: 413 | 414 | import numpy as np 415 | # np is a common nickname for numpy, so now when you want to use it, you say: 416 | print np.mean(mylist) 417 | 418 | # Modules are packages of functions and other useful things, including something called an array 419 | # arrays are a bit like lists, but they are only for numbers, 420 | # that way you can do math on an entire array at once instead of looping through the list and doubling every element like we did before 421 | 422 | mylist = [1,2,3,4,100] 423 | myarray = np.array(mylist) # this takes mylist and makes it into an array, once again np is just the nickname for numpy 424 | 425 | print "doubling a list:" 426 | print mylist*2 427 | print "doubling an array:" 428 | print myarray*2 429 | 430 | # many things we do are easier with arrays, including anything involving math 431 | # An important thing to keep in mind is how to start an empty array if you don't want to just make a list into an array 432 | 433 | # if you have a sentence, and you want to save the lengths of all the words: 434 | 435 | sentence = "if you have a sentence, and you want to save the lengths of all the words" 436 | 437 | # one of many built-in tricks: 438 | words = sentence.split() 439 | # this one looks like a function but flipped around. This is because it's a built-in method for a string. 440 | # sentence is a string, and all strings have a bunch of methods that you can call by doing sentence.methodname() 441 | # Methods are functions that belong to objects, so split() is a method that all strings have when they are created, and you can use them whenever you need to 442 | # Other functions we made don't belong to any particular object, so they are very flexible. 443 | 444 | # using a list: 445 | lengths=[] 446 | for word in words: 447 | lengths.append(len(word)) 448 | print lengths 449 | 450 | # you can then convert this to an array: 451 | lengths=np.array(lengths) 452 | 453 | 454 | # using an array: 455 | lengths = np.zeros(len(words)) 456 | for i in xrange(len(words)): 457 | lengths[i]=len(words[i]) 458 | print lengths 459 | 460 | # using the array seems harder in this case, but sometimes it might come in handy. 461 | 462 | # I usually start with lists and convert them to arrays right before I have to do math on them 463 | 464 | 465 | ######## useful numpy functions ########### 466 | 467 | print np.std(myarray) 468 | print np.random.rand(10) # multiply, add, or round to get the random numbers you need 469 | 470 | # I want 100 random numbers from 10 up to and including 15, but only integers 471 | print np.floor(np.random.rand(100)*6+10) 472 | 473 | # np.floor() rounds down, you can also use np.ceil() to round up, or np.around() to round up or down, whichever is closest (like you learn in school) 474 | 475 | # There are tons of numpy modules, just search online. 476 | # Anything you have ever seen in a math class will probably exist in numpy 477 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 485 | -------------------------------------------------------------------------------- /python_intro.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:42512dd288705f0efed4039b19230604208bfe8b48a5e91bb30d2c401f6189fa" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "heading", 13 | "level": 1, 14 | "metadata": {}, 15 | "source": [ 16 | " An intro to Python for biology" 17 | ] 18 | }, 19 | { 20 | "cell_type": "heading", 21 | "level": 3, 22 | "metadata": {}, 23 | "source": [ 24 | "Variables and printing" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "collapsed": false, 30 | "input": [ 31 | "# this is a comment, use these to tell people what your code does" 32 | ], 33 | "language": "python", 34 | "metadata": {}, 35 | "outputs": [] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "collapsed": false, 40 | "input": [ 41 | "mystring = \"Homo sapiens\" # a string, letters, words, sentences, anything in quotes\n", 42 | "myinteger = 4 # an integer\n", 43 | "myfloat = 4.0 # a float (a number with a decimal point)\n", 44 | "\n", 45 | "print \"This is a string:\", mystring, \"this is a float:\", myfloat, \"and this is an integer: \", myinteger" 46 | ], 47 | "language": "python", 48 | "metadata": {}, 49 | "outputs": [ 50 | { 51 | "output_type": "stream", 52 | "stream": "stdout", 53 | "text": [ 54 | "This is a string: Homo sapiens this is a float: 4.0 and this is an integer: 4\n" 55 | ] 56 | } 57 | ], 58 | "prompt_number": 4 59 | }, 60 | { 61 | "cell_type": "heading", 62 | "level": 3, 63 | "metadata": {}, 64 | "source": [ 65 | "Basic Math" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "You can do calculations:" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "collapsed": false, 78 | "input": [ 79 | "myfloat = 2.0\n", 80 | "yourfloat = 3.0\n", 81 | "print myfloat/yourfloat" 82 | ], 83 | "language": "python", 84 | "metadata": {}, 85 | "outputs": [ 86 | { 87 | "output_type": "stream", 88 | "stream": "stdout", 89 | "text": [ 90 | "0.666666666667\n" 91 | ] 92 | } 93 | ], 94 | "prompt_number": 6 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": {}, 99 | "source": [ 100 | "But be careful, when you calculate with only integers, you can only get an integer back, so Python rounds your answer down!" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "collapsed": false, 106 | "input": [ 107 | "myint = 2\n", 108 | "yourint = 3\n", 109 | "print myint/yourint" 110 | ], 111 | "language": "python", 112 | "metadata": {}, 113 | "outputs": [ 114 | { 115 | "output_type": "stream", 116 | "stream": "stdout", 117 | "text": [ 118 | "0\n" 119 | ] 120 | } 121 | ], 122 | "prompt_number": 7 123 | }, 124 | { 125 | "cell_type": "code", 126 | "collapsed": false, 127 | "input": [ 128 | "myint = 2\n", 129 | "yourint = 3\n", 130 | "\n", 131 | "# Which of these work and why?" 132 | ], 133 | "language": "python", 134 | "metadata": {}, 135 | "outputs": [], 136 | "prompt_number": 10 137 | }, 138 | { 139 | "cell_type": "code", 140 | "collapsed": false, 141 | "input": [ 142 | "print float(myint)/yourint # 1" 143 | ], 144 | "language": "python", 145 | "metadata": {}, 146 | "outputs": [ 147 | { 148 | "output_type": "stream", 149 | "stream": "stdout", 150 | "text": [ 151 | "0.666666666667\n" 152 | ] 153 | } 154 | ], 155 | "prompt_number": 20 156 | }, 157 | { 158 | "cell_type": "code", 159 | "collapsed": false, 160 | "input": [ 161 | "print myint/float(yourint) # 2" 162 | ], 163 | "language": "python", 164 | "metadata": {}, 165 | "outputs": [ 166 | { 167 | "output_type": "stream", 168 | "stream": "stdout", 169 | "text": [ 170 | "0.666666666667\n" 171 | ] 172 | } 173 | ], 174 | "prompt_number": 21 175 | }, 176 | { 177 | "cell_type": "code", 178 | "collapsed": false, 179 | "input": [ 180 | "print (myint+0.0)/yourint # 3" 181 | ], 182 | "language": "python", 183 | "metadata": {}, 184 | "outputs": [ 185 | { 186 | "output_type": "stream", 187 | "stream": "stdout", 188 | "text": [ 189 | "0.666666666667\n" 190 | ] 191 | } 192 | ], 193 | "prompt_number": 22 194 | }, 195 | { 196 | "cell_type": "code", 197 | "collapsed": false, 198 | "input": [ 199 | "print myint/yourint*1.0 # 4" 200 | ], 201 | "language": "python", 202 | "metadata": {}, 203 | "outputs": [ 204 | { 205 | "output_type": "stream", 206 | "stream": "stdout", 207 | "text": [ 208 | "0.0\n" 209 | ] 210 | } 211 | ], 212 | "prompt_number": 23 213 | }, 214 | { 215 | "cell_type": "code", 216 | "collapsed": false, 217 | "input": [ 218 | "print myint*1.0/yourint # 5" 219 | ], 220 | "language": "python", 221 | "metadata": {}, 222 | "outputs": [ 223 | { 224 | "output_type": "stream", 225 | "stream": "stdout", 226 | "text": [ 227 | "0.666666666667\n" 228 | ] 229 | } 230 | ], 231 | "prompt_number": 24 232 | }, 233 | { 234 | "cell_type": "code", 235 | "collapsed": false, 236 | "input": [ 237 | "print float(myint/yourint) # 6" 238 | ], 239 | "language": "python", 240 | "metadata": {}, 241 | "outputs": [ 242 | { 243 | "output_type": "stream", 244 | "stream": "stdout", 245 | "text": [ 246 | "0.0\n" 247 | ] 248 | } 249 | ], 250 | "prompt_number": 25 251 | }, 252 | { 253 | "cell_type": "heading", 254 | "level": 3, 255 | "metadata": {}, 256 | "source": [ 257 | "Lists" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "collapsed": false, 263 | "input": [ 264 | "mylist = [6,3,6,7,2,6,2,9,7,0]\n", 265 | "print mylist" 266 | ], 267 | "language": "python", 268 | "metadata": {}, 269 | "outputs": [ 270 | { 271 | "output_type": "stream", 272 | "stream": "stdout", 273 | "text": [ 274 | "[6, 3, 6, 7, 2, 6, 2, 9, 7, 0]\n" 275 | ] 276 | } 277 | ], 278 | "prompt_number": 32 279 | }, 280 | { 281 | "cell_type": "markdown", 282 | "metadata": {}, 283 | "source": [ 284 | "You can get each item of the list using the brackets. Try to replace 0 with other numbers and see what you get:" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "collapsed": false, 290 | "input": [ 291 | "print mylist[0]" 292 | ], 293 | "language": "python", 294 | "metadata": {}, 295 | "outputs": [ 296 | { 297 | "output_type": "stream", 298 | "stream": "stdout", 299 | "text": [ 300 | "6\n" 301 | ] 302 | } 303 | ], 304 | "prompt_number": 33 305 | }, 306 | { 307 | "cell_type": "markdown", 308 | "metadata": {}, 309 | "source": [ 310 | "Here the number inside the brackets is called an index, it represents the location of an item in the list. " 311 | ] 312 | }, 313 | { 314 | "cell_type": "markdown", 315 | "metadata": {}, 316 | "source": [ 317 | "You can also get more than one item at a time by providing a range of indices:" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "collapsed": false, 323 | "input": [ 324 | "print mylist[0:3]" 325 | ], 326 | "language": "python", 327 | "metadata": {}, 328 | "outputs": [ 329 | { 330 | "output_type": "stream", 331 | "stream": "stdout", 332 | "text": [ 333 | "[6, 3, 6]\n" 334 | ] 335 | } 336 | ], 337 | "prompt_number": 34 338 | }, 339 | { 340 | "cell_type": "markdown", 341 | "metadata": {}, 342 | "source": [ 343 | "Strings can be words, sentences, and anything else you can write as a sequence of characters. They act a lot like lists. " 344 | ] 345 | }, 346 | { 347 | "cell_type": "markdown", 348 | "metadata": {}, 349 | "source": [ 350 | "You can make a string using quotes" 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "collapsed": false, 356 | "input": [ 357 | "mystring = \"ACGT\"\n", 358 | "print mystring" 359 | ], 360 | "language": "python", 361 | "metadata": {}, 362 | "outputs": [ 363 | { 364 | "output_type": "stream", 365 | "stream": "stdout", 366 | "text": [ 367 | "ACGT\n", 368 | "4\n", 369 | "A\n", 370 | "C\n", 371 | "G\n", 372 | "T\n" 373 | ] 374 | } 375 | ], 376 | "prompt_number": 26 377 | }, 378 | { 379 | "cell_type": "markdown", 380 | "metadata": {}, 381 | "source": [ 382 | "What happens if you remove the quotes? What about using single quotes like 'this'?" 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "collapsed": false, 388 | "input": [ 389 | "mystring = ACGT\n", 390 | "print mystring" 391 | ], 392 | "language": "python", 393 | "metadata": {}, 394 | "outputs": [ 395 | { 396 | "ename": "NameError", 397 | "evalue": "name 'ACGT' is not defined", 398 | "output_type": "pyerr", 399 | "traceback": [ 400 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", 401 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmystring\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mACGT\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0mmystring\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 402 | "\u001b[0;31mNameError\u001b[0m: name 'ACGT' is not defined" 403 | ] 404 | } 405 | ], 406 | "prompt_number": 30 407 | }, 408 | { 409 | "cell_type": "markdown", 410 | "metadata": {}, 411 | "source": [ 412 | "You can get the length of the string using the len() function:" 413 | ] 414 | }, 415 | { 416 | "cell_type": "code", 417 | "collapsed": false, 418 | "input": [ 419 | "mystring = \"ACGT\"\n", 420 | "print len(mystring)" 421 | ], 422 | "language": "python", 423 | "metadata": {}, 424 | "outputs": [] 425 | }, 426 | { 427 | "cell_type": "markdown", 428 | "metadata": {}, 429 | "source": [ 430 | "You can get each letter of the string using the brackets. Try to replace 0 with other numbers and see what you get:" 431 | ] 432 | }, 433 | { 434 | "cell_type": "code", 435 | "collapsed": false, 436 | "input": [ 437 | "print mystring[0]" 438 | ], 439 | "language": "python", 440 | "metadata": {}, 441 | "outputs": [ 442 | { 443 | "output_type": "stream", 444 | "stream": "stdout", 445 | "text": [ 446 | "A\n" 447 | ] 448 | } 449 | ], 450 | "prompt_number": 29 451 | }, 452 | { 453 | "cell_type": "markdown", 454 | "metadata": {}, 455 | "source": [ 456 | "Here is an example with DNA:" 457 | ] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "collapsed": false, 462 | "input": [ 463 | "DNA = \"CAACGGGCAATATGTCTCTGTGTG\"\n", 464 | "print \"Your DNA is\", len(DNA), \"bases long\"\n", 465 | "\n", 466 | "print DNA[7]" 467 | ], 468 | "language": "python", 469 | "metadata": {}, 470 | "outputs": [ 471 | { 472 | "output_type": "stream", 473 | "stream": "stdout", 474 | "text": [ 475 | "Your DNA is 24 bases long\n", 476 | "C\n" 477 | ] 478 | } 479 | ], 480 | "prompt_number": 35 481 | } 482 | ], 483 | "metadata": {} 484 | } 485 | ] 486 | } --------------------------------------------------------------------------------