├── .gitignore ├── README.md ├── assignment1 ├── Assignment1.docx ├── Assignment1.pdf ├── assignment1.ipynb ├── assignment1_clean.ipynb └── interest.png ├── assignment2 ├── Assignment2.docx ├── Assignment2.pdf ├── datasplitting.pptx ├── exploratory.ipynb ├── feature_importance.png ├── samsung_data_prediction.ipynb ├── samsung_data_prediction_submitted.ipynb └── sitstand_pca.png ├── custom.css ├── data ├── camera.xls ├── camera.xlsx ├── cameras.csv ├── camerasModified.csv ├── face.rda ├── gaData.csv ├── gaData.rda ├── galton.csv ├── loansData.csv ├── movies.txt ├── ravensData.csv ├── ravensData.rda ├── samsungData.csv ├── samsungData.rda ├── ss06pid.csv └── warpbreaks.csv ├── week1 ├── representing_data.ipynb ├── simulation.ipynb └── wk1_quiz.ipynb ├── week2 ├── data_munging_basics.ipynb ├── getting_data.ipynb ├── structure_of_a_data_analysis.ipynb ├── summarizing_data.ipynb └── wk2_quiz.ipynb ├── week3 ├── exploratory_graphs.ipynb ├── expository_graphs.ipynb ├── hierarchical_clustering.ipynb ├── k-means_clustering.ipynb ├── svd_pca.ipynb ├── twoPanel.pdf ├── twoPanel.png └── wk3_quiz.ipynb ├── week4 ├── basic_least_squares.ipynb ├── clustering_example.ipynb ├── inference_basics.ipynb ├── multiple_variable_regression.ipynb ├── regression_in_real_world.ipynb ├── regression_with_factor_vars.ipynb └── wk4_quiz.ipynb ├── week5 ├── anova_with_multiple_factors.ipynb ├── binary_outcomes.ipynb ├── count_outcomes.ipynb └── model_checking_and_selection.ipynb ├── week6 ├── predicting_with_regression.ipynb ├── predicting_with_trees.ipynb ├── quiz.ipynb ├── tree.png ├── tree2.png └── tree3.png ├── week7 ├── bootstrap.ipynb ├── random_forest.ipynb └── smoothing.ipynb └── week8 ├── multiple_testing.ipynb └── simulation_for_model_checking.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | *.c 6 | 7 | # Packages 8 | *.egg 9 | *.egg-info 10 | dist 11 | build 12 | eggs 13 | parts 14 | bin 15 | var 16 | sdist 17 | develop-eggs 18 | .installed.cfg 19 | lib 20 | lib64 21 | __pycache__ 22 | 23 | # Installer logs 24 | pip-log.txt 25 | 26 | # Unit test / coverage reports 27 | .coverage 28 | .tox 29 | nosetests.xml 30 | 31 | # Translations 32 | *.mo 33 | 34 | # Mr Developer 35 | .mr.developer.cfg 36 | .project 37 | .pydevproject 38 | 39 | # Xcode 40 | *.xcodeproj 41 | 42 | # OS generated files 43 | .DS_Store 44 | .DS_Store? 45 | # ._* 46 | # .Spotlight-V100 47 | # .Trashes 48 | # Icon? 49 | # ehthumbs.db 50 | # Thumbs.db 51 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Coursera Data Analysis, The Python Version 2 | ========================================== 3 | 4 | This is a collection of IPython notebooks that I created while following Coursera's Data Analysis course by Jeff Leek, assistant professor in the Biostatistics Department of the Johns Hopkins Bloomberg School of Public Health. 5 | 6 | The course itself uses R to perform data analysis. But since my priority and future objective is to use Python as a general data analysis framework, I decided to follow the course as much as possible using Python. This proved to be a very effective strategy to master data analysis in Python, and more importantly, to know what the limitations are. 7 | 8 | Most of data analysis tasks in the course (that are done in R) can be done using the following Python libraries: 9 | * numpy and scipy, 10 | * pandas, 11 | * statsmodels, 12 | * patsy, 13 | * scikit-learn, 14 | * and matplotlib for plotting. 15 | 16 | And IPython, of course, what else. 17 | 18 | Although I discovered as well that for some cases R is the only way to go, for example (incomplete, non-exhaustive list): 19 | * smoothscatter plot 20 | * loess smoothing 21 | * splines regression 22 | * (To Be Completed) 23 | 24 | The IPython notebooks are created assuming that they are read/executed while watching or following the course video lectures. So it's very likely that you'll find some parts that don't really make much sense if you just read them as it is without the videos. As of now (March 2013) I haven't put so much effort in explaining what is being done in some steps, or what are the objectives of some code snippets. I'm still working on writing more explanations, this is still a work in progress, so stay tuned. 25 | 26 | 27 | Week 1: 28 | * Representing data 29 | * Simulation 30 | 31 | 32 | Week 2: 33 | * Structure of a data analysis 34 | * Getting data 35 | * Summarising data 36 | * Data munging basics 37 | 38 | 39 | Week 3: 40 | * Exploratory graphs 41 | * Expository graphs 42 | * Hierarchical clustering 43 | * K-Means clustering 44 | * Dimension reduction (SVD and PCA) 45 | 46 | 47 | Week 4: 48 | * Clustering example 49 | * Basic least squares 50 | * Inference basics 51 | * Regression with factor variables 52 | * Multiple variable regression 53 | * Regression in the real world 54 | 55 | 56 | Week 5: 57 | * ANOVA with multiple factors 58 | * Binary outcomes 59 | * Count outcomes 60 | * Model checking and selection 61 | 62 | 63 | Week 6: 64 | * Predicting with regression 65 | * Predicting with trees 66 | 67 | 68 | Week 7: 69 | * Smoothing 70 | * Bootstrap 71 | * Random Forest 72 | 73 | 74 | Week 8: 75 | * Multiple testing 76 | * Simulation for model checking 77 | 78 | 79 | Assignments: 80 | * Assignment 1 81 | * Assignment 2 82 | 83 | -------------------------------------------------------------------------------- /assignment1/Assignment1.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/herrfz/dataanalysis/fa2d42045f364f1a5b0683b7fb908399d69ff1cb/assignment1/Assignment1.docx -------------------------------------------------------------------------------- /assignment1/Assignment1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/herrfz/dataanalysis/fa2d42045f364f1a5b0683b7fb908399d69ff1cb/assignment1/Assignment1.pdf -------------------------------------------------------------------------------- /assignment1/interest.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/herrfz/dataanalysis/fa2d42045f364f1a5b0683b7fb908399d69ff1cb/assignment1/interest.png -------------------------------------------------------------------------------- /assignment2/Assignment2.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/herrfz/dataanalysis/fa2d42045f364f1a5b0683b7fb908399d69ff1cb/assignment2/Assignment2.docx -------------------------------------------------------------------------------- /assignment2/Assignment2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/herrfz/dataanalysis/fa2d42045f364f1a5b0683b7fb908399d69ff1cb/assignment2/Assignment2.pdf -------------------------------------------------------------------------------- /assignment2/datasplitting.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/herrfz/dataanalysis/fa2d42045f364f1a5b0683b7fb908399d69ff1cb/assignment2/datasplitting.pptx -------------------------------------------------------------------------------- /assignment2/feature_importance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/herrfz/dataanalysis/fa2d42045f364f1a5b0683b7fb908399d69ff1cb/assignment2/feature_importance.png -------------------------------------------------------------------------------- /assignment2/sitstand_pca.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/herrfz/dataanalysis/fa2d42045f364f1a5b0683b7fb908399d69ff1cb/assignment2/sitstand_pca.png -------------------------------------------------------------------------------- /custom.css: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/camera.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/herrfz/dataanalysis/fa2d42045f364f1a5b0683b7fb908399d69ff1cb/data/camera.xls -------------------------------------------------------------------------------- /data/camera.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/herrfz/dataanalysis/fa2d42045f364f1a5b0683b7fb908399d69ff1cb/data/camera.xlsx -------------------------------------------------------------------------------- /data/cameras.csv: -------------------------------------------------------------------------------- 1 | address,direction,street,crossStreet,intersection,Location 1 2 | S CATON AVE & BENSON AVE,N/B,Caton Ave,Benson Ave,Caton Ave & Benson Ave,"(39.2693779962, -76.6688185297)" 3 | S CATON AVE & BENSON AVE,S/B,Caton Ave,Benson Ave,Caton Ave & Benson Ave,"(39.2693157898, -76.6689698176)" 4 | WILKENS AVE & PINE HEIGHTS AVE,E/B,Wilkens Ave,Pine Heights,Wilkens Ave & Pine Heights,"(39.2720252302, -76.676960806)" 5 | THE ALAMEDA & E 33RD ST,S/B,The Alameda,33rd St,The Alameda & 33rd St,"(39.3285013141, -76.5953545714)" 6 | E 33RD ST & THE ALAMEDA,E/B,E 33rd,The Alameda,E 33rd & The Alameda,"(39.3283410623, -76.5953594625)" 7 | ERDMAN AVE & N MACON ST,E/B,Erdman,Macon St,Erdman & Macon St,"(39.3068045671, -76.5593167803)" 8 | ERDMAN AVE & N MACON ST,W/B,Erdman,Macon St,Erdman & Macon St,"(39.306966535, -76.5593122365)" 9 | N CHARLES ST & E LAKE AVE,S/B,Charles,Lake Ave,Charles & Lake Ave,"(39.3690535299, -76.625826716)" 10 | E MADISON ST & N CAROLINE ST,W/B,Madison,Caroline St,Madison & Caroline St,"(39.2993257666, -76.5976760827)" 11 | ORLEANS ST & N LINWOOD AVE,E/B,Orleans,Linwood Ave,Orleans & Linwood Ave,"(39.2958661981, -76.5764270078)" 12 | EASTERN AVE & KANE ST,E/B,Eastern,Kane St,Eastern & Kane St,"(39.2877626582, -76.5371017795)" 13 | EDMONDSON AVE & COOKS LN,E/B,Edmonson,Cooks Lane,Edmonson & Cooks Lane,"(39.2923680595, -76.7017056326)" 14 | W FRANKLIN ST & N PULASKI ST,W/B,Franklin,Pulaski St,Franklin & Pulaski St,"(39.2937082594, -76.6503837515)" 15 | ORLEANS ST & N GAY ST,E/B,Orleans,Gay St,Orleans & Gay St,"(39.2947203114, -76.606128007)" 16 | S MARTIN LUTHER KING JR BLVD & WASHINGTON BLVD,N/B,MLK Jr. Blvd.,Washington Blvd,MLK Jr. Blvd. & Washington Blvd,"(39.2834598231, -76.6261138807)" 17 | HILLEN RD & ARGONNE DR,S/B,Hillen Rd,Argonne Drive,Hillen Rd & Argonne Drive,"(39.3399907644, -76.588021025)" 18 | W NORTH AVE & N HOWARD ST,W/B,North Ave,Howard St,North Ave & Howard St,"(39.3110873669, -76.6193071428)" 19 | E PATAPSCO AVE & 4TH ST,W/B,Patapsco ,4th St,"Patapsco 20 | & 4th St","(39.2372692804, -76.6054039252)" 21 | REISTERSTOWN RD & FALLSTAFF RD,S/B,Reisterstown,Fallstaff Road,Reisterstown & Fallstaff Road,"(39.3621351031, -76.7102427408)" 22 | PARK HEIGHTS AVE & HAYWARD AVE,N/B,Park Heights,Hayward Ave,Park Heights & Hayward Ave,"(39.3499204055, -76.6788706721)" 23 | PARK HEIGHTS AVE & HAYWARD AVE,S/B,Park Heights,Hayward Ave,Park Heights & Hayward Ave,"(39.3499204055, -76.6788706721)" 24 | S MARTIN LUTHER KING JR BLVD & W PRATT ST,S/B,MLK Jr. Blvd ,Pratt St,"MLK Jr. Blvd 25 | & Pratt St","(39.2860268994, -76.6278460704)" 26 | W NORTHERN PKWY & GREENSPRING AVE,E/B,Northern Pkwy,Greenspring Ave,Northern Pkwy & Greenspring Ave,"(39.3550243172, -76.6604587972)" 27 | W NORTHERN PKWY & GREENSPRING AVE,W/B,Northern Pkwy,Greenspring Ave,Northern Pkwy & Greenspring Ave,"(39.3551612114, -76.6605058823)" 28 | EDMONDSON AVE & N ATHOL AVE,E/B,Edmonson,Woodbridge Ave,"Edmonson 29 | & Woodbridge Ave","(39.2934525382, -76.6893905792)" 30 | EDMONDSON AVE & N ATHOL AVE,W/B,Edmonson ,Woodbridge Ave,"Edmonson 31 | & Woodbridge Ave","(39.2935925831, -76.689399241)" 32 | FREDERICK AVE & S CATHERINE ST,E/B,Fredrick Ave,Catherine Ave,"Fredrick Ave 33 | & Catherine Ave","(39.2833935865, -76.6559289926)" 34 | PARK HEIGHTS AVE & VIOLET AVE,N/B,Park Heights,Voilet Ave,Park Heights & Voilet Ave,"(39.3307133383, -76.6597502076)" 35 | SINCLAIR LN & MORAVIA RD,W/B,Sinclair,Moravia Road,Sinclair & Moravia Road,"(39.3220360862, -76.5484525966)" 36 | WILKENS AVE & DESOTO RD,E/B,Wilkens,DeSoto,Wilkens & DeSoto,"(39.2749037576, -76.6681630903)" 37 | E NORTHERN PKWY & WAVERLY WAY,W/B,Northern Pkwy,Waverly St,Northern Pkwy & Waverly St,"(39.367051509, -76.5810739873)" 38 | E COLD SPRING LN & HILLEN RD,W/B,Cold Spring,Hillen Road,Cold Spring & Hillen Road,"(39.3459074717, -76.5859273904)" 39 | W COLD SPRING LN & ROLAND AVE,E/B,Cold Spring,Roland Ave,"Cold Spring 40 | & Roland Ave","(39.34390606, -76.6354262185)" 41 | E COLD SPRING LN & LOCH RAVEN BLVD,W/B,Cold Spring,Loch Raven Blvd,"Cold Spring 42 | & Loch Raven Blvd","(39.3460452397, -76.5920075335)" 43 | TAMARIND RD & W COLD SPRING LN,E/B,Tamarind,Coldspring Lane,"Tamarind 44 | & Coldspring Lane","(39.343899313, -76.6519407916)" 45 | HARFORD RD & THE ALAMEDA,N/B,Harford ,The Alameda,"Harford 46 | & The Alameda","(39.3212074758, -76.5907705888)" 47 | HARFORD RD & ROSALIE AVE,N/B,Harford,Rosalie Ave,"Harford 48 | & Rosalie Ave","(39.3680654859, -76.5478183886)" 49 | HARFORD RD & CHRISTOPHER AVE,N/B,Harford ,Christopher Ave,"Harford 50 | & Christopher Ave","(39.3583326463, -76.5562471252)" 51 | SINCLAIR LN & SHANNON DR,E/B,Sinclair,Shannon Drive,"Sinclair 52 | & Shannon Drive","(39.3175028074, -76.5556737923)" 53 | SINCLAIR LN & SHANNON DR,W/B,Sinclair ,Shannon Drive,"Sinclair 54 | & Shannon Drive","(39.3175028074, -76.5556737923)" 55 | LIBERTY HTS & HILLSDALE RD,E/B,Liberty Hghts,Hillsdale Ave,"Liberty Hghts 56 | & Hillsdale Ave","(39.3304453818, -76.6945100293)" 57 | LIBERTY HTS & HILLSDALE RD,W/B,Liberty Hghts,Hillsdale Ave,"Liberty Hghts 58 | & Hillsdale Ave","(39.3304453818, -76.6945100293)" 59 | E NORTHERN PKWY & SPRINGLAKE WAY,W/B,Northern Pkwy,Springlake Way,"Northern Pkwy 60 | & Springlake Way","(39.3643110382, -76.6176988692)" 61 | HARFORD RD & WALTHER AVE,S/B,Harford,Walther Ave,Harford & Walther Ave,"(39.3353084656, -76.5751846354)" 62 | W NORTHERN PKWY & FALLS RD,W/B,Northern Pkwy,Falls Road,Northern Pkwy & Falls Road,"(39.3614127238, -76.6462199216)" 63 | EDMONDSON AVE & N HILTON ST,E/B,Edmonson,Hilton St,Edmonson & Hilton St,"(39.294061339, -76.6727600072)" 64 | N PRESIDENT ST & E FAYETTE ST,S/B,President,Fayette St,President & Fayette St,"(39.2906240776, -76.6066648277)" 65 | RUSSELL ST & W HAMBURG ST,N/B,Russell ,Hamburg St,"Russell 66 | & Hamburg St","(39.2797863216, -76.6237544477)" 67 | RUSSELL ST & W HAMBURG ST,S/B,Russell,Hamburg St,"Russell 68 | & Hamburg St","(39.2798187398, -76.6239105956)" 69 | LIGHT ST & E PRATT ST,S/B,Light SB ,Pratt St,"Light SB 70 | & Pratt St","(39.2865415105, -76.6135493898)" 71 | E LOMBARD ST & S GAY ST,W/B,Lombard ,Gay St,"Lombard 72 | & Gay St","(39.2877660778, -76.6087475019)" 73 | HARFORD RD & E NORTH AVE,N/B,Harford Rd,North Ave,"Harford Rd 74 | & North Ave","(39.3119363297, -76.5993579666)" 75 | FORT SMALLWOOD RD & FORT ARMISTEAD RD,S/B,Ft Smallwood,Fort Armstead,"Ft Smallwood 76 | & Fort Armstead","(39.1999130165, -76.5559766825)" 77 | GARRISON BLVD & WABASH AVE,E/B,Garrison ,Wabash Ave,"Garrison 78 | & Wabash Ave","(39.3412090597, -76.6831167251)" 79 | WALTHER AVE & GLENMORE AVE,N/B,Walther ,Glenmore,"Walther 80 | & Glenmore","(39.3535402213, -76.5424942905)" 81 | W FRANKLIN ST & CATHEDRAL ST,W/B,Franklin ,Cathedral,"Franklin 82 | & Cathedral","(39.2950659131, -76.616872047)" 83 | PERRING PKWY & E BELVEDERE AVE,S/B,Perring Pkwy,Belvedere Ave,"Perring Pkwy 84 | & Belvedere Ave","(39.3549627467, -76.575725921)" 85 | GWYNNS FLS & GARRISON BLVD,W/B,Gwynns Falls ,Garrison Blvd,"Gwynns Falls 86 | & Garrison Blvd","(39.3135792902, -76.6762250182)" 87 | REISTERSTOWN RD & DRUID PARK DR,S/B,Reistertown Rd,Druid Lake Drive,"Reistertown Rd 88 | & Druid Lake Drive","(39.3252867997, -76.6577109834)" 89 | POTEE ST & TALBOTT ST,S/B,Potee,Talbot,"Potee 90 | & Talbot","(39.2364856246, -76.6122106478)" 91 | YORK RD & GITTINGS AVE,S/B,York Rd ,Gitting Ave,"York Rd 92 | & Gitting Ave","(39.3704929583, -76.6098121277)" 93 | WABASH AVE & W BELVEDERE AVE,E/B,Wabash ,Belvedere Ave,"Wabash 94 | & Belvedere Ave","(39.3416713553, -76.685042508)" 95 | E NORTHERN PKWY & YORK RD,W/B,Northern Pkwy,York Road,"Northern Pkwy 96 | & York Road","(39.3651462024, -76.6099220341)" 97 | REISTERSTOWN RD & PATTERSON AVE,E/B,Reistertown ,Patterson Ave,"Reistertown 98 | & Patterson Ave","(39.3561820401, -76.7025209585)" 99 | PULASKI HWY & E MONUMENT ST,E/B,Pulaski Hwy ,Monument St,"Pulaski Hwy 100 | & Monument St","(39.2997984532, -76.5542506552)" 101 | W FRANKLIN ST & N FRANKLINTOWN RD,E/B,Franklin ,Franklintown Road,"Franklin 102 | & Franklintown Road","(39.2930857459, -76.6623624871)" 103 | S HANOVER ST & E CROMWELL ST,S/B,Hanover ,Cromwell St, &,"(39.2620595495, -76.6145872791)" 104 | REISTERSTOWN RD & MENLO DR,N/B,Reisterstown ,Menlo Drive,"Reisterstown 105 | & Menlo Drive","(39.3519849983, -76.6963758159)" 106 | RUSSELL ST & BAYARD ST,S/B,Russell ,Bayard St,"Russell 107 | & Bayard St","(39.2734675536, -76.6287385068)" 108 | LIBERTY HEIGHTS AVE & N DUKELAND ST,E/B,Liberty Hghts,Dukeland St,"Liberty Hghts 109 | & Dukeland St","(39.3226393684, -76.6666335903)" 110 | S HANOVER & REEDBIRD AVE,N/B,Hanover ,Reedbird Ave,"Hanover 111 | & Reedbird Ave","(39.2498797679, -76.6137494417)" 112 | W FAYETTE ST & N LIBERTY ST,W/B,Fayette,Liberty Heights Ave,"Fayette 113 | & Liberty Heights Ave","(39.2905038859, -76.617661176)" 114 | GWYNNS FLS & GARRISON BLVD,E/B,Gwynns Falls ,Garrison Blvd,"Gwynns Falls 115 | & Garrison Blvd","(39.3135792902, -76.6762250182)" 116 | LOCH RAVEN BLVD & WALKER AVE,N/B,Loch Raven,Walker Ave,"Loch Raven 117 | & Walker Ave","(39.3700602939, -76.5831798122)" 118 | PULASKI HWY & MORAVIA PARK DR,W/B,Pulaski Hwy ,Moravia Park Drive,"Pulaski Hwy 119 | & Moravia Park Drive","(39.3086437141, -76.5323658645)" 120 | HILLEN ST & FORREST ST,W/B,Hillen ,Forrest St,"Hillen 121 | & Forrest St","(39.2968601609, -76.6055317698)" 122 | PULASKI HWY & NORTH POINT RD,E/B,Pulaski ,North Point Blvd,"Pulaski 123 | & North Point Blvd","(39.3043275894, -76.5395800163)" 124 | N MONROE ST & W LAFAYETTE AVE,S/B,Monroe,Lafayette,"Monroe 125 | & Lafayette","(39.2987426878, -76.647517496)" 126 | W MOUNT ROYAL AVE & W NORTH AVE,N/B,Mt Royal,North,"Mt Royal 127 | & North","(39.3107672842, -76.6247302963)" 128 | MOUNT ROYAL TER & W NORTH AVE,S/B,Mt Royal,North,"Mt Royal 129 | & North","(39.3108706445, -76.6251255783)" 130 | -------------------------------------------------------------------------------- /data/camerasModified.csv: -------------------------------------------------------------------------------- 1 | street,crossStreet,intersection,Location 1 2 | Caton Ave,Benson Ave,Caton Ave & Benson Ave,"(39.2693779962, -76.6688185297)" 3 | Caton Ave,Benson Ave,Caton Ave & Benson Ave,"(39.2693157898, -76.6689698176)" 4 | Wilkens Ave,Pine Heights,Wilkens Ave & Pine Heights,"(39.2720252302, -76.676960806)" 5 | The Alameda,33rd St,The Alameda & 33rd St,"(39.3285013141, -76.5953545714)" 6 | E 33rd,The Alameda,E 33rd & The Alameda,"(39.3283410623, -76.5953594625)" 7 | Erdman,Macon St,Erdman & Macon St,"(39.3068045671, -76.5593167803)" 8 | Erdman,Macon St,Erdman & Macon St,"(39.306966535, -76.5593122365)" 9 | Charles,Lake Ave,Charles & Lake Ave,"(39.3690535299, -76.625826716)" 10 | Madison,Caroline St,Madison & Caroline St,"(39.2993257666, -76.5976760827)" 11 | Orleans,Linwood Ave,Orleans & Linwood Ave,"(39.2958661981, -76.5764270078)" 12 | Eastern,Kane St,Eastern & Kane St,"(39.2877626582, -76.5371017795)" 13 | Edmonson,Cooks Lane,Edmonson & Cooks Lane,"(39.2923680595, -76.7017056326)" 14 | Franklin,Pulaski St,Franklin & Pulaski St,"(39.2937082594, -76.6503837515)" 15 | Orleans,Gay St,Orleans & Gay St,"(39.2947203114, -76.606128007)" 16 | MLK Jr. Blvd.,Washington Blvd,MLK Jr. Blvd. & Washington Blvd,"(39.2834598231, -76.6261138807)" 17 | Hillen Rd,Argonne Drive,Hillen Rd & Argonne Drive,"(39.3399907644, -76.588021025)" 18 | North Ave,Howard St,North Ave & Howard St,"(39.3110873669, -76.6193071428)" 19 | Patapsco ,4th St,"Patapsco 20 | & 4th St","(39.2372692804, -76.6054039252)" 21 | Reisterstown,Fallstaff Road,Reisterstown & Fallstaff Road,"(39.3621351031, -76.7102427408)" 22 | Park Heights,Hayward Ave,Park Heights & Hayward Ave,"(39.3499204055, -76.6788706721)" 23 | Park Heights,Hayward Ave,Park Heights & Hayward Ave,"(39.3499204055, -76.6788706721)" 24 | MLK Jr. Blvd ,Pratt St,"MLK Jr. Blvd 25 | & Pratt St","(39.2860268994, -76.6278460704)" 26 | Northern Pkwy,Greenspring Ave,Northern Pkwy & Greenspring Ave,"(39.3550243172, -76.6604587972)" 27 | Northern Pkwy,Greenspring Ave,Northern Pkwy & Greenspring Ave,"(39.3551612114, -76.6605058823)" 28 | Edmonson,Woodbridge Ave,"Edmonson 29 | & Woodbridge Ave","(39.2934525382, -76.6893905792)" 30 | Edmonson ,Woodbridge Ave,"Edmonson 31 | & Woodbridge Ave","(39.2935925831, -76.689399241)" 32 | Fredrick Ave,Catherine Ave,"Fredrick Ave 33 | & Catherine Ave","(39.2833935865, -76.6559289926)" 34 | Park Heights,Voilet Ave,Park Heights & Voilet Ave,"(39.3307133383, -76.6597502076)" 35 | Sinclair,Moravia Road,Sinclair & Moravia Road,"(39.3220360862, -76.5484525966)" 36 | Wilkens,DeSoto,Wilkens & DeSoto,"(39.2749037576, -76.6681630903)" 37 | Northern Pkwy,Waverly St,Northern Pkwy & Waverly St,"(39.367051509, -76.5810739873)" 38 | Cold Spring,Hillen Road,Cold Spring & Hillen Road,"(39.3459074717, -76.5859273904)" 39 | Cold Spring,Roland Ave,"Cold Spring 40 | & Roland Ave","(39.34390606, -76.6354262185)" 41 | Cold Spring,Loch Raven Blvd,"Cold Spring 42 | & Loch Raven Blvd","(39.3460452397, -76.5920075335)" 43 | Tamarind,Coldspring Lane,"Tamarind 44 | & Coldspring Lane","(39.343899313, -76.6519407916)" 45 | Harford ,The Alameda,"Harford 46 | & The Alameda","(39.3212074758, -76.5907705888)" 47 | Harford,Rosalie Ave,"Harford 48 | & Rosalie Ave","(39.3680654859, -76.5478183886)" 49 | Harford ,Christopher Ave,"Harford 50 | & Christopher Ave","(39.3583326463, -76.5562471252)" 51 | Sinclair,Shannon Drive,"Sinclair 52 | & Shannon Drive","(39.3175028074, -76.5556737923)" 53 | Sinclair ,Shannon Drive,"Sinclair 54 | & Shannon Drive","(39.3175028074, -76.5556737923)" 55 | Liberty Hghts,Hillsdale Ave,"Liberty Hghts 56 | & Hillsdale Ave","(39.3304453818, -76.6945100293)" 57 | Liberty Hghts,Hillsdale Ave,"Liberty Hghts 58 | & Hillsdale Ave","(39.3304453818, -76.6945100293)" 59 | Northern Pkwy,Springlake Way,"Northern Pkwy 60 | & Springlake Way","(39.3643110382, -76.6176988692)" 61 | Harford,Walther Ave,Harford & Walther Ave,"(39.3353084656, -76.5751846354)" 62 | Northern Pkwy,Falls Road,Northern Pkwy & Falls Road,"(39.3614127238, -76.6462199216)" 63 | Edmonson,Hilton St,Edmonson & Hilton St,"(39.294061339, -76.6727600072)" 64 | President,Fayette St,President & Fayette St,"(39.2906240776, -76.6066648277)" 65 | Russell ,Hamburg St,"Russell 66 | & Hamburg St","(39.2797863216, -76.6237544477)" 67 | Russell,Hamburg St,"Russell 68 | & Hamburg St","(39.2798187398, -76.6239105956)" 69 | Light SB ,Pratt St,"Light SB 70 | & Pratt St","(39.2865415105, -76.6135493898)" 71 | Lombard ,Gay St,"Lombard 72 | & Gay St","(39.2877660778, -76.6087475019)" 73 | Harford Rd,North Ave,"Harford Rd 74 | & North Ave","(39.3119363297, -76.5993579666)" 75 | Ft Smallwood,Fort Armstead,"Ft Smallwood 76 | & Fort Armstead","(39.1999130165, -76.5559766825)" 77 | Garrison ,Wabash Ave,"Garrison 78 | & Wabash Ave","(39.3412090597, -76.6831167251)" 79 | Walther ,Glenmore,"Walther 80 | & Glenmore","(39.3535402213, -76.5424942905)" 81 | Franklin ,Cathedral,"Franklin 82 | & Cathedral","(39.2950659131, -76.616872047)" 83 | Perring Pkwy,Belvedere Ave,"Perring Pkwy 84 | & Belvedere Ave","(39.3549627467, -76.575725921)" 85 | Gwynns Falls ,Garrison Blvd,"Gwynns Falls 86 | & Garrison Blvd","(39.3135792902, -76.6762250182)" 87 | Reistertown Rd,Druid Lake Drive,"Reistertown Rd 88 | & Druid Lake Drive","(39.3252867997, -76.6577109834)" 89 | Potee,Talbot,"Potee 90 | & Talbot","(39.2364856246, -76.6122106478)" 91 | York Rd ,Gitting Ave,"York Rd 92 | & Gitting Ave","(39.3704929583, -76.6098121277)" 93 | Wabash ,Belvedere Ave,"Wabash 94 | & Belvedere Ave","(39.3416713553, -76.685042508)" 95 | Northern Pkwy,York Road,"Northern Pkwy 96 | & York Road","(39.3651462024, -76.6099220341)" 97 | Reistertown ,Patterson Ave,"Reistertown 98 | & Patterson Ave","(39.3561820401, -76.7025209585)" 99 | Pulaski Hwy ,Monument St,"Pulaski Hwy 100 | & Monument St","(39.2997984532, -76.5542506552)" 101 | Franklin ,Franklintown Road,"Franklin 102 | & Franklintown Road","(39.2930857459, -76.6623624871)" 103 | Hanover ,Cromwell St, &,"(39.2620595495, -76.6145872791)" 104 | Reisterstown ,Menlo Drive,"Reisterstown 105 | & Menlo Drive","(39.3519849983, -76.6963758159)" 106 | Russell ,Bayard St,"Russell 107 | & Bayard St","(39.2734675536, -76.6287385068)" 108 | Liberty Hghts,Dukeland St,"Liberty Hghts 109 | & Dukeland St","(39.3226393684, -76.6666335903)" 110 | Hanover ,Reedbird Ave,"Hanover 111 | & Reedbird Ave","(39.2498797679, -76.6137494417)" 112 | Fayette,Liberty Heights Ave,"Fayette 113 | & Liberty Heights Ave","(39.2905038859, -76.617661176)" 114 | Gwynns Falls ,Garrison Blvd,"Gwynns Falls 115 | & Garrison Blvd","(39.3135792902, -76.6762250182)" 116 | Loch Raven,Walker Ave,"Loch Raven 117 | & Walker Ave","(39.3700602939, -76.5831798122)" 118 | Pulaski Hwy ,Moravia Park Drive,"Pulaski Hwy 119 | & Moravia Park Drive","(39.3086437141, -76.5323658645)" 120 | Hillen ,Forrest St,"Hillen 121 | & Forrest St","(39.2968601609, -76.6055317698)" 122 | Pulaski ,North Point Blvd,"Pulaski 123 | & North Point Blvd","(39.3043275894, -76.5395800163)" 124 | Monroe,Lafayette,"Monroe 125 | & Lafayette","(39.2987426878, -76.647517496)" 126 | Mt Royal,North,"Mt Royal 127 | & North","(39.3107672842, -76.6247302963)" 128 | Mt Royal,North,"Mt Royal 129 | & North","(39.3108706445, -76.6251255783)" 130 | -------------------------------------------------------------------------------- /data/face.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/herrfz/dataanalysis/fa2d42045f364f1a5b0683b7fb908399d69ff1cb/data/face.rda -------------------------------------------------------------------------------- /data/gaData.csv: -------------------------------------------------------------------------------- 1 | "date","visits","simplystats","julian" 2 | 2011-01-01,0,0,14975 3 | 2011-01-02,0,0,14976 4 | 2011-01-03,0,0,14977 5 | 2011-01-04,0,0,14978 6 | 2011-01-05,0,0,14979 7 | 2011-01-06,0,0,14980 8 | 2011-01-07,0,0,14981 9 | 2011-01-08,0,0,14982 10 | 2011-01-09,0,0,14983 11 | 2011-01-10,0,0,14984 12 | 2011-01-11,0,0,14985 13 | 2011-01-12,0,0,14986 14 | 2011-01-13,0,0,14987 15 | 2011-01-14,0,0,14988 16 | 2011-01-15,0,0,14989 17 | 2011-01-16,0,0,14990 18 | 2011-01-17,0,0,14991 19 | 2011-01-18,0,0,14992 20 | 2011-01-19,0,0,14993 21 | 2011-01-20,0,0,14994 22 | 2011-01-21,5,0,14995 23 | 2011-01-22,3,0,14996 24 | 2011-01-23,4,0,14997 25 | 2011-01-24,15,0,14998 26 | 2011-01-25,7,0,14999 27 | 2011-01-26,6,0,15000 28 | 2011-01-27,5,0,15001 29 | 2011-01-28,9,0,15002 30 | 2011-01-29,3,0,15003 31 | 2011-01-30,4,0,15004 32 | 2011-01-31,10,0,15005 33 | 2011-02-01,11,0,15006 34 | 2011-02-02,11,0,15007 35 | 2011-02-03,9,0,15008 36 | 2011-02-04,14,0,15009 37 | 2011-02-05,6,0,15010 38 | 2011-02-06,3,0,15011 39 | 2011-02-07,12,0,15012 40 | 2011-02-08,12,0,15013 41 | 2011-02-09,11,0,15014 42 | 2011-02-10,10,0,15015 43 | 2011-02-11,1,0,15016 44 | 2011-02-12,5,0,15017 45 | 2011-02-13,5,0,15018 46 | 2011-02-14,23,0,15019 47 | 2011-02-15,7,0,15020 48 | 2011-02-16,15,0,15021 49 | 2011-02-17,13,0,15022 50 | 2011-02-18,8,0,15023 51 | 2011-02-19,0,0,15024 52 | 2011-02-20,3,0,15025 53 | 2011-02-21,8,0,15026 54 | 2011-02-22,14,0,15027 55 | 2011-02-23,10,0,15028 56 | 2011-02-24,6,0,15029 57 | 2011-02-25,7,0,15030 58 | 2011-02-26,8,0,15031 59 | 2011-02-27,4,0,15032 60 | 2011-02-28,11,0,15033 61 | 2011-03-01,8,0,15034 62 | 2011-03-02,20,0,15035 63 | 2011-03-03,10,0,15036 64 | 2011-03-04,12,0,15037 65 | 2011-03-05,4,0,15038 66 | 2011-03-06,3,0,15039 67 | 2011-03-07,11,0,15040 68 | 2011-03-08,8,0,15041 69 | 2011-03-09,11,0,15042 70 | 2011-03-10,12,0,15043 71 | 2011-03-11,11,0,15044 72 | 2011-03-12,5,0,15045 73 | 2011-03-13,2,0,15046 74 | 2011-03-14,9,0,15047 75 | 2011-03-15,11,0,15048 76 | 2011-03-16,7,0,15049 77 | 2011-03-17,7,0,15050 78 | 2011-03-18,4,0,15051 79 | 2011-03-19,1,0,15052 80 | 2011-03-20,2,0,15053 81 | 2011-03-21,6,0,15054 82 | 2011-03-22,7,0,15055 83 | 2011-03-23,6,0,15056 84 | 2011-03-24,7,0,15057 85 | 2011-03-25,7,0,15058 86 | 2011-03-26,5,0,15059 87 | 2011-03-27,3,0,15060 88 | 2011-03-28,21,0,15061 89 | 2011-03-29,25,0,15062 90 | 2011-03-30,13,0,15063 91 | 2011-03-31,20,0,15064 92 | 2011-04-01,15,0,15065 93 | 2011-04-02,4,0,15066 94 | 2011-04-03,5,0,15067 95 | 2011-04-04,21,0,15068 96 | 2011-04-05,12,0,15069 97 | 2011-04-06,7,0,15070 98 | 2011-04-07,12,0,15071 99 | 2011-04-08,11,0,15072 100 | 2011-04-09,1,0,15073 101 | 2011-04-10,6,0,15074 102 | 2011-04-11,7,0,15075 103 | 2011-04-12,5,0,15076 104 | 2011-04-13,4,0,15077 105 | 2011-04-14,13,0,15078 106 | 2011-04-15,9,0,15079 107 | 2011-04-16,3,0,15080 108 | 2011-04-17,11,0,15081 109 | 2011-04-18,7,0,15082 110 | 2011-04-19,9,0,15083 111 | 2011-04-20,6,0,15084 112 | 2011-04-21,4,0,15085 113 | 2011-04-22,5,0,15086 114 | 2011-04-23,3,0,15087 115 | 2011-04-24,1,0,15088 116 | 2011-04-25,7,0,15089 117 | 2011-04-26,9,0,15090 118 | 2011-04-27,9,0,15091 119 | 2011-04-28,12,0,15092 120 | 2011-04-29,6,0,15093 121 | 2011-04-30,2,0,15094 122 | 2011-05-01,0,0,15095 123 | 2011-05-02,6,0,15096 124 | 2011-05-03,8,0,15097 125 | 2011-05-04,6,0,15098 126 | 2011-05-05,7,0,15099 127 | 2011-05-06,7,0,15100 128 | 2011-05-07,0,0,15101 129 | 2011-05-08,5,0,15102 130 | 2011-05-09,7,0,15103 131 | 2011-05-10,6,0,15104 132 | 2011-05-11,5,0,15105 133 | 2011-05-12,14,0,15106 134 | 2011-05-13,5,0,15107 135 | 2011-05-14,6,0,15108 136 | 2011-05-15,4,0,15109 137 | 2011-05-16,10,0,15110 138 | 2011-05-17,20,0,15111 139 | 2011-05-18,11,0,15112 140 | 2011-05-19,9,0,15113 141 | 2011-05-20,11,0,15114 142 | 2011-05-21,11,0,15115 143 | 2011-05-22,8,0,15116 144 | 2011-05-23,5,0,15117 145 | 2011-05-24,7,0,15118 146 | 2011-05-25,3,0,15119 147 | 2011-05-26,7,0,15120 148 | 2011-05-27,6,0,15121 149 | 2011-05-28,1,0,15122 150 | 2011-05-29,3,0,15123 151 | 2011-05-30,9,0,15124 152 | 2011-05-31,13,0,15125 153 | 2011-06-01,12,0,15126 154 | 2011-06-02,10,0,15127 155 | 2011-06-03,8,0,15128 156 | 2011-06-04,3,0,15129 157 | 2011-06-05,0,0,15130 158 | 2011-06-06,9,0,15131 159 | 2011-06-07,7,0,15132 160 | 2011-06-08,8,0,15133 161 | 2011-06-09,8,0,15134 162 | 2011-06-10,16,0,15135 163 | 2011-06-11,2,0,15136 164 | 2011-06-12,5,0,15137 165 | 2011-06-13,5,0,15138 166 | 2011-06-14,9,0,15139 167 | 2011-06-15,7,0,15140 168 | 2011-06-16,11,0,15141 169 | 2011-06-17,9,0,15142 170 | 2011-06-18,3,0,15143 171 | 2011-06-19,6,0,15144 172 | 2011-06-20,5,0,15145 173 | 2011-06-21,7,0,15146 174 | 2011-06-22,5,0,15147 175 | 2011-06-23,5,0,15148 176 | 2011-06-24,5,0,15149 177 | 2011-06-25,0,0,15150 178 | 2011-06-26,4,0,15151 179 | 2011-06-27,4,0,15152 180 | 2011-06-28,9,0,15153 181 | 2011-06-29,5,0,15154 182 | 2011-06-30,3,0,15155 183 | 2011-07-01,8,0,15156 184 | 2011-07-02,6,0,15157 185 | 2011-07-03,0,0,15158 186 | 2011-07-04,2,0,15159 187 | 2011-07-05,9,0,15160 188 | 2011-07-06,7,0,15161 189 | 2011-07-07,5,0,15162 190 | 2011-07-08,6,0,15163 191 | 2011-07-09,1,0,15164 192 | 2011-07-10,6,0,15165 193 | 2011-07-11,10,0,15166 194 | 2011-07-12,6,0,15167 195 | 2011-07-13,9,0,15168 196 | 2011-07-14,3,0,15169 197 | 2011-07-15,7,0,15170 198 | 2011-07-16,2,0,15171 199 | 2011-07-17,1,0,15172 200 | 2011-07-18,9,0,15173 201 | 2011-07-19,5,0,15174 202 | 2011-07-20,5,0,15175 203 | 2011-07-21,5,0,15176 204 | 2011-07-22,5,0,15177 205 | 2011-07-23,2,0,15178 206 | 2011-07-24,2,0,15179 207 | 2011-07-25,10,0,15180 208 | 2011-07-26,6,0,15181 209 | 2011-07-27,6,0,15182 210 | 2011-07-28,9,0,15183 211 | 2011-07-29,10,0,15184 212 | 2011-07-30,2,0,15185 213 | 2011-07-31,2,0,15186 214 | 2011-08-01,6,0,15187 215 | 2011-08-02,7,0,15188 216 | 2011-08-03,6,0,15189 217 | 2011-08-04,14,0,15190 218 | 2011-08-05,11,0,15191 219 | 2011-08-06,6,0,15192 220 | 2011-08-07,5,0,15193 221 | 2011-08-08,6,0,15194 222 | 2011-08-09,10,0,15195 223 | 2011-08-10,5,0,15196 224 | 2011-08-11,11,0,15197 225 | 2011-08-12,9,0,15198 226 | 2011-08-13,2,0,15199 227 | 2011-08-14,2,0,15200 228 | 2011-08-15,3,0,15201 229 | 2011-08-16,14,0,15202 230 | 2011-08-17,14,0,15203 231 | 2011-08-18,17,0,15204 232 | 2011-08-19,9,0,15205 233 | 2011-08-20,6,0,15206 234 | 2011-08-21,8,0,15207 235 | 2011-08-22,6,0,15208 236 | 2011-08-23,5,0,15209 237 | 2011-08-24,10,0,15210 238 | 2011-08-25,4,0,15211 239 | 2011-08-26,6,0,15212 240 | 2011-08-27,3,0,15213 241 | 2011-08-28,5,0,15214 242 | 2011-08-29,6,0,15215 243 | 2011-08-30,9,0,15216 244 | 2011-08-31,5,0,15217 245 | 2011-09-01,6,0,15218 246 | 2011-09-02,8,0,15219 247 | 2011-09-03,2,0,15220 248 | 2011-09-04,2,0,15221 249 | 2011-09-05,4,0,15222 250 | 2011-09-06,10,0,15223 251 | 2011-09-07,12,0,15224 252 | 2011-09-08,8,0,15225 253 | 2011-09-09,6,0,15226 254 | 2011-09-10,5,0,15227 255 | 2011-09-11,3,0,15228 256 | 2011-09-12,30,0,15229 257 | 2011-09-13,27,0,15230 258 | 2011-09-14,13,0,15231 259 | 2011-09-15,7,0,15232 260 | 2011-09-16,11,0,15233 261 | 2011-09-17,6,0,15234 262 | 2011-09-18,7,0,15235 263 | 2011-09-19,6,0,15236 264 | 2011-09-20,8,0,15237 265 | 2011-09-21,3,0,15238 266 | 2011-09-22,7,0,15239 267 | 2011-09-23,16,0,15240 268 | 2011-09-24,7,0,15241 269 | 2011-09-25,6,0,15242 270 | 2011-09-26,14,0,15243 271 | 2011-09-27,15,0,15244 272 | 2011-09-28,10,0,15245 273 | 2011-09-29,17,0,15246 274 | 2011-09-30,9,0,15247 275 | 2011-10-01,7,0,15248 276 | 2011-10-02,4,0,15249 277 | 2011-10-03,11,0,15250 278 | 2011-10-04,15,0,15251 279 | 2011-10-05,12,0,15252 280 | 2011-10-06,9,0,15253 281 | 2011-10-07,10,0,15254 282 | 2011-10-08,8,0,15255 283 | 2011-10-09,4,0,15256 284 | 2011-10-10,11,0,15257 285 | 2011-10-11,6,0,15258 286 | 2011-10-12,9,0,15259 287 | 2011-10-13,2,0,15260 288 | 2011-10-14,4,0,15261 289 | 2011-10-15,4,0,15262 290 | 2011-10-16,1,0,15263 291 | 2011-10-17,16,0,15264 292 | 2011-10-18,11,0,15265 293 | 2011-10-19,13,0,15266 294 | 2011-10-20,8,0,15267 295 | 2011-10-21,6,0,15268 296 | 2011-10-22,2,0,15269 297 | 2011-10-23,3,0,15270 298 | 2011-10-24,14,0,15271 299 | 2011-10-25,4,0,15272 300 | 2011-10-26,6,0,15273 301 | 2011-10-27,11,0,15274 302 | 2011-10-28,8,0,15275 303 | 2011-10-29,8,0,15276 304 | 2011-10-30,10,0,15277 305 | 2011-10-31,16,0,15278 306 | 2011-11-01,20,0,15279 307 | 2011-11-02,16,0,15280 308 | 2011-11-03,14,0,15281 309 | 2011-11-04,4,0,15282 310 | 2011-11-05,5,0,15283 311 | 2011-11-06,4,0,15284 312 | 2011-11-07,13,0,15285 313 | 2011-11-08,10,0,15286 314 | 2011-11-09,22,0,15287 315 | 2011-11-10,19,0,15288 316 | 2011-11-11,13,0,15289 317 | 2011-11-12,5,0,15290 318 | 2011-11-13,8,0,15291 319 | 2011-11-14,11,0,15292 320 | 2011-11-15,20,0,15293 321 | 2011-11-16,15,0,15294 322 | 2011-11-17,21,0,15295 323 | 2011-11-18,15,0,15296 324 | 2011-11-19,6,0,15297 325 | 2011-11-20,3,0,15298 326 | 2011-11-21,15,0,15299 327 | 2011-11-22,9,0,15300 328 | 2011-11-23,13,0,15301 329 | 2011-11-24,3,0,15302 330 | 2011-11-25,5,0,15303 331 | 2011-11-26,8,0,15304 332 | 2011-11-27,6,0,15305 333 | 2011-11-28,14,0,15306 334 | 2011-11-29,20,0,15307 335 | 2011-11-30,18,0,15308 336 | 2011-12-01,11,0,15309 337 | 2011-12-02,8,0,15310 338 | 2011-12-03,5,0,15311 339 | 2011-12-04,8,0,15312 340 | 2011-12-05,8,0,15313 341 | 2011-12-06,14,0,15314 342 | 2011-12-07,6,0,15315 343 | 2011-12-08,14,0,15316 344 | 2011-12-09,7,0,15317 345 | 2011-12-10,4,0,15318 346 | 2011-12-11,6,0,15319 347 | 2011-12-12,10,0,15320 348 | 2011-12-13,8,0,15321 349 | 2011-12-14,6,0,15322 350 | 2011-12-15,11,0,15323 351 | 2011-12-16,13,0,15324 352 | 2011-12-17,7,0,15325 353 | 2011-12-18,10,0,15326 354 | 2011-12-19,9,0,15327 355 | 2011-12-20,19,0,15328 356 | 2011-12-21,15,0,15329 357 | 2011-12-22,10,0,15330 358 | 2011-12-23,3,0,15331 359 | 2011-12-24,2,0,15332 360 | 2011-12-25,1,0,15333 361 | 2011-12-26,3,0,15334 362 | 2011-12-27,4,0,15335 363 | 2011-12-28,9,0,15336 364 | 2011-12-29,7,0,15337 365 | 2011-12-30,4,0,15338 366 | 2011-12-31,5,0,15339 367 | 2012-01-01,11,0,15340 368 | 2012-01-02,4,0,15341 369 | 2012-01-03,10,0,15342 370 | 2012-01-04,18,0,15343 371 | 2012-01-05,33,0,15344 372 | 2012-01-06,17,0,15345 373 | 2012-01-07,6,0,15346 374 | 2012-01-08,6,0,15347 375 | 2012-01-09,18,0,15348 376 | 2012-01-10,12,0,15349 377 | 2012-01-11,10,0,15350 378 | 2012-01-12,26,0,15351 379 | 2012-01-13,10,0,15352 380 | 2012-01-14,7,0,15353 381 | 2012-01-15,6,0,15354 382 | 2012-01-16,21,0,15355 383 | 2012-01-17,11,0,15356 384 | 2012-01-18,12,0,15357 385 | 2012-01-19,12,0,15358 386 | 2012-01-20,13,0,15359 387 | 2012-01-21,2,0,15360 388 | 2012-01-22,3,0,15361 389 | 2012-01-23,10,0,15362 390 | 2012-01-24,12,0,15363 391 | 2012-01-25,10,0,15364 392 | 2012-01-26,17,0,15365 393 | 2012-01-27,14,0,15366 394 | 2012-01-28,6,0,15367 395 | 2012-01-29,10,0,15368 396 | 2012-01-30,22,0,15369 397 | 2012-01-31,25,0,15370 398 | 2012-02-01,11,0,15371 399 | 2012-02-02,12,0,15372 400 | 2012-02-03,9,0,15373 401 | 2012-02-04,7,0,15374 402 | 2012-02-05,2,0,15375 403 | 2012-02-06,9,0,15376 404 | 2012-02-07,13,0,15377 405 | 2012-02-08,10,0,15378 406 | 2012-02-09,19,0,15379 407 | 2012-02-10,14,0,15380 408 | 2012-02-11,10,0,15381 409 | 2012-02-12,10,0,15382 410 | 2012-02-13,17,0,15383 411 | 2012-02-14,16,0,15384 412 | 2012-02-15,13,0,15385 413 | 2012-02-16,18,0,15386 414 | 2012-02-17,12,0,15387 415 | 2012-02-18,7,0,15388 416 | 2012-02-19,11,0,15389 417 | 2012-02-20,13,0,15390 418 | 2012-02-21,13,0,15391 419 | 2012-02-22,10,0,15392 420 | 2012-02-23,19,0,15393 421 | 2012-02-24,14,0,15394 422 | 2012-02-25,4,0,15395 423 | 2012-02-26,6,0,15396 424 | 2012-02-27,16,0,15397 425 | 2012-02-28,14,0,15398 426 | 2012-02-29,17,0,15399 427 | 2012-03-01,12,0,15400 428 | 2012-03-02,12,0,15401 429 | 2012-03-03,7,0,15402 430 | 2012-03-04,8,0,15403 431 | 2012-03-05,16,0,15404 432 | 2012-03-06,9,0,15405 433 | 2012-03-07,14,0,15406 434 | 2012-03-08,12,0,15407 435 | 2012-03-09,16,0,15408 436 | 2012-03-10,2,0,15409 437 | 2012-03-11,7,0,15410 438 | 2012-03-12,9,0,15411 439 | 2012-03-13,9,0,15412 440 | 2012-03-14,12,0,15413 441 | 2012-03-15,5,0,15414 442 | 2012-03-16,7,0,15415 443 | 2012-03-17,1,0,15416 444 | 2012-03-18,7,0,15417 445 | 2012-03-19,12,0,15418 446 | 2012-03-20,22,0,15419 447 | 2012-03-21,19,0,15420 448 | 2012-03-22,17,0,15421 449 | 2012-03-23,16,0,15422 450 | 2012-03-24,6,0,15423 451 | 2012-03-25,2,0,15424 452 | 2012-03-26,20,0,15425 453 | 2012-03-27,27,0,15426 454 | 2012-03-28,16,0,15427 455 | 2012-03-29,19,0,15428 456 | 2012-03-30,10,0,15429 457 | 2012-03-31,6,0,15430 458 | 2012-04-01,4,0,15431 459 | 2012-04-02,9,0,15432 460 | 2012-04-03,14,0,15433 461 | 2012-04-04,22,0,15434 462 | 2012-04-05,20,0,15435 463 | 2012-04-06,19,0,15436 464 | 2012-04-07,7,0,15437 465 | 2012-04-08,5,0,15438 466 | 2012-04-09,15,0,15439 467 | 2012-04-10,17,0,15440 468 | 2012-04-11,15,0,15441 469 | 2012-04-12,30,0,15442 470 | 2012-04-13,6,0,15443 471 | 2012-04-14,12,0,15444 472 | 2012-04-15,6,0,15445 473 | 2012-04-16,10,0,15446 474 | 2012-04-17,23,0,15447 475 | 2012-04-18,17,0,15448 476 | 2012-04-19,19,0,15449 477 | 2012-04-20,19,0,15450 478 | 2012-04-21,8,0,15451 479 | 2012-04-22,10,0,15452 480 | 2012-04-23,14,0,15453 481 | 2012-04-24,17,0,15454 482 | 2012-04-25,8,0,15455 483 | 2012-04-26,20,0,15456 484 | 2012-04-27,10,0,15457 485 | 2012-04-28,8,0,15458 486 | 2012-04-29,11,0,15459 487 | 2012-04-30,14,0,15460 488 | 2012-05-01,24,0,15461 489 | 2012-05-02,21,0,15462 490 | 2012-05-03,12,0,15463 491 | 2012-05-04,16,0,15464 492 | 2012-05-05,2,0,15465 493 | 2012-05-06,9,0,15466 494 | 2012-05-07,15,0,15467 495 | 2012-05-08,16,0,15468 496 | 2012-05-09,13,0,15469 497 | 2012-05-10,15,0,15470 498 | 2012-05-11,6,0,15471 499 | 2012-05-12,10,0,15472 500 | 2012-05-13,8,0,15473 501 | 2012-05-14,18,0,15474 502 | 2012-05-15,20,0,15475 503 | 2012-05-16,12,0,15476 504 | 2012-05-17,9,0,15477 505 | 2012-05-18,10,0,15478 506 | 2012-05-19,6,0,15479 507 | 2012-05-20,7,0,15480 508 | 2012-05-21,17,0,15481 509 | 2012-05-22,18,0,15482 510 | 2012-05-23,16,0,15483 511 | 2012-05-24,17,0,15484 512 | 2012-05-25,8,0,15485 513 | 2012-05-26,3,0,15486 514 | 2012-05-27,3,0,15487 515 | 2012-05-28,12,0,15488 516 | 2012-05-29,17,0,15489 517 | 2012-05-30,11,0,15490 518 | 2012-05-31,11,0,15491 519 | 2012-06-01,10,0,15492 520 | 2012-06-02,7,0,15493 521 | 2012-06-03,9,0,15494 522 | 2012-06-04,25,0,15495 523 | 2012-06-05,13,0,15496 524 | 2012-06-06,20,0,15497 525 | 2012-06-07,19,0,15498 526 | 2012-06-08,14,0,15499 527 | 2012-06-09,3,0,15500 528 | 2012-06-10,6,0,15501 529 | 2012-06-11,10,0,15502 530 | 2012-06-12,7,0,15503 531 | 2012-06-13,16,0,15504 532 | 2012-06-14,11,0,15505 533 | 2012-06-15,8,0,15506 534 | 2012-06-16,3,0,15507 535 | 2012-06-17,5,0,15508 536 | 2012-06-18,20,0,15509 537 | 2012-06-19,12,0,15510 538 | 2012-06-20,8,0,15511 539 | 2012-06-21,15,0,15512 540 | 2012-06-22,12,4,15513 541 | 2012-06-23,13,1,15514 542 | 2012-06-24,11,7,15515 543 | 2012-06-25,9,6,15516 544 | 2012-06-26,15,6,15517 545 | 2012-06-27,17,3,15518 546 | 2012-06-28,18,3,15519 547 | 2012-06-29,13,5,15520 548 | 2012-06-30,5,2,15521 549 | 2012-07-01,2,0,15522 550 | 2012-07-02,17,3,15523 551 | 2012-07-03,12,3,15524 552 | 2012-07-04,8,3,15525 553 | 2012-07-05,8,2,15526 554 | 2012-07-06,14,7,15527 555 | 2012-07-07,5,2,15528 556 | 2012-07-08,6,2,15529 557 | 2012-07-09,9,2,15530 558 | 2012-07-10,9,1,15531 559 | 2012-07-11,30,10,15532 560 | 2012-07-12,16,4,15533 561 | 2012-07-13,15,3,15534 562 | 2012-07-14,9,2,15535 563 | 2012-07-15,2,1,15536 564 | 2012-07-16,12,2,15537 565 | 2012-07-17,50,8,15538 566 | 2012-07-18,57,13,15539 567 | 2012-07-19,51,22,15540 568 | 2012-07-20,33,14,15541 569 | 2012-07-21,10,4,15542 570 | 2012-07-22,16,4,15543 571 | 2012-07-23,29,10,15544 572 | 2012-07-24,33,9,15545 573 | 2012-07-25,26,6,15546 574 | 2012-07-26,22,8,15547 575 | 2012-07-27,15,5,15548 576 | 2012-07-28,10,1,15549 577 | 2012-07-29,14,2,15550 578 | 2012-07-30,69,37,15551 579 | 2012-07-31,47,18,15552 580 | 2012-08-01,36,9,15553 581 | 2012-08-02,17,6,15554 582 | 2012-08-03,24,5,15555 583 | 2012-08-04,19,4,15556 584 | 2012-08-05,12,5,15557 585 | 2012-08-06,25,8,15558 586 | 2012-08-07,17,3,15559 587 | 2012-08-08,26,9,15560 588 | 2012-08-09,79,40,15561 589 | 2012-08-10,68,38,15562 590 | 2012-08-11,33,24,15563 591 | 2012-08-12,24,13,15564 592 | 2012-08-13,26,9,15565 593 | 2012-08-14,52,29,15566 594 | 2012-08-15,33,14,15567 595 | 2012-08-16,28,10,15568 596 | 2012-08-17,24,12,15569 597 | 2012-08-18,10,5,15570 598 | 2012-08-19,9,4,15571 599 | 2012-08-20,22,4,15572 600 | 2012-08-21,33,18,15573 601 | 2012-08-22,19,6,15574 602 | 2012-08-23,14,3,15575 603 | 2012-08-24,25,13,15576 604 | 2012-08-25,8,6,15577 605 | 2012-08-26,19,7,15578 606 | 2012-08-27,23,7,15579 607 | 2012-08-28,30,9,15580 608 | 2012-08-29,20,5,15581 609 | 2012-08-30,27,7,15582 610 | 2012-08-31,22,4,15583 611 | 2012-09-01,9,2,15584 612 | 2012-09-02,17,5,15585 613 | 2012-09-03,23,5,15586 614 | 2012-09-04,23,4,15587 615 | 2012-09-05,22,1,15588 616 | 2012-09-06,23,8,15589 617 | 2012-09-07,20,7,15590 618 | 2012-09-08,14,4,15591 619 | 2012-09-09,20,1,15592 620 | 2012-09-10,26,5,15593 621 | 2012-09-11,32,5,15594 622 | 2012-09-12,21,4,15595 623 | 2012-09-13,27,9,15596 624 | 2012-09-14,28,9,15597 625 | 2012-09-15,12,2,15598 626 | 2012-09-16,14,4,15599 627 | 2012-09-17,23,6,15600 628 | 2012-09-18,30,2,15601 629 | 2012-09-19,41,4,15602 630 | 2012-09-20,33,9,15603 631 | 2012-09-21,22,8,15604 632 | 2012-09-22,20,5,15605 633 | 2012-09-23,16,6,15606 634 | 2012-09-24,36,19,15607 635 | 2012-09-25,28,10,15608 636 | 2012-09-26,41,7,15609 637 | 2012-09-27,32,7,15610 638 | 2012-09-28,36,11,15611 639 | 2012-09-29,10,4,15612 640 | 2012-09-30,23,4,15613 641 | 2012-10-01,31,11,15614 642 | 2012-10-02,19,4,15615 643 | 2012-10-03,18,3,15616 644 | 2012-10-04,23,5,15617 645 | 2012-10-05,32,6,15618 646 | 2012-10-06,18,3,15619 647 | 2012-10-07,18,1,15620 648 | 2012-10-08,23,4,15621 649 | 2012-10-09,23,6,15622 650 | 2012-10-10,20,4,15623 651 | 2012-10-11,20,2,15624 652 | 2012-10-12,19,4,15625 653 | 2012-10-13,8,0,15626 654 | 2012-10-14,12,1,15627 655 | 2012-10-15,22,5,15628 656 | 2012-10-16,17,2,15629 657 | 2012-10-17,25,3,15630 658 | 2012-10-18,21,5,15631 659 | 2012-10-19,32,7,15632 660 | 2012-10-20,12,1,15633 661 | 2012-10-21,15,5,15634 662 | 2012-10-22,18,3,15635 663 | 2012-10-23,24,6,15636 664 | 2012-10-24,24,4,15637 665 | 2012-10-25,24,6,15638 666 | 2012-10-26,21,6,15639 667 | 2012-10-27,14,3,15640 668 | 2012-10-28,21,2,15641 669 | 2012-10-29,27,6,15642 670 | 2012-10-30,35,14,15643 671 | 2012-10-31,27,16,15644 672 | 2012-11-01,20,8,15645 673 | 2012-11-02,22,6,15646 674 | 2012-11-03,9,4,15647 675 | 2012-11-04,20,4,15648 676 | 2012-11-05,26,7,15649 677 | 2012-11-06,31,9,15650 678 | 2012-11-07,33,21,15651 679 | 2012-11-08,25,13,15652 680 | 2012-11-09,21,6,15653 681 | 2012-11-10,6,1,15654 682 | 2012-11-11,11,4,15655 683 | 2012-11-12,32,10,15656 684 | 2012-11-13,19,6,15657 685 | 2012-11-14,27,9,15658 686 | 2012-11-15,31,5,15659 687 | 2012-11-16,18,5,15660 688 | 2012-11-17,11,6,15661 689 | 2012-11-18,15,4,15662 690 | 2012-11-19,28,9,15663 691 | 2012-11-20,30,9,15664 692 | 2012-11-21,22,3,15665 693 | 2012-11-22,4,0,15666 694 | 2012-11-23,13,2,15667 695 | 2012-11-24,19,5,15668 696 | 2012-11-25,11,5,15669 697 | 2012-11-26,19,9,15670 698 | 2012-11-27,40,8,15671 699 | 2012-11-28,21,8,15672 700 | 2012-11-29,32,8,15673 701 | 2012-11-30,17,6,15674 702 | 2012-12-01,13,3,15675 703 | 2012-12-02,17,0,15676 704 | 2012-12-03,16,3,15677 705 | 2012-12-04,94,64,15678 706 | 2012-12-05,34,18,15679 707 | 2012-12-06,62,10,15680 708 | 2012-12-07,32,7,15681 709 | 2012-12-08,18,8,15682 710 | 2012-12-09,8,2,15683 711 | 2012-12-10,29,10,15684 712 | 2012-12-11,23,4,15685 713 | 2012-12-12,32,6,15686 714 | 2012-12-13,25,7,15687 715 | 2012-12-14,17,1,15688 716 | 2012-12-15,15,4,15689 717 | 2012-12-16,12,1,15690 718 | 2012-12-17,27,3,15691 719 | 2012-12-18,27,5,15692 720 | 2012-12-19,35,5,15693 721 | 2012-12-20,34,4,15694 722 | 2012-12-21,29,5,15695 723 | 2012-12-22,19,2,15696 724 | 2012-12-23,12,1,15697 725 | 2012-12-24,17,1,15698 726 | 2012-12-25,9,3,15699 727 | 2012-12-26,17,3,15700 728 | 2012-12-27,17,3,15701 729 | 2012-12-28,21,1,15702 730 | 2012-12-29,14,3,15703 731 | 2012-12-30,11,3,15704 732 | 2012-12-31,25,2,15705 733 | -------------------------------------------------------------------------------- /data/gaData.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/herrfz/dataanalysis/fa2d42045f364f1a5b0683b7fb908399d69ff1cb/data/gaData.rda -------------------------------------------------------------------------------- /data/galton.csv: -------------------------------------------------------------------------------- 1 | "","child","parent" 2 | "1",61.7,70.5 3 | "2",61.7,68.5 4 | "3",61.7,65.5 5 | "4",61.7,64.5 6 | "5",61.7,64 7 | "6",62.2,67.5 8 | "7",62.2,67.5 9 | "8",62.2,67.5 10 | "9",62.2,66.5 11 | "10",62.2,66.5 12 | "11",62.2,66.5 13 | "12",62.2,64.5 14 | "13",63.2,70.5 15 | "14",63.2,69.5 16 | "15",63.2,68.5 17 | "16",63.2,68.5 18 | "17",63.2,68.5 19 | "18",63.2,68.5 20 | "19",63.2,68.5 21 | "20",63.2,68.5 22 | "21",63.2,68.5 23 | "22",63.2,67.5 24 | "23",63.2,67.5 25 | "24",63.2,67.5 26 | "25",63.2,67.5 27 | "26",63.2,67.5 28 | "27",63.2,66.5 29 | "28",63.2,66.5 30 | "29",63.2,66.5 31 | "30",63.2,65.5 32 | "31",63.2,65.5 33 | "32",63.2,65.5 34 | "33",63.2,65.5 35 | "34",63.2,65.5 36 | "35",63.2,65.5 37 | "36",63.2,65.5 38 | "37",63.2,65.5 39 | "38",63.2,65.5 40 | "39",63.2,64.5 41 | "40",63.2,64.5 42 | "41",63.2,64.5 43 | "42",63.2,64.5 44 | "43",63.2,64 45 | "44",63.2,64 46 | "45",64.2,69.5 47 | "46",64.2,69.5 48 | "47",64.2,69.5 49 | "48",64.2,69.5 50 | "49",64.2,69.5 51 | "50",64.2,69.5 52 | "51",64.2,69.5 53 | "52",64.2,69.5 54 | "53",64.2,69.5 55 | "54",64.2,69.5 56 | "55",64.2,69.5 57 | "56",64.2,69.5 58 | "57",64.2,69.5 59 | "58",64.2,69.5 60 | "59",64.2,69.5 61 | "60",64.2,69.5 62 | "61",64.2,68.5 63 | "62",64.2,68.5 64 | "63",64.2,68.5 65 | "64",64.2,68.5 66 | "65",64.2,68.5 67 | "66",64.2,68.5 68 | "67",64.2,68.5 69 | "68",64.2,68.5 70 | "69",64.2,68.5 71 | "70",64.2,68.5 72 | "71",64.2,68.5 73 | "72",64.2,67.5 74 | "73",64.2,67.5 75 | "74",64.2,67.5 76 | "75",64.2,67.5 77 | "76",64.2,67.5 78 | "77",64.2,67.5 79 | "78",64.2,67.5 80 | "79",64.2,67.5 81 | "80",64.2,67.5 82 | "81",64.2,67.5 83 | "82",64.2,67.5 84 | "83",64.2,67.5 85 | "84",64.2,67.5 86 | "85",64.2,67.5 87 | "86",64.2,66.5 88 | "87",64.2,66.5 89 | "88",64.2,66.5 90 | "89",64.2,66.5 91 | "90",64.2,66.5 92 | "91",64.2,65.5 93 | "92",64.2,65.5 94 | "93",64.2,65.5 95 | "94",64.2,65.5 96 | "95",64.2,65.5 97 | "96",64.2,64.5 98 | "97",64.2,64.5 99 | "98",64.2,64.5 100 | "99",64.2,64.5 101 | "100",64.2,64 102 | "101",64.2,64 103 | "102",64.2,64 104 | "103",64.2,64 105 | "104",65.2,71.5 106 | "105",65.2,70.5 107 | "106",65.2,69.5 108 | "107",65.2,69.5 109 | "108",65.2,69.5 110 | "109",65.2,69.5 111 | "110",65.2,68.5 112 | "111",65.2,68.5 113 | "112",65.2,68.5 114 | "113",65.2,68.5 115 | "114",65.2,68.5 116 | "115",65.2,68.5 117 | "116",65.2,68.5 118 | "117",65.2,68.5 119 | "118",65.2,68.5 120 | "119",65.2,68.5 121 | "120",65.2,68.5 122 | "121",65.2,68.5 123 | "122",65.2,68.5 124 | "123",65.2,68.5 125 | "124",65.2,68.5 126 | "125",65.2,68.5 127 | "126",65.2,67.5 128 | "127",65.2,67.5 129 | "128",65.2,67.5 130 | "129",65.2,67.5 131 | "130",65.2,67.5 132 | "131",65.2,67.5 133 | "132",65.2,67.5 134 | "133",65.2,67.5 135 | "134",65.2,67.5 136 | "135",65.2,67.5 137 | "136",65.2,67.5 138 | "137",65.2,67.5 139 | "138",65.2,67.5 140 | "139",65.2,67.5 141 | "140",65.2,67.5 142 | "141",65.2,66.5 143 | "142",65.2,66.5 144 | "143",65.2,65.5 145 | "144",65.2,65.5 146 | "145",65.2,65.5 147 | "146",65.2,65.5 148 | "147",65.2,65.5 149 | "148",65.2,65.5 150 | "149",65.2,65.5 151 | "150",65.2,64.5 152 | "151",65.2,64 153 | "152",66.2,71.5 154 | "153",66.2,71.5 155 | "154",66.2,71.5 156 | "155",66.2,70.5 157 | "156",66.2,69.5 158 | "157",66.2,69.5 159 | "158",66.2,69.5 160 | "159",66.2,69.5 161 | "160",66.2,69.5 162 | "161",66.2,69.5 163 | "162",66.2,69.5 164 | "163",66.2,69.5 165 | "164",66.2,69.5 166 | "165",66.2,69.5 167 | "166",66.2,69.5 168 | "167",66.2,69.5 169 | "168",66.2,69.5 170 | "169",66.2,69.5 171 | "170",66.2,69.5 172 | "171",66.2,69.5 173 | "172",66.2,69.5 174 | "173",66.2,68.5 175 | "174",66.2,68.5 176 | "175",66.2,68.5 177 | "176",66.2,68.5 178 | "177",66.2,68.5 179 | "178",66.2,68.5 180 | "179",66.2,68.5 181 | "180",66.2,68.5 182 | "181",66.2,68.5 183 | "182",66.2,68.5 184 | "183",66.2,68.5 185 | "184",66.2,68.5 186 | "185",66.2,68.5 187 | "186",66.2,68.5 188 | "187",66.2,68.5 189 | "188",66.2,68.5 190 | "189",66.2,68.5 191 | "190",66.2,68.5 192 | "191",66.2,68.5 193 | "192",66.2,68.5 194 | "193",66.2,68.5 195 | "194",66.2,68.5 196 | "195",66.2,68.5 197 | "196",66.2,68.5 198 | "197",66.2,68.5 199 | "198",66.2,67.5 200 | "199",66.2,67.5 201 | "200",66.2,67.5 202 | "201",66.2,67.5 203 | "202",66.2,67.5 204 | "203",66.2,67.5 205 | "204",66.2,67.5 206 | "205",66.2,67.5 207 | "206",66.2,67.5 208 | "207",66.2,67.5 209 | "208",66.2,67.5 210 | "209",66.2,67.5 211 | "210",66.2,67.5 212 | "211",66.2,67.5 213 | "212",66.2,67.5 214 | "213",66.2,67.5 215 | "214",66.2,67.5 216 | "215",66.2,67.5 217 | "216",66.2,67.5 218 | "217",66.2,67.5 219 | "218",66.2,67.5 220 | "219",66.2,67.5 221 | "220",66.2,67.5 222 | "221",66.2,67.5 223 | "222",66.2,67.5 224 | "223",66.2,67.5 225 | "224",66.2,67.5 226 | "225",66.2,67.5 227 | "226",66.2,67.5 228 | "227",66.2,67.5 229 | "228",66.2,67.5 230 | "229",66.2,67.5 231 | "230",66.2,67.5 232 | "231",66.2,67.5 233 | "232",66.2,67.5 234 | "233",66.2,67.5 235 | "234",66.2,66.5 236 | "235",66.2,66.5 237 | "236",66.2,66.5 238 | "237",66.2,66.5 239 | "238",66.2,66.5 240 | "239",66.2,66.5 241 | "240",66.2,66.5 242 | "241",66.2,66.5 243 | "242",66.2,66.5 244 | "243",66.2,66.5 245 | "244",66.2,66.5 246 | "245",66.2,66.5 247 | "246",66.2,66.5 248 | "247",66.2,66.5 249 | "248",66.2,66.5 250 | "249",66.2,66.5 251 | "250",66.2,66.5 252 | "251",66.2,65.5 253 | "252",66.2,65.5 254 | "253",66.2,65.5 255 | "254",66.2,65.5 256 | "255",66.2,65.5 257 | "256",66.2,65.5 258 | "257",66.2,65.5 259 | "258",66.2,65.5 260 | "259",66.2,65.5 261 | "260",66.2,65.5 262 | "261",66.2,65.5 263 | "262",66.2,64.5 264 | "263",66.2,64.5 265 | "264",66.2,64.5 266 | "265",66.2,64.5 267 | "266",66.2,64.5 268 | "267",66.2,64 269 | "268",66.2,64 270 | "269",67.2,71.5 271 | "270",67.2,71.5 272 | "271",67.2,71.5 273 | "272",67.2,71.5 274 | "273",67.2,70.5 275 | "274",67.2,70.5 276 | "275",67.2,70.5 277 | "276",67.2,69.5 278 | "277",67.2,69.5 279 | "278",67.2,69.5 280 | "279",67.2,69.5 281 | "280",67.2,69.5 282 | "281",67.2,69.5 283 | "282",67.2,69.5 284 | "283",67.2,69.5 285 | "284",67.2,69.5 286 | "285",67.2,69.5 287 | "286",67.2,69.5 288 | "287",67.2,69.5 289 | "288",67.2,69.5 290 | "289",67.2,69.5 291 | "290",67.2,69.5 292 | "291",67.2,69.5 293 | "292",67.2,69.5 294 | "293",67.2,69.5 295 | "294",67.2,69.5 296 | "295",67.2,69.5 297 | "296",67.2,69.5 298 | "297",67.2,69.5 299 | "298",67.2,69.5 300 | "299",67.2,69.5 301 | "300",67.2,69.5 302 | "301",67.2,69.5 303 | "302",67.2,69.5 304 | "303",67.2,68.5 305 | "304",67.2,68.5 306 | "305",67.2,68.5 307 | "306",67.2,68.5 308 | "307",67.2,68.5 309 | "308",67.2,68.5 310 | "309",67.2,68.5 311 | "310",67.2,68.5 312 | "311",67.2,68.5 313 | "312",67.2,68.5 314 | "313",67.2,68.5 315 | "314",67.2,68.5 316 | "315",67.2,68.5 317 | "316",67.2,68.5 318 | "317",67.2,68.5 319 | "318",67.2,68.5 320 | "319",67.2,68.5 321 | "320",67.2,68.5 322 | "321",67.2,68.5 323 | "322",67.2,68.5 324 | "323",67.2,68.5 325 | "324",67.2,68.5 326 | "325",67.2,68.5 327 | "326",67.2,68.5 328 | "327",67.2,68.5 329 | "328",67.2,68.5 330 | "329",67.2,68.5 331 | "330",67.2,68.5 332 | "331",67.2,68.5 333 | "332",67.2,68.5 334 | "333",67.2,68.5 335 | "334",67.2,67.5 336 | "335",67.2,67.5 337 | "336",67.2,67.5 338 | "337",67.2,67.5 339 | "338",67.2,67.5 340 | "339",67.2,67.5 341 | "340",67.2,67.5 342 | "341",67.2,67.5 343 | "342",67.2,67.5 344 | "343",67.2,67.5 345 | "344",67.2,67.5 346 | "345",67.2,67.5 347 | "346",67.2,67.5 348 | "347",67.2,67.5 349 | "348",67.2,67.5 350 | "349",67.2,67.5 351 | "350",67.2,67.5 352 | "351",67.2,67.5 353 | "352",67.2,67.5 354 | "353",67.2,67.5 355 | "354",67.2,67.5 356 | "355",67.2,67.5 357 | "356",67.2,67.5 358 | "357",67.2,67.5 359 | "358",67.2,67.5 360 | "359",67.2,67.5 361 | "360",67.2,67.5 362 | "361",67.2,67.5 363 | "362",67.2,67.5 364 | "363",67.2,67.5 365 | "364",67.2,67.5 366 | "365",67.2,67.5 367 | "366",67.2,67.5 368 | "367",67.2,67.5 369 | "368",67.2,67.5 370 | "369",67.2,67.5 371 | "370",67.2,67.5 372 | "371",67.2,67.5 373 | "372",67.2,66.5 374 | "373",67.2,66.5 375 | "374",67.2,66.5 376 | "375",67.2,66.5 377 | "376",67.2,66.5 378 | "377",67.2,66.5 379 | "378",67.2,66.5 380 | "379",67.2,66.5 381 | "380",67.2,66.5 382 | "381",67.2,66.5 383 | "382",67.2,66.5 384 | "383",67.2,66.5 385 | "384",67.2,66.5 386 | "385",67.2,66.5 387 | "386",67.2,66.5 388 | "387",67.2,66.5 389 | "388",67.2,66.5 390 | "389",67.2,65.5 391 | "390",67.2,65.5 392 | "391",67.2,65.5 393 | "392",67.2,65.5 394 | "393",67.2,65.5 395 | "394",67.2,65.5 396 | "395",67.2,65.5 397 | "396",67.2,65.5 398 | "397",67.2,65.5 399 | "398",67.2,65.5 400 | "399",67.2,65.5 401 | "400",67.2,64.5 402 | "401",67.2,64.5 403 | "402",67.2,64.5 404 | "403",67.2,64.5 405 | "404",67.2,64.5 406 | "405",67.2,64 407 | "406",67.2,64 408 | "407",68.2,72.5 409 | "408",68.2,71.5 410 | "409",68.2,71.5 411 | "410",68.2,71.5 412 | "411",68.2,70.5 413 | "412",68.2,70.5 414 | "413",68.2,70.5 415 | "414",68.2,70.5 416 | "415",68.2,70.5 417 | "416",68.2,70.5 418 | "417",68.2,70.5 419 | "418",68.2,70.5 420 | "419",68.2,70.5 421 | "420",68.2,70.5 422 | "421",68.2,70.5 423 | "422",68.2,70.5 424 | "423",68.2,69.5 425 | "424",68.2,69.5 426 | "425",68.2,69.5 427 | "426",68.2,69.5 428 | "427",68.2,69.5 429 | "428",68.2,69.5 430 | "429",68.2,69.5 431 | "430",68.2,69.5 432 | "431",68.2,69.5 433 | "432",68.2,69.5 434 | "433",68.2,69.5 435 | "434",68.2,69.5 436 | "435",68.2,69.5 437 | "436",68.2,69.5 438 | "437",68.2,69.5 439 | "438",68.2,69.5 440 | "439",68.2,69.5 441 | "440",68.2,69.5 442 | "441",68.2,69.5 443 | "442",68.2,69.5 444 | "443",68.2,68.5 445 | "444",68.2,68.5 446 | "445",68.2,68.5 447 | "446",68.2,68.5 448 | "447",68.2,68.5 449 | "448",68.2,68.5 450 | "449",68.2,68.5 451 | "450",68.2,68.5 452 | "451",68.2,68.5 453 | "452",68.2,68.5 454 | "453",68.2,68.5 455 | "454",68.2,68.5 456 | "455",68.2,68.5 457 | "456",68.2,68.5 458 | "457",68.2,68.5 459 | "458",68.2,68.5 460 | "459",68.2,68.5 461 | "460",68.2,68.5 462 | "461",68.2,68.5 463 | "462",68.2,68.5 464 | "463",68.2,68.5 465 | "464",68.2,68.5 466 | "465",68.2,68.5 467 | "466",68.2,68.5 468 | "467",68.2,68.5 469 | "468",68.2,68.5 470 | "469",68.2,68.5 471 | "470",68.2,68.5 472 | "471",68.2,68.5 473 | "472",68.2,68.5 474 | "473",68.2,68.5 475 | "474",68.2,68.5 476 | "475",68.2,68.5 477 | "476",68.2,68.5 478 | "477",68.2,67.5 479 | "478",68.2,67.5 480 | "479",68.2,67.5 481 | "480",68.2,67.5 482 | "481",68.2,67.5 483 | "482",68.2,67.5 484 | "483",68.2,67.5 485 | "484",68.2,67.5 486 | "485",68.2,67.5 487 | "486",68.2,67.5 488 | "487",68.2,67.5 489 | "488",68.2,67.5 490 | "489",68.2,67.5 491 | "490",68.2,67.5 492 | "491",68.2,67.5 493 | "492",68.2,67.5 494 | "493",68.2,67.5 495 | "494",68.2,67.5 496 | "495",68.2,67.5 497 | "496",68.2,67.5 498 | "497",68.2,67.5 499 | "498",68.2,67.5 500 | "499",68.2,67.5 501 | "500",68.2,67.5 502 | "501",68.2,67.5 503 | "502",68.2,67.5 504 | "503",68.2,67.5 505 | "504",68.2,67.5 506 | "505",68.2,66.5 507 | "506",68.2,66.5 508 | "507",68.2,66.5 509 | "508",68.2,66.5 510 | "509",68.2,66.5 511 | "510",68.2,66.5 512 | "511",68.2,66.5 513 | "512",68.2,66.5 514 | "513",68.2,66.5 515 | "514",68.2,66.5 516 | "515",68.2,66.5 517 | "516",68.2,66.5 518 | "517",68.2,66.5 519 | "518",68.2,66.5 520 | "519",68.2,65.5 521 | "520",68.2,65.5 522 | "521",68.2,65.5 523 | "522",68.2,65.5 524 | "523",68.2,65.5 525 | "524",68.2,65.5 526 | "525",68.2,65.5 527 | "526",68.2,64 528 | "527",69.2,72.5 529 | "528",69.2,72.5 530 | "529",69.2,71.5 531 | "530",69.2,71.5 532 | "531",69.2,71.5 533 | "532",69.2,71.5 534 | "533",69.2,71.5 535 | "534",69.2,70.5 536 | "535",69.2,70.5 537 | "536",69.2,70.5 538 | "537",69.2,70.5 539 | "538",69.2,70.5 540 | "539",69.2,70.5 541 | "540",69.2,70.5 542 | "541",69.2,70.5 543 | "542",69.2,70.5 544 | "543",69.2,70.5 545 | "544",69.2,70.5 546 | "545",69.2,70.5 547 | "546",69.2,70.5 548 | "547",69.2,70.5 549 | "548",69.2,70.5 550 | "549",69.2,70.5 551 | "550",69.2,70.5 552 | "551",69.2,70.5 553 | "552",69.2,69.5 554 | "553",69.2,69.5 555 | "554",69.2,69.5 556 | "555",69.2,69.5 557 | "556",69.2,69.5 558 | "557",69.2,69.5 559 | "558",69.2,69.5 560 | "559",69.2,69.5 561 | "560",69.2,69.5 562 | "561",69.2,69.5 563 | "562",69.2,69.5 564 | "563",69.2,69.5 565 | "564",69.2,69.5 566 | "565",69.2,69.5 567 | "566",69.2,69.5 568 | "567",69.2,69.5 569 | "568",69.2,69.5 570 | "569",69.2,69.5 571 | "570",69.2,69.5 572 | "571",69.2,69.5 573 | "572",69.2,69.5 574 | "573",69.2,69.5 575 | "574",69.2,69.5 576 | "575",69.2,69.5 577 | "576",69.2,69.5 578 | "577",69.2,69.5 579 | "578",69.2,69.5 580 | "579",69.2,69.5 581 | "580",69.2,69.5 582 | "581",69.2,69.5 583 | "582",69.2,69.5 584 | "583",69.2,69.5 585 | "584",69.2,69.5 586 | "585",69.2,68.5 587 | "586",69.2,68.5 588 | "587",69.2,68.5 589 | "588",69.2,68.5 590 | "589",69.2,68.5 591 | "590",69.2,68.5 592 | "591",69.2,68.5 593 | "592",69.2,68.5 594 | "593",69.2,68.5 595 | "594",69.2,68.5 596 | "595",69.2,68.5 597 | "596",69.2,68.5 598 | "597",69.2,68.5 599 | "598",69.2,68.5 600 | "599",69.2,68.5 601 | "600",69.2,68.5 602 | "601",69.2,68.5 603 | "602",69.2,68.5 604 | "603",69.2,68.5 605 | "604",69.2,68.5 606 | "605",69.2,68.5 607 | "606",69.2,68.5 608 | "607",69.2,68.5 609 | "608",69.2,68.5 610 | "609",69.2,68.5 611 | "610",69.2,68.5 612 | "611",69.2,68.5 613 | "612",69.2,68.5 614 | "613",69.2,68.5 615 | "614",69.2,68.5 616 | "615",69.2,68.5 617 | "616",69.2,68.5 618 | "617",69.2,68.5 619 | "618",69.2,68.5 620 | "619",69.2,68.5 621 | "620",69.2,68.5 622 | "621",69.2,68.5 623 | "622",69.2,68.5 624 | "623",69.2,68.5 625 | "624",69.2,68.5 626 | "625",69.2,68.5 627 | "626",69.2,68.5 628 | "627",69.2,68.5 629 | "628",69.2,68.5 630 | "629",69.2,68.5 631 | "630",69.2,68.5 632 | "631",69.2,68.5 633 | "632",69.2,68.5 634 | "633",69.2,67.5 635 | "634",69.2,67.5 636 | "635",69.2,67.5 637 | "636",69.2,67.5 638 | "637",69.2,67.5 639 | "638",69.2,67.5 640 | "639",69.2,67.5 641 | "640",69.2,67.5 642 | "641",69.2,67.5 643 | "642",69.2,67.5 644 | "643",69.2,67.5 645 | "644",69.2,67.5 646 | "645",69.2,67.5 647 | "646",69.2,67.5 648 | "647",69.2,67.5 649 | "648",69.2,67.5 650 | "649",69.2,67.5 651 | "650",69.2,67.5 652 | "651",69.2,67.5 653 | "652",69.2,67.5 654 | "653",69.2,67.5 655 | "654",69.2,67.5 656 | "655",69.2,67.5 657 | "656",69.2,67.5 658 | "657",69.2,67.5 659 | "658",69.2,67.5 660 | "659",69.2,67.5 661 | "660",69.2,67.5 662 | "661",69.2,67.5 663 | "662",69.2,67.5 664 | "663",69.2,67.5 665 | "664",69.2,67.5 666 | "665",69.2,67.5 667 | "666",69.2,67.5 668 | "667",69.2,67.5 669 | "668",69.2,67.5 670 | "669",69.2,67.5 671 | "670",69.2,67.5 672 | "671",69.2,66.5 673 | "672",69.2,66.5 674 | "673",69.2,66.5 675 | "674",69.2,66.5 676 | "675",69.2,66.5 677 | "676",69.2,66.5 678 | "677",69.2,66.5 679 | "678",69.2,66.5 680 | "679",69.2,66.5 681 | "680",69.2,66.5 682 | "681",69.2,66.5 683 | "682",69.2,66.5 684 | "683",69.2,66.5 685 | "684",69.2,65.5 686 | "685",69.2,65.5 687 | "686",69.2,65.5 688 | "687",69.2,65.5 689 | "688",69.2,65.5 690 | "689",69.2,65.5 691 | "690",69.2,65.5 692 | "691",69.2,64.5 693 | "692",69.2,64.5 694 | "693",69.2,64 695 | "694",70.2,72.5 696 | "695",70.2,71.5 697 | "696",70.2,71.5 698 | "697",70.2,71.5 699 | "698",70.2,71.5 700 | "699",70.2,71.5 701 | "700",70.2,71.5 702 | "701",70.2,71.5 703 | "702",70.2,71.5 704 | "703",70.2,71.5 705 | "704",70.2,71.5 706 | "705",70.2,70.5 707 | "706",70.2,70.5 708 | "707",70.2,70.5 709 | "708",70.2,70.5 710 | "709",70.2,70.5 711 | "710",70.2,70.5 712 | "711",70.2,70.5 713 | "712",70.2,70.5 714 | "713",70.2,70.5 715 | "714",70.2,70.5 716 | "715",70.2,70.5 717 | "716",70.2,70.5 718 | "717",70.2,70.5 719 | "718",70.2,70.5 720 | "719",70.2,69.5 721 | "720",70.2,69.5 722 | "721",70.2,69.5 723 | "722",70.2,69.5 724 | "723",70.2,69.5 725 | "724",70.2,69.5 726 | "725",70.2,69.5 727 | "726",70.2,69.5 728 | "727",70.2,69.5 729 | "728",70.2,69.5 730 | "729",70.2,69.5 731 | "730",70.2,69.5 732 | "731",70.2,69.5 733 | "732",70.2,69.5 734 | "733",70.2,69.5 735 | "734",70.2,69.5 736 | "735",70.2,69.5 737 | "736",70.2,69.5 738 | "737",70.2,69.5 739 | "738",70.2,69.5 740 | "739",70.2,69.5 741 | "740",70.2,69.5 742 | "741",70.2,69.5 743 | "742",70.2,69.5 744 | "743",70.2,69.5 745 | "744",70.2,68.5 746 | "745",70.2,68.5 747 | "746",70.2,68.5 748 | "747",70.2,68.5 749 | "748",70.2,68.5 750 | "749",70.2,68.5 751 | "750",70.2,68.5 752 | "751",70.2,68.5 753 | "752",70.2,68.5 754 | "753",70.2,68.5 755 | "754",70.2,68.5 756 | "755",70.2,68.5 757 | "756",70.2,68.5 758 | "757",70.2,68.5 759 | "758",70.2,68.5 760 | "759",70.2,68.5 761 | "760",70.2,68.5 762 | "761",70.2,68.5 763 | "762",70.2,68.5 764 | "763",70.2,68.5 765 | "764",70.2,68.5 766 | "765",70.2,67.5 767 | "766",70.2,67.5 768 | "767",70.2,67.5 769 | "768",70.2,67.5 770 | "769",70.2,67.5 771 | "770",70.2,67.5 772 | "771",70.2,67.5 773 | "772",70.2,67.5 774 | "773",70.2,67.5 775 | "774",70.2,67.5 776 | "775",70.2,67.5 777 | "776",70.2,67.5 778 | "777",70.2,67.5 779 | "778",70.2,67.5 780 | "779",70.2,67.5 781 | "780",70.2,67.5 782 | "781",70.2,67.5 783 | "782",70.2,67.5 784 | "783",70.2,67.5 785 | "784",70.2,66.5 786 | "785",70.2,66.5 787 | "786",70.2,66.5 788 | "787",70.2,66.5 789 | "788",70.2,65.5 790 | "789",70.2,65.5 791 | "790",70.2,65.5 792 | "791",70.2,65.5 793 | "792",70.2,65.5 794 | "793",71.2,72.5 795 | "794",71.2,72.5 796 | "795",71.2,71.5 797 | "796",71.2,71.5 798 | "797",71.2,71.5 799 | "798",71.2,71.5 800 | "799",71.2,70.5 801 | "800",71.2,70.5 802 | "801",71.2,70.5 803 | "802",71.2,70.5 804 | "803",71.2,70.5 805 | "804",71.2,70.5 806 | "805",71.2,70.5 807 | "806",71.2,69.5 808 | "807",71.2,69.5 809 | "808",71.2,69.5 810 | "809",71.2,69.5 811 | "810",71.2,69.5 812 | "811",71.2,69.5 813 | "812",71.2,69.5 814 | "813",71.2,69.5 815 | "814",71.2,69.5 816 | "815",71.2,69.5 817 | "816",71.2,69.5 818 | "817",71.2,69.5 819 | "818",71.2,69.5 820 | "819",71.2,69.5 821 | "820",71.2,69.5 822 | "821",71.2,69.5 823 | "822",71.2,69.5 824 | "823",71.2,69.5 825 | "824",71.2,69.5 826 | "825",71.2,69.5 827 | "826",71.2,68.5 828 | "827",71.2,68.5 829 | "828",71.2,68.5 830 | "829",71.2,68.5 831 | "830",71.2,68.5 832 | "831",71.2,68.5 833 | "832",71.2,68.5 834 | "833",71.2,68.5 835 | "834",71.2,68.5 836 | "835",71.2,68.5 837 | "836",71.2,68.5 838 | "837",71.2,68.5 839 | "838",71.2,68.5 840 | "839",71.2,68.5 841 | "840",71.2,68.5 842 | "841",71.2,68.5 843 | "842",71.2,68.5 844 | "843",71.2,68.5 845 | "844",71.2,67.5 846 | "845",71.2,67.5 847 | "846",71.2,67.5 848 | "847",71.2,67.5 849 | "848",71.2,67.5 850 | "849",71.2,67.5 851 | "850",71.2,67.5 852 | "851",71.2,67.5 853 | "852",71.2,67.5 854 | "853",71.2,67.5 855 | "854",71.2,67.5 856 | "855",71.2,65.5 857 | "856",71.2,65.5 858 | "857",72.2,73 859 | "858",72.2,72.5 860 | "859",72.2,72.5 861 | "860",72.2,72.5 862 | "861",72.2,72.5 863 | "862",72.2,72.5 864 | "863",72.2,72.5 865 | "864",72.2,72.5 866 | "865",72.2,71.5 867 | "866",72.2,71.5 868 | "867",72.2,71.5 869 | "868",72.2,71.5 870 | "869",72.2,71.5 871 | "870",72.2,71.5 872 | "871",72.2,71.5 873 | "872",72.2,71.5 874 | "873",72.2,71.5 875 | "874",72.2,70.5 876 | "875",72.2,70.5 877 | "876",72.2,70.5 878 | "877",72.2,70.5 879 | "878",72.2,69.5 880 | "879",72.2,69.5 881 | "880",72.2,69.5 882 | "881",72.2,69.5 883 | "882",72.2,69.5 884 | "883",72.2,69.5 885 | "884",72.2,69.5 886 | "885",72.2,69.5 887 | "886",72.2,69.5 888 | "887",72.2,69.5 889 | "888",72.2,69.5 890 | "889",72.2,68.5 891 | "890",72.2,68.5 892 | "891",72.2,68.5 893 | "892",72.2,68.5 894 | "893",72.2,67.5 895 | "894",72.2,67.5 896 | "895",72.2,67.5 897 | "896",72.2,67.5 898 | "897",72.2,65.5 899 | "898",73.2,73 900 | "899",73.2,73 901 | "900",73.2,73 902 | "901",73.2,72.5 903 | "902",73.2,72.5 904 | "903",73.2,71.5 905 | "904",73.2,71.5 906 | "905",73.2,70.5 907 | "906",73.2,70.5 908 | "907",73.2,70.5 909 | "908",73.2,69.5 910 | "909",73.2,69.5 911 | "910",73.2,69.5 912 | "911",73.2,69.5 913 | "912",73.2,68.5 914 | "913",73.2,68.5 915 | "914",73.2,68.5 916 | "915",73.7,72.5 917 | "916",73.7,72.5 918 | "917",73.7,72.5 919 | "918",73.7,72.5 920 | "919",73.7,71.5 921 | "920",73.7,71.5 922 | "921",73.7,70.5 923 | "922",73.7,70.5 924 | "923",73.7,70.5 925 | "924",73.7,69.5 926 | "925",73.7,69.5 927 | "926",73.7,69.5 928 | "927",73.7,69.5 929 | "928",73.7,69.5 930 | -------------------------------------------------------------------------------- /data/movies.txt: -------------------------------------------------------------------------------- 1 | score rating genre box office running time 2 | 2 Fast 2 Furious 48.9 PG-13 action/adventure 127.146 107 3 | 28 Days Later 78.2 R horror 45.065 113 4 | A Guy Thing 39.5 PG-13 rom comedy 15.545 101 5 | A Man Apart 42.9 R action/adventure 26.248 110 6 | A Mighty Wind 79.9 PG-13 comedy 17.781 91 7 | Agent Cody Banks 57.9 PG action/adventure 47.811 102 8 | Alex & Emma 35.1 PG-13 rom comedy 14.219 96 9 | American Wedding 50.7 R comedy 104.441 96 10 | Anger Management 62.6 PG-13 comedy 134.404 106 11 | Anything Else 63.3 R rom comedy 3.212 108 12 | Bad Boys II 38.1 R action/adventure 138.397 147 13 | Bad Santa 75.8 R comedy 59.523 91 14 | Basic 43.6 R suspense 26.536 98 15 | Beyond Borders 48.8 R drama 4.430 127 16 | big fish 78.0 PG-13 drama 65.151 125 17 | Biker Boyz 44.8 PG-13 action/adventure 21.731 110 18 | Boat Trip 29.2 R comedy 8.600 94 19 | Bringing Down the House 57.7 PG-13 comedy 132.582 105 20 | Brother Bear 60.6 G animated 84.874 84 21 | Bruce Almighty 63.3 PG-13 comedy 242.705 101 22 | Bulletproof Monk 47.3 PG-13 action/adventure 23.359 104 23 | Cabin Fever 54.3 R horror 21.158 93 24 | Calendar Girls 67.3 PG-13 comedy 29.591 108 25 | Charlie's Angles: Full Throttle 54.8 PG-13 action/adventure 100.785 106 26 | Cheaper by the Dozen 52.9 PG comedy 135.445 98 27 | Cold Creek Manor 50.6 R suspense 21.386 118 28 | Cold Mountain 78.0 R drama 90.712 152 29 | Confidence 62.2 R drama 12.218 97 30 | Cradle 2 the Grave 45.8 R action/adventure 34.613 101 31 | Daddy Day Care 40.5 PG comedy 104.297 92 32 | Daredevil 52.1 PG-13 action/adventure 102.544 103 33 | Dark Blue 58.7 R drama 9.250 118 34 | Darkness Falls 31.1 PG-13 horror 32.131 86 35 | Deliver Us From Eva 61.8 R rom comedy 17.366 105 36 | Dickie Roberts, Former Child Star 52.6 PG-13 comedy 22.739 98 37 | Down With Love 63.7 PG-13 rom comedy 20.305 101 38 | Dr. Seuss' The Cat in the Hat 37.8 PG fantasy 99.880 82 39 | Dreamcatcher 42.5 R sci-fi 33.700 136 40 | Dumb and Dumberer 29.5 PG-13 comedy 26.166 85 41 | Duplex 51.3 PG-13 comedy 9.671 89 42 | Dysfunktional Family 57.2 R documentary 2.235 89 43 | Elf 70.3 PG comedy 173.306 95 44 | Final Destination 2 46.1 R horror 46.492 90 45 | Finding Nemo 91.2 G animated 339.715 100 46 | Freaky Friday 74.9 PG comedy 110.222 97 47 | Freddy vs. Jason 47.3 R horror 82.217 97 48 | From Justin to Kelly 34.8 PG musical 4.929 81 49 | Gigli 29.3 R rom comedy 6.088 121 50 | Gods and Generals 42.0 PG-13 drama 12.875 231 51 | Good Boy! 57.9 PG comedy 37.655 87 52 | Gothika 48.1 R horror 59.454 98 53 | Grind 35.0 PG-13 comedy 5.124 105 54 | Head of State 56.7 PG-13 comedy 37.845 95 55 | Holes 71.9 PG drama 67.365 117 56 | Hollywood Homicide 47.9 PG-13 action/adventure 30.941 116 57 | Honey 49.9 PG-13 drama 30.223 94 58 | House of 1000 Corpses 36.3 R horror 12.599 105 59 | How to Deal 53.6 PG-13 drama 14.144 101 60 | How to Lose a Guy in 10 Days 50.3 PG-13 rom comedy 106.094 116 61 | Identity 60.0 R suspense 52.131 90 62 | In the Cut 53.6 R suspense 4.717 119 63 | Intolerable Cruelty 67.1 PG-13 rom comedy 35.189 100 64 | It Runs in the Family 55.6 PG-13 comedy 7.492 109 65 | Jeepers Creepers 2 44.2 R horror 35.667 104 66 | Johnny English 56.4 PG comedy 28.082 87 67 | Just Married 41.5 PG-13 rom comedy 56.127 95 68 | Kangaroo Jack 30.4 PG comedy 66.746 89 69 | Kill Bill - Vol 1 84.1 R action/adventure 69.869 111 70 | Lara Croft, Tomb Raider 48.8 PG-13 action/adventure 65.660 117 71 | Le Divorce 59.6 PG-13 comedy 9.081 117 72 | Legally Blonde 2 50.8 PG-13 comedy 89.921 95 73 | Looney Tunes: Back in Action 59.3 PG comedy 20.808 90 74 | Lost in Translation 94.6 R drama 43.217 102 75 | Love Actually 70.5 R rom comedy 59.365 135 76 | Love Don't Cost a Thing 45.4 PG-13 rom comedy 21.803 100 77 | Malibu's Most Wanted 44.8 PG-13 comedy 34.340 86 78 | Marci X 36.2 R comedy 1.649 84 79 | Master and Commander 87.6 PG-13 action/adventure 92.076 138 80 | Matchstick Men 68.9 PG-13 comedy 36.886 116 81 | Mona Lisa Smile 55.8 PG-13 drama 63.696 117 82 | Monster 64.9 R drama 23.802 109 83 | My Boss's Daughter 33.2 PG-13 rom comedy 15.551 90 84 | Mystic River 89.9 R drama 79.207 137 85 | National Security 37.9 PG-13 action/adventure 35.765 88 86 | Old School 54.7 R comedy 74.663 92 87 | Once Upon A Time in Mexico 64.6 R action/adventure 55.846 102 88 | Open Range 71.6 R western 58.331 139 89 | Out of Time 65.8 PG-13 suspense 41.077 105 90 | Paycheck 48.3 PG-13 sci-fi 53.428 119 91 | Peter Pan 67.5 PG fantasy 47.581 113 92 | Phone Booth 62.1 R suspense 46.566 81 93 | Piglet's Big Movie 63.6 G animated 23.103 75 94 | Pirates of the Caribbean 67.8 PG-13 action/adventure 305.414 143 95 | Radio 54.2 PG drama 51.987 109 96 | Rugrats Go Wild 55.6 PG animated 39.403 84 97 | Runaway Jury 65.4 PG-13 suspense 49.441 127 98 | S.W.A.T. 55.4 PG-13 action/adventure 116.643 117 99 | Scary Movie 3 50.2 PG-13 comedy 110.000 84 100 | Seabiscuit 81.3 PG-13 drama 120.171 141 101 | Secondhand Lions 57.9 PG drama 41.521 111 102 | Shanghai Knights 62.1 PG-13 action/adventure 60.477 114 103 | Sinbad: Legend of the Seven Seas 55.3 PG animated 26.309 86 104 | Something's Gotta Give 75.5 PG-13 rom comedy 121.418 128 105 | Spy Kids 3-D: Game Over 65.0 PG action/adventure 111.761 84 106 | Stuck on You 65.6 PG-13 comedy 33.762 118 107 | Tears of the Sun 53.1 R action/adventure 43.427 121 108 | Terminator 3:Rise of the Machines 71.3 R action/adventure 150.358 109 109 | The Core 53.1 PG-13 sci-fi 31.187 135 110 | The Fighting Temptations 63.3 PG-13 comedy 30.251 123 111 | The Haunted Mansion 45.3 PG comedy 74.320 99 112 | The Hulk 61.0 PG-13 action/adventure 132.176 138 113 | The Hunted 54.0 R action/adventure 34.316 94 114 | The In-Laws 44.8 PG-13 comedy 20.453 95 115 | The Italian Job 66.5 PG-13 action/adventure 106.129 111 116 | The Jungle Book 2 55.2 G animated 47.902 72 117 | The Last Samurai 67.8 R drama 110.069 154 118 | The League of Extraordinary Gentlemen 43.2 PG-13 action/adventure 66.465 110 119 | The Life of David Gale 46.9 R suspense 19.956 130 120 | The Lizze McGuire Movie 57.1 PG comedy 42.718 94 121 | The Lord of the Rings III 92.2 PG-13 fantasy 361.119 201 122 | The Matrix Reloaded 70.1 R action/adventure 281.519 138 123 | The Matrix Revolutions 49.7 R action/adventure 139.260 129 124 | The Medallion 46.2 PG-13 action/adventure 22.219 88 125 | The Missing 67.0 R western 26.900 137 126 | The Order 29.8 R suspense 7.661 102 127 | The Real Cancun 40.8 R documentary 3.779 96 128 | The Recruit 62.6 PG-13 suspense 52.802 115 129 | The Rundown 60.4 PG-13 action 47.611 104 130 | The School of Rock 86.4 PG-13 comedy 81.239 108 131 | The Texas Chainsaw Massacre 42.2 PG-13 horror 80.168 98 132 | Timeline 42.9 R action 19.481 116 133 | Tupac: Resurrection 69.5 R documentary 7.719 90 134 | Under the Tuscan Sun 63.1 PG-13 comedy 43.502 113 135 | Underworld 46.2 R action 51.484 121 136 | Uptown Girls 38.5 PG-13 comedy 37.182 92 137 | View from the Top 43.7 PG-13 comedy 15.597 87 138 | What a Girl Wants 53.3 PG comedy 36.017 105 139 | Willard 60.3 PG-13 horror 6.852 100 140 | Wrong Turn 32.6 R horror 15.419 84 141 | X2: X-Men United 72.5 PG-13 action 214.950 133 142 | -------------------------------------------------------------------------------- /data/ravensData.csv: -------------------------------------------------------------------------------- 1 | "ravenWinNum","ravenWin","ravenScore","opponentScore" 2 | 1,"W",24,9 3 | 1,"W",38,35 4 | 1,"W",28,13 5 | 1,"W",34,31 6 | 1,"W",44,13 7 | 0,"L",23,24 8 | 1,"W",31,30 9 | 1,"W",23,16 10 | 1,"W",9,6 11 | 1,"W",31,29 12 | 0,"L",13,43 13 | 1,"W",25,15 14 | 1,"W",55,20 15 | 1,"W",13,10 16 | 1,"W",16,13 17 | 0,"L",20,23 18 | 0,"L",28,31 19 | 0,"L",17,34 20 | 1,"W",33,14 21 | 0,"L",17,23 22 | -------------------------------------------------------------------------------- /data/ravensData.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/herrfz/dataanalysis/fa2d42045f364f1a5b0683b7fb908399d69ff1cb/data/ravensData.rda -------------------------------------------------------------------------------- /data/samsungData.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/herrfz/dataanalysis/fa2d42045f364f1a5b0683b7fb908399d69ff1cb/data/samsungData.rda -------------------------------------------------------------------------------- /data/warpbreaks.csv: -------------------------------------------------------------------------------- 1 | "","breaks","wool","tension" 2 | "1",26,"A","L" 3 | "2",30,"A","L" 4 | "3",54,"A","L" 5 | "4",25,"A","L" 6 | "5",70,"A","L" 7 | "6",52,"A","L" 8 | "7",51,"A","L" 9 | "8",26,"A","L" 10 | "9",67,"A","L" 11 | "10",18,"A","M" 12 | "11",21,"A","M" 13 | "12",29,"A","M" 14 | "13",17,"A","M" 15 | "14",12,"A","M" 16 | "15",18,"A","M" 17 | "16",35,"A","M" 18 | "17",30,"A","M" 19 | "18",36,"A","M" 20 | "19",36,"A","H" 21 | "20",21,"A","H" 22 | "21",24,"A","H" 23 | "22",18,"A","H" 24 | "23",10,"A","H" 25 | "24",43,"A","H" 26 | "25",28,"A","H" 27 | "26",15,"A","H" 28 | "27",26,"A","H" 29 | "28",27,"B","L" 30 | "29",14,"B","L" 31 | "30",29,"B","L" 32 | "31",19,"B","L" 33 | "32",29,"B","L" 34 | "33",31,"B","L" 35 | "34",41,"B","L" 36 | "35",20,"B","L" 37 | "36",44,"B","L" 38 | "37",42,"B","M" 39 | "38",26,"B","M" 40 | "39",19,"B","M" 41 | "40",16,"B","M" 42 | "41",39,"B","M" 43 | "42",28,"B","M" 44 | "43",21,"B","M" 45 | "44",39,"B","M" 46 | "45",29,"B","M" 47 | "46",20,"B","H" 48 | "47",21,"B","H" 49 | "48",24,"B","H" 50 | "49",17,"B","H" 51 | "50",13,"B","H" 52 | "51",15,"B","H" 53 | "52",15,"B","H" 54 | "53",16,"B","H" 55 | "54",28,"B","H" 56 | -------------------------------------------------------------------------------- /week1/representing_data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "representing_data" 4 | }, 5 | "nbformat": 3, 6 | "nbformat_minor": 0, 7 | "worksheets": [ 8 | { 9 | "cells": [ 10 | { 11 | "cell_type": "heading", 12 | "level": 2, 13 | "metadata": {}, 14 | "source": [ 15 | "Representing Data in R -- Python equivalent" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "collapsed": true, 21 | "input": [ 22 | "import pandas as pd\n", 23 | "import numpy as np" 24 | ], 25 | "language": "python", 26 | "metadata": {}, 27 | "outputs": [], 28 | "prompt_number": 1 29 | }, 30 | { 31 | "cell_type": "code", 32 | "collapsed": false, 33 | "input": [ 34 | "# 'characters' is equivalent to string\n", 35 | "firstName = 'jeff'\n", 36 | "print type(firstName), firstName" 37 | ], 38 | "language": "python", 39 | "metadata": {}, 40 | "outputs": [ 41 | { 42 | "output_type": "stream", 43 | "stream": "stdout", 44 | "text": [ 45 | " jeff\n" 46 | ] 47 | } 48 | ], 49 | "prompt_number": 2 50 | }, 51 | { 52 | "cell_type": "code", 53 | "collapsed": false, 54 | "input": [ 55 | "# 'numeric' is equivalent to float\n", 56 | "heightCM = 188.2\n", 57 | "print type(heightCM), heightCM" 58 | ], 59 | "language": "python", 60 | "metadata": {}, 61 | "outputs": [ 62 | { 63 | "output_type": "stream", 64 | "stream": "stdout", 65 | "text": [ 66 | " 188.2\n" 67 | ] 68 | } 69 | ], 70 | "prompt_number": 3 71 | }, 72 | { 73 | "cell_type": "code", 74 | "collapsed": false, 75 | "input": [ 76 | "# integer is equivalent to integer\n", 77 | "numberSons = 1\n", 78 | "print type(numberSons), numberSons" 79 | ], 80 | "language": "python", 81 | "metadata": {}, 82 | "outputs": [ 83 | { 84 | "output_type": "stream", 85 | "stream": "stdout", 86 | "text": [ 87 | " 1\n" 88 | ] 89 | } 90 | ], 91 | "prompt_number": 4 92 | }, 93 | { 94 | "cell_type": "code", 95 | "collapsed": false, 96 | "input": [ 97 | "# 'logical' is equivalent to Boolean\n", 98 | "teachingCoursera = True\n", 99 | "print type(teachingCoursera), teachingCoursera" 100 | ], 101 | "language": "python", 102 | "metadata": {}, 103 | "outputs": [ 104 | { 105 | "output_type": "stream", 106 | "stream": "stdout", 107 | "text": [ 108 | " True\n" 109 | ] 110 | } 111 | ], 112 | "prompt_number": 5 113 | }, 114 | { 115 | "cell_type": "code", 116 | "collapsed": false, 117 | "input": [ 118 | "# 'vectors' is equivalent to numpy array or Python list (I will use array everywhere for consistency)\n", 119 | "heights = np.array([188.2, 181.3, 193.4])\n", 120 | "print heights\n", 121 | "\n", 122 | "firstNames = np.array(['jeff', 'roger', 'andrew', 'brian'])\n", 123 | "print firstNames" 124 | ], 125 | "language": "python", 126 | "metadata": {}, 127 | "outputs": [ 128 | { 129 | "output_type": "stream", 130 | "stream": "stdout", 131 | "text": [ 132 | "[ 188.2 181.3 193.4]\n", 133 | "['jeff' 'roger' 'andrew' 'brian']\n" 134 | ] 135 | } 136 | ], 137 | "prompt_number": 6 138 | }, 139 | { 140 | "cell_type": "code", 141 | "collapsed": false, 142 | "input": [ 143 | "# 'list' is equivalent to dictionary in Python\n", 144 | "vector1 = np.array([188.2, 181.3, 193.4])\n", 145 | "vector2 = np.array(['jeff', 'roger', 'andrew', 'brian'])\n", 146 | "myList = dict(heights = vector1, firstNames = vector2)\n", 147 | "print myList\n", 148 | "\n", 149 | "print myList['heights']\n", 150 | "print myList['firstNames']" 151 | ], 152 | "language": "python", 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "output_type": "stream", 157 | "stream": "stdout", 158 | "text": [ 159 | "{'firstNames': array(['jeff', 'roger', 'andrew', 'brian'], \n", 160 | " dtype='|S6'), 'heights': array([ 188.2, 181.3, 193.4])}\n", 161 | "[ 188.2 181.3 193.4]\n", 162 | "['jeff' 'roger' 'andrew' 'brian']\n" 163 | ] 164 | } 165 | ], 166 | "prompt_number": 7 167 | }, 168 | { 169 | "cell_type": "code", 170 | "collapsed": false, 171 | "input": [ 172 | "# 'matrices' is equivalent to two-dimensional numpy array\n", 173 | "myMatrix = np.array([[1, 2], [3, 4]])\n", 174 | "print myMatrix" 175 | ], 176 | "language": "python", 177 | "metadata": {}, 178 | "outputs": [ 179 | { 180 | "output_type": "stream", 181 | "stream": "stdout", 182 | "text": [ 183 | "[[1 2]\n", 184 | " [3 4]]\n" 185 | ] 186 | } 187 | ], 188 | "prompt_number": 8 189 | }, 190 | { 191 | "cell_type": "code", 192 | "collapsed": false, 193 | "input": [ 194 | "# data frame is equivalent to Pandas DataFrame\n", 195 | "# this example doesn't work because the input array lengths are not the same\n", 196 | "vector1 = np.array([188.2, 181.3, 193.4])\n", 197 | "vector2 = np.array(['jeff', 'roger', 'andrew', 'brian'])\n", 198 | "\n", 199 | "# ValueError: arrays must all be same length\n", 200 | "# \n", 201 | "myDataFrame = pd.DataFrame(dict(heights = vector1, firstNames = vector2))" 202 | ], 203 | "language": "python", 204 | "metadata": {}, 205 | "outputs": [ 206 | { 207 | "ename": "ValueError", 208 | "evalue": "arrays must all be same length", 209 | "output_type": "pyerr", 210 | "traceback": [ 211 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", 212 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;31m# ValueError: arrays must all be same length\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;31m#\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0mmyDataFrame\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mheights\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvector1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfirstNames\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvector2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 213 | "\u001b[0;32m/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas/core/frame.pyc\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, data, index, columns, dtype, copy)\u001b[0m\n\u001b[1;32m 383\u001b[0m \u001b[0mmgr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_init_mgr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 384\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 385\u001b[0;31m \u001b[0mmgr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_init_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 386\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mma\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mMaskedArray\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 387\u001b[0m \u001b[0mmask\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mma\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetmaskarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 214 | "\u001b[0;32m/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas/core/frame.pyc\u001b[0m in \u001b[0;36m_init_dict\u001b[0;34m(self, data, index, columns, dtype)\u001b[0m\n\u001b[1;32m 515\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 516\u001b[0m return _arrays_to_mgr(arrays, data_names, index, columns,\n\u001b[0;32m--> 517\u001b[0;31m dtype=dtype)\n\u001b[0m\u001b[1;32m 518\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 519\u001b[0m def _init_ndarray(self, values, index, columns, dtype=None,\n", 215 | "\u001b[0;32m/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas/core/frame.pyc\u001b[0m in \u001b[0;36m_arrays_to_mgr\u001b[0;34m(arrays, arr_names, index, columns, dtype)\u001b[0m\n\u001b[1;32m 5343\u001b[0m \u001b[0;31m# figure out the index, if necessary\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5344\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mindex\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5345\u001b[0;31m \u001b[0mindex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mextract_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marrays\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5346\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5347\u001b[0m \u001b[0mindex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_ensure_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 216 | "\u001b[0;32m/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas/core/frame.pyc\u001b[0m in \u001b[0;36mextract_index\u001b[0;34m(data)\u001b[0m\n\u001b[1;32m 5395\u001b[0m \u001b[0mlengths\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mraw_lengths\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5396\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlengths\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5397\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'arrays must all be same length'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5398\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5399\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mhave_dicts\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 217 | "\u001b[0;31mValueError\u001b[0m: arrays must all be same length" 218 | ] 219 | } 220 | ], 221 | "prompt_number": 10 222 | }, 223 | { 224 | "cell_type": "code", 225 | "collapsed": false, 226 | "input": [ 227 | "# data frame -- fixed\n", 228 | "vector1 = np.array([188.2, 181.3, 193.4, 192.3])\n", 229 | "vector2 = np.array(['jeff', 'roger', 'andrew', 'brian'])\n", 230 | "\n", 231 | "myDataFrame = pd.DataFrame(dict(heights = vector1, firstNames = vector2))\n", 232 | "myDataFrame" 233 | ], 234 | "language": "python", 235 | "metadata": {}, 236 | "outputs": [ 237 | { 238 | "html": [ 239 | "
\n", 240 | "\n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | "
firstNamesheights
0 jeff 188.2
1 roger 181.3
2 andrew 193.4
3 brian 192.3
\n", 271 | "
" 272 | ], 273 | "output_type": "pyout", 274 | "prompt_number": 11, 275 | "text": [ 276 | " firstNames heights\n", 277 | "0 jeff 188.2\n", 278 | "1 roger 181.3\n", 279 | "2 andrew 193.4\n", 280 | "3 brian 192.3" 281 | ] 282 | } 283 | ], 284 | "prompt_number": 11 285 | }, 286 | { 287 | "cell_type": "code", 288 | "collapsed": false, 289 | "input": [ 290 | "# factors is equivalent to pandas Categorical\n", 291 | "smoker = np.array(['yes', 'no', 'yes', 'yes'])\n", 292 | "smokerFactor = pd.Categorical.from_array(smoker)\n", 293 | "smokerFactor" 294 | ], 295 | "language": "python", 296 | "metadata": {}, 297 | "outputs": [ 298 | { 299 | "output_type": "pyout", 300 | "prompt_number": 12, 301 | "text": [ 302 | "Categorical: \n", 303 | "array(['yes', 'no', 'yes', 'yes'], dtype=object)\n", 304 | "Levels (2): Index(['no', 'yes'], dtype=object)" 305 | ] 306 | } 307 | ], 308 | "prompt_number": 12 309 | }, 310 | { 311 | "cell_type": "code", 312 | "collapsed": false, 313 | "input": [ 314 | "# R's NA missing values is equivalent to NaN\n", 315 | "vector1 = np.array([188.2, 181.3, 193.4, NaN])\n", 316 | "print vector1\n", 317 | "print isnan(vector1)" 318 | ], 319 | "language": "python", 320 | "metadata": {}, 321 | "outputs": [ 322 | { 323 | "output_type": "stream", 324 | "stream": "stdout", 325 | "text": [ 326 | "[ 188.2 181.3 193.4 nan]\n", 327 | "[False False False True]\n" 328 | ] 329 | } 330 | ], 331 | "prompt_number": 13 332 | }, 333 | { 334 | "cell_type": "code", 335 | "collapsed": false, 336 | "input": [ 337 | "# subsetting\n", 338 | "vector1 = np.array([188.2, 181.3, 193.4, 192.3])\n", 339 | "vector2 = np.array(['jeff', 'roger', 'andrew', 'brian'])\n", 340 | "\n", 341 | "myDataFrame = pd.DataFrame(dict(heights = vector1, firstNames = vector2))\n", 342 | "\n", 343 | "print '------------------'\n", 344 | "print vector1[0]\n", 345 | "print '------------------'\n", 346 | "print vector1[[0, 1, 3]]\n", 347 | "print '------------------'\n", 348 | "print myDataFrame.ix[0, 0:2] # appears transposed as compared to R\n", 349 | "print '------------------'\n", 350 | "print myDataFrame['firstNames'] # there's no 'Levels' as in R\n", 351 | "print '------------------'\n", 352 | "print myDataFrame[myDataFrame['firstNames'] == 'jeff']\n", 353 | "print '------------------'\n", 354 | "print myDataFrame[myDataFrame['heights'] < 190]" 355 | ], 356 | "language": "python", 357 | "metadata": {}, 358 | "outputs": [ 359 | { 360 | "output_type": "stream", 361 | "stream": "stdout", 362 | "text": [ 363 | "------------------\n", 364 | "188.2\n", 365 | "------------------\n", 366 | "[ 188.2 181.3 192.3]\n", 367 | "------------------\n", 368 | "firstNames jeff\n", 369 | "heights 188.2\n", 370 | "Name: 0\n", 371 | "------------------\n", 372 | "0 jeff\n", 373 | "1 roger\n", 374 | "2 andrew\n", 375 | "3 brian\n", 376 | "Name: firstNames\n", 377 | "------------------\n", 378 | " firstNames heights\n", 379 | "0 jeff 188.2\n", 380 | "------------------\n", 381 | " firstNames heights\n", 382 | "0 jeff 188.2\n", 383 | "1 roger 181.3\n" 384 | ] 385 | } 386 | ], 387 | "prompt_number": 14 388 | }, 389 | { 390 | "cell_type": "code", 391 | "collapsed": true, 392 | "input": [], 393 | "language": "python", 394 | "metadata": {}, 395 | "outputs": [] 396 | } 397 | ], 398 | "metadata": {} 399 | } 400 | ] 401 | } -------------------------------------------------------------------------------- /week1/simulation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "simulation" 4 | }, 5 | "nbformat": 3, 6 | "nbformat_minor": 0, 7 | "worksheets": [ 8 | { 9 | "cells": [ 10 | { 11 | "cell_type": "heading", 12 | "level": 2, 13 | "metadata": {}, 14 | "source": [ 15 | "Simulation Basics -- equivalent in Python" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "collapsed": false, 21 | "input": [ 22 | "import numpy as np\n", 23 | "import scipy.stats as s" 24 | ], 25 | "language": "python", 26 | "metadata": {}, 27 | "outputs": [], 28 | "prompt_number": 1 29 | }, 30 | { 31 | "cell_type": "code", 32 | "collapsed": false, 33 | "input": [ 34 | "# normal distribution\n", 35 | "# normal(mean, stdev, size)\n", 36 | "heights = np.random.normal(188, 3, 10)\n", 37 | "print heights" 38 | ], 39 | "language": "python", 40 | "metadata": {}, 41 | "outputs": [ 42 | { 43 | "output_type": "stream", 44 | "stream": "stdout", 45 | "text": [ 46 | "[ 182.67139438 192.17804988 190.09008635 186.57278872 194.03622068\n", 47 | " 191.88272193 183.61975607 186.97670622 187.82621993 189.9658577 ]\n" 48 | ] 49 | } 50 | ], 51 | "prompt_number": 2 52 | }, 53 | { 54 | "cell_type": "code", 55 | "collapsed": false, 56 | "input": [ 57 | "# binomial distribution\n", 58 | "# binomial(n, p, size)\n", 59 | "coinFlips = np.random.binomial(10, 0.5, 10)\n", 60 | "print coinFlips" 61 | ], 62 | "language": "python", 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "output_type": "stream", 67 | "stream": "stdout", 68 | "text": [ 69 | "[4 6 5 6 7 3 4 6 5 6]\n" 70 | ] 71 | } 72 | ], 73 | "prompt_number": 3 74 | }, 75 | { 76 | "cell_type": "code", 77 | "collapsed": false, 78 | "input": [ 79 | "# normal density\n", 80 | "x = np.linspace(-5, 5, num=10)\n", 81 | "normalDensity = s.norm.pdf(x, 0, 1)\n", 82 | "print np.around(normalDensity, decimals=2)" 83 | ], 84 | "language": "python", 85 | "metadata": {}, 86 | "outputs": [ 87 | { 88 | "output_type": "stream", 89 | "stream": "stdout", 90 | "text": [ 91 | "[ 0. 0. 0.01 0.1 0.34 0.34 0.1 0.01 0. 0. ]\n" 92 | ] 93 | } 94 | ], 95 | "prompt_number": 4 96 | }, 97 | { 98 | "cell_type": "code", 99 | "collapsed": false, 100 | "input": [ 101 | "# binomial density\n", 102 | "x = np.arange(0, 11, 1) # note that it's (0, 11, 1) instead of (0, 10, 1)\n", 103 | "binomialDensity = s.binom.pmf(x, 10, 0.5)\n", 104 | "print np.around(binomialDensity, decimals=2)" 105 | ], 106 | "language": "python", 107 | "metadata": {}, 108 | "outputs": [ 109 | { 110 | "output_type": "stream", 111 | "stream": "stdout", 112 | "text": [ 113 | "[ 0. 0.01 0.04 0.12 0.21 0.25 0.21 0.12 0.04 0.01 0. ]\n" 114 | ] 115 | } 116 | ], 117 | "prompt_number": 5 118 | }, 119 | { 120 | "cell_type": "code", 121 | "collapsed": false, 122 | "input": [ 123 | "# 'sample' draws a random sample with and without replacement --> numpy.random.choice in numpy 1.7\n", 124 | "heights = np.random.normal(188, 3, 10)\n", 125 | "\n", 126 | "# with replacement\n", 127 | "print 'random sampling with replacement:'\n", 128 | "print np.random.choice(heights, size=10, replace=True, p=None)\n", 129 | "\n", 130 | "# without replacement\n", 131 | "print '\\nrandom sampling without replacement'\n", 132 | "print np.random.choice(heights, size=10, replace=False, p=None)\n", 133 | "\n", 134 | "# sample according to a set of probability\n", 135 | "probs = [0.4, 0.3, 0.2, 0.1, 0, 0, 0, 0, 0, 0]\n", 136 | "print '\\nrandom sampling with replacement according to a set of probability:'\n", 137 | "print probs\n", 138 | "print '\\nsum of probabilities:'\n", 139 | "print sum(probs)\n", 140 | "print '\\nsampled:'\n", 141 | "print np.random.choice(heights, size=10, replace=True, p=probs)" 142 | ], 143 | "language": "python", 144 | "metadata": {}, 145 | "outputs": [ 146 | { 147 | "output_type": "stream", 148 | "stream": "stdout", 149 | "text": [ 150 | "random sampling with replacement:\n", 151 | "[ 188.51730245 187.57465384 187.7094007 187.34057076 189.44858637\n", 152 | " 187.57465384 188.51730245 184.62068782 187.57465384 189.4947812 ]\n", 153 | "\n", 154 | "random sampling without replacement\n", 155 | "[ 187.57465384 187.7094007 189.92494462 186.40851384 188.8397573\n", 156 | " 189.44858637 188.51730245 187.34057076 189.4947812 184.62068782]\n", 157 | "\n", 158 | "random sampling with replacement according to a set of probability:\n", 159 | "[0.4, 0.3, 0.2, 0.1, 0, 0, 0, 0, 0, 0]\n", 160 | "\n", 161 | "sum of probabilities:\n", 162 | "1.0\n", 163 | "\n", 164 | "sampled:\n", 165 | "[ 189.44858637 189.44858637 184.62068782 189.44858637 189.44858637\n", 166 | " 184.62068782 189.44858637 186.40851384 189.44858637 189.44858637]\n" 167 | ] 168 | } 169 | ], 170 | "prompt_number": 8 171 | }, 172 | { 173 | "cell_type": "code", 174 | "collapsed": false, 175 | "input": [ 176 | "# setting a seed\n", 177 | "np.random.seed(12345)\n", 178 | "print np.random.normal(0, 1, 5)\n", 179 | "print '-------------------------------------------------------------'\n", 180 | "\n", 181 | "np.random.seed(12345)\n", 182 | "print np.random.normal(0, 1, 5)" 183 | ], 184 | "language": "python", 185 | "metadata": {}, 186 | "outputs": [ 187 | { 188 | "output_type": "stream", 189 | "stream": "stdout", 190 | "text": [ 191 | "[-0.20470766 0.47894334 -0.51943872 -0.5557303 1.96578057]\n", 192 | "-------------------------------------------------------------\n", 193 | "[-0.20470766 0.47894334 -0.51943872 -0.5557303 1.96578057]\n" 194 | ] 195 | } 196 | ], 197 | "prompt_number": 9 198 | }, 199 | { 200 | "cell_type": "code", 201 | "collapsed": false, 202 | "input": [], 203 | "language": "python", 204 | "metadata": {}, 205 | "outputs": [] 206 | } 207 | ], 208 | "metadata": {} 209 | } 210 | ] 211 | } -------------------------------------------------------------------------------- /week1/wk1_quiz.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "wk1_quiz" 4 | }, 5 | "nbformat": 3, 6 | "nbformat_minor": 0, 7 | "worksheets": [ 8 | { 9 | "cells": [ 10 | { 11 | "cell_type": "heading", 12 | "level": 2, 13 | "metadata": {}, 14 | "source": [ 15 | "Week 1 quiz" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "collapsed": false, 21 | "input": [ 22 | "import pandas as pd" 23 | ], 24 | "language": "python", 25 | "metadata": {}, 26 | "outputs": [], 27 | "prompt_number": 2 28 | }, 29 | { 30 | "cell_type": "code", 31 | "collapsed": false, 32 | "input": [ 33 | "%load_ext rmagic" 34 | ], 35 | "language": "python", 36 | "metadata": {}, 37 | "outputs": [], 38 | "prompt_number": 3 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "Question 3. Here we'll use R just to generate the same data as required in the course quiz." 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "collapsed": false, 50 | "input": [ 51 | "%%R -o dataFrame\n", 52 | "set.seed(31);\n", 53 | "heightsCM = rnorm(30,mean=188, sd=5);\n", 54 | "weightsK = rnorm(30,mean=84,sd=3); \n", 55 | "hasDaughter = sample(c(TRUE,FALSE),size=30,replace=T); \n", 56 | "dataFrame = data.frame(heightsCM,weightsK,hasDaughter);" 57 | ], 58 | "language": "python", 59 | "metadata": {}, 60 | "outputs": [], 61 | "prompt_number": 4 62 | }, 63 | { 64 | "cell_type": "code", 65 | "collapsed": false, 66 | "input": [ 67 | "p = pd.DataFrame(dataFrame.T, columns=['heightsCM', 'weightsK', 'hasDaughter'])" 68 | ], 69 | "language": "python", 70 | "metadata": {}, 71 | "outputs": [], 72 | "prompt_number": 5 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "Question 4. Here we'll use R just to generate the same data as required in the course quiz." 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "collapsed": false, 84 | "input": [ 85 | "%%R -o s\n", 86 | "set.seed(41);\n", 87 | "cauchyValues = rcauchy(100)\n", 88 | "\n", 89 | "set.seed(415);\n", 90 | "s = sample(cauchyValues,size=10,replace=T)" 91 | ], 92 | "language": "python", 93 | "metadata": {}, 94 | "outputs": [], 95 | "prompt_number": 7 96 | }, 97 | { 98 | "cell_type": "code", 99 | "collapsed": false, 100 | "input": [ 101 | "s" 102 | ], 103 | "language": "python", 104 | "metadata": {}, 105 | "outputs": [ 106 | { 107 | "output_type": "pyout", 108 | "prompt_number": 8, 109 | "text": [ 110 | "array([ 0.80847186, -1.11228626, 0.37166707, -1.16507204, -2.97017695,\n", 111 | " -1.02310311, 0.31152624, 0.28154888, 0.2414012 , -1.11228626])" 112 | ] 113 | } 114 | ], 115 | "prompt_number": 8 116 | }, 117 | { 118 | "cell_type": "code", 119 | "collapsed": false, 120 | "input": [], 121 | "language": "python", 122 | "metadata": {}, 123 | "outputs": [] 124 | } 125 | ], 126 | "metadata": {} 127 | } 128 | ] 129 | } -------------------------------------------------------------------------------- /week2/getting_data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "getting_data" 4 | }, 5 | "nbformat": 3, 6 | "nbformat_minor": 0, 7 | "worksheets": [ 8 | { 9 | "cells": [ 10 | { 11 | "cell_type": "heading", 12 | "level": 2, 13 | "metadata": {}, 14 | "source": [ 15 | "Getting data" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "Getting and setting directory, equivalent to R getwd() and setwd() commands\n", 23 | "\n", 24 | "This is the more general Python way. In IPython we can use system commands directly prefixed by an exclamation mark (!)" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "collapsed": false, 30 | "input": [ 31 | "import os\n", 32 | "\n", 33 | "os.getcwd()" 34 | ], 35 | "language": "python", 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "output_type": "pyout", 40 | "prompt_number": 2, 41 | "text": [ 42 | "'/Users/erriza/dataanalysis/week2'" 43 | ] 44 | } 45 | ], 46 | "prompt_number": 2 47 | }, 48 | { 49 | "cell_type": "code", 50 | "collapsed": false, 51 | "input": [ 52 | "os.chdir('..')\n", 53 | "os.getcwd()" 54 | ], 55 | "language": "python", 56 | "metadata": {}, 57 | "outputs": [ 58 | { 59 | "output_type": "pyout", 60 | "prompt_number": 3, 61 | "text": [ 62 | "'/Users/erriza/dataanalysis'" 63 | ] 64 | } 65 | ], 66 | "prompt_number": 3 67 | }, 68 | { 69 | "cell_type": "code", 70 | "collapsed": false, 71 | "input": [ 72 | "os.chdir('./week2/')\n", 73 | "os.getcwd()" 74 | ], 75 | "language": "python", 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "output_type": "pyout", 80 | "prompt_number": 4, 81 | "text": [ 82 | "'/Users/erriza/dataanalysis/week2'" 83 | ] 84 | } 85 | ], 86 | "prompt_number": 4 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "Load CSV data. We'll use mostly `pandas`." 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "collapsed": false, 98 | "input": [ 99 | "import pandas as pd\n", 100 | "\n", 101 | "fileUrl = 'https://data.baltimorecity.gov/api/views/dz54-2aru/rows.csv?accessType=DOWNLOAD'\n", 102 | "\n", 103 | "# we can directly use read_csv to download the file\n", 104 | "# this is equivalent to R's combined download.file() and read.table() or read.csv() commands\n", 105 | "cameraData = pd.read_csv(fileUrl)\n", 106 | "\n", 107 | "# save data locally\n", 108 | "cameraData.to_csv('../data/cameras.csv', index=False)\n", 109 | "\n", 110 | "# for simplicity I'll use IPython tricks to list folder contents\n", 111 | "!ls ../data\n", 112 | "\n", 113 | "\n", 114 | "# get current date and time\n", 115 | "# this is equivalent to R date() command\n", 116 | "# note that I use IPython ! prefix to run my system's command\n", 117 | "dateDownloaded = !date\n", 118 | "print '\\nDate downloaded: ' + str(dateDownloaded)\n", 119 | "\n", 120 | "cameraData.head()" 121 | ], 122 | "language": "python", 123 | "metadata": {}, 124 | "outputs": [ 125 | { 126 | "output_type": "stream", 127 | "stream": "stdout", 128 | "text": [ 129 | "camera.xls face.rda loansData.csv samsungData.csv\r\n", 130 | "camera.xlsx gaData.csv movies.txt samsungData.rda\r\n", 131 | "cameras.csv gaData.rda ravensData.csv ss06pid.csv\r\n", 132 | "camerasModified.csv galton.csv ravensData.rda warpbreaks.csv\r\n" 133 | ] 134 | }, 135 | { 136 | "output_type": "stream", 137 | "stream": "stdout", 138 | "text": [ 139 | "\n", 140 | "Date downloaded: ['Mon Mar 18 21:29:11 CET 2013']\n" 141 | ] 142 | }, 143 | { 144 | "html": [ 145 | "
\n", 146 | "\n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | "
addressdirectionstreetcrossStreetintersectionLocation 1
0 S CATON AVE & BENSON AVE N/B Caton Ave Benson Ave Caton Ave & Benson Ave (39.2693779962, -76.6688185297)
1 S CATON AVE & BENSON AVE S/B Caton Ave Benson Ave Caton Ave & Benson Ave (39.2693157898, -76.6689698176)
2 WILKENS AVE & PINE HEIGHTS AVE E/B Wilkens Ave Pine Heights Wilkens Ave & Pine Heights (39.2720252302, -76.676960806)
3 THE ALAMEDA & E 33RD ST S/B The Alameda 33rd St The Alameda & 33rd St (39.3285013141, -76.5953545714)
4 E 33RD ST & THE ALAMEDA E/B E 33rd The Alameda E 33rd & The Alameda (39.3283410623, -76.5953594625)
\n", 206 | "
" 207 | ], 208 | "output_type": "pyout", 209 | "prompt_number": 7, 210 | "text": [ 211 | " address direction street crossStreet \\\n", 212 | "0 S CATON AVE & BENSON AVE N/B Caton Ave Benson Ave \n", 213 | "1 S CATON AVE & BENSON AVE S/B Caton Ave Benson Ave \n", 214 | "2 WILKENS AVE & PINE HEIGHTS AVE E/B Wilkens Ave Pine Heights \n", 215 | "3 THE ALAMEDA & E 33RD ST S/B The Alameda 33rd St \n", 216 | "4 E 33RD ST & THE ALAMEDA E/B E 33rd The Alameda \n", 217 | "\n", 218 | " intersection Location 1 \n", 219 | "0 Caton Ave & Benson Ave (39.2693779962, -76.6688185297) \n", 220 | "1 Caton Ave & Benson Ave (39.2693157898, -76.6689698176) \n", 221 | "2 Wilkens Ave & Pine Heights (39.2720252302, -76.676960806) \n", 222 | "3 The Alameda & 33rd St (39.3285013141, -76.5953545714) \n", 223 | "4 E 33rd & The Alameda (39.3283410623, -76.5953594625) " 224 | ] 225 | } 226 | ], 227 | "prompt_number": 7 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "Read Excel file\n", 234 | "\n", 235 | "This is equivalent to R `read.xlsx()` and `read.xlsx2()` commands.\n", 236 | "\n", 237 | "We need openpyxl 1.5.8 (don't use the latest version due to a bug) and xlrd packages. Install with: \n", 238 | "\n", 239 | "
\n",
240 |       "sudo pip install openpyxl==1.5.8\n",
241 |       "sudo pip install xlrd\n",
242 |       "
\n", 243 | "\n", 244 | "Pandas `ExcelFile()` can't download and read at once (in contrast to `read_csv()`), so we need to resort to the basic Python way.\n", 245 | "Also notice I'm using .xls; .xlsx doesn't work in my computer." 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "collapsed": false, 251 | "input": [ 252 | "import urllib2\n", 253 | "\n", 254 | "# download the file as camera.xls and save it in ./data subfolder\n", 255 | "fileUrl = 'https://data.baltimorecity.gov/api/views/dz54-2aru/rows.xls?accessType=DOWNLOAD'\n", 256 | "f = urllib2.urlopen(fileUrl)\n", 257 | "data = f.read()\n", 258 | "with open('../data/camera.xls', 'wb') as w:\n", 259 | " w.write(data)\n", 260 | "\n", 261 | "# load the Excel file as a pandas DataFrame\n", 262 | "cameraData = pd.ExcelFile('../data/camera.xls')\n", 263 | "cameraData = cameraData.parse('Baltimore Fixed Speed Cameras', index_col=None, na_values=['NA'])\n", 264 | "cameraData.head()" 265 | ], 266 | "language": "python", 267 | "metadata": {}, 268 | "outputs": [ 269 | { 270 | "html": [ 271 | "
\n", 272 | "\n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | "
addressdirectionstreetcrossStreetintersectionLocation 1
0 S CATON AVE & BENSON AVE N/B Caton Ave Benson Ave Caton Ave & Benson Ave (39.2693779962, -76.6688185297)
1 S CATON AVE & BENSON AVE S/B Caton Ave Benson Ave Caton Ave & Benson Ave (39.2693157898, -76.6689698176)
2 WILKENS AVE & PINE HEIGHTS AVE E/B Wilkens Ave Pine Heights Wilkens Ave & Pine Heights (39.2720252302, -76.676960806)
3 THE ALAMEDA & E 33RD ST S/B The Alameda 33rd St The Alameda & 33rd St (39.3285013141, -76.5953545714)
4 E 33RD ST & THE ALAMEDA E/B E 33rd The Alameda E 33rd & The Alameda (39.3283410623, -76.5953594625)
\n", 332 | "
" 333 | ], 334 | "output_type": "pyout", 335 | "prompt_number": 8, 336 | "text": [ 337 | " address direction street crossStreet \\\n", 338 | "0 S CATON AVE & BENSON AVE N/B Caton Ave Benson Ave \n", 339 | "1 S CATON AVE & BENSON AVE S/B Caton Ave Benson Ave \n", 340 | "2 WILKENS AVE & PINE HEIGHTS AVE E/B Wilkens Ave Pine Heights \n", 341 | "3 THE ALAMEDA & E 33RD ST S/B The Alameda 33rd St \n", 342 | "4 E 33RD ST & THE ALAMEDA E/B E 33rd The Alameda \n", 343 | "\n", 344 | " intersection Location 1 \n", 345 | "0 Caton Ave & Benson Ave (39.2693779962, -76.6688185297) \n", 346 | "1 Caton Ave & Benson Ave (39.2693157898, -76.6689698176) \n", 347 | "2 Wilkens Ave & Pine Heights (39.2720252302, -76.676960806) \n", 348 | "3 The Alameda & 33rd St (39.3285013141, -76.5953545714) \n", 349 | "4 E 33rd & The Alameda (39.3283410623, -76.5953594625) " 350 | ] 351 | } 352 | ], 353 | "prompt_number": 8 354 | }, 355 | { 356 | "cell_type": "markdown", 357 | "metadata": {}, 358 | "source": [ 359 | "The course video describes R's `readLines()` for reading a text file, which is similar to standard Python file access, so I'm not going to detail it here.\n", 360 | "\n", 361 | "Similarly, R's `readLines()` to read data from a website is similar to Python with urllib2 package as in xls example above`" 362 | ] 363 | }, 364 | { 365 | "cell_type": "markdown", 366 | "metadata": {}, 367 | "source": [ 368 | "
Read JSON file\n", 369 | "\n", 370 | "This is equivalent to R's `fromJSON()` command." 371 | ] 372 | }, 373 | { 374 | "cell_type": "code", 375 | "collapsed": false, 376 | "input": [ 377 | "import json\n", 378 | "\n", 379 | "# first we get the json file from the website\n", 380 | "fileUrl = 'https://data.baltimorecity.gov/api/views/dz54-2aru/rows.json?accessType=DOWNLOAD'\n", 381 | "req = urllib2.Request(fileUrl)\n", 382 | "opener = urllib2.build_opener()\n", 383 | "f = opener.open(req)\n", 384 | "\n", 385 | "# then we read it into a data structure\n", 386 | "jsonCamera = json.loads(f.read())\n", 387 | "\n", 388 | "# json is loadad as dictionary\n", 389 | "print jsonCamera['meta']['view']['id']\n", 390 | "print jsonCamera['meta']['view']['name']\n", 391 | "print jsonCamera['meta']['view']['attribution']" 392 | ], 393 | "language": "python", 394 | "metadata": {}, 395 | "outputs": [ 396 | { 397 | "output_type": "stream", 398 | "stream": "stdout", 399 | "text": [ 400 | "dz54-2aru\n", 401 | "Baltimore Fixed Speed Cameras\n", 402 | "Department of Transportation\n" 403 | ] 404 | } 405 | ], 406 | "prompt_number": 9 407 | }, 408 | { 409 | "cell_type": "markdown", 410 | "metadata": {}, 411 | "source": [ 412 | "
Writing data" 413 | ] 414 | }, 415 | { 416 | "cell_type": "code", 417 | "collapsed": false, 418 | "input": [ 419 | "# first read the csv file\n", 420 | "cameraData = pd.read_csv('../data/cameras.csv')\n", 421 | "\n", 422 | "# take a subset of the columns\n", 423 | "tmpData = cameraData.ix[:,2:]\n", 424 | "\n", 425 | "# then save it to a different csv file\n", 426 | "# this is equivalent to R's write.table() command\n", 427 | "tmpData.to_csv('../data/camerasModified.csv', sep=',', index=False)\n", 428 | "\n", 429 | "cameraData2 = pd.read_csv('../data/camerasModified.csv')\n", 430 | "cameraData2.head()" 431 | ], 432 | "language": "python", 433 | "metadata": {}, 434 | "outputs": [ 435 | { 436 | "html": [ 437 | "
\n", 438 | "\n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | "
streetcrossStreetintersectionLocation 1
0 Caton Ave Benson Ave Caton Ave & Benson Ave (39.2693779962, -76.6688185297)
1 Caton Ave Benson Ave Caton Ave & Benson Ave (39.2693157898, -76.6689698176)
2 Wilkens Ave Pine Heights Wilkens Ave & Pine Heights (39.2720252302, -76.676960806)
3 The Alameda 33rd St The Alameda & 33rd St (39.3285013141, -76.5953545714)
4 E 33rd The Alameda E 33rd & The Alameda (39.3283410623, -76.5953594625)
\n", 486 | "
" 487 | ], 488 | "output_type": "pyout", 489 | "prompt_number": 10, 490 | "text": [ 491 | " street crossStreet intersection \\\n", 492 | "0 Caton Ave Benson Ave Caton Ave & Benson Ave \n", 493 | "1 Caton Ave Benson Ave Caton Ave & Benson Ave \n", 494 | "2 Wilkens Ave Pine Heights Wilkens Ave & Pine Heights \n", 495 | "3 The Alameda 33rd St The Alameda & 33rd St \n", 496 | "4 E 33rd The Alameda E 33rd & The Alameda \n", 497 | "\n", 498 | " Location 1 \n", 499 | "0 (39.2693779962, -76.6688185297) \n", 500 | "1 (39.2693157898, -76.6689698176) \n", 501 | "2 (39.2720252302, -76.676960806) \n", 502 | "3 (39.3285013141, -76.5953545714) \n", 503 | "4 (39.3283410623, -76.5953594625) " 504 | ] 505 | } 506 | ], 507 | "prompt_number": 10 508 | }, 509 | { 510 | "cell_type": "markdown", 511 | "metadata": {}, 512 | "source": [ 513 | "The course video explains R commands to save and load the workspace. I don't think we have the equivalent for that." 514 | ] 515 | }, 516 | { 517 | "cell_type": "markdown", 518 | "metadata": {}, 519 | "source": [ 520 | "
The course video explains R's `paste()` and `paste0()` commands, which look like standard Python's string manipulations:" 521 | ] 522 | }, 523 | { 524 | "cell_type": "code", 525 | "collapsed": false, 526 | "input": [ 527 | "print ['../data' + str(i) + '.csv' for i in range(1, 6)]" 528 | ], 529 | "language": "python", 530 | "metadata": {}, 531 | "outputs": [ 532 | { 533 | "output_type": "stream", 534 | "stream": "stdout", 535 | "text": [ 536 | "['../data1.csv', '../data2.csv', '../data3.csv', '../data4.csv', '../data5.csv']\n" 537 | ] 538 | } 539 | ], 540 | "prompt_number": 11 541 | }, 542 | { 543 | "cell_type": "markdown", 544 | "metadata": {}, 545 | "source": [ 546 | "
Getting data off webpages" 547 | ] 548 | }, 549 | { 550 | "cell_type": "code", 551 | "collapsed": false, 552 | "input": [ 553 | "from lxml.html import parse\n", 554 | "\n", 555 | "url = 'http://scholar.google.com/citations?user=HI-I6C0AAAAJ&hl=en'\n", 556 | "\n", 557 | "# this is equivalent to the combined R's opening/reading/closing connection and htmlTreeParse() commands\n", 558 | "html3 = parse(url).getroot()\n", 559 | "\n", 560 | "# get the title text using xpath expression\n", 561 | "# this is equivalent to R xpathSApply() command\n", 562 | "title = html3.xpath('//title')\n", 563 | "print [x.text_content() for x in title]\n", 564 | "\n", 565 | "# get the texts of col-citedby elements using xpath expression\n", 566 | "citedby = html3.xpath(\"//td[@id='col-citedby']\")\n", 567 | "print [x.text_content() for x in citedby]" 568 | ], 569 | "language": "python", 570 | "metadata": {}, 571 | "outputs": [ 572 | { 573 | "output_type": "stream", 574 | "stream": "stdout", 575 | "text": [ 576 | "['Jeff Leek - Google Scholar Citations']\n", 577 | "['Cited by', '344', '183', '147', '143', '111', '96', '87', '80', '59', '18', '11', '10', '10', '8', '8', '8', '7', '6', '5', '3']\n" 578 | ] 579 | } 580 | ], 581 | "prompt_number": 12 582 | }, 583 | { 584 | "cell_type": "code", 585 | "collapsed": false, 586 | "input": [], 587 | "language": "python", 588 | "metadata": {}, 589 | "outputs": [] 590 | } 591 | ], 592 | "metadata": {} 593 | } 594 | ] 595 | } -------------------------------------------------------------------------------- /week2/wk2_quiz.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "wk2_quiz" 4 | }, 5 | "nbformat": 3, 6 | "nbformat_minor": 0, 7 | "worksheets": [ 8 | { 9 | "cells": [ 10 | { 11 | "cell_type": "code", 12 | "collapsed": false, 13 | "input": [ 14 | "import pandas as pd\n", 15 | "import numpy as np" 16 | ], 17 | "language": "python", 18 | "metadata": {}, 19 | "outputs": [], 20 | "prompt_number": 1 21 | }, 22 | { 23 | "cell_type": "code", 24 | "collapsed": false, 25 | "input": [ 26 | "import urllib2\n", 27 | "\n", 28 | "url = 'http://simplystatistics.tumblr.com/'\n", 29 | "f = urllib2.urlopen(url)\n", 30 | "simplyStats = f.read()\n", 31 | "with open('../data/simplystats', 'wb') as w:\n", 32 | " w.write(simplyStats)" 33 | ], 34 | "language": "python", 35 | "metadata": {}, 36 | "outputs": [], 37 | "prompt_number": 16 38 | }, 39 | { 40 | "cell_type": "code", 41 | "collapsed": false, 42 | "input": [ 43 | "with open('simplystats', 'r') as w:\n", 44 | " l = w.readlines()" 45 | ], 46 | "language": "python", 47 | "metadata": {}, 48 | "outputs": [], 49 | "prompt_number": 17 50 | }, 51 | { 52 | "cell_type": "code", 53 | "collapsed": false, 54 | "input": [ 55 | "print ','.join([str(len(l[1])), str(len(l[44])), str(len(l[121]))])" 56 | ], 57 | "language": "python", 58 | "metadata": {}, 59 | "outputs": [ 60 | { 61 | "output_type": "stream", 62 | "stream": "stdout", 63 | "text": [ 64 | "920,7,26\n" 65 | ] 66 | } 67 | ], 68 | "prompt_number": 24 69 | }, 70 | { 71 | "cell_type": "code", 72 | "collapsed": false, 73 | "input": [ 74 | "idaho_housing = pd.read_csv('https://dl.dropbox.com/u/7710864/data/csv_hid/ss06hid.csv')\n", 75 | "idaho_housing" 76 | ], 77 | "language": "python", 78 | "metadata": {}, 79 | "outputs": [ 80 | { 81 | "output_type": "pyout", 82 | "prompt_number": 26, 83 | "text": [ 84 | "\n", 85 | "Int64Index: 6496 entries, 0 to 6495\n", 86 | "Columns: 188 entries, RT to wgtp80\n", 87 | "dtypes: float64(97), int64(90), object(1)" 88 | ] 89 | } 90 | ], 91 | "prompt_number": 26 92 | }, 93 | { 94 | "cell_type": "code", 95 | "collapsed": false, 96 | "input": [ 97 | "# property value\n", 98 | "idaho_housing['VAL'].value_counts()\n", 99 | "\n", 100 | "# value > $1,000,000 is category 24" 101 | ], 102 | "language": "python", 103 | "metadata": {}, 104 | "outputs": [ 105 | { 106 | "output_type": "pyout", 107 | "prompt_number": 28, 108 | "text": [ 109 | "18 502\n", 110 | "14 495\n", 111 | "16 486\n", 112 | "15 483\n", 113 | "17 357\n", 114 | "20 312\n", 115 | "13 233\n", 116 | "19 232\n", 117 | "12 199\n", 118 | "21 164\n", 119 | "22 159\n", 120 | "11 152\n", 121 | "10 119\n", 122 | "9 99\n", 123 | "1 75\n", 124 | "8 70\n", 125 | "24 53\n", 126 | "23 47\n", 127 | "2 42\n", 128 | "3 33\n", 129 | "4 30\n", 130 | "6 29\n", 131 | "5 26\n", 132 | "7 23" 133 | ] 134 | } 135 | ], 136 | "prompt_number": 28 137 | }, 138 | { 139 | "cell_type": "code", 140 | "collapsed": false, 141 | "input": [ 142 | "idaho_housing['FES']" 143 | ], 144 | "language": "python", 145 | "metadata": {}, 146 | "outputs": [ 147 | { 148 | "output_type": "pyout", 149 | "prompt_number": 29, 150 | "text": [ 151 | "0 2\n", 152 | "1 NaN\n", 153 | "2 7\n", 154 | "3 1\n", 155 | "4 1\n", 156 | "5 2\n", 157 | "6 NaN\n", 158 | "7 NaN\n", 159 | "8 2\n", 160 | "9 NaN\n", 161 | "10 7\n", 162 | "11 2\n", 163 | "12 1\n", 164 | "13 NaN\n", 165 | "14 NaN\n", 166 | "...\n", 167 | "6481 1\n", 168 | "6482 1\n", 169 | "6483 NaN\n", 170 | "6484 4\n", 171 | "6485 NaN\n", 172 | "6486 1\n", 173 | "6487 NaN\n", 174 | "6488 1\n", 175 | "6489 6\n", 176 | "6490 NaN\n", 177 | "6491 2\n", 178 | "6492 4\n", 179 | "6493 3\n", 180 | "6494 1\n", 181 | "6495 5\n", 182 | "Name: FES, Length: 6496" 183 | ] 184 | } 185 | ], 186 | "prompt_number": 29 187 | }, 188 | { 189 | "cell_type": "code", 190 | "collapsed": false, 191 | "input": [ 192 | "# BDS: bedrooms (b, 0, 1, 2, 3, 4, 5); 5 == 5 or more\n", 193 | "# RMS: rooms (b, 1, 2, 3, 4, 5, 6, 7, 8, 9); 9 == 9 or more\n", 194 | "a = idaho_housing['BDS'][(idaho_housing['BDS'] == 3) & (idaho_housing['RMS'] == 4)].count()\n", 195 | "b = idaho_housing['BDS'][(idaho_housing['BDS'] == 2) & (idaho_housing['RMS'] == 5)].count()\n", 196 | "c = idaho_housing['BDS'][(idaho_housing['BDS'] == 2) & (idaho_housing['RMS'] == 7)].count()\n", 197 | "print ','.join([str(a), str(b), str(c)])" 198 | ], 199 | "language": "python", 200 | "metadata": {}, 201 | "outputs": [ 202 | { 203 | "output_type": "stream", 204 | "stream": "stdout", 205 | "text": [ 206 | "148,386,49\n" 207 | ] 208 | } 209 | ], 210 | "prompt_number": 37 211 | }, 212 | { 213 | "cell_type": "code", 214 | "collapsed": false, 215 | "input": [ 216 | "'''\n", 217 | "ACR: lot size \n", 218 | "- b: N/A\n", 219 | "- 1: less than one acre\n", 220 | "- 2: one to less than ten acres\n", 221 | "- 3: ten or more acres\n", 222 | "\n", 223 | "AGS: sales of agriculture products\n", 224 | "- b: N/A\n", 225 | "- 1: None\n", 226 | "- 2: $ 1 - $999\n", 227 | "- 3: $ 1000 - $2499\n", 228 | "- 4: $ 2500 - $4999\n", 229 | "- 5: $ 5000 - $9999\n", 230 | "- 6: $10000+\n", 231 | "''' \n", 232 | "idaho_housing[(idaho_housing['ACR'] == 3) & (idaho_housing['AGS'] == 6)].index[:3]" 233 | ], 234 | "language": "python", 235 | "metadata": {}, 236 | "outputs": [ 237 | { 238 | "output_type": "pyout", 239 | "prompt_number": 43, 240 | "text": [ 241 | "Int64Index([124, 237, 261], dtype=int64)" 242 | ] 243 | } 244 | ], 245 | "prompt_number": 43 246 | }, 247 | { 248 | "cell_type": "code", 249 | "collapsed": false, 250 | "input": [ 251 | "indexes = idaho_housing[(idaho_housing['ACR'] == 3) & (idaho_housing['AGS'] == 6)].index\n", 252 | "subset = idaho_housing.ix[indexes]\n", 253 | "subset['MRGX'].isnull().sum()" 254 | ], 255 | "language": "python", 256 | "metadata": {}, 257 | "outputs": [ 258 | { 259 | "output_type": "pyout", 260 | "prompt_number": 47, 261 | "text": [ 262 | "8" 263 | ] 264 | } 265 | ], 266 | "prompt_number": 47 267 | }, 268 | { 269 | "cell_type": "code", 270 | "collapsed": false, 271 | "input": [ 272 | "idaho_housing.columns.map(lambda x: x.split('wgtp'))[122]" 273 | ], 274 | "language": "python", 275 | "metadata": {}, 276 | "outputs": [ 277 | { 278 | "output_type": "pyout", 279 | "prompt_number": 51, 280 | "text": [ 281 | "['', '15']" 282 | ] 283 | } 284 | ], 285 | "prompt_number": 51 286 | }, 287 | { 288 | "cell_type": "code", 289 | "collapsed": false, 290 | "input": [ 291 | "qs = [0, 1]\n", 292 | "[idaho_housing['YBL'].quantile(q=i) for i in qs]" 293 | ], 294 | "language": "python", 295 | "metadata": {}, 296 | "outputs": [ 297 | { 298 | "output_type": "pyout", 299 | "prompt_number": 58, 300 | "text": [ 301 | "[-1.0, 25.0]" 302 | ] 303 | } 304 | ], 305 | "prompt_number": 58 306 | }, 307 | { 308 | "cell_type": "code", 309 | "collapsed": false, 310 | "input": [ 311 | "pops = pd.read_csv('https://dl.dropbox.com/u/7710864/data/csv_hid/ss06pid.csv')\n", 312 | "pops" 313 | ], 314 | "language": "python", 315 | "metadata": {}, 316 | "outputs": [ 317 | { 318 | "output_type": "pyout", 319 | "prompt_number": 59, 320 | "text": [ 321 | "\n", 322 | "Int64Index: 14931 entries, 0 to 14930\n", 323 | "Columns: 239 entries, RT to pwgtp80\n", 324 | "dtypes: float64(74), int64(162), object(3)" 325 | ] 326 | } 327 | ], 328 | "prompt_number": 59 329 | }, 330 | { 331 | "cell_type": "code", 332 | "collapsed": false, 333 | "input": [ 334 | "merged = pd.merge(idaho_housing, pops, on='SERIALNO', how='outer', sort=True)\n", 335 | "merged" 336 | ], 337 | "language": "python", 338 | "metadata": {}, 339 | "outputs": [ 340 | { 341 | "output_type": "pyout", 342 | "prompt_number": 63, 343 | "text": [ 344 | "\n", 345 | "Int64Index: 15451 entries, 0 to 15450\n", 346 | "Columns: 426 entries, RT_x to pwgtp80\n", 347 | "dtypes: float64(332), int64(90), object(4)" 348 | ] 349 | } 350 | ], 351 | "prompt_number": 63 352 | }, 353 | { 354 | "cell_type": "code", 355 | "collapsed": false, 356 | "input": [ 357 | "merged.ix[:15,:20]" 358 | ], 359 | "language": "python", 360 | "metadata": {}, 361 | "outputs": [ 362 | { 363 | "html": [ 364 | "
\n", 365 | "\n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | "
RT_xSERIALNODIVISIONPUMA_xREGIONST_xADJUST_xWGTPNPTYPEACRAGSBDSBLDBUSCONPELEPFSFULPGASP
0 H 186 8 700 4 16 1015675 89 4 1 1NaN 4 2 2NaN 180 0 2 3
1 H 186 8 700 4 16 1015675 89 4 1 1NaN 4 2 2NaN 180 0 2 3
2 H 186 8 700 4 16 1015675 89 4 1 1NaN 4 2 2NaN 180 0 2 3
3 H 186 8 700 4 16 1015675 89 4 1 1NaN 4 2 2NaN 180 0 2 3
4 H 306 8 700 4 16 1015675 310 1 1NaNNaN 1 7NaNNaN 60 0 2 3
5 H 395 8 100 4 16 1015675 106 2 1 1NaN 3 2 2NaN 70 0 2 30
6 H 395 8 100 4 16 1015675 106 2 1 1NaN 3 2 2NaN 70 0 2 30
7 H 506 8 700 4 16 1015675 240 4 1 1NaN 4 2 2NaN 40 0 2 80
8 H 506 8 700 4 16 1015675 240 4 1 1NaN 4 2 2NaN 40 0 2 80
9 H 506 8 700 4 16 1015675 240 4 1 1NaN 4 2 2NaN 40 0 2 80
10 H 506 8 700 4 16 1015675 240 4 1 1NaN 4 2 2NaN 40 0 2 80
11 H 835 8 800 4 16 1015675 118 4 1 2 1 5 2 2NaN 250 0 2 3
12 H 835 8 800 4 16 1015675 118 4 1 2 1 5 2 2NaN 250 0 2 3
13 H 835 8 800 4 16 1015675 118 4 1 2 1 5 2 2NaN 250 0 2 3
14 H 835 8 800 4 16 1015675 118 4 1 2 1 5 2 2NaN 250 0 2 3
15 H 989 8 700 4 16 1015675 115 4 1 1NaN 3 2 2NaN 130 0 2 3
\n", 762 | "
" 763 | ], 764 | "output_type": "pyout", 765 | "prompt_number": 68, 766 | "text": [ 767 | " RT_x SERIALNO DIVISION PUMA_x REGION ST_x ADJUST_x WGTP NP TYPE ACR AGS BDS \\\n", 768 | "0 H 186 8 700 4 16 1015675 89 4 1 1 NaN 4 \n", 769 | "1 H 186 8 700 4 16 1015675 89 4 1 1 NaN 4 \n", 770 | "2 H 186 8 700 4 16 1015675 89 4 1 1 NaN 4 \n", 771 | "3 H 186 8 700 4 16 1015675 89 4 1 1 NaN 4 \n", 772 | "4 H 306 8 700 4 16 1015675 310 1 1 NaN NaN 1 \n", 773 | "5 H 395 8 100 4 16 1015675 106 2 1 1 NaN 3 \n", 774 | "6 H 395 8 100 4 16 1015675 106 2 1 1 NaN 3 \n", 775 | "7 H 506 8 700 4 16 1015675 240 4 1 1 NaN 4 \n", 776 | "8 H 506 8 700 4 16 1015675 240 4 1 1 NaN 4 \n", 777 | "9 H 506 8 700 4 16 1015675 240 4 1 1 NaN 4 \n", 778 | "10 H 506 8 700 4 16 1015675 240 4 1 1 NaN 4 \n", 779 | "11 H 835 8 800 4 16 1015675 118 4 1 2 1 5 \n", 780 | "12 H 835 8 800 4 16 1015675 118 4 1 2 1 5 \n", 781 | "13 H 835 8 800 4 16 1015675 118 4 1 2 1 5 \n", 782 | "14 H 835 8 800 4 16 1015675 118 4 1 2 1 5 \n", 783 | "15 H 989 8 700 4 16 1015675 115 4 1 1 NaN 3 \n", 784 | "\n", 785 | " BLD BUS CONP ELEP FS FULP GASP \n", 786 | "0 2 2 NaN 180 0 2 3 \n", 787 | "1 2 2 NaN 180 0 2 3 \n", 788 | "2 2 2 NaN 180 0 2 3 \n", 789 | "3 2 2 NaN 180 0 2 3 \n", 790 | "4 7 NaN NaN 60 0 2 3 \n", 791 | "5 2 2 NaN 70 0 2 30 \n", 792 | "6 2 2 NaN 70 0 2 30 \n", 793 | "7 2 2 NaN 40 0 2 80 \n", 794 | "8 2 2 NaN 40 0 2 80 \n", 795 | "9 2 2 NaN 40 0 2 80 \n", 796 | "10 2 2 NaN 40 0 2 80 \n", 797 | "11 2 2 NaN 250 0 2 3 \n", 798 | "12 2 2 NaN 250 0 2 3 \n", 799 | "13 2 2 NaN 250 0 2 3 \n", 800 | "14 2 2 NaN 250 0 2 3 \n", 801 | "15 2 2 NaN 130 0 2 3 " 802 | ] 803 | } 804 | ], 805 | "prompt_number": 68 806 | }, 807 | { 808 | "cell_type": "code", 809 | "collapsed": false, 810 | "input": [ 811 | "merged.shape" 812 | ], 813 | "language": "python", 814 | "metadata": {}, 815 | "outputs": [ 816 | { 817 | "output_type": "pyout", 818 | "prompt_number": 69, 819 | "text": [ 820 | "(15451, 426)" 821 | ] 822 | } 823 | ], 824 | "prompt_number": 69 825 | }, 826 | { 827 | "cell_type": "code", 828 | "collapsed": false, 829 | "input": [], 830 | "language": "python", 831 | "metadata": {}, 832 | "outputs": [] 833 | } 834 | ], 835 | "metadata": {} 836 | } 837 | ] 838 | } -------------------------------------------------------------------------------- /week3/twoPanel.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/herrfz/dataanalysis/fa2d42045f364f1a5b0683b7fb908399d69ff1cb/week3/twoPanel.pdf -------------------------------------------------------------------------------- /week3/twoPanel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/herrfz/dataanalysis/fa2d42045f364f1a5b0683b7fb908399d69ff1cb/week3/twoPanel.png -------------------------------------------------------------------------------- /week5/anova_with_multiple_factors.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "anova_with_multiple_factors" 4 | }, 5 | "nbformat": 3, 6 | "nbformat_minor": 0, 7 | "worksheets": [ 8 | { 9 | "cells": [ 10 | { 11 | "cell_type": "code", 12 | "collapsed": false, 13 | "input": [ 14 | "import pandas as pd\n", 15 | "import numpy as np" 16 | ], 17 | "language": "python", 18 | "metadata": {}, 19 | "outputs": [], 20 | "prompt_number": 1 21 | }, 22 | { 23 | "cell_type": "code", 24 | "collapsed": false, 25 | "input": [ 26 | "movies = pd.read_csv('http://www.rossmanchance.com/iscam2/data/movies03RT.txt', sep='\\t')\n", 27 | "movies.columns = ['X', 'score', 'rating', 'genre', 'box_office', 'running_time']\n", 28 | "movies.head()" 29 | ], 30 | "language": "python", 31 | "metadata": {}, 32 | "outputs": [ 33 | { 34 | "html": [ 35 | "
\n", 36 | "\n", 37 | " \n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | "
Xscoreratinggenrebox_officerunning_time
0 2 Fast 2 Furious 48.9 PG-13 action/adventure 127.146 107
1 28 Days Later 78.2 R horror 45.065 113
2 A Guy Thing 39.5 PG-13 rom comedy 15.545 101
3 A Man Apart 42.9 R action/adventure 26.248 110
4 A Mighty Wind 79.9 PG-13 comedy 17.781 91
\n", 96 | "
" 97 | ], 98 | "output_type": "pyout", 99 | "prompt_number": 17, 100 | "text": [ 101 | " X score rating genre box_office running_time\n", 102 | "0 2 Fast 2 Furious 48.9 PG-13 action/adventure 127.146 107\n", 103 | "1 28 Days Later 78.2 R horror 45.065 113\n", 104 | "2 A Guy Thing 39.5 PG-13 rom comedy 15.545 101\n", 105 | "3 A Man Apart 42.9 R action/adventure 26.248 110\n", 106 | "4 A Mighty Wind 79.9 PG-13 comedy 17.781 91" 107 | ] 108 | } 109 | ], 110 | "prompt_number": 17 111 | }, 112 | { 113 | "cell_type": "code", 114 | "collapsed": false, 115 | "input": [ 116 | "from statsmodels.stats.anova import anova_lm\n", 117 | "from statsmodels.formula.api import ols\n", 118 | "\n", 119 | "lm = ols('score ~ rating', movies).fit()\n", 120 | "\n", 121 | "aovObject = anova_lm(lm)\n", 122 | "aovObject" 123 | ], 124 | "language": "python", 125 | "metadata": {}, 126 | "outputs": [ 127 | { 128 | "html": [ 129 | "
\n", 130 | "\n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | "
dfsum_sqmean_sqFPR(>F)
rating 3 570.123813 190.041271 0.918184 0.433975
Residual 136 28148.635044 206.975258 NaN NaN
\n", 160 | "
" 161 | ], 162 | "output_type": "pyout", 163 | "prompt_number": 24, 164 | "text": [ 165 | " df sum_sq mean_sq F PR(>F)\n", 166 | "rating 3 570.123813 190.041271 0.918184 0.433975\n", 167 | "Residual 136 28148.635044 206.975258 NaN NaN" 168 | ] 169 | } 170 | ], 171 | "prompt_number": 24 172 | }, 173 | { 174 | "cell_type": "code", 175 | "collapsed": false, 176 | "input": [ 177 | "lm.params" 178 | ], 179 | "language": "python", 180 | "metadata": {}, 181 | "outputs": [ 182 | { 183 | "output_type": "pyout", 184 | "prompt_number": 25, 185 | "text": [ 186 | "Intercept 67.650000\n", 187 | "rating[T.PG] -12.592857\n", 188 | "rating[T.PG-13] -11.814615\n", 189 | "rating[T.R] -12.020000" 190 | ] 191 | } 192 | ], 193 | "prompt_number": 25 194 | }, 195 | { 196 | "cell_type": "code", 197 | "collapsed": false, 198 | "input": [ 199 | "lm2 = ols('score ~ rating + genre', movies).fit()\n", 200 | "\n", 201 | "aovObject2 = anova_lm(lm2)\n", 202 | "aovObject2" 203 | ], 204 | "language": "python", 205 | "metadata": {}, 206 | "outputs": [ 207 | { 208 | "html": [ 209 | "
\n", 210 | "\n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | "
dfsum_sqmean_sqFPR(>F)
rating 3 570.123813 190.041271 0.973214 0.407720
genre 12 3934.928021 327.910668 1.679252 0.079134
Residual 124 24213.707023 195.271831 NaN NaN
\n", 248 | "
" 249 | ], 250 | "output_type": "pyout", 251 | "prompt_number": 28, 252 | "text": [ 253 | " df sum_sq mean_sq F PR(>F)\n", 254 | "rating 3 570.123813 190.041271 0.973214 0.407720\n", 255 | "genre 12 3934.928021 327.910668 1.679252 0.079134\n", 256 | "Residual 124 24213.707023 195.271831 NaN NaN" 257 | ] 258 | } 259 | ], 260 | "prompt_number": 28 261 | }, 262 | { 263 | "cell_type": "code", 264 | "collapsed": false, 265 | "input": [ 266 | "lm3 = ols('score ~ genre + rating', movies).fit()\n", 267 | "\n", 268 | "aovObject3 = anova_lm(lm3)\n", 269 | "aovObject3" 270 | ], 271 | "language": "python", 272 | "metadata": {}, 273 | "outputs": [ 274 | { 275 | "html": [ 276 | "
\n", 277 | "\n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | "
dfsum_sqmean_sqFPR(>F)
genre 12 4221.505277 351.792106 1.801551 0.054737
rating 3 283.546557 94.515519 0.484020 0.693992
Residual 124 24213.707023 195.271831 NaN NaN
\n", 315 | "
" 316 | ], 317 | "output_type": "pyout", 318 | "prompt_number": 29, 319 | "text": [ 320 | " df sum_sq mean_sq F PR(>F)\n", 321 | "genre 12 4221.505277 351.792106 1.801551 0.054737\n", 322 | "rating 3 283.546557 94.515519 0.484020 0.693992\n", 323 | "Residual 124 24213.707023 195.271831 NaN NaN" 324 | ] 325 | } 326 | ], 327 | "prompt_number": 29 328 | }, 329 | { 330 | "cell_type": "code", 331 | "collapsed": false, 332 | "input": [ 333 | "lm4 = ols('score ~ genre + rating + box_office', movies).fit()\n", 334 | "\n", 335 | "aovObject4 = anova_lm(lm4)\n", 336 | "aovObject4" 337 | ], 338 | "language": "python", 339 | "metadata": {}, 340 | "outputs": [ 341 | { 342 | "html": [ 343 | "
\n", 344 | "\n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | "
dfsum_sqmean_sqFPR(>F)
genre 12 4221.505277 351.792106 2.186135 0.016198
rating 3 283.546557 94.515519 0.587346 0.624421
box_office 1 4420.588612 4420.588612 27.470780 0.000001
Residual 123 19793.118411 160.919662 NaN NaN
\n", 390 | "
" 391 | ], 392 | "output_type": "pyout", 393 | "prompt_number": 30, 394 | "text": [ 395 | " df sum_sq mean_sq F PR(>F)\n", 396 | "genre 12 4221.505277 351.792106 2.186135 0.016198\n", 397 | "rating 3 283.546557 94.515519 0.587346 0.624421\n", 398 | "box_office 1 4420.588612 4420.588612 27.470780 0.000001\n", 399 | "Residual 123 19793.118411 160.919662 NaN NaN" 400 | ] 401 | } 402 | ], 403 | "prompt_number": 30 404 | }, 405 | { 406 | "cell_type": "code", 407 | "collapsed": false, 408 | "input": [], 409 | "language": "python", 410 | "metadata": {}, 411 | "outputs": [] 412 | } 413 | ], 414 | "metadata": {} 415 | } 416 | ] 417 | } -------------------------------------------------------------------------------- /week6/quiz.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "quiz" 4 | }, 5 | "nbformat": 3, 6 | "nbformat_minor": 0, 7 | "worksheets": [ 8 | { 9 | "cells": [ 10 | { 11 | "cell_type": "code", 12 | "collapsed": false, 13 | "input": [ 14 | "import pandas.rpy.common as com" 15 | ], 16 | "language": "python", 17 | "metadata": {}, 18 | "outputs": [], 19 | "prompt_number": 1 20 | }, 21 | { 22 | "cell_type": "code", 23 | "collapsed": false, 24 | "input": [ 25 | "SAheart = com.load_data('SAheart', package='ElemStatLearn')\n", 26 | "SAheart.head()" 27 | ], 28 | "language": "python", 29 | "metadata": {}, 30 | "outputs": [ 31 | { 32 | "html": [ 33 | "
\n", 34 | "\n", 35 | " \n", 36 | " \n", 37 | " \n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | "
sbptobaccoldladiposityfamhisttypeaobesityalcoholagechd
1 160 12.00 5.73 23.11 Present 49 25.30 97.20 52 1
2 144 0.01 4.41 28.61 Absent 55 28.87 2.06 63 1
3 118 0.08 3.48 32.28 Present 52 29.14 3.81 46 0
4 170 7.50 6.41 38.03 Present 51 31.99 24.26 58 1
5 134 13.60 3.50 27.78 Present 60 25.99 57.34 49 1
\n", 118 | "
" 119 | ], 120 | "output_type": "pyout", 121 | "prompt_number": 11, 122 | "text": [ 123 | " sbp tobacco ldl adiposity famhist typea obesity alcohol age chd\n", 124 | "1 160 12.00 5.73 23.11 Present 49 25.30 97.20 52 1\n", 125 | "2 144 0.01 4.41 28.61 Absent 55 28.87 2.06 63 1\n", 126 | "3 118 0.08 3.48 32.28 Present 52 29.14 3.81 46 0\n", 127 | "4 170 7.50 6.41 38.03 Present 51 31.99 24.26 58 1\n", 128 | "5 134 13.60 3.50 27.78 Present 60 25.99 57.34 49 1" 129 | ] 130 | } 131 | ], 132 | "prompt_number": 11 133 | }, 134 | { 135 | "cell_type": "code", 136 | "collapsed": false, 137 | "input": [ 138 | "%load_ext rmagic" 139 | ], 140 | "language": "python", 141 | "metadata": {}, 142 | "outputs": [], 143 | "prompt_number": 12 144 | }, 145 | { 146 | "cell_type": "code", 147 | "collapsed": false, 148 | "input": [ 149 | "%%R -o train\n", 150 | "set.seed(8484)\n", 151 | "train = sample(1:dim(SAheart)[1],size=dim(SAheart)[1]/2,replace=F)" 152 | ], 153 | "language": "python", 154 | "metadata": {}, 155 | "outputs": [], 156 | "prompt_number": 13 157 | }, 158 | { 159 | "cell_type": "code", 160 | "collapsed": false, 161 | "input": [ 162 | "trainSA = SAheart.ix[train,:]\n", 163 | "test = filter(lambda x: x not in train, SAheart.index)\n", 164 | "testSA = SAheart.ix[test, :]" 165 | ], 166 | "language": "python", 167 | "metadata": {}, 168 | "outputs": [], 169 | "prompt_number": 37 170 | }, 171 | { 172 | "cell_type": "code", 173 | "collapsed": false, 174 | "input": [ 175 | "from statsmodels.formula.api import glm\n", 176 | "from statsmodels.api import families as f" 177 | ], 178 | "language": "python", 179 | "metadata": {}, 180 | "outputs": [], 181 | "prompt_number": 38 182 | }, 183 | { 184 | "cell_type": "code", 185 | "collapsed": false, 186 | "input": [ 187 | "lm = glm('chd ~ age + alcohol + obesity + tobacco + typea + ldl', trainSA, family=f.Binomial()).fit()" 188 | ], 189 | "language": "python", 190 | "metadata": {}, 191 | "outputs": [], 192 | "prompt_number": 39 193 | }, 194 | { 195 | "cell_type": "code", 196 | "collapsed": false, 197 | "input": [ 198 | "def missClass(values, prediction):\n", 199 | " return float( sum( ((prediction > 0.5) * 1) != values ) ) / float(len(values))" 200 | ], 201 | "language": "python", 202 | "metadata": {}, 203 | "outputs": [], 204 | "prompt_number": 40 205 | }, 206 | { 207 | "cell_type": "code", 208 | "collapsed": false, 209 | "input": [ 210 | "print 'training set misclassification %.4f' % missClass(trainSA['chd'], lm.fittedvalues)\n", 211 | "print 'test set misclassification %.4f' % missClass(testSA['chd'], lm.predict(testSA.ix[:,:-1]))" 212 | ], 213 | "language": "python", 214 | "metadata": {}, 215 | "outputs": [ 216 | { 217 | "output_type": "stream", 218 | "stream": "stdout", 219 | "text": [ 220 | "training set misclassification 0.2597\n", 221 | "test set misclassification 0.3203\n" 222 | ] 223 | } 224 | ], 225 | "prompt_number": 52 226 | }, 227 | { 228 | "cell_type": "code", 229 | "collapsed": false, 230 | "input": [ 231 | "olive = com.load_data('olive', package='pgmm')\n", 232 | "olive = olive.ix[:,:-1]\n", 233 | "olive.head()" 234 | ], 235 | "language": "python", 236 | "metadata": {}, 237 | "outputs": [ 238 | { 239 | "html": [ 240 | "
\n", 241 | "\n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | "
RegionAreaPalmiticPalmitoleicStearicOleicLinoleicLinolenicArachidic
1 1 1 1075 75 226 7823 672 36 60
2 1 1 1088 73 224 7709 781 31 61
3 1 1 911 54 246 8113 549 31 63
4 1 1 966 57 240 7952 619 50 78
5 1 1 1051 67 259 7771 672 50 80
\n", 319 | "
" 320 | ], 321 | "output_type": "pyout", 322 | "prompt_number": 56, 323 | "text": [ 324 | " Region Area Palmitic Palmitoleic Stearic Oleic Linoleic Linolenic Arachidic\n", 325 | "1 1 1 1075 75 226 7823 672 36 60\n", 326 | "2 1 1 1088 73 224 7709 781 31 61\n", 327 | "3 1 1 911 54 246 8113 549 31 63\n", 328 | "4 1 1 966 57 240 7952 619 50 78\n", 329 | "5 1 1 1051 67 259 7771 672 50 80" 330 | ] 331 | } 332 | ], 333 | "prompt_number": 56 334 | }, 335 | { 336 | "cell_type": "code", 337 | "collapsed": false, 338 | "input": [ 339 | "from sklearn import tree\n", 340 | "import patsy as pt\n", 341 | "import pandas as pd" 342 | ], 343 | "language": "python", 344 | "metadata": {}, 345 | "outputs": [], 346 | "prompt_number": 60 347 | }, 348 | { 349 | "cell_type": "code", 350 | "collapsed": false, 351 | "input": [ 352 | "y, X = pt.dmatrices('Area ~ Region + Palmitic + Palmitoleic + Stearic + \\\n", 353 | " Oleic + Linoleic + Linolenic + Arachidic - 1', olive)\n", 354 | "\n", 355 | "clf = tree.DecisionTreeClassifier().fit(X, y)" 356 | ], 357 | "language": "python", 358 | "metadata": {}, 359 | "outputs": [], 360 | "prompt_number": 96 361 | }, 362 | { 363 | "cell_type": "code", 364 | "collapsed": false, 365 | "input": [ 366 | "import StringIO, pydot\n", 367 | "from IPython.core.display import HTML\n", 368 | "\n", 369 | "dot_data = StringIO.StringIO()\n", 370 | "tree.export_graphviz(clf, out_file=dot_data)\n", 371 | "graph = pydot.graph_from_dot_data(dot_data.getvalue())\n", 372 | "graph.write_png('tree3.png')\n", 373 | "#HTML('')" 374 | ], 375 | "language": "python", 376 | "metadata": {}, 377 | "outputs": [ 378 | { 379 | "output_type": "pyout", 380 | "prompt_number": 97, 381 | "text": [ 382 | "True" 383 | ] 384 | } 385 | ], 386 | "prompt_number": 97 387 | }, 388 | { 389 | "cell_type": "code", 390 | "collapsed": false, 391 | "input": [ 392 | "olive.mean()" 393 | ], 394 | "language": "python", 395 | "metadata": {}, 396 | "outputs": [ 397 | { 398 | "output_type": "pyout", 399 | "prompt_number": 104, 400 | "text": [ 401 | "Region 1.699301\n", 402 | "Area 4.599650\n", 403 | "Palmitic 1231.741259\n", 404 | "Palmitoleic 126.094406\n", 405 | "Stearic 228.865385\n", 406 | "Oleic 7311.748252\n", 407 | "Linoleic 980.527972\n", 408 | "Linolenic 31.888112\n", 409 | "Arachidic 58.097902" 410 | ] 411 | } 412 | ], 413 | "prompt_number": 104 414 | }, 415 | { 416 | "cell_type": "code", 417 | "collapsed": false, 418 | "input": [ 419 | "newdata = olive[['Region', 'Palmitic', 'Palmitoleic', 'Stearic', 'Oleic', 'Linoleic', 'Linolenic', 'Arachidic']].mean()" 420 | ], 421 | "language": "python", 422 | "metadata": {}, 423 | "outputs": [], 424 | "prompt_number": 111 425 | }, 426 | { 427 | "cell_type": "code", 428 | "collapsed": false, 429 | "input": [ 430 | "clf.predict_proba(newdata)" 431 | ], 432 | "language": "python", 433 | "metadata": {}, 434 | "outputs": [ 435 | { 436 | "output_type": "pyout", 437 | "prompt_number": 112, 438 | "text": [ 439 | "array([[ 0., 1., 0., 0., 0., 0., 0., 0., 0.]])" 440 | ] 441 | } 442 | ], 443 | "prompt_number": 112 444 | }, 445 | { 446 | "cell_type": "code", 447 | "collapsed": false, 448 | "input": [], 449 | "language": "python", 450 | "metadata": {}, 451 | "outputs": [] 452 | } 453 | ], 454 | "metadata": {} 455 | } 456 | ] 457 | } -------------------------------------------------------------------------------- /week6/tree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/herrfz/dataanalysis/fa2d42045f364f1a5b0683b7fb908399d69ff1cb/week6/tree.png -------------------------------------------------------------------------------- /week6/tree2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/herrfz/dataanalysis/fa2d42045f364f1a5b0683b7fb908399d69ff1cb/week6/tree2.png -------------------------------------------------------------------------------- /week6/tree3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/herrfz/dataanalysis/fa2d42045f364f1a5b0683b7fb908399d69ff1cb/week6/tree3.png -------------------------------------------------------------------------------- /week8/multiple_testing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "multiple_testing" 4 | }, 5 | "nbformat": 3, 6 | "nbformat_minor": 0, 7 | "worksheets": [ 8 | { 9 | "cells": [ 10 | { 11 | "cell_type": "code", 12 | "collapsed": false, 13 | "input": [ 14 | "import pandas as pd\n", 15 | "import numpy as np\n", 16 | "import patsy as pt\n", 17 | "import statsmodels.api as sm\n", 18 | "from statsmodels.sandbox.stats.multicomp import multipletests" 19 | ], 20 | "language": "python", 21 | "metadata": {}, 22 | "outputs": [], 23 | "prompt_number": 1 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "\n", 52 | "\n", 53 | "Test" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "collapsed": false, 59 | "input": [ 60 | "np.random.seed(1010093)\n", 61 | "\n", 62 | "pValues = []\n", 63 | "\n", 64 | "for i in xrange(1000):\n", 65 | " x = np.random.normal(size=20)\n", 66 | " y = np.random.normal(size=20)\n", 67 | " y, x = pt.dmatrices('y ~ x')\n", 68 | "\n", 69 | " pValues.append(sm.OLS(y, x).fit().pvalues[1])\n", 70 | " \n", 71 | "pValues = np.array(pValues)" 72 | ], 73 | "language": "python", 74 | "metadata": {}, 75 | "outputs": [], 76 | "prompt_number": 17 77 | }, 78 | { 79 | "cell_type": "code", 80 | "collapsed": false, 81 | "input": [ 82 | "sum(pValues < .05)" 83 | ], 84 | "language": "python", 85 | "metadata": {}, 86 | "outputs": [ 87 | { 88 | "output_type": "pyout", 89 | "prompt_number": 18, 90 | "text": [ 91 | "54" 92 | ] 93 | } 94 | ], 95 | "prompt_number": 18 96 | }, 97 | { 98 | "cell_type": "code", 99 | "collapsed": false, 100 | "input": [ 101 | "_, p_adjust, _, _ = multipletests(pValues, method='bonferroni')\n", 102 | "sum(p_adjust < .05)" 103 | ], 104 | "language": "python", 105 | "metadata": {}, 106 | "outputs": [ 107 | { 108 | "output_type": "pyout", 109 | "prompt_number": 19, 110 | "text": [ 111 | "0" 112 | ] 113 | } 114 | ], 115 | "prompt_number": 19 116 | }, 117 | { 118 | "cell_type": "code", 119 | "collapsed": false, 120 | "input": [ 121 | "_, p_adjust, _, _ = multipletests(pValues, method='fdr_bh')\n", 122 | "sum(p_adjust < .05)" 123 | ], 124 | "language": "python", 125 | "metadata": {}, 126 | "outputs": [ 127 | { 128 | "output_type": "pyout", 129 | "prompt_number": 20, 130 | "text": [ 131 | "0" 132 | ] 133 | } 134 | ], 135 | "prompt_number": 20 136 | }, 137 | { 138 | "cell_type": "code", 139 | "collapsed": false, 140 | "input": [ 141 | "np.random.seed(1010093)\n", 142 | "\n", 143 | "pValues = []\n", 144 | "\n", 145 | "for i in xrange(1000):\n", 146 | " x = np.random.normal(size=20)\n", 147 | " \n", 148 | " # first 500 beta = 0, last 500 beta = 2\n", 149 | " if i < 500:\n", 150 | " y = np.random.normal(size=20)\n", 151 | " else:\n", 152 | " y = np.random.normal(loc=2*x, size=20)\n", 153 | "\n", 154 | " y, x = pt.dmatrices('y ~ x')\n", 155 | " \n", 156 | " pValues.append(sm.OLS(y, x).fit().pvalues[1])\n", 157 | " \n", 158 | "pValues = np.array(pValues)" 159 | ], 160 | "language": "python", 161 | "metadata": {}, 162 | "outputs": [], 163 | "prompt_number": 21 164 | }, 165 | { 166 | "cell_type": "code", 167 | "collapsed": false, 168 | "input": [ 169 | "trueStatus = np.concatenate([np.repeat('zero', 500), np.repeat('not zero', 500)])\n", 170 | "pd.crosstab(pValues < .05, trueStatus)" 171 | ], 172 | "language": "python", 173 | "metadata": {}, 174 | "outputs": [ 175 | { 176 | "html": [ 177 | "
\n", 178 | "\n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | "
col_0not zerozero
row_0
False 0 470
True 500 30
\n", 204 | "
" 205 | ], 206 | "output_type": "pyout", 207 | "prompt_number": 22, 208 | "text": [ 209 | "col_0 not zero zero\n", 210 | "row_0 \n", 211 | "False 0 470\n", 212 | "True 500 30" 213 | ] 214 | } 215 | ], 216 | "prompt_number": 22 217 | }, 218 | { 219 | "cell_type": "code", 220 | "collapsed": false, 221 | "input": [ 222 | "_, p_adjust, _, _ = multipletests(pValues, method='bonferroni')\n", 223 | "pd.crosstab(p_adjust < .05, trueStatus)" 224 | ], 225 | "language": "python", 226 | "metadata": {}, 227 | "outputs": [ 228 | { 229 | "html": [ 230 | "
\n", 231 | "\n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | "
col_0not zerozero
row_0
False 29 500
True 471 0
\n", 257 | "
" 258 | ], 259 | "output_type": "pyout", 260 | "prompt_number": 23, 261 | "text": [ 262 | "col_0 not zero zero\n", 263 | "row_0 \n", 264 | "False 29 500\n", 265 | "True 471 0" 266 | ] 267 | } 268 | ], 269 | "prompt_number": 23 270 | }, 271 | { 272 | "cell_type": "code", 273 | "collapsed": false, 274 | "input": [ 275 | "plot(pValues, p_adjust, 'ok')\n", 276 | "xlim(-.05, 1.05)\n", 277 | "ylim(-.05, 1.05)\n", 278 | "xlabel('pValues')\n", 279 | "ylabel('p_adjust');" 280 | ], 281 | "language": "python", 282 | "metadata": {}, 283 | "outputs": [ 284 | { 285 | "output_type": "display_data", 286 | "png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEICAYAAAC3Y/QeAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAHPxJREFUeJzt3X9M03f+B/BnoZUyZeAPJtpyUQvhx/glgkg8Tb1Fy2Ry\nibocLvOmU8aZE9gut+TiXU7QbDeX7A+BeWHzx86bMqZngme1t9Ot81ABFRVPjMeInJWbRlSmmxah\nfL5/OPul0pZC+6Hl83k+kib03Xc/fb1xe/bN58f7oxAEQQAREclKkL8LICKikcfwJyKSIYY/EZEM\nMfyJiGSI4U9EJENKfxfgCYVC4e8SiIhGJVcndI6amb8gCMN6bNy4cdjvHa0PjlkeD45Z+g9vx+vO\nqAl/IiLyHYY/EZEMST789Xq9v0sYcRyzPHDM0ifmeBXCYDuGAoBCoRh0/xURETlyl52Sn/kTEZET\ngohWr14tPPfcc0JSUpLLPkVFRUJMTIyQkpIiNDU1Oe0znDI3btwoKBQKAYDbR3BwsDBmzJhB+4nx\nUKlUwrhx44TIyEghKChoWNtwN8bg4OAhbev5558Xxo0bN6T3BQUFCSqVyuX4/PF7HYlH/9+7QqEQ\nQkNDhcjISL/X5e1jzJgxglKp9Oj/naH+vsaMGSOEhIQM6T2hoaFCeHi4Q7tKpRLUarXT96jVamHs\n2LED/ht21ubN4+ltqVQqr7YfHBwshIaGOt3uzJkzhUOHDg05AwfLTlHD//jx40JTU5PL8DcajcKL\nL74oCIIg1NfXC1lZWU77uRuAMxs3bvT7/0R88MEHH756REVFDesLAHCdnaLu9pk3bx7Gjx/v8vWD\nBw/itddeAwBkZWWhq6sLN2/e9PpzKysrvd4GEVGguHHjBioqKny6Tb9e4dvR0YHo6Gj7c61Wi+vX\nr2Py5MkD+paWltp/1uv1bo+C9/b2+rJMIiK/s1qtg/Yxm80wm80ebc/vyzsITx2JdrWUQ//wH4xS\n6fdhERH5lFqtHrTP0xPjsrIyl339eraPRqOBxWKxP79+/To0Go3X212/fr3X2yAiChRRUVEoKiry\n6Tb9Gv55eXnYvXs3AKC+vh4RERFOd/kMVWZmJiIiIjzqGxwcjDFjxnj9mcOhUqkQFhaGyMhIBAUN\n75/C3aJ3wcHBQ9rO888/j7CwsCG9LygoCCqVyulrrtqloP/vXaFQIDQ0FJGRkX6syDfGjBkDlUrl\n88UUFQoFxowZg5CQkCG9JzQ0FOHh4Q7tKpXK5SxYrVZj7NixDv8NKxSKAW3eCg4OdvgdqVQqr7Yf\nHByM0NDQAdtQqVRIT0/H9u3bkZubO+ztOyPq/pEVK1bg66+/RmdnJ6Kjo1FWVoaenh4AQGFhIRYv\nXozDhw8jJiYGY8eOxa5du3zyueXl5ejq6hrQbjAYYDKZfPIZRESjmajhX11dPWgfMc7M6e7udtru\nyQETIiI5kOQVvq7+tPTkgAkRkRxIMvyLi4uh0+kc2nQ6nc8PmBARjVaSPCfyyYGRiooKWK1WqNVq\nFBUV+fyACRHRaMVVPYmIJIqrehIRkQOGPxGRDDH8iYhkiOFPRCRDDH8iIhli+BMRyRDDn4hIhhj+\nREQyxPAnIpIhhj8RkQwx/ImIZIjhT0QkQwx/IiIZYvgTEckQw5+ISIYY/kREMsTwJyKSIYY/EZEM\nMfyJiGSI4U9EJEMMfyIiGWL4ExHJEMOfiEiGGP5ERDLE8CcikiGGPxGRDDH8iYhkSPTwN5lMiI+P\nR2xsLLZs2TLg9c7OTuTk5CAtLQ1JSUn45JNPxC6JiEj2FIIgCGJt3GazIS4uDkePHoVGo0FmZiaq\nq6uRkJBg71NaWoru7m786U9/QmdnJ+Li4nDz5k0olcr/L1KhgIhlEhFJkrvsFHXm39jYiJiYGEyb\nNg0qlQr5+fmora116DNlyhTcu3cPAHDv3j1MnDjRIfiJiMj3RE3Zjo4OREdH259rtVo0NDQ49Cko\nKMDPfvYzTJ06Fffv38fnn3/udFulpaX2n/V6PfR6vRglExGNWmazGWaz2aO+ooa/QqEYtM+7776L\ntLQ0mM1mtLW1YeHChbhw4QLCwsIc+vUPfyIiGujpiXFZWZnLvqLu9tFoNLBYLPbnFosFWq3Woc/J\nkyfx8ssvAwB0Oh2mT5+OK1euiFkWEZHsiRr+GRkZaG1tRXt7Ox49eoSamhrk5eU59ImPj8fRo0cB\nADdv3sSVK1cwY8YMrz/baDTCYDBAr9fDYDDAaDR6vU0iIqkQdbePUqlEZWUlDAYDbDYb1qxZg4SE\nBFRVVQEACgsLsWHDBqxevRqpqano6+vD+++/jwkTJnj1uUajESUlJWhra7O3Pfk5NzfXq20TEUmB\nqKd6+spQT/U0GAz44osvnLabTCZflkZEFLD8dqqnv3R3dzttt1qtI1wJEVFgkmT4h4SEOG1Xq9Uj\nXAkRUWCSZPgXFxdDp9M5tOl0OhQVFfmpIiKiwCLJS2mfHNStqKiA1WqFWq1GUVERD/YSEf1Ikgd8\niYhIhgd8iYjIPYY/EZEMMfyJiGSI4U9EJEMMfyIiGWL4ExHJkGTDn6t6EhG5JsmLvLiqJxGRe5Kc\n+ZeXlzsEP/A4/CsqKvxUERFRYJFk+HNVTyIi9yQZ/lzVk4jIPUmGP1f1JCJyT5IHfLmqJxGRe1zV\nk4hIoriqJxEROWD4ExHJEMOfiEiGGP5ERDLE8CcikiGGPxGRDDH8iYhkiOFPRCRDDH8iIhli+BMR\nyRDDn4hIhhj+REQyxPAnIpIh0cPfZDIhPj4esbGx2LJli9M+ZrMZM2fORFJSEvR6vdglERHJnqhL\nOttsNsTFxeHo0aPQaDTIzMxEdXU1EhIS7H26urowd+5c/OMf/4BWq0VnZycmTZrkWCSXdCYiGjK/\nLenc2NiImJgYTJs2DSqVCvn5+aitrXXos3fvXixbtgxarRYABgQ/ERH5nqh38uro6EB0dLT9uVar\nRUNDg0Of1tZW9PT0YMGCBbh//z5KSkqwcuXKAdsqLS21/6zX67l7iIjoKWazGWaz2aO+ooa/QqEY\ntE9PTw+amppw7NgxPHjwANnZ2ZgzZw5iY2Md+vUPfyIiGujpiXFZWZnLvh7t9nnhhRc8anuaRqOB\nxWKxP7dYLPbdO09ER0dj0aJFCA0NxcSJEzF//nxcuHDBk7KIiGiY3Ib/w4cPcfv2bdy6dQt37tyx\nP9rb29HR0THoxjMyMtDa2or29nY8evQINTU1yMvLc+jz85//HHV1dbDZbHjw4AEaGhqQmJjo3aiI\niMgtt7t9qqqqsHXrVvzvf//DrFmz7O1hYWFYv3794BtXKlFZWQmDwQCbzYY1a9YgISEBVVVVAIDC\nwkLEx8cjJycHKSkpCAoKQkFBAcOfiEhkHp3qWVFRgaKiopGoxyme6klENHRen+o5efJk3L9/HwCw\nefNmLF26FE1NTb6rkIiIRpRH4b9582aEhYWhrq4Ox44dw+uvv45f/epXYtdGREQi8Sj8g4ODAQCH\nDh1CQUEBXnrpJfT09IhaGBERicej8NdoNHjjjTdQU1OD3NxcWK1W9PX1iV0bERGJxKMDvj/88ANM\nJhNSUlIQGxuLb7/9FhcvXsSiRYtGokYe8CUiGgZ32elR+F+7dg2CIAy4YvcnP/mJbyocBMOfiGjo\nvA7/pKQke/BbrVZcvXoVcXFxuHTpkm8rdYHhT0Q0dO6y06O1ff797387PG9qasKHH37ofWVEROQX\nw17PPykpacCXglg48yciGjqvZ/4ffPCB/ee+vj40NTVBo9H4pjoiIhpxHoX//fv37fv8lUolXnrp\nJSxbtkzUwoiISDyi3sbRV7jbh4ho6Ia926ekpARbt27FkiVLnG504sSJKCwsxJw5c3xTKRERjQi3\nM/8zZ84gIyPD5W3Bbt++jT/84Q+4fPmyWPUB4MyfiGg4hj3zz8jIAAC398tVqVTDr4yIiPzC7cw/\nOTnZ9RsVCjQ3N4tSlLPP4syfiGhohj3z//vf/w4A2LZtGwBg5cqVEAQBe/bs8XGJvmc0GlFeXo7u\n7m6EhISguLgYubm5/i6LiCggeHS2T1paGs6fP+/QNnPmTJw7d060wvob6szfaDSipKQEbW1t9jad\nToetW7fyC4CIZMPrO3kJgoC6ujr78xMnTgT0bpjy8nKH4AeAtrY2VFRU+KkiIqLA4tFFXjt37sTq\n1avx3XffAQAiIiKwa9cuUQvzRnd3t9N2q9U6wpUQEQUmj8J/1qxZaG5uRldXFxQKBcLDw8Wuyysh\nISFO29Vq9QhXQkQUmDwKf+DxLRxbWlocZs9//OMfRSnKW8XFxWhraxuwz7+oqMiPVRERBQ6Pwr+w\nsBAPHz7El19+iYKCAuzbtw9ZWVli1zZsTw7qVlRUwGq1Qq1Wo6ioiAd7iYh+5NHZPsnJybh48SJS\nUlLQ3NyM77//Hjk5OQ4HgcXE8/yJiIbO67N9QkNDAQDPPPMMOjo6oFQqcePGDd9VSEREI8qj3T5L\nlizB3bt38fbbb2PWrFkAgIKCAlELIyIi8Qx5SWer1Qqr1YqIiAh72z//+U8sXLjQ58U9wd0+RERD\n5/UN3Acj9tW+DH8ioqHzep8/ERFJC8OfiEiGGP5ERDLk0dk+Dx8+xLZt21BXVweFQoF58+Zh3bp1\n9uUSpk+fLmqRRETkWx7N/H/5y1+ipaUFxcXFWL9+PS5duoSVK1faXz9w4IDL95pMJsTHxyM2NhZb\ntmxx2e/06dNQKpVut0VERL7h0dk+iYmJaGlpGbTtaTabDXFxcTh69Cg0Gg0yMzNRXV2NhISEAf0W\nLlyIZ555BqtXr8ayZcsci+TZPkREQzbsO3k9kZ6ejlOnTiE7OxsAUF9fb7/Yy53GxkbExMRg2rRp\nAID8/HzU1tYOCP+KigosX74cp0+fdrmt0tJS+896vd7tfYWJiOTIbDbDbDZ71Nej8D9z5gzmzp2L\n6OhoKBQKXLt2DXFxcUhOTnZ7L9+Ojg5ER0fbn2u1WjQ0NAzoU1tbiy+//BKnT5+GQqFwuq3+4U9E\nRAM9PTEuKytz2dej8DeZTMMqxFWQ9/fmm2/ivffes/95wt07RETi8yj8n+y2GSqNRgOLxWJ/brFY\noNVqHfqcPXsW+fn5AIDOzk4cOXIEKpUKeXl5w/pMIiIanE+Wd3Clt7cXcXFxOHbsGKZOnYrZs2c7\nPeD7xOrVq7FkyRIsXbrUsUge8CUiGjKvD/gOl1KpRGVlJQwGA2w2G9asWYOEhARUVVUBeHyTGCIi\nGnmizvx9ZTgzf6PRiPLycnR3dyMkJATFxcW8kxcRyYroq3qKbajhbzQasXbtWocbzkRFRWH79u38\nAiAi2ZBd+KenpztdYjo9PR1nz571ZWlERAFLdks6t7e3O22/evXqyBZCRBSgJBn+RETkniTD39V1\nCVx9lIjoMUmG/+bNmxEVFeXQFhUVhU2bNvmpIiKiwCLJA77A4zN+KioqYLVaoVarUVRUxDN9iEhW\nZHe2DxERyfBsHyIico/hT0QkQ5INf6PRCIPBAL1eD4PBAKPR6O+SiIgChqgLu/mL0WhESUkJ2tra\n7G1PfuZBXyIiic78y8vLHYIfeBz+FRUVfqqIiCiwSDL8u7u7nbZbrdYRroSIKDBJMvxDQkKctqvV\n6hGuhIgoMEky/IuLi6HT6RzadDodioqK/FQREVFgkeQB3ycHdXmFLxGRc7zCl4hIoniFLxEROWD4\nExHJEMOfiEiGGP5ERDLE8CcikiGGPxGRDDH8iYhkiOFPRCRDDH8iIhli+BMRyZBkw7+0tBSTJk1C\nREQEJk2ahNLSUn+XREQUMCS5sFtpaSneeecd9Pb22tveeecd+2tERHInyYXdJk2ahNu3bzttv3Xr\nli9LIyIKWH5d2M1kMiE+Ph6xsbHYsmXLgNf37NmD1NRUpKSkYO7cuWhubvb6M/vP+Pvr6enxettE\nRFIg6m4fm82G9evX4+jRo9BoNMjMzEReXh4SEhLsfWbMmIHjx48jPDwcJpMJb7zxBurr6736XKXS\n+bBUKpVX2yUikgpRZ/6NjY2IiYnBtGnToFKpkJ+fj9raWoc+2dnZCA8PBwBkZWXh+vXrXn/uokWL\nnLYvXLjQ620TEUmBqDP/jo4OREdH259rtVo0NDS47L9jxw4sXrzY6Wv9D9Tq9Xro9XqX23G2vx8A\n7ty5475gIqJRzGw2w2w2e9RX1PBXKBQe9/3qq6+wc+dOnDhxwunrQzlLp6Ojw2m7L/6qICIKVE9P\njMvKylz2FTX8NRoNLBaL/bnFYoFWqx3Qr7m5GQUFBTCZTBg/frzXn3vjxg2n7Tdv3vR620REUiDq\nPv+MjAy0traivb0djx49Qk1NDfLy8hz6XLt2DUuXLsWnn36KmJgYn3zu1KlTnbZPmTLFJ9snIhrt\nRJ35K5VKVFZWwmAwwGazYc2aNUhISEBVVRUAoLCwEJs2bcLdu3exbt06AI/PyGlsbPTqc6dMmYKL\nFy8OaHf1pUBEJDeSvMjLaDRi7dq1Drt/oqKisH37duTm5opRIhFRwPHrRV5ERBR4JBn+5eXlAw76\n3rhxAxUVFX6qiIgosEgy/Lu7u522W63WEa6EiCgwSTL8Q0JCnLar1eoRroSIKDBJMvyzs7MHrO+j\nVCoxZ84cP1VERBRYJBn+p06dGrCyZ29vr9cLxhERSYUkw5/7/ImI3JNk+HOfPxGRe5IM/+LiYuh0\nOoc2nU6HoqIiP1VERBRYJBn+ubm5mD17NpRKJYKDg6FUKjF79mxe3UtE9CNJhn9paSn27duH3t5e\n2Gw29Pb2Yt++fbx5OxHRjyS5tg9v4E5EJMO1fXgDdyIi9yQZ/jabzWl7X1/fCFdCRBSYJBn+zz33\n3JDaiYjkRpLh72rm72p3EBGR3EjygK9KpXIa9CqVCo8ePfJlaUREAUt2B3wVCoW/SyAiCmiSnPm7\nC/9RMFwiIp+Q3cyfiIjcY/gTEckQw5+ISIYY/kREMiTJ8A8Kcj6s4ODgEa6EiCgwSTL8n332Waft\nYWFhI1wJEVFgkmT4u7pdI2/jSET0GM/zJyKSKJ7nT0REDhj+REQyxPAnIpIhhj8RkQwx/ImIZEj0\n8DeZTIiPj0dsbCy2bNnitE9xcTFiY2ORmpqKc+fOiV0SEZHsiRr+NpsN69evh8lkQktLC6qrq3H5\n8mWHPocPH8Y333yD1tZWfPTRR1i3bp2YJREREUQO/8bGRsTExGDatGlQqVTIz89HbW2tQ5+DBw/i\ntddeAwBkZWWhq6sLN2/eFLMsIiLZU4q58Y6ODkRHR9ufa7VaNDQ0DNrn+vXrmDx5skO/0tJS+896\nvR56vV6UmomIRiuz2Qyz2exRX1HD39PbKT59BZqz9/UPfyIiGujpiXFZWZnLvqLu9tFoNLBYLPbn\nFosFWq3WbZ/r169Do9GIWRYRkeyJGv4ZGRlobW1Fe3s7Hj16hJqaGuTl5Tn0ycvLw+7duwEA9fX1\niIiIGLDLZ6hcrWXBdX2IiB4TdbePUqlEZWUlDAYDbDYb1qxZg4SEBFRVVQEACgsLsXjxYhw+fBgx\nMTEYO3Ysdu3a5ZPPZtATEbkmyVU9iYhI5qt6enrkW0o4ZnngmKVPzPEy/CWIY5YHjln6GP5ERORT\nDH8iIhkaNQd8iYho6FxFvKinevrKKPh+IiIaVbjbh4hIhhj+REQyxPAnIpIhyYS/HO8YNtiY9+zZ\ng9TUVKSkpGDu3Llobm72Q5W+5cm/MwCcPn0aSqUSBw4cGMHqxOHJmM1mM2bOnImkpCRJLHc+2Jg7\nOzuRk5ODtLQ0JCUl4ZNPPhn5In3o9ddfx+TJk5GcnOyyj8/zS5CA3t5eQafTCVevXhUePXokpKam\nCi0tLQ59jEaj8OKLLwqCIAj19fVCVlaWP0r1GU/GfPLkSaGrq0sQBEE4cuSILMb8pN+CBQuE3Nxc\nYf/+/X6o1Hc8GfPdu3eFxMREwWKxCIIgCLdu3fJHqT7jyZg3btwo/O53vxME4fF4J0yYIPT09Pij\nXJ84fvy40NTUJCQlJTl9XYz8ksTMX453DPNkzNnZ2QgPDwfweMzXr1/3R6k+48mYAaCiogLLly9H\nZGSkH6r0LU/GvHfvXixbtsy+XPqkSZP8UarPeDLmKVOm4N69ewCAe/fuYeLEiVAqR8XJi07NmzcP\n48ePd/m6GPklifB3djewjo6OQfuM5jD0ZMz97dixA4sXLx6J0kTj6b9zbW2t/V7Qo/0aEU/G3Nra\nijt37mDBggXIyMjAX//615Eu06c8GXNBQQEuXbqEqVOnIjU1FVu3bh3pMkeUGPk1er8q+/HlHcNG\ni6HU/tVXX2Hnzp04ceKEiBWJz5Mxv/nmm3jvvffsqxk+/W8+2ngy5p6eHjQ1NeHYsWN48OABsrOz\nMWfOHMTGxo5Ahb7nyZjfffddpKWlwWw2o62tDQsXLsSFCxcQFhY2AhX6h6/zSxLhL8c7hnkyZgBo\nbm5GQUEBTCaT2z8rRwNPxnz27Fnk5+cDeHxQ8MiRI1CpVANuIjRaeDLm6OhoTJo0CaGhoQgNDcX8\n+fNx4cKFURv+noz55MmT+P3vfw8A0Ol0mD59Oq5cuYKMjIwRrXWkiJJfXh81CAA9PT3CjBkzhKtX\nrwrd3d2DHvA9derUqD/46cmY//vf/wo6nU44deqUn6r0LU/G3N+qVauEv/3tbyNYoe95MubLly8L\nL7zwgtDb2yv88MMPQlJSknDp0iU/Vew9T8b81ltvCaWlpYIgCMKNGzcEjUYj3L592x/l+szVq1c9\nOuDrq/ySxMzfn3cM8xdPxrxp0ybcvXvXvv9bpVKhsbHRn2V7xZMxS40nY46Pj0dOTg5SUlIQFBSE\ngoICJCYm+rny4fNkzBs2bMDq1auRmpqKvr4+vP/++5gwYYKfKx++FStW4Ouvv0ZnZyeio6NRVlaG\nnp4eAOLl16hY2I2IiHxLEmf7EBHR0DD8iYhkiOFPRCRDDH8iIhli+BP9qKysDBs2bHBoO3/+vNsz\nZ0pLS/HBBx+IXRqRzzH8iX70yiuvoKamxqHts88+wyuvvOLyPaP5KnGSN4Y/yVJ7ezvi4+Px6quv\nIjExES+//DK0Wi3Gjx/vcC3Evn37sGLFCnz88ceYPXs20tLSsHz5cjx8+NDe58kXgF6vx9mzZwE8\nvrp4+vTpAACbzYa3334bs2fPRmpqKj766CMAwLfffov58+dj5syZSE5ORl1d3UgNn4jhT/L1n//8\nB7/+9a/R0tKCZ599Ftu2bcOKFSvw2WefAQDq6+sxYcIE6HQ6LFu2DI2NjTh//jwSEhKwY8eOAdtT\nKBRO/xLYsWMHIiIi0NjYiMbGRnz88cdob29HdXU1cnJycO7cOTQ3NyMtLU30MRM9wfAn2YqOjkZ2\ndjYA4NVXX0VdXR1+8YtfYP/+/RAEwWGXz8WLFzFv3jykpKRgz549aGlp8fhzvvjiC+zevRszZ87E\nnDlzcOfOHXzzzTfIzMzErl27UFZWhubmZowbN06UcRI5I4nlHYiGo/8sXRAEKBQKaLVaTJ8+HWaz\nGQcOHEB9fT0AYNWqVTh48CCSk5Pxl7/8BWazecD2lEol+vr6AABWq9XhtcrKSixcuHDAe/71r3/h\n0KFDWLVqFX7zm99g5cqVPhwhkWuc+ZNsXbt2zR7ue/fuxbx58wA8Xmflrbfegk6nw9SpUwEA33//\nPaKiotDT04NPP/3U/sUh9Fs2etq0aThz5gwAYP/+/fbPMRgM2LZtG3p7ewE83t304MEDXLt2DZGR\nkVi7di3Wrl0riVuL0ujB8CfZiouLw4cffojExER899139gXwli9fjpaWFqxYscLed/PmzcjKysJP\nf/pTJCQk2Nv77+f/7W9/iz//+c9IT0/H7du37e1r165FYmIi0tPTkZycjHXr1qG3txdmsxlpaWlI\nT0/H559/jpKSkhEcPckdF3YjWWpvb8eSJUtw8eJFf5dC5Bec+ZNs8Rx9kjPO/ImIZIgzfyIiGWL4\nExHJEMOfiEiGGP5ERDLE8CcikiGGPxGRDP0fqiVcfC2OPvEAAAAASUVORK5CYII=\n" 287 | } 288 | ], 289 | "prompt_number": 24 290 | }, 291 | { 292 | "cell_type": "code", 293 | "collapsed": false, 294 | "input": [ 295 | "_, p_adjust, _, _ = multipletests(pValues, method='fdr_bh')\n", 296 | "pd.crosstab(p_adjust < .05, trueStatus)" 297 | ], 298 | "language": "python", 299 | "metadata": {}, 300 | "outputs": [ 301 | { 302 | "html": [ 303 | "
\n", 304 | "\n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | "
col_0not zerozero
row_0
False 0 483
True 500 17
\n", 330 | "
" 331 | ], 332 | "output_type": "pyout", 333 | "prompt_number": 25, 334 | "text": [ 335 | "col_0 not zero zero\n", 336 | "row_0 \n", 337 | "False 0 483\n", 338 | "True 500 17" 339 | ] 340 | } 341 | ], 342 | "prompt_number": 25 343 | }, 344 | { 345 | "cell_type": "code", 346 | "collapsed": false, 347 | "input": [ 348 | "plot(pValues, p_adjust, 'ok')\n", 349 | "xlim(-.05, 1.05)\n", 350 | "ylim(-.05, 1.05)\n", 351 | "xlabel('pValues')\n", 352 | "ylabel('p_adjust');" 353 | ], 354 | "language": "python", 355 | "metadata": {}, 356 | "outputs": [ 357 | { 358 | "output_type": "display_data", 359 | "png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEICAYAAAC3Y/QeAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XtUVWX+P/D3PhxMML7gpREFvIAsLnKTvC5HpRrDMaSW\nVw5pZUqOKy/lmpzMybPBMdNZzVoqOcvSbErF2zSDso0praNjhnhLUlwNXkEmnVTwEiK3/fvDOD8R\nOGefyz7X92st1+rAcw6fPTVvHj/72c8jyLIsg4iIvIrG2QUQEZHjMfyJiLwQw5+IyAsx/ImIvBDD\nn4jIC2mdXYASgiA4uwQiIrfU3oJOt5n5y7Js1R+9Xm/1e931D6/ZO/7wmj3/j63Xa4rbhD8REdkP\nw5+IyAt5fPinpKQ4uwSH4zV7B16z51PzegXZXGPIBQiCYLZ/RURELZnKTo+f+RMRUWuqhv/LL7+M\n7t27Iz4+vt0x8+bNQ2RkJBITE3HixAk1yyEicguSJCE1NRUpKSlITU2FJEl2/xmqrvOfPn065s6d\nixdeeKHN7+/Zswdnz55FWVkZDh8+jNmzZ6OoqEjNkoiIXI4kSXj77bdx6tQp1NfXt/r+uXPnAADP\nPPOM3X6mqjP/ESNGoHPnzu1+f9euXXjxxRcBAEOGDEF1dTWuXr2qZklERC5FkiTMnDkTJ06caDP4\ngfvhv2bNGrv+XKc+4VtZWYmwsDDj69DQUFy+fBndu3dvNVYUReM/p6SkeN1dfyLyHM0z/bKyMty5\nc0fRe2pra82OMRgMMBgMij7P6ds7PHwnur2tHB4MfyIidyOKInJzc3Hz5k00NDRY/P6OHTuaHfPw\nxDg7O7vdsU4N/5CQEFRUVBhfX758GSEhIU6siIjIviRJwvPPP4+bN29a/Rl+fn6YO3euHaty8lLP\n9PR0fPLJJwCAoqIiBAUFtdnyISJyF5IkITg4GIIgQBAEpKWl2RT8Pj4+WLhwoV1v9gIqz/x1Oh32\n79+Pa9euISwsDNnZ2cYbGrNmzcLYsWOxZ88e9OvXD506dcLGjRvVLIeISFWSJGHy5Mmoqamxy+cF\nBwdj/fr1dg9+gE/4EhHZTJIkzJgxw+bVivYOez7hS0SkAkmSkJycjHHjxtkc/N27d1dtlt8Whj8R\nkYVEUYS/vz/S0tJw4sQJqzsTgiDAz88Per0eV65ccVjwAy6w1JOIyB00r80/c+aMojX37fHz88PC\nhQudvnyd4U9EZIIoili+fDnq6uqs/gytVouEhATk5OQ4dHZvCsOfiOgXoihi5cqVuHv3rl0+T6fT\nYcuWLXb5LHtj+BORV8vMzMT27dvR2Nhot8/08fHB5MmTXTb4AYY/EXmxUaNG4cCBA3b5LK1Wi8WL\nFzu9l68UV/sQkVdpXqkjCIJdgl+j0SA8PBz//Oc/3Sb4Ac78icgLiKKIv/zlL7hz547dHhjt3r07\nNmzY4DI3cC3F8Ccij5aZmYm8vDy7fqZer3erWX5buL0DEXksSZKQnp6OpqYmu3xe//79cerUKbt8\nliNwewci8hqSJKFHjx7QaDRIS0uzOPgDAwNRUFAAWZZb/XGn4DeH4U9EHqF5n5309HRcuXLF4m5B\nREQECgoKUF1d7bZ9fEsw/InI7YmiiEmTJuHEiRMWz/Q7dOgAvV6Ps2fPekXoN+MNXyJyO7ZuueDr\n64v4+HiX2m7B0Rj+ROQ2JEnCvHnzcP78eYvfKwgCnn76acydO9drA/9BDH8icgu27ruTkZHh0tst\nOBrDn4hcmq1777jDPjvOwBu+ROSSmpds5uXlWRz8zQekyLKMhoYGBn8bOPMnIpdi66EpERERWLVq\nFfv6ZjD8icgl2HoIeocOHbBo0SK333bBUdj2ISKnevA8XEuC/8HWjizLuHfvHoPfApz5E5HDiaKI\n3Nxc3Lhxw6p9u9x9R01XwPAnIodo7uWfPHnS6o3WuHLHftj2ISLViaKI8ePHW7X9AgAkJyejoKCA\nK3fsiDN/IlKNPWb7rnwIujvjzJ+I7E6SJAQHByMtLc3q2X7zDV0Gvzo48yciu7HHgej+/v7Yvn07\nb+aqjDN/IrLag4eh23oguq+vL3Q6HX7++WcGvwNw5k9EFhNFEX/605+s3m+nGVfvOA9n/kSkmCiK\neOSRR5CdnW118Dev3OG+O86levgXFhYiOjoakZGRWLFiRavvX7t2DWPGjEFSUhLi4uLw8ccfq10S\nEVlo1KhREAQB2dnZVh+gAtxfuXPs2DG2dVyAIFvzeJ1CjY2NiIqKwt69exESEoJBgwYhLy8PMTEx\nxjGiKOLevXtYvnw5rl27hqioKFy9ehVa7f/vSJk6gZ6I1NW7d2+Ul5fb9Bncd8c5TGWnqjP/4uJi\n9OvXD3369IGvry8yMjKQn5/fYkyPHj1w69YtAMCtW7fQtWvXFsFPRI7XPNMXBMHm4Nfr9dx3xwWp\nmrKVlZUICwszvg4NDcXhw4dbjMnKysKTTz6Jnj174vbt29i+fXubn/XgfzgpKSlISUlRo2Qir2av\nG7l+fn5YuHAhA9/BDAYDDAaDorGqhr8gCGbHvPPOO0hKSoLBYMC5c+cwevRonDx5EgEBAS3G8T8i\nIvXYcjYucP//6+Hh4dxH38kenhhnZ2e3O1bVtk9ISAgqKiqMrysqKhAaGtpizKFDhzBp0iQA9w9h\n6Nu3L3744Qc1yyIi3J9QabVaCIKAtLQ0q4I/PDwcBQUFaGpqwtmzZxn8bkTVmf/AgQNRVlaGixcv\nomfPnti2bRvy8vJajImOjsbevXsxfPhwXL16FT/88APCw8PVLIvIa9l6YEqzXr164dKlS3aqipxB\n1Zm/VqtFbm4uUlNTERsbiylTpiAmJgbr1q3DunXrAABvvfUWjh49isTERPzmN7/BypUr0aVLFzXL\nIvIqzfvsNM/wrQ1+QRDQrVs36PV6Br8HUHWpp71wqSeR5Wzt4z9o5MiR2L9/vx2qIkdy2lJPInIO\nURTx3HPPWR38Go0GAQEBxmMSGfyehwvqiTxMZmZmq3trlggMDMTmzZt589bDMfyJPEBmZia2bdtm\n9YEpzdje8R5s+xC5ud69eyMvL8+mc3HZ3vE+DH8iN/TgCh5rt18IDg42novLhyi9D8OfyM2Ioohx\n48ZZtWQzIiLCuJ3yjz/+yL6+F2PPn8hN2NLXDw4Oxvr16xn2ZMTwJ3ID1p6NGxAQgAULFrCtQ60w\n/IlclCiKWLlyJe7evWvxezt27IidO3dypk/tYvgTuZC4uDicPn3aps/Q6XQ8GpHM4g1fIhcgSRI6\nduxodfBrNBrodDrIsszgJ0UY/kROJEkSIiIiMG7cONy7d8/i9zcv12xsbGTok0XY9iFyEltPzeLT\nuGQLzvyJHEySJHTq1AnZ2dlWBX/zASoMfrIFw5/IAZrbOz4+PkhLS0NNTY3Fn6HRaKDX63Hu3Dmu\n4iGbse1DpDJRFLFs2TI0NDRY9X6NRoMpU6awp092xZk/kQoePB83Ozvb4uBvbu3IssybuaQKzvyJ\n7EwURWRnZ1v9fq7TJ0fgzJ/ITkRRNM70rSEIAvR6PYOfHIIzfyIb2eOsXC7bJEfjzJ/ISpIkITk5\nGRMnTrQ4+AVBaLG9MoOfHI0zfyIrWLu9siAIWLJkCXfZJKdj+BNZyNoD0oOCgrBp0yau0SeXwPAn\nUsja3n6vXr1w6dIllaoisg57/kRmiKIIf39/pKWlMfjJYzD8iUxo3nxN6YEqycnJxpu4siwz+Mll\nse1D1AZJkjBjxgyLDknnw1nkThj+RA+QJAnPP/88bt68adH7GPzkbtj2IULLvr4lwd+hQwc+lUtu\niTN/8mqSJGH8+PGoq6uz+L0RERFYtWoVl26SW+LMn7xS8/76aWlpFge/n58fCgoKcPbsWQY/uS2G\nP3kdURStWrYJAI8++ih27NjB0Ce3p3r4FxYWIjo6GpGRkVixYkWbYwwGAwYMGIC4uDikpKSoXRJ5\nqebZvrW7bgYFBWHr1q0MfvIIgizLslof3tjYiKioKOzduxchISEYNGgQ8vLyEBMTYxxTXV2N4cOH\n41//+hdCQ0Nx7do1dOvWrWWRggAVyyQvIEkSpkyZgp9//tni9wYHB2P9+vUMfXI7prJT1Zl/cXEx\n+vXrhz59+sDX1xcZGRnIz89vMWbLli2YMGECQkNDAaBV8BPZonnnzfT0dIuDX6/XQ5Zl/Pjjjwx+\n8jiqrvaprKxEWFiY8XVoaCgOHz7cYkxZWRnq6+vxxBNP4Pbt25g/fz6mTZvW6rMe3AUxJSWF7SEy\ny9oN2Pz9/bF9+3YGPrkdg8EAg8GgaKyq4S8Igtkx9fX1OH78OPbt24eamhoMGzYMQ4cORWRkZItx\n3AKXlGrekqGxsdGi93Xu3BmffvopQ5/c1sMTY1P3txS1fZ566ilFX3tYSEgIKioqjK8rKiqM7Z1m\nYWFhePrpp+Hn54euXbti5MiROHnypJKyiFoRRRE5OTkWBb8gCNDpdLhx4waDn7yGyfC/e/curl+/\njp9++gk3btww/rl48SIqKyvNfvjAgQNRVlaGixcvoq6uDtu2bUN6enqLMc8++ywOHjyIxsZG1NTU\n4PDhw4iNjbXtqsgrSZKEZcuWWbQ4QKfToampiU/oktcx2fZZt24dVq1ahf/+9794/PHHjV8PCAjA\nnDlzzH+4Vovc3FykpqaisbERM2bMQExMDNatWwcAmDVrFqKjozFmzBgkJCRAo9EgKyuL4U8WsWYT\ntoCAACxYsIDtRPJaipZ6rlmzBnPnznVEPW3iUk9qy6hRo3DgwAGL3sNjFMmb2LzUs3v37rh9+zYA\nYOnSpRg/fjyOHz9uvwqJLCBJErRarcXBHxgYiN27dzP4iaAw/JcuXYqAgAAcPHgQ+/btw8svv4zf\n/e53atdG1IIkSejRowfS0tIsuqHr5+cHvV6P6upq3tAl+oWi8Pfx8QEAFBQUICsrC2lpaaivr1e1\nMKJmoihCq9UiLS0NV65cUfw+jUaDgoIC1NTUcLZP9BBF6/xDQkLwyiuv4Msvv8Sbb76J2tpaNDU1\nqV0beTlJkjB16lRUV1db9f4pU6Zwpk/UDkUz/+3btyM1NRVffPEFgoKCUFVVhT//+c9q10ZeTBRF\nPPvss1YFv0aj4QErRGYoWu1TXl4OWZZbPbHbq1cv1Qp7EFf7eA9bZ/sjR47E/v377VwVkXsylZ2K\nwj8uLs4Y/LW1tbhw4QKioqJw+vRp+1baDoa/d5AkCRkZGbhz545F7xMEARkZGZzpEz3EVHYq6vmf\nOnWqxevjx4/j/ffft70yol+IooilS5dadC/Jx8cHf/zjH3kzl8gKVu/nHxcX1+qXglo48/dc1jyd\nC7C9Q6SEzTP/9957z/jPTU1NOH78OEJCQuxTHXktSZIwc+ZMi4I/PDwcq1ev5ioeIhspCv/bt28b\ne/7N660nTJigamHk2SRJgk6nMz45roRer2eLh8hOVD3G0V7Y9vEslu7J06FDByxatIjBT2Qhq9s+\n8+fPx6pVqzBu3Lg2P7Rr166YNWsWhg4dap9KyaNJkoTJkyejpqZG0XgerkKkHpMz/6NHj2LgwIHt\nHgt2/fp1/PGPf8SZM2fUqg8AZ/6eQJIkTJs2DVVVVYrG89B0ItvZvM7flF27drU6oMXeGP7uLzk5\nGSdOnDA7LiAgAHl5eQx9IjuwOvzj4+NNfmhJSYnt1SnA8Hdfoihi+fLlqKurMzuWs30i+7K65797\n924AwNq1awEA06ZNgyzL2Lx5s51LJE8UFxen+CnwwMBABj+RAylq+yQlJeG7775r8bUBAwYo+mu8\nPXDm716a1+8r3X65f//+DntgkMib2HySlyzLOHjwoPH1N998wzCmNkmShPnz5ysKfh8fH+j1egY/\nkRMoesjro48+wvTp03Hz5k0AQFBQEDZu3KhqYeReJEnC888/b/xvxByNRoP8/Hy2eYicxKLVPtXV\n1RAEAYGBgWrW1ArbPq4tMzMTeXl5Fr1Hp9NxF04ildm8tw9w/wjH0tJS1NbWGr+2ZMkS26sjtyaK\nokXBr9Fo8Pbbb/NpXSInUxT+s2bNwt27d/HVV18hKysLO3bswJAhQ9SujVycKIrIzs5WNNbHxweT\nJ0/mbJ/IRShq+8THx+P7779HQkICSkpKcOfOHYwZM6bFTWA1se3jWiRJwrx583D+/HlF4wsKCtjb\nJ3ICm9s+fn5+AAB/f39UVlaia9euipfxkWfJzMzEtm3bFB+6MnLkSAY/kQtStNRz3LhxqKqqwhtv\nvIHHH38cffr0gU6nU7s2ciGiKEIQBOTl5SkO/l69evHAFSIXZfHePrW1taitrUVQUJDxa19++SVG\njx5t9+Kase3jXJZuwezn54eFCxfypi6Rk6m6sRug/tO+DH/nsWQZp1arxeLFixn6RC7C5id8yfuI\nogiNRqM4+AMCAhj8RG5E8Tp/8h6WLOEE+MAWkTti24dakCQJ6enpFt3UvXTpkspVEZE1bF7qeffu\nXaxduxYHDx6EIAgYMWIEZs+ejY4dOwIA+vbta79qyWmaN2WzZBknV/MQuSdFPf8XXngBpaWlmDdv\nHubMmYPTp09j2rRpxu9/9tln7b63sLAQ0dHRiIyMxIoVK9odd+TIEWi1WpOfReqaN28ezp07Z3ac\nTqeDLMsMfiI3pmjmf/r0aZSWlhpfP/nkk4iNjTX7vsbGRsyZMwd79+5FSEgIBg0ahPT0dMTExLQa\n94c//AFjxoxhe8dJRFHEhQsXzI7r378/+/tEHkBR+CcnJ+Pbb7/FsGHDAABFRUV4/PHHzb6vuLgY\n/fr1Q58+fQAAGRkZyM/PbxX+a9aswcSJE3HkyJF2P+vBVSQpKSlISUlRUjoplJuba/YXLw9dIXJt\nBoMBBoNB0VhF4X/06FEMHz4cYWFhEAQB5eXliIqKQnx8vMmzfCsrKxEWFmZ8HRoaisOHD7cak5+f\nj6+++gpHjhyBIAhtfhaXENqfJElYvXo17t27Z3If/uTkZOTk5HCbBiIX9/DE2NSqPUXhX1hYaFUh\n7QX5g1577TW8++67xrvSbPs4htKHtwICAnDs2DEHVEREjqQo/JvbNpYKCQlBRUWF8XVFRQVCQ0Nb\njDl27BgyMjIAANeuXcPnn38OX19fpKenW/UzyTylB6trtVosWLDAARURkaPZZZ1/exoaGhAVFYV9\n+/ahZ8+eGDx4MPLy8lr1/JtNnz4d48aNw/jx41sWyXX+dqH0qMXAwED4+vri1VdfZbuNyI3Z5SQv\na2i1WuTm5iI1NRWNjY2YMWMGYmJisG7dOgD3D4khxxBFEcuWLUNDQ4PJcT4+PqiurnZQVUTkLKrO\n/O2FM3/biKKInJwcRf8bBgQE4NatWw6oiojUxo3dvJgoili6dKniX57s8RN5B27s5sEs2Y5ZEAQs\nWbKEPX4iL8Hw91CiKCoOfu7RQ+R92PbxQJIkYdmyZWbH+fj4QK/XM/iJvBDD38OIoohJkyaZXdUT\nHh6O/Px8tnmIvBTbPh7Ckv5+eHi4ot07ichzMfw9gCUHrD/66KNYvXq1yhURkatj28fNiaKoKPgF\nQUBERAS2bt3KDdqIiA95uTNJkvDcc8+Z7e/7+flhx44dDH0iL8OHvDyQJEl48cUXzQY/ACxcuJDB\nT0QtcObvhkRRxPLly1FXV2d2rE6n48lbRF7KaRu7kf1Zsk8Pg5+I2sO2jxuxZJ8eBj8RmcKZv5tQ\nupwzICAACxYs4MNbRGQSw98N9O7dG+Xl5WbHJScn88hFIlKEbR8XN2rUKEXBHxQUhJycHAdURESe\ngOHvwpQ+wBUeHo5NmzZxOScRKca2jwuSJAlTp05VdJyiXq9nf5+ILMZ1/i5GkiRkZGTgzp07ZscG\nBgbyvF0iahef8HUjq1evVhT8giBg8+bNDqiIiDwRw9+FSJKEQ4cOmR0XGBiI3bt3s8dPRFZj+LuI\nzMxMpKWlmZ316/V6VFdXM/iJyCbs+bsAURSRnZ1tdhzP2iUiS5jKToa/C/Dz80Ntba3JMdyugYgs\nxRu+LkqSJERERJgN/tTUVAY/EdkVw99JJEnC/Pnzcf78eZPjgoODMXfuXAdVRUTegg95OUHzQSzX\nr183OS44OBjr16/nzV0isjv2/B1MFEWsXLkSd+/eNTnOz88PNTU1DqqKiDwRD3NxEc378Tc1NZkc\np9VqsXDhQgdVRUTeiDN/B5EkCePHjzd59KJGo0GnTp24Hz8R2QWXerqAiIgIkzd3u3Xrho8//pj9\nfSKyG6cu9SwsLER0dDQiIyOxYsWKVt/fvHkzEhMTkZCQgOHDh6OkpETtkhxOFEWTwe/n58fgJyKH\nUrXn39jYiDlz5mDv3r0ICQnBoEGDkJ6ejpiYGOOY8PBwHDhwAIGBgSgsLMQrr7yCoqIiNctyKCXH\nLy5cuJDBT0QOperMv7i4GP369UOfPn3g6+uLjIwM5OfntxgzbNgwBAYGAgCGDBmCy5cvq1mSw0iS\nhM6dO5sN/vDwcPb3icjhVJ35V1ZWIiwszPg6NDQUhw8fbnf8hg0bMHbs2Da/92BApqSkICUlxV5l\n2p0kSZg2bZrZvfY7dOiA1atXO6gqIvJ0BoMBBoNB0VhVw18QBMVjv/76a3z00Uf45ptv2vy+O82O\n582bh6qqKpNjNBoNFi1axHYPEdnNwxNjUxtGqhr+ISEhqKioML6uqKhAaGhoq3ElJSXIyspCYWEh\nOnfurGZJqjN3c7fZ22+/7Va/0IjIs6i61LOhoQFRUVHYt28fevbsicGDByMvL6/FDd/y8nI8+eST\n2LRpE4YOHdp2kW6y1DMzMxN5eXlmx3FrZiJyBKc94avVapGbm4vU1FQ0NjZixowZiImJwbp16wAA\ns2bNQk5ODqqqqjB79mwAgK+vL4qLi9UsSxUMfiJyJ3zIyw4kSUJ6errJbRsEQcCSJUvY6iEih+He\nPiqSJAk6nc5k8Gs0Gvb4icilcD9/GzSfu3v79m2T4xj8RORqGP5WEkVRUY9fp9Mx+InI5bDnbyV/\nf3+ze/Lz3F0iciae4WtnkiSZDf7k5GQGPxG5LIa/hTIzM5Genm52XE5OjgOqISKyDts+FujduzfK\ny8vNjuNafiJyBTzMxQ6UBn///v1x6tQpB1RERGQae/42iouLMxv8vr6+0Ov1DH4icgt8yMuMzMxM\nnD592uQYjUaDf/zjH9yhk4jcBmf+Jihdyz9lyhQGPxG5Ffb82yGKosm9sJv16tULly5dckBFRESW\n4Q1fKzzyyCOoq6szOYbBT0SujDd8LTRq1Cizwd+/f38GPxG5LYb/Q0RRNHvoeocOHbiqh4jcGlf7\nPEDpgSyLFi1yQDVEROphz/8XcXFxZpd0AtysjYjcB3v+ZihZyw8w+InIc3h9+Ctdyz9y5EgGPxF5\nDK9u+0iShPHjx5td2RMeHo5z587Z/ecTEamJbZ92rF692mzw+/v7Y/Xq1Q6qiIjIMbw2/CVJwpEj\nR0yOEQQB27dv59YNRORxvDL8JUnC/PnzUVVV1e4YjUaD3bt3M/iJyCN53Tp/SZKQkZGBO3futDtG\nq9Vi8eLFDH4i8lheFf6SJGHy5Mmoqalp8/tarRYJCQnIyclh8BORR/Oq1T4RERE4f/58u99PTU1F\nYWGhzT+HiMgVcLXPL0wFf8eOHTF37lwHVkNE5DxeE/6ZmZkmvx8bG8tWDxF5Da/o+Y8aNcrkTp2C\nICAnJ8eBFREROZfHh/+vfvUr/PTTTybHZGRkcNZPRF7Fo2/49u7dG+Xl5SbHaDQaNDY2WlsaEZHL\nMpWdHjvz9/f3x927d82OmzJligOqISJyLarf8C0sLER0dDQiIyOxYsWKNsfMmzcPkZGRSExMxIkT\nJ2z+mb1791YU/NyimYi8larh39jYiDlz5qCwsBClpaXIy8vDmTNnWozZs2cPzp49i7KyMnzwwQeY\nPXu2zT/XXKsH4BbNROTdVA3/4uJi9OvXD3369IGvry8yMjKQn5/fYsyuXbvw4osvAgCGDBmC6upq\nXL16Vc2y8Nhjj2H//v2q/gwiIlemas+/srISYWFhxtehoaE4fPiw2TGXL19G9+7dW4wTRdH4zykp\nKUhJSbG6rv/9739Wv5eIyFUZDAYYDAZFY1UNf0EQFI17+G50W+97MPxtodfr7fI5RESu5uGJcXZ2\ndrtjVW37hISEoKKiwvi6oqICoaGhJsdcvnwZISEhqtVkr18iRETuTNXwHzhwIMrKynDx4kXU1dVh\n27ZtSE9PbzEmPT0dn3zyCQCgqKgIQUFBrVo+lmpvXasbPNJAROQQqrZ9tFotcnNzkZqaisbGRsyY\nMQMxMTFYt24dAGDWrFkYO3Ys9uzZg379+qFTp07YuHGjXX42g56IqH0e/YQvEZE38+otnZXe+fYk\nvGbvwGv2fGpeL8PfA/GavQOv2fMx/ImIyK4Y/kREXshtbvgSEZHl3HpLZzf4/URE5FbY9iEi8kIM\nfyIiL8TwJyLyQh4T/s44MczZzF3z5s2bkZiYiISEBAwfPhwlJSVOqNK+lPx7BoAjR45Aq9Xis88+\nc2B16lByzQaDAQMGDEBcXJxN2527CnPXfO3aNYwZMwZJSUmIi4vDxx9/7Pgi7ejll19G9+7dER8f\n3+4Yu+eX7AEaGhrkiIgI+cKFC3JdXZ2cmJgol5aWthgjSZL829/+VpZlWS4qKpKHDBnijFLtRsk1\nHzp0SK6urpZlWZY///xzr7jm5nFPPPGE/Mwzz8g7d+50QqX2o+Saq6qq5NjYWLmiokKWZVn+6aef\nnFGq3Si5Zr1eL7/55puyLN+/3i5dusj19fXOKNcuDhw4IB8/flyOi4tr8/tq5JdHzPxd9cQwNSm5\n5mHDhiEwMBDA/Wu+fPmyM0q1GyXXDABr1qzBxIkT8dhjjzmhSvtScs1btmzBhAkTjNuld+vWzRml\n2o2Sa+7Rowdu3boFALh16xa6du0KrdYtFi+2acSIEejcuXO731cjvzwi/Ns6DayystLsGHcOQyXX\n/KANGzZg7NixjihNNUr/Pefn5xvPgnb3Z0SUXHNZWRlu3LiBJ554AgMHDsSnn37q6DLtSsk1Z2Vl\n4fTp0+h9yni1AAAFJklEQVTZsycSExOxatUqR5fpUGrkl/v+qnyAPU8McxeW1P7111/jo48+wjff\nfKNiRepTcs2vvfYa3n33XeNuhg//O3c3Sq65vr4ex48fx759+1BTU4Nhw4Zh6NChiIyMdECF9qfk\nmt955x0kJSXBYDDg3LlzGD16NE6ePImAgAAHVOgc9s4vjwh/VzwxTG1KrhkASkpKkJWVhcLCQpN/\nrXQHSq752LFjyMjIAHD/puDnn38OX1/fVocIuQsl1xwWFoZu3brBz88Pfn5+GDlyJE6ePOm24a/k\nmg8dOoTFixcDACIiItC3b1/88MMPGDhwoENrdRRV8svmuwYuoL6+Xg4PD5cvXLgg37t3z+wN32+/\n/dbtb34queZLly7JERER8rfffuukKu1LyTU/6KWXXpL//ve/O7BC+1NyzWfOnJGfeuopuaGhQf75\n55/luLg4+fTp006q2HZKrvn111+XRVGUZVmWr1y5IoeEhMjXr193Rrl2c+HCBUU3fO2VXx4x83fm\niWHOouSac3JyUFVVZex/+/r6ori42Jll20TJNXsaJdccHR2NMWPGICEhARqNBllZWYiNjXVy5dZT\ncs1vvfUWpk+fjsTERDQ1NWHlypXo0qWLkyu3nk6nw/79+3Ht2jWEhYUhOzsb9fX1ANTLL7fY2I2I\niOzLI1b7EBGRZRj+REReiOFPROSFGP5ERF6I4U/0i+zsbLz11lstvvbdd9+ZXDkjiiLee+89tUsj\nsjuGP9EvMjMzsW3bthZf27p1KzIzM9t9jzs/JU7ejeFPXunixYuIjo7G1KlTERsbi0mTJiE0NBSd\nO3du8SzEjh07oNPp8OGHH2Lw4MFISkrCxIkTcffuXeOY5l8AKSkpOHbsGID7Txf37dsXANDY2Ig3\n3ngDgwcPRmJiIj744AMAwI8//oiRI0diwIABiI+Px8GDBx11+UQMf/Je//nPf/Dqq6+itLQU//d/\n/4e1a9dCp9Nh69atAICioiJ06dIFERERmDBhAoqLi/Hdd98hJiYGGzZsaPV5giC0+TeBDRs2ICgo\nCMXFxSguLsaHH36IixcvIi8vD2PGjMGJEydQUlKCpKQk1a+ZqBnDn7xWWFgYhg0bBgCYOnUqDh48\niClTpmDnzp2QZblFy+f777/HiBEjkJCQgM2bN6O0tFTxz/niiy/wySefYMCAARg6dChu3LiBs2fP\nYtCgQdi4cSOys7NRUlKCRx99VJXrJGqLR2zvQGSNB2fpsixDEASEhoaib9++MBgM+Oyzz1BUVAQA\neOmll7Br1y7Ex8fjb3/7GwwGQ6vP02q1aGpqAgDU1ta2+F5ubi5Gjx7d6j3//ve/UVBQgJdeegkL\nFizAtGnT7HiFRO3jzJ+8Vnl5uTHct2zZghEjRgC4v8/K66+/joiICPTs2RMAcOfOHQQHB6O+vh6b\nNm0y/uKQH9g2uk+fPjh69CgAYOfOncafk5qairVr16KhoQHA/XZTTU0NysvL8dhjj2HmzJmYOXOm\nRxwtSu6D4U9eKyoqCu+//z5iY2Nx8+ZN4wZ4EydORGlpKXQ6nXHs0qVLMWTIEPz6179GTEyM8esP\n9vl///vf469//SuSk5Nx/fp149dnzpyJ2NhYJCcnIz4+HrNnz0ZDQwMMBgOSkpKQnJyM7du3Y/78\n+Q68evJ23NiNvNLFixcxbtw4fP/9984uhcgpOPMnr8U1+uTNOPMnIvJCnPkTEXkhhj8RkRdi+BMR\neSGGPxGRF2L4ExF5IYY/EZEX+n+XKHfV6au9XQAAAABJRU5ErkJggg==\n" 360 | } 361 | ], 362 | "prompt_number": 26 363 | }, 364 | { 365 | "cell_type": "code", 366 | "collapsed": false, 367 | "input": [], 368 | "language": "python", 369 | "metadata": {}, 370 | "outputs": [] 371 | } 372 | ], 373 | "metadata": {} 374 | } 375 | ] 376 | } --------------------------------------------------------------------------------