├── .gitignore
├── README.md
├── data
    ├── Adult_final.csv
    ├── AirPassengers.csv
    ├── data.csv
    ├── finaprojects.csv
    └── hw2data.csv
├── final_project.md
├── flaskapi
    ├── README.md
    ├── data
    │   └── titanic.csv
    ├── main.py
    ├── model
    │   ├── model.pkl
    │   └── model_columns.pkl
    ├── requirements.txt
    ├── sendrequest.py
    └── tests.py
├── homework
    ├── Homework 0 - Mathematical and Coding Foundations and Review.ipynb
    ├── Homework 1 - Introduction to Supervised Learning.ipynb
    ├── Homework 2- Model Selection and Regularization.ipynb
    └── hw3.csv
├── html
    ├── Makefile
    ├── content
    │   └── keyboard-review.md
    ├── develop_server.sh
    ├── fabfile.py
    ├── output
    │   ├── archives.html
    │   ├── author
    │   │   └── dorian-goldman.html
    │   ├── authors.html
    │   ├── categories.html
    │   ├── category
    │   │   └── review.html
    │   ├── index.html
    │   ├── my-first-review.html
    │   ├── tags.html
    │   └── theme
    │   │   ├── css
    │   │       ├── main.css
    │   │       ├── pygment.css
    │   │       ├── reset.css
    │   │       ├── typogrify.css
    │   │       └── wide.css
    │   │   └── images
    │   │       └── icons
    │   │           ├── aboutme.png
    │   │           ├── bitbucket.png
    │   │           ├── delicious.png
    │   │           ├── facebook.png
    │   │           ├── github.png
    │   │           ├── gitorious.png
    │   │           ├── gittip.png
    │   │           ├── google-groups.png
    │   │           ├── google-plus.png
    │   │           ├── hackernews.png
    │   │           ├── lastfm.png
    │   │           ├── linkedin.png
    │   │           ├── reddit.png
    │   │           ├── rss.png
    │   │           ├── slideshare.png
    │   │           ├── speakerdeck.png
    │   │           ├── stackoverflow.png
    │   │           ├── twitter.png
    │   │           ├── vimeo.png
    │   │           └── youtube.png
    ├── pelicanconf.py
    ├── pelicanconf.pyc
    └── publishconf.py
├── img
    ├── levelsets.png
    ├── overfitting.png
    ├── regression.png
    ├── regressionexample
    └── regularization.png
├── lectures
    ├── Lecture 1. What is Data Science_.pdf
    ├── Lecture 2 - Mathematics Review.pdf
    ├── Lecture 2. Introduction to Supervised Learning (1).pdf
    ├── Lecture 3 - Model Selection, Evaluation and Regularization.pdf
    ├── Lecture 4 - Classification  (1).pdf
    └── Lecture 5 - Decision Trees and Non-Parametric Models.pdf
├── notebooks
    ├── .DS_Store
    ├── .ipynb_checkpoints
    │   ├── Cleaning and Featurizing Data-checkpoint.ipynb
    │   ├── Lecture 2 - Regression Bookingdotcom Case Study-checkpoint.ipynb
    │   ├── Lecture 3 - Model Complexity and Regularization-checkpoint.ipynb
    │   ├── Lecture 4 - Correlation and Principle Component Analysis-checkpoint.ipynb
    │   ├── Lecture 5 - Classification-checkpoint.ipynb
    │   ├── Lecture 6 - Decision Tree Regression-checkpoint.ipynb
    │   ├── Lecture 6 - Decision Trees-checkpoint.ipynb
    │   ├── Lecture1 - Introduction-to-Regression-checkpoint.ipynb
    │   ├── PCA - MNIST example-checkpoint.ipynb
    │   └── Untitled-checkpoint.ipynb
    ├── Example of summing two normal random variables.ipynb
    ├── Lecture 1 - Introduction to Regression.ipynb
    ├── Lecture 10 - Time Series Forecasting.ipynb
    ├── Lecture 2 - Regression Bookingdotcom Case Study.ipynb
    ├── Lecture 3 - Model Complexity and Regularization.ipynb
    ├── Lecture 4 - Correlation and Principle Component Analysis.ipynb
    ├── Lecture 5 - Classification.ipynb
    ├── Lecture 5 - Cleaning and Featurizing Data.ipynb
    ├── Lecture 6 - Decision Tree Regression.ipynb
    ├── Lecture 6 - Decision Trees.ipynb
    ├── Lecture 7 - Recommendation Engines.ipynb
    ├── Lecture 9 - Unsupervised Learning.ipynb
    ├── Lecture Questions.ipynb
    ├── Markov Decision Processes via Policy Gradients.ipynb
    ├── PCA - MNIST example.ipynb
    ├── Untitled.ipynb
    ├── Untitled1.ipynb
    ├── Untitled2.ipynb
    ├── dt.dot
    ├── dt.png
    ├── matrix_factorization_recommender.ipynb
    ├── mnist_pca.png
    └── temp-plot.html
├── pdfs
    ├── AllofStatistics.pdf
    ├── Conditional-Probability.pdf
    ├── Discrete-Probabilities.pdf
    ├── Effective Computation in Physics.pdf
    ├── ISLR_First_Printing.pdf
    ├── MachineLearningMethodsGraph.pdf
    ├── Scikit_Learn_Cheat_Sheet_Python.pdf
    ├── [Mark Joshi]Quant Job Interview Questions And Answers (1).pdf
    ├── coinbias.pdf
    ├── eigenvaluenotes.pdf
    ├── lagrangemultipliers.aux
    ├── lagrangemultipliers.log
    ├── lagrangemultipliers.out
    ├── lagrangemultipliers.pdf
    ├── lagrangemultipliers.synctex.gz
    ├── lagrangemultipliers.tex
    └── lecture4notes.pdf
├── recengine
    ├── .gitignore
    ├── README.md
    ├── event-suggestors
    │   ├── RA_history_event_suggestions.py
    │   ├── RA_neighbors_event_suggestions.py
    │   └── RA_scrapedoutput_reader.py
    ├── nearest-neighbors-generators
    │   ├── RASparse_rowcol_generator.py
    │   └── RA_generate_neighbors.py
    ├── php-files
    │   ├── raticket_advisor.php
    │   ├── ratickets4b.py
    │   ├── userid.html
    │   ├── userid.php
    │   └── write_events.php
    └── scraper-programs
    │   └── RAEventPageScraper.py
├── src
    └── project_proposal_bot.py
└── webapp
    ├── data
        └── train_titanic.csv
    ├── hello.py
    └── templates
        └── my-form.html


/.gitignore:
--------------------------------------------------------------------------------
1 | *.ipynb_checkpoints
2 | *.DS_Store
3 | *.DS_Store?
4 | .DS_Store
5 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # APMAE4990 - Introduction to Data Science in Industry
 2 | 
 3 | ### Instructor: Dorian Goldman
 4 | ### Term: Spring 2019
 5 | ### Location: R 5:30pm-8:00pm 413 Kent
 6 | 
 7 | ### Objectives: 
 8 | This course is designed for graduate and advanced undergraduate students who wish to learn the fundamentals of data science and machine learning in the context of real world applications. An emphasis will be placed on problems encountered by companies such as Amazon, Booking.com, Netflix, Uber/Lyft, The New York Times and others. Despite a focus on applications, the course will be mathematically rigorous, but the goal is to motivate each tool by a concrete problem arising in industry. The course will follow an online iPython notebook where students can try out various algorithms in real time as we go through the course.
 9 | 
10 | 
11 | There will be no midterms or exams, but rather assignments which will be handed in periodically throughout the term. 
12 | 
13 | **Update:** While in prevoius years the students were free to select their own projects, for various reasons I have decided to have everyone work with the same dataset this year. Due to the growing size of the class, this will allow me to more efficiently answer questions and to focus on the relevant data science concepts.  The project will be announced during the first few lecture of the class. 
14 | 
15 | 
16 | ### Prerequisites:
17 |  Exposure to undergraduate-level probability, statistics, calculus, programming, and linear algebra.
18 | 
19 | 
20 | ### Grading:
21 | - 50% Assignments
22 | - 50% Final Project
23 | 
24 | ## Tentative Course Outline:
25 | 
26 | ### Introduction
27 | - Problems that arise in industry involving data.
28 | - Introduction to regression, classification, clustering. Model training and evaluation.
29 | 
30 | ###  Supervised Learning
31 | 
32 | - Regression: Linear Regression, Random Forest, Gradient Boosting. Examples: ETA prediction for taxis, real estate prediction, news paper demand forecasting. 
33 | - Classification: Logistic Regression, Random Forest, Gradient Boosting. Examples: User Churn, Acquisition and Conversion. 
34 | - Model selection and feature selection. Regularization. Real world performance evaluation and monitoring. 
35 | - Examples from publishing, ride sharing, online commerce and more.
36 | 
37 | ### Unsupervised Learning
38 | - Clustering: K means, DBScan, Gaussian Mixture Models and Expectation Maximization. 
39 | - Correlation of features. Principle Component Analysis. Problem of dimensionality. 
40 | - LDA and topic modeling. 
41 | 
42 | ### A/B tests and Causal Inference 
43 | - A/B experiments. Causal inference introduction.
44 | - Offline and Online policy discovery. 
45 | 
46 | ### Intro to Data Engineering
47 | - Map Reduce. SQL. 
48 | - Feature engineering: Testing out new features and verifying their predictive power.
49 | - The basics of API building. 
50 | 
51 | ### Recommendation Engines and Personalization
52 | - Collaborative Filtering: Matrix Factorization, Neighborhood Models and Graph Diffusion.
53 | - Content Filtering: Topic Modeling, Regression, Classification.
54 | - Cold Starts. Continous Cold starts. Warm Starts. Performance Comparison and Analysis. 
55 | - Introduction to Bayesian statistics. Bayesian vs. Frequentist approach.
56 | 
57 | ### Reinforcement Learning
58 | - Multi-armed Bandits. Thompson Sampling. LinUCB.
59 | - Markov Decision Processes. 
60 | 
61 | ### Deep Learning 
62 | - When and why? The problem surrounding hype in deep learning.
63 | - Image and sound signal processing. 
64 | - Embeddings. 
65 | 
66 | 
67 | 
68 | # References
69 | 
70 |  These are references to deepen your understanding of material presented in lecture. The list is by no means exhaustive.		
71 | 
72 | Gareth James, Daniela Witten, Trevor Hastie, Robert Tibshirani, *An Introduction to Statistical Learning*, Springer 2013		 				
73 | 			
74 | Trevor Hastie, Robert Tibshirani, Jerome Friedman, *Elements of Statistical Learning*, Springer 2013						 					
75 | 
76 | Christopher M. Bishop, *Pattern Recognition and Machine Learning*, Springer, 2006.						 							
77 | 
78 | Cameron Davidson-Pilon, *Bayesian Methods for Hackers*, https://github.com/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers	
79 | 


--------------------------------------------------------------------------------
/data/AirPassengers.csv:
--------------------------------------------------------------------------------
  1 | Month,#Passengers
  2 | 1949-01,112
  3 | 1949-02,118
  4 | 1949-03,132
  5 | 1949-04,129
  6 | 1949-05,121
  7 | 1949-06,135
  8 | 1949-07,148
  9 | 1949-08,148
 10 | 1949-09,136
 11 | 1949-10,119
 12 | 1949-11,104
 13 | 1949-12,118
 14 | 1950-01,115
 15 | 1950-02,126
 16 | 1950-03,141
 17 | 1950-04,135
 18 | 1950-05,125
 19 | 1950-06,149
 20 | 1950-07,170
 21 | 1950-08,170
 22 | 1950-09,158
 23 | 1950-10,133
 24 | 1950-11,114
 25 | 1950-12,140
 26 | 1951-01,145
 27 | 1951-02,150
 28 | 1951-03,178
 29 | 1951-04,163
 30 | 1951-05,172
 31 | 1951-06,178
 32 | 1951-07,199
 33 | 1951-08,199
 34 | 1951-09,184
 35 | 1951-10,162
 36 | 1951-11,146
 37 | 1951-12,166
 38 | 1952-01,171
 39 | 1952-02,180
 40 | 1952-03,193
 41 | 1952-04,181
 42 | 1952-05,183
 43 | 1952-06,218
 44 | 1952-07,230
 45 | 1952-08,242
 46 | 1952-09,209
 47 | 1952-10,191
 48 | 1952-11,172
 49 | 1952-12,194
 50 | 1953-01,196
 51 | 1953-02,196
 52 | 1953-03,236
 53 | 1953-04,235
 54 | 1953-05,229
 55 | 1953-06,243
 56 | 1953-07,264
 57 | 1953-08,272
 58 | 1953-09,237
 59 | 1953-10,211
 60 | 1953-11,180
 61 | 1953-12,201
 62 | 1954-01,204
 63 | 1954-02,188
 64 | 1954-03,235
 65 | 1954-04,227
 66 | 1954-05,234
 67 | 1954-06,264
 68 | 1954-07,302
 69 | 1954-08,293
 70 | 1954-09,259
 71 | 1954-10,229
 72 | 1954-11,203
 73 | 1954-12,229
 74 | 1955-01,242
 75 | 1955-02,233
 76 | 1955-03,267
 77 | 1955-04,269
 78 | 1955-05,270
 79 | 1955-06,315
 80 | 1955-07,364
 81 | 1955-08,347
 82 | 1955-09,312
 83 | 1955-10,274
 84 | 1955-11,237
 85 | 1955-12,278
 86 | 1956-01,284
 87 | 1956-02,277
 88 | 1956-03,317
 89 | 1956-04,313
 90 | 1956-05,318
 91 | 1956-06,374
 92 | 1956-07,413
 93 | 1956-08,405
 94 | 1956-09,355
 95 | 1956-10,306
 96 | 1956-11,271
 97 | 1956-12,306
 98 | 1957-01,315
 99 | 1957-02,301
100 | 1957-03,356
101 | 1957-04,348
102 | 1957-05,355
103 | 1957-06,422
104 | 1957-07,465
105 | 1957-08,467
106 | 1957-09,404
107 | 1957-10,347
108 | 1957-11,305
109 | 1957-12,336
110 | 1958-01,340
111 | 1958-02,318
112 | 1958-03,362
113 | 1958-04,348
114 | 1958-05,363
115 | 1958-06,435
116 | 1958-07,491
117 | 1958-08,505
118 | 1958-09,404
119 | 1958-10,359
120 | 1958-11,310
121 | 1958-12,337
122 | 1959-01,360
123 | 1959-02,342
124 | 1959-03,406
125 | 1959-04,396
126 | 1959-05,420
127 | 1959-06,472
128 | 1959-07,548
129 | 1959-08,559
130 | 1959-09,463
131 | 1959-10,407
132 | 1959-11,362
133 | 1959-12,405
134 | 1960-01,417
135 | 1960-02,391
136 | 1960-03,419
137 | 1960-04,461
138 | 1960-05,472
139 | 1960-06,535
140 | 1960-07,622
141 | 1960-08,606
142 | 1960-09,508
143 | 1960-10,461
144 | 1960-11,390
145 | 1960-12,432
146 | 


--------------------------------------------------------------------------------
/data/data.csv:
--------------------------------------------------------------------------------
  1 | 32.502345269453031,31.70700584656992
  2 | 53.426804033275019,68.77759598163891
  3 | 61.530358025636438,62.562382297945803
  4 | 47.475639634786098,71.546632233567777
  5 | 59.813207869512318,87.230925133687393
  6 | 55.142188413943821,78.211518270799232
  7 | 52.211796692214001,79.64197304980874
  8 | 39.299566694317065,59.171489321869508
  9 | 48.10504169176825,75.331242297063056
 10 | 52.550014442733818,71.300879886850353
 11 | 45.419730144973755,55.165677145959123
 12 | 54.351634881228918,82.478846757497919
 13 | 44.164049496773352,62.008923245725825
 14 | 58.16847071685779,75.392870425994957
 15 | 56.727208057096611,81.43619215887864
 16 | 48.955888566093719,60.723602440673965
 17 | 44.687196231480904,82.892503731453715
 18 | 60.297326851333466,97.379896862166078
 19 | 45.618643772955828,48.847153317355072
 20 | 38.816817537445637,56.877213186268506
 21 | 66.189816606752601,83.878564664602763
 22 | 65.41605174513407,118.59121730252249
 23 | 47.48120860786787,57.251819462268969
 24 | 41.57564261748702,51.391744079832307
 25 | 51.84518690563943,75.380651665312357
 26 | 59.370822011089523,74.765564032151374
 27 | 57.31000343834809,95.455052922574737
 28 | 63.615561251453308,95.229366017555307
 29 | 46.737619407976972,79.052406169565586
 30 | 50.556760148547767,83.432071421323712
 31 | 52.223996085553047,63.358790317497878
 32 | 35.567830047746632,41.412885303700563
 33 | 42.436476944055642,76.617341280074044
 34 | 58.16454011019286,96.769566426108199
 35 | 57.504447615341789,74.084130116602523
 36 | 45.440530725319981,66.588144414228594
 37 | 61.89622268029126,77.768482417793024
 38 | 33.093831736163963,50.719588912312084
 39 | 36.436009511386871,62.124570818071781
 40 | 37.675654860850742,60.810246649902211
 41 | 44.555608383275356,52.682983366387781
 42 | 43.318282631865721,58.569824717692867
 43 | 50.073145632289034,82.905981485070512
 44 | 43.870612645218372,61.424709804339123
 45 | 62.997480747553091,115.24415280079529
 46 | 32.669043763467187,45.570588823376085
 47 | 40.166899008703702,54.084054796223612
 48 | 53.575077531673656,87.994452758110413
 49 | 33.864214971778239,52.725494375900425
 50 | 64.707138666121296,93.576118692658241
 51 | 38.119824026822805,80.166275447370964
 52 | 44.502538064645101,65.101711570560326
 53 | 40.599538384552318,65.562301260400375
 54 | 41.720676356341293,65.280886920822823
 55 | 51.088634678336796,73.434641546324301
 56 | 55.078095904923202,71.13972785861894
 57 | 41.377726534895203,79.102829683549857
 58 | 62.494697427269791,86.520538440347153
 59 | 49.203887540826003,84.742697807826218
 60 | 41.102685187349664,59.358850248624933
 61 | 41.182016105169822,61.684037524833627
 62 | 50.186389494880601,69.847604158249183
 63 | 52.378446219236217,86.098291205774103
 64 | 50.135485486286122,59.108839267699643
 65 | 33.644706006191782,69.89968164362763
 66 | 39.557901222906828,44.862490711164398
 67 | 56.130388816875467,85.498067778840223
 68 | 57.362052133238237,95.536686846467219
 69 | 60.269214393997906,70.251934419771587
 70 | 35.678093889410732,52.721734964774988
 71 | 31.588116998132829,50.392670135079896
 72 | 53.66093226167304,63.642398775657753
 73 | 46.682228649471917,72.247251068662365
 74 | 43.107820219102464,57.812512976181402
 75 | 70.34607561504933,104.25710158543822
 76 | 44.492855880854073,86.642020318822006
 77 | 57.50453330326841,91.486778000110135
 78 | 36.930076609191808,55.231660886212836
 79 | 55.805733357942742,79.550436678507609
 80 | 38.954769073377065,44.847124242467601
 81 | 56.901214702247074,80.207523139682763
 82 | 56.868900661384046,83.14274979204346
 83 | 34.33312470421609,55.723489260543914
 84 | 59.04974121466681,77.634182511677864
 85 | 57.788223993230673,99.051414841748269
 86 | 54.282328705967409,79.120646274680027
 87 | 51.088719898979143,69.588897851118475
 88 | 50.282836348230731,69.510503311494389
 89 | 44.211741752090113,73.687564318317285
 90 | 38.005488008060688,61.366904537240131
 91 | 32.940479942618296,67.170655768995118
 92 | 53.691639571070056,85.668203145001542
 93 | 68.76573426962166,114.85387123391394
 94 | 46.230966498310252,90.123572069967423
 95 | 68.319360818255362,97.919821035242848
 96 | 50.030174340312143,81.536990783015028
 97 | 49.239765342753763,72.111832469615663
 98 | 50.039575939875988,85.232007342325673
 99 | 48.149858891028863,66.224957888054632
100 | 25.128484647772304,53.454394214850524
101 | 


--------------------------------------------------------------------------------
/data/finaprojects.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/data/finaprojects.csv


--------------------------------------------------------------------------------
/final_project.md:
--------------------------------------------------------------------------------
  1 | # Final Project Grading Outline
  2 | 
  3 | Below is the outline for the grading of the final project. Your main goal is to have a public web server I can go to such as `http://yourname.somedomain.com`. 
  4 | 
  5 | **Broad requirements:**
  6 | - Each team member has pushed their contributions to Github. 
  7 | - A notebook is pushed along with the code base that summarizes your work on the steps below. 
  8 | 
  9 | **Specific Requirements:**
 10 | 
 11 | - A 5-10 minute presentation of your work which will include: motivation, using the app, notebook presentation. **Note:** For CVN students, please provide a youtube link to a video (or upload it to your Github). 
 12 | - An iPython notebook which shows your analysis/work. 
 13 | - The full code base in the same Github repo. 
 14 | - Provide a link in the projects worksheet to your completed project. 
 15 | 
 16 | 
 17 | # Data Engineering and Machine Learning
 18 | 
 19 | Below is a breakdown of the grading scheme. 
 20 | 
 21 | ## Data Gathering and Preparation (30%):
 22 | 
 23 | **Data gathering/preprocessing** (may not be as relevant depending on project):
 24 | 
 25 | - Did you find ways of processing your data to make the problem at hand more tractible/easier
 26 | 
 27 | **Examples:** image formatting, string matching.
 28 | 
 29 | **Data integrity checks (10%):** 
 30 | 
 31 | - Did you account for missing values and outliers? 
 32 | - Is there information leakage? ie. a variable which is actually inferred by the outcome (eg. predicting a user likes a movie using the fact that they've liked that movie before).
 33 | - Are some variables non-sensical or redundant? (ie. if you see "Male" sometimes and "M" other times, or numerical values in the gender column).
 34 | 
 35 | **Feature Engineering (15%):** 
 36 | - Did you convert categorical features into one hot encoded dummy variables? 
 37 | - Was there an opportunity to make a new variable from the old ones that has more predictive power? (ie. if you are predicting the Titanic survivor problem and Cabin seems to be predictive but it's sparse, maybe replacing it with a binary variable "had a cabin or not" is better). 
 38 | 
 39 | **Standarization (5%):** 
 40 | - Did you standardize your variables properly?
 41 | 
 42 | **Use of databases (BONUS) (+15%):** If you use any kind of SQL database for your data storage/retrieval (MySQL, Postgres, etc).
 43 | 
 44 | 
 45 | ## Model Selection, Comparison and Cross Validation (60%):
 46 | 
 47 | ### Supervised Problem (predicting an outcome such as a recommendation, stock price, disease, etc):
 48 | 
 49 | **Exploratory Analysis (10%):** 
 50 | - Did you analyze the features and how they are related to the outcome variable? (regression: scatter plots, classification: conditional histograms). 
 51 | - Did you look at correlations or chi-squared if the variables are categorical? 
 52 | (https://en.wikipedia.org/wiki/Chi-squared_test. But feel free to find a package that does this automatically). 
 53 | 
 54 | **Model Selection (50%)**: 
 55 | - Did you randomly split your data into training and testing data (20%, 80%) using k-fold cross validation?
 56 | - Did you perform regularization (very important if the number of features is large!)? Why did you use L^1 or L^2? I expect to see use of GridSearchCV for this with at least 2 fold cross validation.
 57 | - Did you try out various models and see which one performed best? (You don't need to check all of them, but for classification/regression you should at least try a couple. **DID YOU TRY OUT THE SIMPLEST MODEL FIRST?**
 58 | 
 59 | **Examples:**
 60 | 
 61 | *Classification:* Logistic Regression and Random Forest Classification. (Use ROC for evaluation)
 62 | 
 63 | *Regression:* Linear Regression and Random Forest. (Use R^2 for evaluation)
 64 | 
 65 | *Recommendation Engines:* Item/Item, User/User, Matrix Factorization. (Use precision/recall for evaluation).
 66 | 
 67 | *Image classification/segmentation:* Try neural nets and simple logistic regression. 
 68 | 
 69 | *Time Series:* Auto-regressive models with different distributions (Poisson, Normal, etc). 
 70 | 
 71 | **I would like to see a performance comparison of at least two different models.**
 72 | 
 73 | ### Unsupervised Problem (extracting meaning from text, finding similar images/documents, etc):
 74 | 
 75 | **Model Selection /Exploration (60%)**:
 76 | - Did you analyze features and see relationships? 
 77 | - Did you do dimensionality reduction and try making scatter plots of your data?
 78 | - Did you then investigate properties of those clusters?
 79 | - Did you check if the groups have comparable numbers of points, similar covariances? For instance, if you did K-means, did you check for the kinds of behavior we covered in class?
 80 | - Based on the above, did you try various clustering algorithms appropriate for this problem? 
 81 | - Are the clusters stable? (ie. when you take random subsets of your data, do you get similar clusters? When you choose different initial conditions, do you get the same result?
 82 | - Do you have interpretations for the clusters you found? Can you justfiy the number of clusters you selected?
 83 | 
 84 | **Examples:**
 85 | 
 86 | - *Word embeddings/Topic models:* LDA, word2vec with K-means, GMM, DBSCAN. 
 87 | - *Recommendations:* If you don't have any validation data, cosine similarity is a good start. Try item/item, user/user,  etc. Did you account for high dimensionality? 
 88 | 
 89 | **Ideally you can find a way of validating your model in a supervised way. If this isn't possible, try to show that your clusters are stable, and make sense by investigating what they say. 
 90 | 
 91 | ## Design and Strategy (10%)
 92 | 
 93 | ## Problem Statement and Usefuleness: (5%)
 94 | 
 95 | Is the problem clearly stated and motivated? Is this something useful or is it contrived?
 96 | 
 97 | ## User Experience (5%):
 98 | 
 99 | Is the website relatively easy to use? Does it accept some kind of user input and then apply a model, and return
100 | the user information?
101 | 
102 | ## Extra interesting ideas (BONUS 10-20%):
103 | 
104 | This isn't necessary, but I'm leaving this here to allow for interesting and novel modeling/strategy approaches that I may not have thought of. 
105 | 
106 | - Did you use a novel modelling approach for your problem that required coding something by hand?
107 | - Did you use clever processing or hierechcal models for your problem to customize for your context?
108 | 
109 | 
110 | 
111 | 
112 | 
113 | 
114 | 
115 |  
116 | 


--------------------------------------------------------------------------------
/flaskapi/README.md:
--------------------------------------------------------------------------------
 1 | # Flask API for scikit learn
 2 | A simple Flask application that can serve predictions from a scikit-learn model. Reads a pickled sklearn model into memory when the Flask app is started and returns predictions through the /predict endpoint. You can also use the /train endpoint to train/retrain the model. Any sklearn model can be used for prediction.
 3 | 
 4 | ### Dependencies
 5 | - scikit-learn
 6 | - Flask
 7 | - pandas
 8 | - numpy
 9 | 
10 | ```
11 | pip install -r requirements.txt
12 | ```
13 | # BEFORE YOU DO ANYTHING
14 | Run
15 | ```
16 | $ python main.py
17 | ```
18 | 
19 | Then go to 
20 | 
21 | ```
22 | http:///0.0.0.0:80/train 
23 | ```
24 | in your web browswer. 
25 | 
26 | Then run
27 | ```
28 | $ python sendrequest.py
29 | ```
30 | 
31 | This sends your data to the API and it outputs predictions. 
32 | # Endpoints
33 | ### /predict (POST)
34 | Returns an array of predictions given a JSON object representing independent variables. Here's a sample input:
35 | ```
36 | [
37 | 	{'Age': 85, 'Sex': 'male', 'Embarked': 'S'},
38 |     {'Age': 24, 'Sex': 'female', 'Embarked': 'C'},
39 |     {'Age': 3, 'Sex': 'male', 'Embarked': 'C'},
40 |     {'Age': 21, 'Sex': 'male', 'Embarked': 'S'}
41 | ]
42 | ```
43 | 
44 | and sample output:
45 | ```
46 | {'prediction': [0, 1, 1, 0]}
47 | ```
48 | 
49 | 
50 | ### /train (GET)
51 | Trains the model. This is currently hard-coded to be a random forest model that is run on a subset of columns of the titanic dataset.
52 | 
53 | ### /wipe (GET)
54 | Removes the trained model.
55 | 


--------------------------------------------------------------------------------
/flaskapi/main.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import shutil
  4 | import time
  5 | import traceback
  6 | 
  7 | from flask import Flask, request, jsonify
  8 | import pandas as pd
  9 | from sklearn.externals import joblib
 10 | 
 11 | app = Flask(__name__)
 12 | 
 13 | # inputs
 14 | training_data = 'data/titanic.csv'
 15 | include = ['Age', 'Sex', 'Embarked', 'Survived']
 16 | dependent_variable = include[-1]
 17 | 
 18 | model_directory = 'model'
 19 | model_file_name = '%s/model.pkl' % model_directory
 20 | model_columns_file_name = '%s/model_columns.pkl' % model_directory
 21 | 
 22 | # These will be populated at training time
 23 | model_columns = None
 24 | clf = None
 25 | 
 26 | 
 27 | @app.route('/predict', methods=['POST'])
 28 | def predict():
 29 |     if clf:
 30 |         try:
 31 |             json_ = request.json
 32 |             query = pd.get_dummies(pd.DataFrame(json_))
 33 | 
 34 |             for col in model_columns:
 35 |                 if col not in query.columns:
 36 |                     query[col] = 0
 37 | 
 38 |             prediction = list(clf.predict(query))
 39 | 
 40 |             return jsonify({'prediction': prediction})
 41 | 
 42 |         except Exception, e:
 43 | 
 44 |             return jsonify({'error': str(e), 'trace': traceback.format_exc()})
 45 |     else:
 46 |         print 'train first'
 47 |         return 'no model here'
 48 | 
 49 | 
 50 | @app.route('/train', methods=['GET'])
 51 | def train():
 52 |     # using random forest as an example
 53 |     # can do the training separately and just update the pickles
 54 |     from sklearn.ensemble import RandomForestClassifier as rf
 55 | 
 56 |     df = pd.read_csv(training_data)
 57 |     df_ = df[include]
 58 | 
 59 |     categoricals = []  # going to one-hot encode categorical variables
 60 | 
 61 |     for col, col_type in df_.dtypes.iteritems():
 62 |         if col_type == 'O':
 63 |             categoricals.append(col)
 64 |         else:
 65 |             df_[col].fillna(0, inplace=True)  # fill NA's with 0 for ints/floats, too generic
 66 | 
 67 |     # get_dummies effectively creates one-hot encoded variables
 68 |     df_ohe = pd.get_dummies(df_, columns=categoricals, dummy_na=True)
 69 | 
 70 |     x = df_ohe[df_ohe.columns.difference([dependent_variable])]
 71 |     y = df_ohe[dependent_variable]
 72 | 
 73 |     # capture a list of columns that will be used for prediction
 74 |     global model_columns
 75 |     model_columns = list(x.columns)
 76 |     joblib.dump(model_columns, model_columns_file_name)
 77 | 
 78 |     global clf
 79 |     clf = rf()
 80 |     start = time.time()
 81 |     clf.fit(x, y)
 82 |     print 'Trained in %.1f seconds' % (time.time() - start)
 83 |     print 'Model training score: %s' % clf.score(x, y)
 84 | 
 85 |     joblib.dump(clf, model_file_name)
 86 | 
 87 |     return 'Success'
 88 | 
 89 | 
 90 | @app.route('/wipe', methods=['GET'])
 91 | def wipe():
 92 |     try:
 93 |         shutil.rmtree('model')
 94 |         os.makedirs(model_directory)
 95 |         return 'Model wiped'
 96 | 
 97 |     except Exception, e:
 98 |         print str(e)
 99 |         return 'Could not remove and recreate the model directory'
100 | 
101 | 
102 | if __name__ == '__main__':
103 |     try:
104 |         port = int(sys.argv[1])
105 |     except Exception, e:
106 |         port = 80
107 | 
108 |     try:
109 |         clf = joblib.load(model_file_name)
110 |         print 'model loaded'
111 |         model_columns = joblib.load(model_columns_file_name)
112 |         print 'model columns loaded'
113 | 
114 |     except Exception, e:
115 |         print 'No model here'
116 |         print 'Train first'
117 |         print str(e)
118 |         clf = None
119 | 
120 |     app.run(host='0.0.0.0', port=port, debug=True)
121 | 


--------------------------------------------------------------------------------
/flaskapi/model/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/flaskapi/model/model.pkl


--------------------------------------------------------------------------------
/flaskapi/model/model_columns.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/flaskapi/model/model_columns.pkl


--------------------------------------------------------------------------------
/flaskapi/requirements.txt:
--------------------------------------------------------------------------------
1 | Flask==0.10.1
2 | numpy==1.10.4
3 | pandas==0.17.1
4 | scikit-learn==0.17


--------------------------------------------------------------------------------
/flaskapi/sendrequest.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | dictToSend = {'question':'what is the answer?'}
 3 | dictToSend=[
 4 | 	{'Age': 85, 'Sex': 'male', 'Embarked': 'S'},
 5 |     {'Age': 24, 'Sex': 'female', 'Embarked': 'C'},
 6 |     {'Age': 3, 'Sex': 'male', 'Embarked': 'C'},
 7 |     {'Age': 21, 'Sex': 'male', 'Embarked': 'S'}]
 8 | res = requests.post('http://0.0.0.0:80/predict', json=dictToSend)
 9 | print 'response from server:',res.text
10 | dictFromServer = res.json()
11 | 


--------------------------------------------------------------------------------
/flaskapi/tests.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | 
 4 | class MyTestCase(unittest.TestCase):
 5 |     def test_something(self):
 6 |         self.assertEqual(True, False)
 7 | 
 8 | 
 9 | if __name__ == '__main__':
10 |     unittest.main()
11 | 


--------------------------------------------------------------------------------
/html/Makefile:
--------------------------------------------------------------------------------
  1 | PY?=python
  2 | PELICAN?=pelican
  3 | PELICANOPTS=
  4 | 
  5 | BASEDIR=$(CURDIR)
  6 | INPUTDIR=$(BASEDIR)/content
  7 | OUTPUTDIR=$(BASEDIR)/output
  8 | CONFFILE=$(BASEDIR)/pelicanconf.py
  9 | PUBLISHCONF=$(BASEDIR)/publishconf.py
 10 | 
 11 | FTP_HOST=localhost
 12 | FTP_USER=anonymous
 13 | FTP_TARGET_DIR=/
 14 | 
 15 | SSH_HOST=localhost
 16 | SSH_PORT=22
 17 | SSH_USER=root
 18 | SSH_TARGET_DIR=n
 19 | 
 20 | S3_BUCKET=my_s3_bucket
 21 | 
 22 | CLOUDFILES_USERNAME=my_rackspace_username
 23 | CLOUDFILES_API_KEY=my_rackspace_api_key
 24 | CLOUDFILES_CONTAINER=my_cloudfiles_container
 25 | 
 26 | DROPBOX_DIR=~/Dropbox/Public/
 27 | 
 28 | GITHUB_PAGES_BRANCH=gh-pages
 29 | 
 30 | DEBUG ?= 0
 31 | ifeq ($(DEBUG), 1)
 32 | 	PELICANOPTS += -D
 33 | endif
 34 | 
 35 | RELATIVE ?= 0
 36 | ifeq ($(RELATIVE), 1)
 37 | 	PELICANOPTS += --relative-urls
 38 | endif
 39 | 
 40 | help:
 41 | 	@echo 'Makefile for a pelican Web site                                           '
 42 | 	@echo '                                                                          '
 43 | 	@echo 'Usage:                                                                    '
 44 | 	@echo '   make html                           (re)generate the web site          '
 45 | 	@echo '   make clean                          remove the generated files         '
 46 | 	@echo '   make regenerate                     regenerate files upon modification '
 47 | 	@echo '   make publish                        generate using production settings '
 48 | 	@echo '   make serve [PORT=8000]              serve site at http://localhost:8000'
 49 | 	@echo '   make serve-global [SERVER=0.0.0.0]  serve (as root) to $(SERVER):80    '
 50 | 	@echo '   make devserver [PORT=8000]          start/restart develop_server.sh    '
 51 | 	@echo '   make stopserver                     stop local server                  '
 52 | 	@echo '   make ssh_upload                     upload the web site via SSH        '
 53 | 	@echo '   make rsync_upload                   upload the web site via rsync+ssh  '
 54 | 	@echo '   make dropbox_upload                 upload the web site via Dropbox    '
 55 | 	@echo '   make ftp_upload                     upload the web site via FTP        '
 56 | 	@echo '   make s3_upload                      upload the web site via S3         '
 57 | 	@echo '   make cf_upload                      upload the web site via Cloud Files'
 58 | 	@echo '   make github                         upload the web site via gh-pages   '
 59 | 	@echo '                                                                          '
 60 | 	@echo 'Set the DEBUG variable to 1 to enable debugging, e.g. make DEBUG=1 html   '
 61 | 	@echo 'Set the RELATIVE variable to 1 to enable relative urls                    '
 62 | 	@echo '                                                                          '
 63 | 
 64 | html:
 65 | 	$(PELICAN) $(INPUTDIR) -o $(OUTPUTDIR) -s $(CONFFILE) $(PELICANOPTS)
 66 | 
 67 | clean:
 68 | 	[ ! -d $(OUTPUTDIR) ] || rm -rf $(OUTPUTDIR)
 69 | 
 70 | regenerate:
 71 | 	$(PELICAN) -r $(INPUTDIR) -o $(OUTPUTDIR) -s $(CONFFILE) $(PELICANOPTS)
 72 | 
 73 | serve:
 74 | ifdef PORT
 75 | 	cd $(OUTPUTDIR) && $(PY) -m pelican.server $(PORT)
 76 | else
 77 | 	cd $(OUTPUTDIR) && $(PY) -m pelican.server
 78 | endif
 79 | 
 80 | serve-global:
 81 | ifdef SERVER
 82 | 	cd $(OUTPUTDIR) && $(PY) -m pelican.server 80 $(SERVER)
 83 | else
 84 | 	cd $(OUTPUTDIR) && $(PY) -m pelican.server 80 0.0.0.0
 85 | endif
 86 | 
 87 | 
 88 | devserver:
 89 | ifdef PORT
 90 | 	$(BASEDIR)/develop_server.sh restart $(PORT)
 91 | else
 92 | 	$(BASEDIR)/develop_server.sh restart
 93 | endif
 94 | 
 95 | stopserver:
 96 | 	$(BASEDIR)/develop_server.sh stop
 97 | 	@echo 'Stopped Pelican and SimpleHTTPServer processes running in background.'
 98 | 
 99 | publish:
100 | 	$(PELICAN) $(INPUTDIR) -o $(OUTPUTDIR) -s $(PUBLISHCONF) $(PELICANOPTS)
101 | 
102 | ssh_upload: publish
103 | 	scp -P $(SSH_PORT) -r $(OUTPUTDIR)/* $(SSH_USER)@$(SSH_HOST):$(SSH_TARGET_DIR)
104 | 
105 | rsync_upload: publish
106 | 	rsync -e "ssh -p $(SSH_PORT)" -P -rvzc --delete $(OUTPUTDIR)/ $(SSH_USER)@$(SSH_HOST):$(SSH_TARGET_DIR) --cvs-exclude
107 | 
108 | dropbox_upload: publish
109 | 	cp -r $(OUTPUTDIR)/* $(DROPBOX_DIR)
110 | 
111 | ftp_upload: publish
112 | 	lftp ftp://$(FTP_USER)@$(FTP_HOST) -e "mirror -R $(OUTPUTDIR) $(FTP_TARGET_DIR) ; quit"
113 | 
114 | s3_upload: publish
115 | 	s3cmd sync $(OUTPUTDIR)/ s3://$(S3_BUCKET) --acl-public --delete-removed --guess-mime-type --no-mime-magic --no-preserve
116 | 
117 | cf_upload: publish
118 | 	cd $(OUTPUTDIR) && swift -v -A https://auth.api.rackspacecloud.com/v1.0 -U $(CLOUDFILES_USERNAME) -K $(CLOUDFILES_API_KEY) upload -c $(CLOUDFILES_CONTAINER) .
119 | 
120 | github: publish
121 | 	ghp-import -m "Generate Pelican site" -b $(GITHUB_PAGES_BRANCH) $(OUTPUTDIR)
122 | 	git push origin $(GITHUB_PAGES_BRANCH)
123 | 
124 | .PHONY: html help clean regenerate serve serve-global devserver stopserver publish ssh_upload rsync_upload dropbox_upload ftp_upload s3_upload cf_upload github
125 | 


--------------------------------------------------------------------------------
/html/content/keyboard-review.md:
--------------------------------------------------------------------------------
1 | Title: My First Review
2 | Date: 2010-12-03 10:20
3 | Category: Review
4 | 
5 | Following is a review of my favorite mechanical keyboard.
6 | 


--------------------------------------------------------------------------------
/html/develop_server.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | ##
  3 | # This section should match your Makefile
  4 | ##
  5 | PY=${PY:-python}
  6 | PELICAN=${PELICAN:-pelican}
  7 | PELICANOPTS=
  8 | 
  9 | BASEDIR=$(pwd)
 10 | INPUTDIR=$BASEDIR/content
 11 | OUTPUTDIR=$BASEDIR/output
 12 | CONFFILE=$BASEDIR/pelicanconf.py
 13 | 
 14 | ###
 15 | # Don't change stuff below here unless you are sure
 16 | ###
 17 | 
 18 | SRV_PID=$BASEDIR/srv.pid
 19 | PELICAN_PID=$BASEDIR/pelican.pid
 20 | 
 21 | function usage(){
 22 |   echo "usage: $0 (stop) (start) (restart) [port]"
 23 |   echo "This starts Pelican in debug and reload mode and then launches"
 24 |   echo "an HTTP server to help site development. It doesn't read"
 25 |   echo "your Pelican settings, so if you edit any paths in your Makefile"
 26 |   echo "you will need to edit your settings as well."
 27 |   exit 3
 28 | }
 29 | 
 30 | function alive() {
 31 |   kill -0 $1 >/dev/null 2>&1
 32 | }
 33 | 
 34 | function shut_down(){
 35 |   PID=$(cat $SRV_PID)
 36 |   if [[ $? -eq 0 ]]; then
 37 |     if alive $PID; then
 38 |       echo "Stopping HTTP server"
 39 |       kill $PID
 40 |     else
 41 |       echo "Stale PID, deleting"
 42 |     fi
 43 |     rm $SRV_PID
 44 |   else
 45 |     echo "HTTP server PIDFile not found"
 46 |   fi
 47 | 
 48 |   PID=$(cat $PELICAN_PID)
 49 |   if [[ $? -eq 0 ]]; then
 50 |     if alive $PID; then
 51 |       echo "Killing Pelican"
 52 |       kill $PID
 53 |     else
 54 |       echo "Stale PID, deleting"
 55 |     fi
 56 |     rm $PELICAN_PID
 57 |   else
 58 |     echo "Pelican PIDFile not found"
 59 |   fi
 60 | }
 61 | 
 62 | function start_up(){
 63 |   local port=$1
 64 |   echo "Starting up Pelican and HTTP server"
 65 |   shift
 66 |   $PELICAN --debug --autoreload -r $INPUTDIR -o $OUTPUTDIR -s $CONFFILE $PELICANOPTS &
 67 |   pelican_pid=$!
 68 |   echo $pelican_pid > $PELICAN_PID
 69 |   mkdir -p $OUTPUTDIR && cd $OUTPUTDIR
 70 |   $PY -m pelican.server $port &
 71 |   srv_pid=$!
 72 |   echo $srv_pid > $SRV_PID
 73 |   cd $BASEDIR
 74 |   sleep 1
 75 |   if ! alive $pelican_pid ; then
 76 |     echo "Pelican didn't start. Is the Pelican package installed?"
 77 |     return 1
 78 |   elif ! alive $srv_pid ; then
 79 |     echo "The HTTP server didn't start. Is there another service using port" $port "?"
 80 |     return 1
 81 |   fi
 82 |   echo 'Pelican and HTTP server processes now running in background.'
 83 | }
 84 | 
 85 | ###
 86 | #  MAIN
 87 | ###
 88 | [[ ($# -eq 0) || ($# -gt 2) ]] && usage
 89 | port=''
 90 | [[ $# -eq 2 ]] && port=$2
 91 | 
 92 | if [[ $1 == "stop" ]]; then
 93 |   shut_down
 94 | elif [[ $1 == "restart" ]]; then
 95 |   shut_down
 96 |   start_up $port
 97 | elif [[ $1 == "start" ]]; then
 98 |   if ! start_up $port; then
 99 |     shut_down
100 |   fi
101 | else
102 |   usage
103 | fi
104 | 


--------------------------------------------------------------------------------
/html/fabfile.py:
--------------------------------------------------------------------------------
 1 | from fabric.api import *
 2 | import fabric.contrib.project as project
 3 | import os
 4 | import shutil
 5 | import sys
 6 | import SocketServer
 7 | 
 8 | from pelican.server import ComplexHTTPRequestHandler
 9 | 
10 | # Local path configuration (can be absolute or relative to fabfile)
11 | env.deploy_path = 'output'
12 | DEPLOY_PATH = env.deploy_path
13 | 
14 | # Remote server configuration
15 | production = 'root@localhost:22'
16 | dest_path = 'n'
17 | 
18 | # Rackspace Cloud Files configuration settings
19 | env.cloudfiles_username = 'my_rackspace_username'
20 | env.cloudfiles_api_key = 'my_rackspace_api_key'
21 | env.cloudfiles_container = 'my_cloudfiles_container'
22 | 
23 | # Github Pages configuration
24 | env.github_pages_branch = "gh-pages"
25 | 
26 | # Port for `serve`
27 | PORT = 8000
28 | 
29 | def clean():
30 |     """Remove generated files"""
31 |     if os.path.isdir(DEPLOY_PATH):
32 |         shutil.rmtree(DEPLOY_PATH)
33 |         os.makedirs(DEPLOY_PATH)
34 | 
35 | def build():
36 |     """Build local version of site"""
37 |     local('pelican -s pelicanconf.py')
38 | 
39 | def rebuild():
40 |     """`build` with the delete switch"""
41 |     local('pelican -d -s pelicanconf.py')
42 | 
43 | def regenerate():
44 |     """Automatically regenerate site upon file modification"""
45 |     local('pelican -r -s pelicanconf.py')
46 | 
47 | def serve():
48 |     """Serve site at http://localhost:8000/"""
49 |     os.chdir(env.deploy_path)
50 | 
51 |     class AddressReuseTCPServer(SocketServer.TCPServer):
52 |         allow_reuse_address = True
53 | 
54 |     server = AddressReuseTCPServer(('', PORT), ComplexHTTPRequestHandler)
55 | 
56 |     sys.stderr.write('Serving on port {0} ...\n'.format(PORT))
57 |     server.serve_forever()
58 | 
59 | def reserve():
60 |     """`build`, then `serve`"""
61 |     build()
62 |     serve()
63 | 
64 | def preview():
65 |     """Build production version of site"""
66 |     local('pelican -s publishconf.py')
67 | 
68 | def cf_upload():
69 |     """Publish to Rackspace Cloud Files"""
70 |     rebuild()
71 |     with lcd(DEPLOY_PATH):
72 |         local('swift -v -A https://auth.api.rackspacecloud.com/v1.0 '
73 |               '-U {cloudfiles_username} '
74 |               '-K {cloudfiles_api_key} '
75 |               'upload -c {cloudfiles_container} .'.format(**env))
76 | 
77 | @hosts(production)
78 | def publish():
79 |     """Publish to production via rsync"""
80 |     local('pelican -s publishconf.py')
81 |     project.rsync_project(
82 |         remote_dir=dest_path,
83 |         exclude=".DS_Store",
84 |         local_dir=DEPLOY_PATH.rstrip('/') + '/',
85 |         delete=True,
86 |         extra_opts='-c',
87 |     )
88 | 
89 | def gh_pages():
90 |     """Publish to GitHub Pages"""
91 |     rebuild()
92 |     local("ghp-import -b {github_pages_branch} {deploy_path} -p".format(**env))
93 | 


--------------------------------------------------------------------------------
/html/output/archives.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |         <meta charset="utf-8" />
 5 |         <title>Introduction to Data Science in Industry</title>
 6 |         <link rel="stylesheet" href="/theme/css/main.css" />
 7 | 
 8 |         <!--[if IE]>
 9 |             <script src="https://html5shiv.googlecode.com/svn/trunk/html5.js"></script>
10 |         <![endif]-->
11 | </head>
12 | 
13 | <body id="index" class="home">
14 |         <header id="banner" class="body">
15 |                 <h1><a href="/">Introduction to Data Science in Industry </a></h1>
16 |                 <nav><ul>
17 |                     <li><a href="/category/review.html">Review</a></li>
18 |                 </ul></nav>
19 |         </header><!-- /#banner -->
20 | <section id="content" class="body">
21 | <h1>Archives for Introduction to Data Science in Industry</h1>
22 | 
23 | <dl>
24 |     <dt>Fri 03 December 2010</dt>
25 |     <dd><a href="/my-first-review.html">My First Review</a></dd>
26 | </dl>
27 | </section>
28 |         <section id="extras" class="body">
29 |                 <div class="blogroll">
30 |                         <h2>links</h2>
31 |                         <ul>
32 |                             <li><a href="http://getpelican.com/">Pelican</a></li>
33 |                             <li><a href="http://python.org/">Python.org</a></li>
34 |                             <li><a href="http://jinja.pocoo.org/">Jinja2</a></li>
35 |                             <li><a href="#">You can modify those links in your config file</a></li>
36 |                         </ul>
37 |                 </div><!-- /.blogroll -->
38 |                 <div class="social">
39 |                         <h2>social</h2>
40 |                         <ul>
41 | 
42 |                             <li><a href="#">You can add links in your config file</a></li>
43 |                             <li><a href="#">Another social link</a></li>
44 |                         </ul>
45 |                 </div><!-- /.social -->
46 |         </section><!-- /#extras -->
47 | 
48 |         <footer id="contentinfo" class="body">
49 |                 <address id="about" class="vcard body">
50 |                 Proudly powered by <a href="http://getpelican.com/">Pelican</a>, which takes great advantage of <a href="http://python.org">Python</a>.
51 |                 </address><!-- /#about -->
52 | 
53 |                 <p>The theme is by <a href="http://coding.smashingmagazine.com/2009/08/04/designing-a-html-5-layout-from-scratch/">Smashing Magazine</a>, thanks!</p>
54 |         </footer><!-- /#contentinfo -->
55 | 
56 | </body>
57 | </html>


--------------------------------------------------------------------------------
/html/output/author/dorian-goldman.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |         <meta charset="utf-8" />
 5 |         <title>Introduction to Data Science in Industry - Dorian Goldman</title>
 6 |         <link rel="stylesheet" href="/theme/css/main.css" />
 7 | 
 8 |         <!--[if IE]>
 9 |             <script src="https://html5shiv.googlecode.com/svn/trunk/html5.js"></script>
10 |         <![endif]-->
11 | </head>
12 | 
13 | <body id="index" class="home">
14 |         <header id="banner" class="body">
15 |                 <h1><a href="/">Introduction to Data Science in Industry </a></h1>
16 |                 <nav><ul>
17 |                     <li><a href="/category/review.html">Review</a></li>
18 |                 </ul></nav>
19 |         </header><!-- /#banner -->
20 | 
21 |             <aside id="featured" class="body">
22 |                 <article>
23 |                     <h1 class="entry-title"><a href="/my-first-review.html">My First Review</a></h1>
24 | <footer class="post-info">
25 |         <abbr class="published" title="2010-12-03T10:20:00-05:00">
26 |                 Published: Fri 03 December 2010
27 |         </abbr>
28 | 
29 |         <address class="vcard author">
30 |                 By                         <a class="url fn" href="/author/dorian-goldman.html">Dorian Goldman</a>
31 |         </address>
32 | <p>In <a href="/category/review.html">Review</a>.</p>
33 | 
34 | </footer><!-- /.post-info --><p>Following is a review of my favorite mechanical keyboard.</p>                </article>
35 |             </aside><!-- /#featured -->
36 |         <section id="extras" class="body">
37 |                 <div class="blogroll">
38 |                         <h2>links</h2>
39 |                         <ul>
40 |                             <li><a href="http://getpelican.com/">Pelican</a></li>
41 |                             <li><a href="http://python.org/">Python.org</a></li>
42 |                             <li><a href="http://jinja.pocoo.org/">Jinja2</a></li>
43 |                             <li><a href="#">You can modify those links in your config file</a></li>
44 |                         </ul>
45 |                 </div><!-- /.blogroll -->
46 |                 <div class="social">
47 |                         <h2>social</h2>
48 |                         <ul>
49 | 
50 |                             <li><a href="#">You can add links in your config file</a></li>
51 |                             <li><a href="#">Another social link</a></li>
52 |                         </ul>
53 |                 </div><!-- /.social -->
54 |         </section><!-- /#extras -->
55 | 
56 |         <footer id="contentinfo" class="body">
57 |                 <address id="about" class="vcard body">
58 |                 Proudly powered by <a href="http://getpelican.com/">Pelican</a>, which takes great advantage of <a href="http://python.org">Python</a>.
59 |                 </address><!-- /#about -->
60 | 
61 |                 <p>The theme is by <a href="http://coding.smashingmagazine.com/2009/08/04/designing-a-html-5-layout-from-scratch/">Smashing Magazine</a>, thanks!</p>
62 |         </footer><!-- /#contentinfo -->
63 | 
64 | </body>
65 | </html>


--------------------------------------------------------------------------------
/html/output/authors.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |         <meta charset="utf-8" />
 5 |         <title>Introduction to Data Science in Industry - Authors</title>
 6 |         <link rel="stylesheet" href="/theme/css/main.css" />
 7 | 
 8 |         <!--[if IE]>
 9 |             <script src="https://html5shiv.googlecode.com/svn/trunk/html5.js"></script>
10 |         <![endif]-->
11 | </head>
12 | 
13 | <body id="index" class="home">
14 |         <header id="banner" class="body">
15 |                 <h1><a href="/">Introduction to Data Science in Industry </a></h1>
16 |                 <nav><ul>
17 |                     <li><a href="/category/review.html">Review</a></li>
18 |                 </ul></nav>
19 |         </header><!-- /#banner -->
20 | 
21 | <section id="content" class="body">
22 |     <h1>Authors on Introduction to Data Science in Industry</h1>
23 |     <ul>
24 |         <li><a href="/author/dorian-goldman.html">Dorian Goldman</a> (1)</li>
25 |     </ul>
26 | </section>
27 | 
28 |         <section id="extras" class="body">
29 |                 <div class="blogroll">
30 |                         <h2>links</h2>
31 |                         <ul>
32 |                             <li><a href="http://getpelican.com/">Pelican</a></li>
33 |                             <li><a href="http://python.org/">Python.org</a></li>
34 |                             <li><a href="http://jinja.pocoo.org/">Jinja2</a></li>
35 |                             <li><a href="#">You can modify those links in your config file</a></li>
36 |                         </ul>
37 |                 </div><!-- /.blogroll -->
38 |                 <div class="social">
39 |                         <h2>social</h2>
40 |                         <ul>
41 | 
42 |                             <li><a href="#">You can add links in your config file</a></li>
43 |                             <li><a href="#">Another social link</a></li>
44 |                         </ul>
45 |                 </div><!-- /.social -->
46 |         </section><!-- /#extras -->
47 | 
48 |         <footer id="contentinfo" class="body">
49 |                 <address id="about" class="vcard body">
50 |                 Proudly powered by <a href="http://getpelican.com/">Pelican</a>, which takes great advantage of <a href="http://python.org">Python</a>.
51 |                 </address><!-- /#about -->
52 | 
53 |                 <p>The theme is by <a href="http://coding.smashingmagazine.com/2009/08/04/designing-a-html-5-layout-from-scratch/">Smashing Magazine</a>, thanks!</p>
54 |         </footer><!-- /#contentinfo -->
55 | 
56 | </body>
57 | </html>


--------------------------------------------------------------------------------
/html/output/categories.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |         <meta charset="utf-8" />
 5 |         <title>Introduction to Data Science in Industry</title>
 6 |         <link rel="stylesheet" href="/theme/css/main.css" />
 7 | 
 8 |         <!--[if IE]>
 9 |             <script src="https://html5shiv.googlecode.com/svn/trunk/html5.js"></script>
10 |         <![endif]-->
11 | </head>
12 | 
13 | <body id="index" class="home">
14 |         <header id="banner" class="body">
15 |                 <h1><a href="/">Introduction to Data Science in Industry </a></h1>
16 |                 <nav><ul>
17 |                     <li><a href="/category/review.html">Review</a></li>
18 |                 </ul></nav>
19 |         </header><!-- /#banner -->
20 | <ul>
21 |     <li><a href="/category/review.html">Review</a></li>
22 | </ul>
23 |         <section id="extras" class="body">
24 |                 <div class="blogroll">
25 |                         <h2>links</h2>
26 |                         <ul>
27 |                             <li><a href="http://getpelican.com/">Pelican</a></li>
28 |                             <li><a href="http://python.org/">Python.org</a></li>
29 |                             <li><a href="http://jinja.pocoo.org/">Jinja2</a></li>
30 |                             <li><a href="#">You can modify those links in your config file</a></li>
31 |                         </ul>
32 |                 </div><!-- /.blogroll -->
33 |                 <div class="social">
34 |                         <h2>social</h2>
35 |                         <ul>
36 | 
37 |                             <li><a href="#">You can add links in your config file</a></li>
38 |                             <li><a href="#">Another social link</a></li>
39 |                         </ul>
40 |                 </div><!-- /.social -->
41 |         </section><!-- /#extras -->
42 | 
43 |         <footer id="contentinfo" class="body">
44 |                 <address id="about" class="vcard body">
45 |                 Proudly powered by <a href="http://getpelican.com/">Pelican</a>, which takes great advantage of <a href="http://python.org">Python</a>.
46 |                 </address><!-- /#about -->
47 | 
48 |                 <p>The theme is by <a href="http://coding.smashingmagazine.com/2009/08/04/designing-a-html-5-layout-from-scratch/">Smashing Magazine</a>, thanks!</p>
49 |         </footer><!-- /#contentinfo -->
50 | 
51 | </body>
52 | </html>


--------------------------------------------------------------------------------
/html/output/category/review.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |         <meta charset="utf-8" />
 5 |         <title>Introduction to Data Science in Industry - Review</title>
 6 |         <link rel="stylesheet" href="/theme/css/main.css" />
 7 | 
 8 |         <!--[if IE]>
 9 |             <script src="https://html5shiv.googlecode.com/svn/trunk/html5.js"></script>
10 |         <![endif]-->
11 | </head>
12 | 
13 | <body id="index" class="home">
14 |         <header id="banner" class="body">
15 |                 <h1><a href="/">Introduction to Data Science in Industry </a></h1>
16 |                 <nav><ul>
17 |                     <li class="active"><a href="/category/review.html">Review</a></li>
18 |                 </ul></nav>
19 |         </header><!-- /#banner -->
20 | 
21 |             <aside id="featured" class="body">
22 |                 <article>
23 |                     <h1 class="entry-title"><a href="/my-first-review.html">My First Review</a></h1>
24 | <footer class="post-info">
25 |         <abbr class="published" title="2010-12-03T10:20:00-05:00">
26 |                 Published: Fri 03 December 2010
27 |         </abbr>
28 | 
29 |         <address class="vcard author">
30 |                 By                         <a class="url fn" href="/author/dorian-goldman.html">Dorian Goldman</a>
31 |         </address>
32 | <p>In <a href="/category/review.html">Review</a>.</p>
33 | 
34 | </footer><!-- /.post-info --><p>Following is a review of my favorite mechanical keyboard.</p>                </article>
35 |             </aside><!-- /#featured -->
36 |         <section id="extras" class="body">
37 |                 <div class="blogroll">
38 |                         <h2>links</h2>
39 |                         <ul>
40 |                             <li><a href="http://getpelican.com/">Pelican</a></li>
41 |                             <li><a href="http://python.org/">Python.org</a></li>
42 |                             <li><a href="http://jinja.pocoo.org/">Jinja2</a></li>
43 |                             <li><a href="#">You can modify those links in your config file</a></li>
44 |                         </ul>
45 |                 </div><!-- /.blogroll -->
46 |                 <div class="social">
47 |                         <h2>social</h2>
48 |                         <ul>
49 | 
50 |                             <li><a href="#">You can add links in your config file</a></li>
51 |                             <li><a href="#">Another social link</a></li>
52 |                         </ul>
53 |                 </div><!-- /.social -->
54 |         </section><!-- /#extras -->
55 | 
56 |         <footer id="contentinfo" class="body">
57 |                 <address id="about" class="vcard body">
58 |                 Proudly powered by <a href="http://getpelican.com/">Pelican</a>, which takes great advantage of <a href="http://python.org">Python</a>.
59 |                 </address><!-- /#about -->
60 | 
61 |                 <p>The theme is by <a href="http://coding.smashingmagazine.com/2009/08/04/designing-a-html-5-layout-from-scratch/">Smashing Magazine</a>, thanks!</p>
62 |         </footer><!-- /#contentinfo -->
63 | 
64 | </body>
65 | </html>


--------------------------------------------------------------------------------
/html/output/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |         <meta charset="utf-8" />
 5 |         <title>Introduction to Data Science in Industry</title>
 6 |         <link rel="stylesheet" href="/theme/css/main.css" />
 7 | 
 8 |         <!--[if IE]>
 9 |             <script src="https://html5shiv.googlecode.com/svn/trunk/html5.js"></script>
10 |         <![endif]-->
11 | </head>
12 | 
13 | <body id="index" class="home">
14 |         <header id="banner" class="body">
15 |                 <h1><a href="/">Introduction to Data Science in Industry </a></h1>
16 |                 <nav><ul>
17 |                     <li><a href="/category/review.html">Review</a></li>
18 |                 </ul></nav>
19 |         </header><!-- /#banner -->
20 | 
21 |             <aside id="featured" class="body">
22 |                 <article>
23 |                     <h1 class="entry-title"><a href="/my-first-review.html">My First Review</a></h1>
24 | <footer class="post-info">
25 |         <abbr class="published" title="2010-12-03T10:20:00-05:00">
26 |                 Published: Fri 03 December 2010
27 |         </abbr>
28 | 
29 |         <address class="vcard author">
30 |                 By                         <a class="url fn" href="/author/dorian-goldman.html">Dorian Goldman</a>
31 |         </address>
32 | <p>In <a href="/category/review.html">Review</a>.</p>
33 | 
34 | </footer><!-- /.post-info --><p>Following is a review of my favorite mechanical keyboard.</p>                </article>
35 |             </aside><!-- /#featured -->
36 |         <section id="extras" class="body">
37 |                 <div class="blogroll">
38 |                         <h2>links</h2>
39 |                         <ul>
40 |                             <li><a href="http://getpelican.com/">Pelican</a></li>
41 |                             <li><a href="http://python.org/">Python.org</a></li>
42 |                             <li><a href="http://jinja.pocoo.org/">Jinja2</a></li>
43 |                             <li><a href="#">You can modify those links in your config file</a></li>
44 |                         </ul>
45 |                 </div><!-- /.blogroll -->
46 |                 <div class="social">
47 |                         <h2>social</h2>
48 |                         <ul>
49 | 
50 |                             <li><a href="#">You can add links in your config file</a></li>
51 |                             <li><a href="#">Another social link</a></li>
52 |                         </ul>
53 |                 </div><!-- /.social -->
54 |         </section><!-- /#extras -->
55 | 
56 |         <footer id="contentinfo" class="body">
57 |                 <address id="about" class="vcard body">
58 |                 Proudly powered by <a href="http://getpelican.com/">Pelican</a>, which takes great advantage of <a href="http://python.org">Python</a>.
59 |                 </address><!-- /#about -->
60 | 
61 |                 <p>The theme is by <a href="http://coding.smashingmagazine.com/2009/08/04/designing-a-html-5-layout-from-scratch/">Smashing Magazine</a>, thanks!</p>
62 |         </footer><!-- /#contentinfo -->
63 | 
64 | </body>
65 | </html>


--------------------------------------------------------------------------------
/html/output/my-first-review.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |         <meta charset="utf-8" />
 5 |         <title>My First Review</title>
 6 |         <link rel="stylesheet" href="/theme/css/main.css" />
 7 | 
 8 |         <!--[if IE]>
 9 |             <script src="https://html5shiv.googlecode.com/svn/trunk/html5.js"></script>
10 |         <![endif]-->
11 | </head>
12 | 
13 | <body id="index" class="home">
14 |         <header id="banner" class="body">
15 |                 <h1><a href="/">Introduction to Data Science in Industry </a></h1>
16 |                 <nav><ul>
17 |                     <li class="active"><a href="/category/review.html">Review</a></li>
18 |                 </ul></nav>
19 |         </header><!-- /#banner -->
20 | <section id="content" class="body">
21 |   <article>
22 |     <header>
23 |       <h1 class="entry-title">
24 |         <a href="/my-first-review.html" rel="bookmark"
25 |            title="Permalink to My First Review">My First Review</a></h1>
26 |     </header>
27 | 
28 |     <div class="entry-content">
29 | <footer class="post-info">
30 |         <abbr class="published" title="2010-12-03T10:20:00-05:00">
31 |                 Published: Fri 03 December 2010
32 |         </abbr>
33 | 
34 |         <address class="vcard author">
35 |                 By                         <a class="url fn" href="/author/dorian-goldman.html">Dorian Goldman</a>
36 |         </address>
37 | <p>In <a href="/category/review.html">Review</a>.</p>
38 | 
39 | </footer><!-- /.post-info -->      <p>Following is a review of my favorite mechanical keyboard.</p>
40 |     </div><!-- /.entry-content -->
41 | 
42 |   </article>
43 | </section>
44 |         <section id="extras" class="body">
45 |                 <div class="blogroll">
46 |                         <h2>links</h2>
47 |                         <ul>
48 |                             <li><a href="http://getpelican.com/">Pelican</a></li>
49 |                             <li><a href="http://python.org/">Python.org</a></li>
50 |                             <li><a href="http://jinja.pocoo.org/">Jinja2</a></li>
51 |                             <li><a href="#">You can modify those links in your config file</a></li>
52 |                         </ul>
53 |                 </div><!-- /.blogroll -->
54 |                 <div class="social">
55 |                         <h2>social</h2>
56 |                         <ul>
57 | 
58 |                             <li><a href="#">You can add links in your config file</a></li>
59 |                             <li><a href="#">Another social link</a></li>
60 |                         </ul>
61 |                 </div><!-- /.social -->
62 |         </section><!-- /#extras -->
63 | 
64 |         <footer id="contentinfo" class="body">
65 |                 <address id="about" class="vcard body">
66 |                 Proudly powered by <a href="http://getpelican.com/">Pelican</a>, which takes great advantage of <a href="http://python.org">Python</a>.
67 |                 </address><!-- /#about -->
68 | 
69 |                 <p>The theme is by <a href="http://coding.smashingmagazine.com/2009/08/04/designing-a-html-5-layout-from-scratch/">Smashing Magazine</a>, thanks!</p>
70 |         </footer><!-- /#contentinfo -->
71 | 
72 | </body>
73 | </html>


--------------------------------------------------------------------------------
/html/output/tags.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |         <meta charset="utf-8" />
 5 |         <title>Introduction to Data Science in Industry - Tags</title>
 6 |         <link rel="stylesheet" href="/theme/css/main.css" />
 7 | 
 8 |         <!--[if IE]>
 9 |             <script src="https://html5shiv.googlecode.com/svn/trunk/html5.js"></script>
10 |         <![endif]-->
11 | </head>
12 | 
13 | <body id="index" class="home">
14 |         <header id="banner" class="body">
15 |                 <h1><a href="/">Introduction to Data Science in Industry </a></h1>
16 |                 <nav><ul>
17 |                     <li><a href="/category/review.html">Review</a></li>
18 |                 </ul></nav>
19 |         </header><!-- /#banner -->
20 | 
21 | <section id="content" class="body">
22 |     <h1>Tags for Introduction to Data Science in Industry</h1>
23 |     <ul>
24 |     </ul>
25 | </section>
26 | 
27 |         <section id="extras" class="body">
28 |                 <div class="blogroll">
29 |                         <h2>links</h2>
30 |                         <ul>
31 |                             <li><a href="http://getpelican.com/">Pelican</a></li>
32 |                             <li><a href="http://python.org/">Python.org</a></li>
33 |                             <li><a href="http://jinja.pocoo.org/">Jinja2</a></li>
34 |                             <li><a href="#">You can modify those links in your config file</a></li>
35 |                         </ul>
36 |                 </div><!-- /.blogroll -->
37 |                 <div class="social">
38 |                         <h2>social</h2>
39 |                         <ul>
40 | 
41 |                             <li><a href="#">You can add links in your config file</a></li>
42 |                             <li><a href="#">Another social link</a></li>
43 |                         </ul>
44 |                 </div><!-- /.social -->
45 |         </section><!-- /#extras -->
46 | 
47 |         <footer id="contentinfo" class="body">
48 |                 <address id="about" class="vcard body">
49 |                 Proudly powered by <a href="http://getpelican.com/">Pelican</a>, which takes great advantage of <a href="http://python.org">Python</a>.
50 |                 </address><!-- /#about -->
51 | 
52 |                 <p>The theme is by <a href="http://coding.smashingmagazine.com/2009/08/04/designing-a-html-5-layout-from-scratch/">Smashing Magazine</a>, thanks!</p>
53 |         </footer><!-- /#contentinfo -->
54 | 
55 | </body>
56 | </html>


--------------------------------------------------------------------------------
/html/output/theme/css/main.css:
--------------------------------------------------------------------------------
  1 | /*
  2 | 	Name: Smashing HTML5
  3 | 	Date: July 2009
  4 | 	Description: Sample layout for HTML5 and CSS3 goodness.
  5 | 	Version: 1.0
  6 | 	License: MIT <http://opensource.org/licenses/MIT>
  7 | 	Licensed by: Smashing Media GmbH <http://www.smashingmagazine.com/>
  8 | 	Original author: Enrique Ramírez <http://enrique-ramirez.com/>
  9 | */
 10 | 
 11 | /* Imports */
 12 | @import url("reset.css");
 13 | @import url("pygment.css");
 14 | @import url("typogrify.css");
 15 | @import url(https://fonts.googleapis.com/css?family=Yanone+Kaffeesatz&subset=latin);
 16 | 
 17 | /***** Global *****/
 18 | /* Body */
 19 | body {
 20 |     background: #F5F4EF;
 21 |     color: #000305;
 22 |     font-size: 87.5%; /* Base font size: 14px */
 23 |     font-family: 'Trebuchet MS', Trebuchet, 'Lucida Sans Unicode', 'Lucida Grande', 'Lucida Sans', Arial, sans-serif;
 24 |     line-height: 1.429;
 25 |     margin: 0;
 26 |     padding: 0;
 27 |     text-align: left;
 28 | }
 29 | 
 30 | /* Headings */
 31 | h1 {font-size: 2em }
 32 | h2 {font-size: 1.571em}	/* 22px */
 33 | h3 {font-size: 1.429em}	/* 20px */
 34 | h4 {font-size: 1.286em}	/* 18px */
 35 | h5 {font-size: 1.143em}	/* 16px */
 36 | h6 {font-size: 1em}		/* 14px */
 37 | 
 38 | h1, h2, h3, h4, h5, h6 {
 39 | 	font-weight: 400;
 40 | 	line-height: 1.1;
 41 | 	margin-bottom: .8em;
 42 |     font-family: 'Yanone Kaffeesatz', arial, serif;
 43 | }
 44 | 
 45 | h3, h4, h5, h6 { margin-top: .8em; }
 46 | 	
 47 | hr { border: 2px solid #EEEEEE; }
 48 | 
 49 | /* Anchors */
 50 | a {outline: 0;}
 51 | a img {border: 0px; text-decoration: none;}
 52 | a:link, a:visited {
 53 | 	color: #C74350;
 54 | 	padding: 0 1px;
 55 | 	text-decoration: underline;
 56 | }
 57 | a:hover, a:active {
 58 | 	background-color: #C74350;
 59 | 	color: #fff;
 60 | 	text-decoration: none;
 61 | 	text-shadow: 1px 1px 1px #333;
 62 | }
 63 | 
 64 | h1 a:hover {
 65 |     background-color: inherit
 66 | }
 67 | 	
 68 | /* Paragraphs */
 69 | div.line-block,
 70 | p { margin-top: 1em;
 71 |     margin-bottom: 1em;}
 72 | 
 73 | strong, b {font-weight: bold;}
 74 | em, i {font-style: italic;}
 75 | 
 76 | /* Lists */
 77 | ul {
 78 | 	list-style: outside disc;
 79 | 	margin: 0em 0 0 1.5em;
 80 | }
 81 | 
 82 | ol {
 83 | 	list-style: outside decimal;
 84 | 	margin: 0em 0 0 1.5em;
 85 | }
 86 | 
 87 | li { margin-top: 0.5em;
 88 |      margin-bottom: 1em; }
 89 | 
 90 | .post-info {
 91 |     float:right;
 92 |     margin:10px;
 93 |     padding:5px;
 94 | }
 95 | 
 96 | .post-info p{
 97 |     margin-top: 1px;
 98 |     margin-bottom: 1px;
 99 | }
100 | 
101 | .readmore { float: right }
102 | 
103 | dl {margin: 0 0 1.5em 0;}
104 | dt {font-weight: bold;}
105 | dd {margin-left: 1.5em;}
106 | 
107 | pre{background-color:  rgb(238, 238, 238); padding: 10px; margin: 10px; overflow: auto;}
108 | 
109 | /* Quotes */
110 | blockquote {
111 |     margin: 20px;
112 |     font-style: italic;
113 | }
114 | cite {}
115 | 
116 | q {}
117 | 
118 | div.note {
119 |    float: right;
120 |    margin: 5px;
121 |    font-size: 85%;
122 |    max-width: 300px;
123 | }
124 | 
125 | /* Tables */
126 | table {margin: .5em auto 1.5em auto; width: 98%;}
127 | 	
128 | 	/* Thead */
129 | 	thead th {padding: .5em .4em; text-align: left;}
130 | 	thead td {}
131 | 
132 | 	/* Tbody */
133 | 	tbody td {padding: .5em .4em;}
134 | 	tbody th {}
135 | 	
136 | 	tbody .alt td {}
137 | 	tbody .alt th {}
138 | 	
139 | 	/* Tfoot */
140 | 	tfoot th {}
141 | 	tfoot td {}
142 | 	
143 | /* HTML5 tags */
144 | header, section, footer,
145 | aside, nav, article, figure {
146 | 	display: block;
147 | }
148 | 
149 | /***** Layout *****/
150 | .body {clear: both; margin: 0 auto; width: 800px;}
151 | img.right, figure.right {float: right; margin: 0 0 2em 2em;}
152 | img.left, figure.left {float: left; margin: 0 2em 2em 0;}
153 | 
154 | /*
155 | 	Header
156 | *****************/
157 | #banner {
158 | 	margin: 0 auto;
159 | 	padding: 2.5em 0 0 0;
160 | }
161 | 
162 | 	/* Banner */
163 | 	#banner h1 {font-size: 3.571em; line-height: 0;}
164 | 	#banner h1 a:link, #banner h1 a:visited {
165 | 		color: #000305;
166 | 		display: block;
167 | 		font-weight: bold;
168 | 		margin: 0 0 .6em .2em;
169 | 		text-decoration: none;
170 | 	}
171 | 	#banner h1 a:hover, #banner h1 a:active {
172 | 		background: none;
173 | 		color: #C74350;
174 | 		text-shadow: none;
175 | 	}
176 | 	
177 | 	#banner h1 strong {font-size: 0.36em; font-weight: normal;}
178 | 	
179 | 	/* Main Nav */
180 | 	#banner nav {
181 | 		background: #000305;
182 | 		font-size: 1.143em;
183 | 		height: 40px;
184 | 		line-height: 30px;
185 | 		margin: 0 auto 2em auto;
186 | 		padding: 0;
187 | 		text-align: center;
188 | 		width: 800px;
189 | 		
190 | 		border-radius: 5px;
191 | 		-moz-border-radius: 5px;
192 | 		-webkit-border-radius: 5px;
193 | 	}
194 | 	
195 | 	#banner nav ul {list-style: none; margin: 0 auto; width: 800px;}
196 | 	#banner nav li {float: left; display: inline; margin: 0;}
197 | 	
198 | 	#banner nav a:link, #banner nav a:visited {
199 | 		color: #fff;
200 | 		display: inline-block;
201 | 		height: 30px;
202 | 		padding: 5px 1.5em;
203 | 		text-decoration: none;
204 | 	}
205 | 	#banner nav a:hover, #banner nav a:active,
206 | 	#banner nav .active a:link, #banner nav .active a:visited {
207 | 		background: #C74451;
208 | 		color: #fff;
209 | 		text-shadow: none !important;
210 | 	}
211 | 	
212 | 	#banner nav li:first-child a {
213 | 		border-top-left-radius: 5px;
214 | 		-moz-border-radius-topleft: 5px;
215 | 		-webkit-border-top-left-radius: 5px;
216 | 		
217 | 		border-bottom-left-radius: 5px;
218 | 		-moz-border-radius-bottomleft: 5px;
219 | 		-webkit-border-bottom-left-radius: 5px;
220 | 	}
221 | 
222 | /*
223 | 	Featured
224 | *****************/
225 | #featured {
226 | 	background: #fff;
227 | 	margin-bottom: 2em;
228 | 	overflow: hidden;
229 | 	padding: 20px;
230 | 	width: 760px;
231 | 	
232 | 	border-radius: 10px;
233 | 	-moz-border-radius: 10px;
234 | 	-webkit-border-radius: 10px;
235 | }
236 | 
237 | #featured figure {
238 | 	border: 2px solid #eee;
239 | 	float: right;
240 | 	margin: 0.786em 2em 0 5em;
241 | 	width: 248px;
242 | }
243 | #featured figure img {display: block; float: right;}
244 | 
245 | #featured h2 {color: #C74451; font-size: 1.714em; margin-bottom: 0.333em;}
246 | #featured h3 {font-size: 1.429em; margin-bottom: .5em;}
247 | 
248 | #featured h3 a:link, #featured h3 a:visited {color: #000305; text-decoration: none;}
249 | #featured h3 a:hover, #featured h3 a:active {color: #fff;}
250 | 
251 | /*
252 | 	Body
253 | *****************/
254 | #content {
255 | 	background: #fff;
256 | 	margin-bottom: 2em;
257 | 	overflow: hidden;
258 | 	padding: 20px 20px;
259 | 	width: 760px;
260 | 	
261 | 	border-radius: 10px;
262 | 	-moz-border-radius: 10px;
263 | 	-webkit-border-radius: 10px;
264 | }
265 | 
266 | /*
267 | 	Extras
268 | *****************/
269 | #extras {margin: 0 auto 3em auto; overflow: hidden;}
270 | 
271 | #extras ul {list-style: none; margin: 0;}
272 | #extras li {border-bottom: 1px solid #fff;}
273 | #extras h2 {
274 | 	color: #C74350;
275 | 	font-size: 1.429em;
276 | 	margin-bottom: .25em;
277 | 	padding: 0 3px;
278 | }
279 | 
280 | #extras a:link, #extras a:visited {
281 | 	color: #444;
282 | 	display: block;
283 | 	border-bottom: 1px solid #F4E3E3;
284 | 	text-decoration: none;
285 | 	padding: .3em .25em;
286 | }
287 | 
288 | #extras a:hover, #extras a:active {color: #fff;}
289 | 
290 | 	/* Blogroll */
291 | 	#extras .blogroll {
292 | 		float: left;
293 | 		width: 615px;
294 | 	}
295 | 	
296 | 	#extras .blogroll li {float: left; margin: 0 20px 0 0; width: 185px;}
297 | 	
298 | 	/* Social */
299 | 	#extras .social {
300 | 		float: right;
301 | 		width: 175px;
302 | 	}
303 | 	
304 | 	#extras div[class='social'] a {
305 | 		background-repeat: no-repeat;
306 | 		background-position: 3px 6px;
307 | 		padding-left: 25px;
308 | 	}
309 | 	
310 | 		/* Icons */
311 | 		.social a[href*='about.me'] {background-image: url('../images/icons/aboutme.png');}
312 | 		.social a[href*='bitbucket.org'] {background-image: url('../images/icons/bitbucket.png');}
313 | 		.social a[href*='delicious.com'] {background-image: url('../images/icons/delicious.png');}
314 | 		.social a[href*='digg.com'] {background-image: url('../images/icons/digg.png');}
315 | 		.social a[href*='facebook.com'] {background-image: url('../images/icons/facebook.png');}
316 | 		.social a[href*='gitorious.org'] {background-image: url('../images/icons/gitorious.png');}
317 | 		.social a[href*='github.com'],
318 | 		.social a[href*='git.io'] {
319 | 			background-image: url('../images/icons/github.png');
320 | 			background-size: 16px 16px;
321 | 		}
322 | 		.social a[href*='gittip.com'] {background-image: url('../images/icons/gittip.png');}
323 | 		.social a[href*='plus.google.com'] {background-image: url('../images/icons/google-plus.png');}
324 | 		.social a[href*='groups.google.com'] {background-image: url('../images/icons/google-groups.png');}
325 | 		.social a[href*='news.ycombinator.com'],
326 | 		.social a[href*='hackernewsers.com'] {background-image: url('../images/icons/hackernews.png');}
327 | 		.social a[href*='last.fm'], .social a[href*='lastfm.'] {background-image: url('../images/icons/lastfm.png');}
328 | 		.social a[href*='linkedin.com'] {background-image: url('../images/icons/linkedin.png');}
329 | 		.social a[href*='reddit.com'] {background-image: url('../images/icons/reddit.png');}
330 | 		.social a[type$='atom+xml'], .social a[type$='rss+xml'] {background-image: url('../images/icons/rss.png');}
331 | 		.social a[href*='slideshare.net'] {background-image: url('../images/icons/slideshare.png');}
332 | 		.social a[href*='speakerdeck.com'] {background-image: url('../images/icons/speakerdeck.png');}
333 | 		.social a[href*='stackoverflow.com'] {background-image: url('../images/icons/stackoverflow.png');}
334 | 		.social a[href*='twitter.com'] {background-image: url('../images/icons/twitter.png');}
335 | 		.social a[href*='vimeo.com'] {background-image: url('../images/icons/vimeo.png');}
336 | 		.social a[href*='youtube.com'] {background-image: url('../images/icons/youtube.png');}
337 | 
338 | /*
339 | 	About
340 | *****************/
341 | #about {
342 | 	background: #fff;
343 | 	font-style: normal;
344 | 	margin-bottom: 2em;
345 | 	overflow: hidden;
346 | 	padding: 20px;
347 | 	text-align: left;
348 | 	width: 760px;
349 | 	
350 | 	border-radius: 10px;
351 | 	-moz-border-radius: 10px;
352 | 	-webkit-border-radius: 10px;
353 | }
354 | 
355 | #about .primary {float: left; width: 165px;}
356 | #about .primary strong {color: #C64350; display: block; font-size: 1.286em;}
357 | #about .photo {float: left; margin: 5px 20px;}
358 | 
359 | #about .url:link, #about .url:visited {text-decoration: none;}
360 | 
361 | #about .bio {float: right; width: 500px;}
362 | 
363 | /*
364 | 	Footer
365 | *****************/
366 | #contentinfo {padding-bottom: 2em; text-align: right;}
367 | 
368 | /***** Sections *****/
369 | /* Blog */
370 | .hentry {
371 | 	display: block;
372 | 	clear: both;
373 | 	border-bottom: 1px solid #eee;
374 | 	padding: 1.5em 0;
375 | }
376 | li:last-child .hentry, #content > .hentry {border: 0; margin: 0;}
377 | #content > .hentry {padding: 1em 0;}
378 | .hentry img{display : none ;}
379 | .entry-title {font-size: 3em; margin-bottom: 10px; margin-top: 0;}
380 | .entry-title a:link, .entry-title a:visited {text-decoration: none; color: #333;}
381 | .entry-title a:visited {background-color: #fff;}
382 | 
383 | .hentry .post-info * {font-style: normal;}
384 | 
385 | 	/* Content */
386 | 	.hentry footer {margin-bottom: 2em;}
387 | 	.hentry footer address {display: inline;}
388 | 	#posts-list footer address {display: block;}
389 | 
390 | 	/* Blog Index */
391 | 	#posts-list {list-style: none; margin: 0;}
392 | 	#posts-list .hentry {padding-left: 10px; position: relative;}
393 | 	
394 | 	#posts-list footer {
395 | 		left: 10px;
396 | 		position: relative;
397 |         float: left;
398 | 		top: 0.5em;
399 | 		width: 190px;
400 | 	}
401 | 	
402 | 	/* About the Author */
403 | 	#about-author {
404 | 		background: #f9f9f9;
405 | 		clear: both;
406 | 		font-style: normal;
407 | 		margin: 2em 0;
408 | 		padding: 10px 20px 15px 20px;
409 | 		
410 | 		border-radius: 5px;
411 | 		-moz-border-radius: 5px;
412 | 		-webkit-border-radius: 5px;
413 | 	}
414 | 	
415 | 	#about-author strong {
416 | 		color: #C64350;
417 | 		clear: both;
418 | 		display: block;
419 | 		font-size: 1.429em;
420 | 	}
421 | 	
422 | 	#about-author .photo {border: 1px solid #ddd; float: left; margin: 5px 1em 0 0;}
423 | 	
424 | 	/* Comments */
425 | 	#comments-list {list-style: none; margin: 0 1em;}
426 | 	#comments-list blockquote {
427 | 		background: #f8f8f8;
428 | 		clear: both;
429 | 		font-style: normal;
430 | 		margin: 0;
431 | 		padding: 15px 20px;
432 | 		
433 | 		border-radius: 5px;
434 | 		-moz-border-radius: 5px;
435 | 		-webkit-border-radius: 5px;
436 | 	}
437 | 	#comments-list footer {color: #888; padding: .5em 1em 0 0; text-align: right;}
438 | 	
439 | 	#comments-list li:nth-child(2n) blockquote {background: #F5f5f5;}
440 | 	
441 | 	/* Add a Comment */
442 | 	#add-comment label {clear: left; float: left; text-align: left; width: 150px;}
443 | 	#add-comment input[type='text'],
444 | 	#add-comment input[type='email'],
445 | 	#add-comment input[type='url'] {float: left; width: 200px;}
446 | 	
447 | 	#add-comment textarea {float: left; height: 150px; width: 495px;}
448 | 	
449 | 	#add-comment p.req {clear: both; margin: 0 .5em 1em 0; text-align: right;}
450 | 	
451 | 	#add-comment input[type='submit'] {float: right; margin: 0 .5em;}
452 | 	#add-comment * {margin-bottom: .5em;}
453 | 


--------------------------------------------------------------------------------
/html/output/theme/css/pygment.css:
--------------------------------------------------------------------------------
  1 | .hll {
  2 | background-color:#eee;
  3 | }
  4 | .c {
  5 | color:#408090;
  6 | font-style:italic;
  7 | }
  8 | .err {
  9 | border:1px solid #FF0000;
 10 | }
 11 | .k {
 12 | color:#007020;
 13 | font-weight:bold;
 14 | }
 15 | .o {
 16 | color:#666666;
 17 | }
 18 | .cm {
 19 | color:#408090;
 20 | font-style:italic;
 21 | }
 22 | .cp {
 23 | color:#007020;
 24 | }
 25 | .c1 {
 26 | color:#408090;
 27 | font-style:italic;
 28 | }
 29 | .cs {
 30 | background-color:#FFF0F0;
 31 | color:#408090;
 32 | }
 33 | .gd {
 34 | color:#A00000;
 35 | }
 36 | .ge {
 37 | font-style:italic;
 38 | }
 39 | .gr {
 40 | color:#FF0000;
 41 | }
 42 | .gh {
 43 | color:#000080;
 44 | font-weight:bold;
 45 | }
 46 | .gi {
 47 | color:#00A000;
 48 | }
 49 | .go {
 50 | color:#303030;
 51 | }
 52 | .gp {
 53 | color:#C65D09;
 54 | font-weight:bold;
 55 | }
 56 | .gs {
 57 | font-weight:bold;
 58 | }
 59 | .gu {
 60 | color:#800080;
 61 | font-weight:bold;
 62 | }
 63 | .gt {
 64 | color:#0040D0;
 65 | }
 66 | .kc {
 67 | color:#007020;
 68 | font-weight:bold;
 69 | }
 70 | .kd {
 71 | color:#007020;
 72 | font-weight:bold;
 73 | }
 74 | .kn {
 75 | color:#007020;
 76 | font-weight:bold;
 77 | }
 78 | .kp {
 79 | color:#007020;
 80 | }
 81 | .kr {
 82 | color:#007020;
 83 | font-weight:bold;
 84 | }
 85 | .kt {
 86 | color:#902000;
 87 | }
 88 | .m {
 89 | color:#208050;
 90 | }
 91 | .s {
 92 | color:#4070A0;
 93 | }
 94 | .na {
 95 | color:#4070A0;
 96 | }
 97 | .nb {
 98 | color:#007020;
 99 | }
100 | .nc {
101 | color:#0E84B5;
102 | font-weight:bold;
103 | }
104 | .no {
105 | color:#60ADD5;
106 | }
107 | .nd {
108 | color:#555555;
109 | font-weight:bold;
110 | }
111 | .ni {
112 | color:#D55537;
113 | font-weight:bold;
114 | }
115 | .ne {
116 | color:#007020;
117 | }
118 | .nf {
119 | color:#06287E;
120 | }
121 | .nl {
122 | color:#002070;
123 | font-weight:bold;
124 | }
125 | .nn {
126 | color:#0E84B5;
127 | font-weight:bold;
128 | }
129 | .nt {
130 | color:#062873;
131 | font-weight:bold;
132 | }
133 | .nv {
134 | color:#BB60D5;
135 | }
136 | .ow {
137 | color:#007020;
138 | font-weight:bold;
139 | }
140 | .w {
141 | color:#BBBBBB;
142 | }
143 | .mf {
144 | color:#208050;
145 | }
146 | .mh {
147 | color:#208050;
148 | }
149 | .mi {
150 | color:#208050;
151 | }
152 | .mo {
153 | color:#208050;
154 | }
155 | .sb {
156 | color:#4070A0;
157 | }
158 | .sc {
159 | color:#4070A0;
160 | }
161 | .sd {
162 | color:#4070A0;
163 | font-style:italic;
164 | }
165 | .s2 {
166 | color:#4070A0;
167 | }
168 | .se {
169 | color:#4070A0;
170 | font-weight:bold;
171 | }
172 | .sh {
173 | color:#4070A0;
174 | }
175 | .si {
176 | color:#70A0D0;
177 | font-style:italic;
178 | }
179 | .sx {
180 | color:#C65D09;
181 | }
182 | .sr {
183 | color:#235388;
184 | }
185 | .s1 {
186 | color:#4070A0;
187 | }
188 | .ss {
189 | color:#517918;
190 | }
191 | .bp {
192 | color:#007020;
193 | }
194 | .vc {
195 | color:#BB60D5;
196 | }
197 | .vg {
198 | color:#BB60D5;
199 | }
200 | .vi {
201 | color:#BB60D5;
202 | }
203 | .il {
204 | color:#208050;
205 | }
206 | 


--------------------------------------------------------------------------------
/html/output/theme/css/reset.css:
--------------------------------------------------------------------------------
 1 | /*
 2 | 	Name: Reset Stylesheet
 3 | 	Description: Resets browser's default CSS
 4 | 	Author: Eric Meyer
 5 | 	Author URI: http://meyerweb.com/eric/tools/css/reset/
 6 | */
 7 | 
 8 | /* v1.0 | 20080212 */
 9 | html, body, div, span, applet, object, iframe,
10 | h1, h2, h3, h4, h5, h6, p, blockquote, pre,
11 | a, abbr, acronym, address, big, cite, code,
12 | del, dfn, em, font, img, ins, kbd, q, s, samp,
13 | small, strike, strong, sub, sup, tt, var,
14 | b, u, i, center,
15 | dl, dt, dd, ol, ul, li,
16 | fieldset, form, label, legend,
17 | table, caption, tbody, tfoot, thead, tr, th, td {
18 | 	background: transparent;
19 | 	border: 0;
20 | 	font-size: 100%;
21 | 	margin: 0;
22 | 	outline: 0;
23 | 	padding: 0;
24 | 	vertical-align: baseline;
25 | }
26 | 
27 | body {line-height: 1;}
28 | 
29 | ol, ul {list-style: none;}
30 | 
31 | blockquote, q {quotes: none;}
32 | 
33 | blockquote:before, blockquote:after,
34 | q:before, q:after {
35 | 	content: '';
36 | 	content: none;
37 | }
38 | 
39 | /* remember to define focus styles! */
40 | :focus {
41 | 	outline: 0;
42 | }
43 | 
44 | /* remember to highlight inserts somehow! */
45 | ins {text-decoration: none;}
46 | del {text-decoration: line-through;}
47 | 
48 | /* tables still need 'cellspacing="0"' in the markup */
49 | table {
50 | 	border-collapse: collapse;
51 | 	border-spacing: 0;
52 | }


--------------------------------------------------------------------------------
/html/output/theme/css/typogrify.css:
--------------------------------------------------------------------------------
1 | .caps {font-size:.92em;}
2 | .amp {color:#666; font-size:1.05em;font-family:"Warnock Pro", "Goudy Old Style","Palatino","Book Antiqua",serif; font-style:italic;}    
3 | .dquo {margin-left:-.38em;}
4 | 


--------------------------------------------------------------------------------
/html/output/theme/css/wide.css:
--------------------------------------------------------------------------------
 1 | @import url("main.css");
 2 | 
 3 | body {
 4 |     font:1.3em/1.3 "Hoefler Text","Georgia",Georgia,serif,sans-serif;
 5 | }
 6 | 
 7 | .post-info{
 8 |     display: none;
 9 | }
10 | 
11 | #banner nav {
12 |     display: none;
13 |     -moz-border-radius: 0px;
14 |     margin-bottom: 20px;
15 |     overflow: hidden;
16 |     font-size: 1em;
17 |     background: #F5F4EF;
18 | }
19 | 
20 | #banner nav ul{
21 |     padding-right: 50px;
22 | }
23 | 
24 | #banner nav li{
25 |     float: right;
26 |     color: #000;
27 | }
28 | 
29 | #banner nav li a {
30 |     color: #000;
31 | }
32 | 
33 | #banner h1 {
34 |     margin-bottom: -18px;
35 | }
36 | 
37 | #featured, #extras {
38 |     padding: 50px;
39 | }
40 | 
41 | #featured {
42 |     padding-top: 20px;
43 | }
44 | 
45 | #extras {
46 |     padding-top: 0px;
47 |     padding-bottom: 0px;
48 | }
49 | 


--------------------------------------------------------------------------------
/html/output/theme/images/icons/aboutme.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/html/output/theme/images/icons/aboutme.png


--------------------------------------------------------------------------------
/html/output/theme/images/icons/bitbucket.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/html/output/theme/images/icons/bitbucket.png


--------------------------------------------------------------------------------
/html/output/theme/images/icons/delicious.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/html/output/theme/images/icons/delicious.png


--------------------------------------------------------------------------------
/html/output/theme/images/icons/facebook.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/html/output/theme/images/icons/facebook.png


--------------------------------------------------------------------------------
/html/output/theme/images/icons/github.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/html/output/theme/images/icons/github.png


--------------------------------------------------------------------------------
/html/output/theme/images/icons/gitorious.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/html/output/theme/images/icons/gitorious.png


--------------------------------------------------------------------------------
/html/output/theme/images/icons/gittip.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/html/output/theme/images/icons/gittip.png


--------------------------------------------------------------------------------
/html/output/theme/images/icons/google-groups.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/html/output/theme/images/icons/google-groups.png


--------------------------------------------------------------------------------
/html/output/theme/images/icons/google-plus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/html/output/theme/images/icons/google-plus.png


--------------------------------------------------------------------------------
/html/output/theme/images/icons/hackernews.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/html/output/theme/images/icons/hackernews.png


--------------------------------------------------------------------------------
/html/output/theme/images/icons/lastfm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/html/output/theme/images/icons/lastfm.png


--------------------------------------------------------------------------------
/html/output/theme/images/icons/linkedin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/html/output/theme/images/icons/linkedin.png


--------------------------------------------------------------------------------
/html/output/theme/images/icons/reddit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/html/output/theme/images/icons/reddit.png


--------------------------------------------------------------------------------
/html/output/theme/images/icons/rss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/html/output/theme/images/icons/rss.png


--------------------------------------------------------------------------------
/html/output/theme/images/icons/slideshare.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/html/output/theme/images/icons/slideshare.png


--------------------------------------------------------------------------------
/html/output/theme/images/icons/speakerdeck.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/html/output/theme/images/icons/speakerdeck.png


--------------------------------------------------------------------------------
/html/output/theme/images/icons/stackoverflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/html/output/theme/images/icons/stackoverflow.png


--------------------------------------------------------------------------------
/html/output/theme/images/icons/twitter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/html/output/theme/images/icons/twitter.png


--------------------------------------------------------------------------------
/html/output/theme/images/icons/vimeo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/html/output/theme/images/icons/vimeo.png


--------------------------------------------------------------------------------
/html/output/theme/images/icons/youtube.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/html/output/theme/images/icons/youtube.png


--------------------------------------------------------------------------------
/html/pelicanconf.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*- #
 3 | from __future__ import unicode_literals
 4 | 
 5 | AUTHOR = u'Dorian Goldman'
 6 | SITENAME = u'Introduction to Data Science in Industry'
 7 | SITEURL = ''
 8 | 
 9 | PATH = 'content'
10 | 
11 | TIMEZONE = 'America/New_York'
12 | 
13 | DEFAULT_LANG = u'EN'
14 | 
15 | # Feed generation is usually not desired when developing
16 | FEED_ALL_ATOM = None
17 | CATEGORY_FEED_ATOM = None
18 | TRANSLATION_FEED_ATOM = None
19 | AUTHOR_FEED_ATOM = None
20 | AUTHOR_FEED_RSS = None
21 | 
22 | # Blogroll
23 | LINKS = (('Pelican', 'http://getpelican.com/'),
24 |          ('Python.org', 'http://python.org/'),
25 |          ('Jinja2', 'http://jinja.pocoo.org/'),
26 |          ('You can modify those links in your config file', '#'),)
27 | 
28 | # Social widget
29 | SOCIAL = (('You can add links in your config file', '#'),
30 |           ('Another social link', '#'),)
31 | 
32 | DEFAULT_PAGINATION = 10
33 | 
34 | # Uncomment following line if you want document-relative URLs when developing
35 | #RELATIVE_URLS = True
36 | 


--------------------------------------------------------------------------------
/html/pelicanconf.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/html/pelicanconf.pyc


--------------------------------------------------------------------------------
/html/publishconf.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*- #
 3 | from __future__ import unicode_literals
 4 | 
 5 | # This file is only used if you use `make publish` or
 6 | # explicitly specify it as your config file.
 7 | 
 8 | import os
 9 | import sys
10 | sys.path.append(os.curdir)
11 | from pelicanconf import *
12 | 
13 | SITEURL = ''
14 | RELATIVE_URLS = False
15 | 
16 | FEED_ALL_ATOM = 'feeds/all.atom.xml'
17 | CATEGORY_FEED_ATOM = 'feeds/%s.atom.xml'
18 | 
19 | DELETE_OUTPUT_DIRECTORY = True
20 | 
21 | # Following items are often useful when publishing
22 | 
23 | #DISQUS_SITENAME = ""
24 | #GOOGLE_ANALYTICS = ""
25 | 


--------------------------------------------------------------------------------
/img/levelsets.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/img/levelsets.png


--------------------------------------------------------------------------------
/img/overfitting.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/img/overfitting.png


--------------------------------------------------------------------------------
/img/regression.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/img/regression.png


--------------------------------------------------------------------------------
/img/regressionexample:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/img/regressionexample


--------------------------------------------------------------------------------
/img/regularization.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/img/regularization.png


--------------------------------------------------------------------------------
/lectures/Lecture 1. What is Data Science_.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/lectures/Lecture 1. What is Data Science_.pdf


--------------------------------------------------------------------------------
/lectures/Lecture 2 - Mathematics Review.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/lectures/Lecture 2 - Mathematics Review.pdf


--------------------------------------------------------------------------------
/lectures/Lecture 2. Introduction to Supervised Learning (1).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/lectures/Lecture 2. Introduction to Supervised Learning (1).pdf


--------------------------------------------------------------------------------
/lectures/Lecture 3 - Model Selection, Evaluation and Regularization.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/lectures/Lecture 3 - Model Selection, Evaluation and Regularization.pdf


--------------------------------------------------------------------------------
/lectures/Lecture 4 - Classification  (1).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/lectures/Lecture 4 - Classification  (1).pdf


--------------------------------------------------------------------------------
/lectures/Lecture 5 - Decision Trees and Non-Parametric Models.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/lectures/Lecture 5 - Decision Trees and Non-Parametric Models.pdf


--------------------------------------------------------------------------------
/notebooks/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/notebooks/.DS_Store


--------------------------------------------------------------------------------
/notebooks/.ipynb_checkpoints/Cleaning and Featurizing Data-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 1
6 | }
7 | 


--------------------------------------------------------------------------------
/notebooks/.ipynb_checkpoints/Lecture 6 - Decision Trees-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 1
6 | }
7 | 


--------------------------------------------------------------------------------
/notebooks/.ipynb_checkpoints/PCA - MNIST example-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 1
6 | }
7 | 


--------------------------------------------------------------------------------
/notebooks/.ipynb_checkpoints/Untitled-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 1
6 | }
7 | 


--------------------------------------------------------------------------------
/notebooks/Lecture 5 - Cleaning and Featurizing Data.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {
 7 |     "collapsed": true
 8 |    },
 9 |    "outputs": [],
10 |    "source": [
11 |     "#https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
12 |    ]
13 |   }
14 |  ],
15 |  "metadata": {
16 |   "anaconda-cloud": {},
17 |   "kernelspec": {
18 |    "display_name": "Python [conda root]",
19 |    "language": "python",
20 |    "name": "conda-root-py"
21 |   },
22 |   "language_info": {
23 |    "codemirror_mode": {
24 |     "name": "ipython",
25 |     "version": 2
26 |    },
27 |    "file_extension": ".py",
28 |    "mimetype": "text/x-python",
29 |    "name": "python",
30 |    "nbconvert_exporter": "python",
31 |    "pygments_lexer": "ipython2",
32 |    "version": "2.7.12"
33 |   }
34 |  },
35 |  "nbformat": 4,
36 |  "nbformat_minor": 1
37 | }
38 | 


--------------------------------------------------------------------------------
/notebooks/Lecture 6 - Decision Trees.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {
 7 |     "collapsed": true
 8 |    },
 9 |    "outputs": [],
10 |    "source": [
11 |     "print(__doc__)\n",
12 |     "\n",
13 |     "# Import the necessary modules and libraries\n",
14 |     "import numpy as np\n",
15 |     "from sklearn.tree import DecisionTreeRegressor\n",
16 |     "import matplotlib.pyplot as plt\n",
17 |     "\n",
18 |     "# Create a random dataset\n",
19 |     "rng = np.random.RandomState(1)\n",
20 |     "X = np.sort(5 * rng.rand(80, 1), axis=0)\n",
21 |     "y = np.sin(X).ravel()\n",
22 |     "y[::5] += 3 * (0.5 - rng.rand(16))\n",
23 |     "\n",
24 |     "# Fit regression model\n",
25 |     "regr_0 = DecisionTreeRegressor(max_depth=1)\n",
26 |     "regr_1 = DecisionTreeRegressor(max_depth=2)\n",
27 |     "regr_2 = DecisionTreeRegressor(max_depth=5)\n",
28 |     "regr_0.fit(X, y)\n",
29 |     "regr_1.fit(X, y)\n",
30 |     "regr_2.fit(X, y)\n",
31 |     "\n",
32 |     "# Predict\n",
33 |     "X_test = np.arange(0.0, 5.0, 0.01)[:, np.newaxis]\n",
34 |     "y_0 = regr_0.predict(X_test)\n",
35 |     "y_1 = regr_1.predict(X_test)\n",
36 |     "y_2 = regr_2.predict(X_test)\n",
37 |     "\n",
38 |     "# Plot the results\n",
39 |     "plt.figure()\n",
40 |     "plt.scatter(X, y, c=\"darkorange\", label=\"data\")\n",
41 |     "#plt.plot(X_test, y_0, color=\"red\", label=\"max_depth=1\", linewidth=2)\n",
42 |     "plt.plot(X_test, y_1, color=\"cornflowerblue\", label=\"max_depth=2\", linewidth=2)\n",
43 |     "plt.plot(X_test, y_2, color=\"yellowgreen\", label=\"max_depth=5\", linewidth=2)\n",
44 |     "plt.xlabel(\"data\")\n",
45 |     "plt.ylabel(\"target\")\n",
46 |     "plt.title(\"Decision Tree Regression\")\n",
47 |     "plt.legend()\n",
48 |     "plt.show()"
49 |    ]
50 |   }
51 |  ],
52 |  "metadata": {
53 |   "anaconda-cloud": {},
54 |   "kernelspec": {
55 |    "display_name": "Python [default]",
56 |    "language": "python",
57 |    "name": "python2"
58 |   },
59 |   "language_info": {
60 |    "codemirror_mode": {
61 |     "name": "ipython",
62 |     "version": 2
63 |    },
64 |    "file_extension": ".py",
65 |    "mimetype": "text/x-python",
66 |    "name": "python",
67 |    "nbconvert_exporter": "python",
68 |    "pygments_lexer": "ipython2",
69 |    "version": "2.7.12"
70 |   }
71 |  },
72 |  "nbformat": 4,
73 |  "nbformat_minor": 1
74 | }
75 | 


--------------------------------------------------------------------------------
/notebooks/Untitled.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [
 10 |     {
 11 |      "name": "stderr",
 12 |      "output_type": "stream",
 13 |      "text": [
 14 |       "//anaconda/lib/python2.7/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
 15 |       "  \"This module will be removed in 0.20.\", DeprecationWarning)\n"
 16 |      ]
 17 |     }
 18 |    ],
 19 |    "source": [
 20 |     "# Necssary libraries\n",
 21 |     "import pandas as pd\n",
 22 |     "import statsmodels.api as sm\n",
 23 |     "from sklearn.cross_validation import KFold\n",
 24 |     "from sklearn.metrics import confusion_matrix\n",
 25 |     "from sklearn.preprocessing import StandardScaler\n",
 26 |     "from sklearn.linear_model import LogisticRegression\n",
 27 |     "from sklearn.linear_model import Ridge\n",
 28 |     "from sklearn.svm import SVC\n",
 29 |     "from sklearn.ensemble import RandomForestClassifier as RF\n",
 30 |     "from sklearn.neighbors import KNeighborsClassifier as KNN\n",
 31 |     "import numpy as np\n",
 32 |     "import matplotlib.pyplot as plt\n",
 33 |     "from sklearn.metrics import roc_curve, auc\n",
 34 |     "from sklearn.utils import shuffle\n",
 35 |     "from sklearn.metrics import roc_curve, auc\n",
 36 |     "import pylab\n",
 37 |     "from sklearn import svm\n",
 38 |     "from sklearn.linear_model import LogisticRegression\n",
 39 |     "from sklearn.ensemble import RandomForestClassifier\n",
 40 |     "from mpl_toolkits.basemap import Basemap\n",
 41 |     "import re\n",
 42 |     "import pylab as plt\n",
 43 |     "import seaborn\n",
 44 |     "from sklearn.linear_model import LinearRegression\n",
 45 |     "import numpy.random as nprnd\n",
 46 |     "import random\n",
 47 |     "pd.set_option('display.max_columns', 500)\n",
 48 |     "%matplotlib inline"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 2,
 54 |    "metadata": {
 55 |     "collapsed": false
 56 |    },
 57 |    "outputs": [
 58 |     {
 59 |      "ename": "IOError",
 60 |      "evalue": "File expected_ts_pred.csv does not exist",
 61 |      "output_type": "error",
 62 |      "traceback": [
 63 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 64 |       "\u001b[0;31mIOError\u001b[0m                                   Traceback (most recent call last)",
 65 |       "\u001b[0;32m<ipython-input-2-48d88db12767>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'expected_ts_pred.csv'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
 66 |       "\u001b[0;32m//anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc\u001b[0m in \u001b[0;36mparser_f\u001b[0;34m(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, na_fvalues, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, float_precision, nrows, iterator, chunksize, verbose, encoding, squeeze, mangle_dupe_cols, tupleize_cols, infer_datetime_format, skip_blank_lines)\u001b[0m\n\u001b[1;32m    472\u001b[0m                     skip_blank_lines=skip_blank_lines)\n\u001b[1;32m    473\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 474\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    475\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    476\u001b[0m     \u001b[0mparser_f\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 67 |       "\u001b[0;32m//anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc\u001b[0m in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m    248\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    249\u001b[0m     \u001b[0;31m# Create the parser.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 250\u001b[0;31m     \u001b[0mparser\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTextFileReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    251\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    252\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mnrows\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mchunksize\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 68 |       "\u001b[0;32m//anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m    564\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'has_index_names'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'has_index_names'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    565\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 566\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_make_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mengine\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    567\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    568\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_get_options_with_defaults\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 69 |       "\u001b[0;32m//anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc\u001b[0m in \u001b[0;36m_make_engine\u001b[0;34m(self, engine)\u001b[0m\n\u001b[1;32m    703\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_make_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'c'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    704\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'c'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 705\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mCParserWrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    706\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    707\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'python'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 70 |       "\u001b[0;32m//anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, src, **kwds)\u001b[0m\n\u001b[1;32m   1070\u001b[0m         \u001b[0mkwds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'allow_leading_cols'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex_col\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1071\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1072\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reader\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_parser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTextReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msrc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1073\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1074\u001b[0m         \u001b[0;31m# XXX\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 71 |       "\u001b[0;32mpandas/parser.pyx\u001b[0m in \u001b[0;36mpandas.parser.TextReader.__cinit__ (pandas/parser.c:3173)\u001b[0;34m()\u001b[0m\n",
 72 |       "\u001b[0;32mpandas/parser.pyx\u001b[0m in \u001b[0;36mpandas.parser.TextReader._setup_parser_source (pandas/parser.c:5912)\u001b[0;34m()\u001b[0m\n",
 73 |       "\u001b[0;31mIOError\u001b[0m: File expected_ts_pred.csv does not exist"
 74 |      ]
 75 |     }
 76 |    ],
 77 |    "source": [
 78 |     "df = pd.read_csv('expected_ts_pred.csv')"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "metadata": {
 85 |     "collapsed": true
 86 |    },
 87 |    "outputs": [],
 88 |    "source": []
 89 |   }
 90 |  ],
 91 |  "metadata": {
 92 |   "kernelspec": {
 93 |    "display_name": "Python [conda root]",
 94 |    "language": "python",
 95 |    "name": "conda-root-py"
 96 |   },
 97 |   "language_info": {
 98 |    "codemirror_mode": {
 99 |     "name": "ipython",
100 |     "version": 2
101 |    },
102 |    "file_extension": ".py",
103 |    "mimetype": "text/x-python",
104 |    "name": "python",
105 |    "nbconvert_exporter": "python",
106 |    "pygments_lexer": "ipython2",
107 |    "version": "2.7.12"
108 |   }
109 |  },
110 |  "nbformat": 4,
111 |  "nbformat_minor": 1
112 | }
113 | 


--------------------------------------------------------------------------------
/notebooks/Untitled1.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {
 7 |     "collapsed": true
 8 |    },
 9 |    "outputs": [],
10 |    "source": [
11 |     "# read data into a DataFrame\n",
12 |     "import pandas as pd\n",
13 |     "import pylab as plt\n",
14 |     "import seaborn\n",
15 |     "from sklearn.linear_model import LinearRegression\n",
16 |     "import numpy.random as nprnd\n",
17 |     "import random\n",
18 |     "import json\n",
19 |     "import numpy as np\n",
20 |     "from sklearn.model_selection import train_test_split\n",
21 |     "from scipy.spatial.distance import cosine\n",
22 |     "from sklearn.metrics import pairwise_distances\n",
23 |     "pd.set_option('display.max_columns', 500)\n",
24 |     "%matplotlib inline"
25 |    ]
26 |   }
27 |  ],
28 |  "metadata": {
29 |   "anaconda-cloud": {},
30 |   "kernelspec": {
31 |    "display_name": "Python [conda root]",
32 |    "language": "python",
33 |    "name": "conda-root-py"
34 |   }
35 |  },
36 |  "nbformat": 4,
37 |  "nbformat_minor": 2
38 | }
39 | 


--------------------------------------------------------------------------------
/notebooks/Untitled2.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 4,
 6 |    "metadata": {
 7 |     "collapsed": false
 8 |    },
 9 |    "outputs": [
10 |     {
11 |      "data": {
12 |       "text/plain": [
13 |        "array(['three', 'one', 'two'],\n",
14 |        "      dtype='<U5')"
15 |       ]
16 |      },
17 |      "execution_count": 4,
18 |      "metadata": {},
19 |      "output_type": "execute_result"
20 |     }
21 |    ],
22 |    "source": [
23 |     "columns=['one','two','three']\n",
24 |     "import numpy as np\n",
25 |     "\n",
26 |     "mylist=[2,0,1]\n",
27 |     "np.array(columns)[mylist]"
28 |    ]
29 |   },
30 |   {
31 |    "cell_type": "code",
32 |    "execution_count": null,
33 |    "metadata": {
34 |     "collapsed": true
35 |    },
36 |    "outputs": [],
37 |    "source": []
38 |   },
39 |   {
40 |    "cell_type": "code",
41 |    "execution_count": 5,
42 |    "metadata": {
43 |     "collapsed": false
44 |    },
45 |    "outputs": [
46 |     {
47 |      "data": {
48 |       "text/plain": [
49 |        "[2, 0]"
50 |       ]
51 |      },
52 |      "execution_count": 5,
53 |      "metadata": {},
54 |      "output_type": "execute_result"
55 |     }
56 |    ],
57 |    "source": [
58 |     "list(mylist)[0:2]"
59 |    ]
60 |   }
61 |  ],
62 |  "metadata": {
63 |   "kernelspec": {
64 |    "display_name": "Python 3",
65 |    "language": "python",
66 |    "name": "python3"
67 |   },
68 |   "language_info": {
69 |    "codemirror_mode": {
70 |     "name": "ipython",
71 |     "version": 3
72 |    },
73 |    "file_extension": ".py",
74 |    "mimetype": "text/x-python",
75 |    "name": "python",
76 |    "nbconvert_exporter": "python",
77 |    "pygments_lexer": "ipython3",
78 |    "version": "3.6.0"
79 |   }
80 |  },
81 |  "nbformat": 4,
82 |  "nbformat_minor": 2
83 | }
84 | 


--------------------------------------------------------------------------------
/notebooks/dt.dot:
--------------------------------------------------------------------------------
 1 | digraph Tree {
 2 | node [shape=box] ;
 3 | 0 [label="X[0] <= 3.1328\nmse = 0.5471\nsamples = 80\nvalue = 0.1222"] ;
 4 | 1 [label="X[0] <= 0.5139\nmse = 0.2314\nsamples = 51\nvalue = 0.5712"] ;
 5 | 0 -> 1 [labeldistance=2.5, labelangle=45, headlabel="True"] ;
 6 | 2 [label="mse = 0.1919\nsamples = 11\nvalue = 0.0524"] ;
 7 | 1 -> 2 ;
 8 | 3 [label="mse = 0.1479\nsamples = 40\nvalue = 0.7138"] ;
 9 | 1 -> 3 ;
10 | 4 [label="X[0] <= 3.8502\nmse = 0.1244\nsamples = 29\nvalue = -0.6675"] ;
11 | 0 -> 4 [labeldistance=2.5, labelangle=-45, headlabel="False"] ;
12 | 5 [label="mse = 0.1241\nsamples = 14\nvalue = -0.4519"] ;
13 | 4 -> 5 ;
14 | 6 [label="mse = 0.0407\nsamples = 15\nvalue = -0.8686"] ;
15 | 4 -> 6 ;
16 | }


--------------------------------------------------------------------------------
/notebooks/dt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/notebooks/dt.png


--------------------------------------------------------------------------------
/notebooks/mnist_pca.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/notebooks/mnist_pca.png


--------------------------------------------------------------------------------
/pdfs/AllofStatistics.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/pdfs/AllofStatistics.pdf


--------------------------------------------------------------------------------
/pdfs/Conditional-Probability.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/pdfs/Conditional-Probability.pdf


--------------------------------------------------------------------------------
/pdfs/Discrete-Probabilities.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/pdfs/Discrete-Probabilities.pdf


--------------------------------------------------------------------------------
/pdfs/Effective Computation in Physics.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/pdfs/Effective Computation in Physics.pdf


--------------------------------------------------------------------------------
/pdfs/ISLR_First_Printing.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/pdfs/ISLR_First_Printing.pdf


--------------------------------------------------------------------------------
/pdfs/MachineLearningMethodsGraph.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/pdfs/MachineLearningMethodsGraph.pdf


--------------------------------------------------------------------------------
/pdfs/Scikit_Learn_Cheat_Sheet_Python.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/pdfs/Scikit_Learn_Cheat_Sheet_Python.pdf


--------------------------------------------------------------------------------
/pdfs/[Mark Joshi]Quant Job Interview Questions And Answers (1).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/pdfs/[Mark Joshi]Quant Job Interview Questions And Answers (1).pdf


--------------------------------------------------------------------------------
/pdfs/coinbias.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/pdfs/coinbias.pdf


--------------------------------------------------------------------------------
/pdfs/eigenvaluenotes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/pdfs/eigenvaluenotes.pdf


--------------------------------------------------------------------------------
/pdfs/lagrangemultipliers.aux:
--------------------------------------------------------------------------------
 1 | \relax 
 2 | \providecommand\hyper@newdestlabel[2]{}
 3 | \providecommand*\new@tpo@label[2]{}
 4 | \providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
 5 | \HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
 6 | \global\let\oldcontentsline\contentsline
 7 | \gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
 8 | \global\let\oldnewlabel\newlabel
 9 | \gdef\newlabel#1#2{\newlabelxx{#1}#2}
10 | \gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
11 | \AtEndDocument{\ifx\hyper@anchor\@undefined
12 | \let\contentsline\oldcontentsline
13 | \let\newlabel\oldnewlabel
14 | \fi}
15 | \fi}
16 | \global\let\hyper@last\relax 
17 | \gdef\HyperFirstAtBeginDocument#1{#1}
18 | \providecommand\HyField@AuxAddToFields[1]{}
19 | \providecommand\HyField@AuxAddToCoFields[2]{}
20 | \select@language{english}
21 | \@writefile{toc}{\select@language{english}}
22 | \@writefile{lof}{\select@language{english}}
23 | \@writefile{lot}{\select@language{english}}
24 | \@writefile{toc}{\contentsline {section}{\numberline {1}Introduction to Constrained Optimization}{1}{section.1}}
25 | \@writefile{lof}{\contentsline {figure}{\numberline {1.1}{\ignorespaces $L^1$ and $L^2$ regularizaiton.}}{2}{figure.1.1}}
26 | \newlabel{reg}{{1.1}{2}{$L^1$ and $L^2$ regularizaiton}{figure.1.1}{}}
27 | \@writefile{toc}{\contentsline {section}{\numberline {2}Derivation of Lagrange Multipliers}{3}{section.2}}
28 | \@writefile{toc}{\contentsline {section}{\numberline {3}Interpreting Lasso and Ridge regression}{4}{section.3}}
29 | \@writefile{toc}{\contentsline {section}{\numberline {4}How much of a constraint do we use?}{4}{section.4}}
30 | 


--------------------------------------------------------------------------------
/pdfs/lagrangemultipliers.log:
--------------------------------------------------------------------------------
  1 | This is pdfTeX, Version 3.14159265-2.6-1.40.17 (TeX Live 2016) (preloaded format=pdflatex 2016.5.22)  22 FEB 2017 10:02
  2 | entering extended mode
  3 |  restricted \write18 enabled.
  4 |  file:line:error style messages enabled.
  5 |  %&-line parsing enabled.
  6 | **lagrangemultipliers.tex
  7 | (./lagrangemultipliers.tex
  8 | LaTeX2e <2016/03/31>
  9 | Babel <3.9r> and hyphenation patterns for 83 language(s) loaded.
 10 | (/usr/local/texlive/2016/texmf-dist/tex/latex/koma-script/scrartcl.cls
 11 | Document Class: scrartcl 2016/05/10 v3.20 KOMA-Script document class (article)
 12 | (/usr/local/texlive/2016/texmf-dist/tex/latex/koma-script/scrkbase.sty
 13 | Package: scrkbase 2016/05/10 v3.20 KOMA-Script package (KOMA-Script-dependent b
 14 | asics and keyval usage)
 15 | 
 16 | (/usr/local/texlive/2016/texmf-dist/tex/latex/koma-script/scrbase.sty
 17 | Package: scrbase 2016/05/10 v3.20 KOMA-Script package (KOMA-Script-independent 
 18 | basics and keyval usage)
 19 | 
 20 | (/usr/local/texlive/2016/texmf-dist/tex/latex/graphics/keyval.sty
 21 | Package: keyval 2014/10/28 v1.15 key=value parser (DPC)
 22 | \KV@toks@=\toks14
 23 | )
 24 | (/usr/local/texlive/2016/texmf-dist/tex/latex/koma-script/scrlfile.sty
 25 | Package: scrlfile 2016/05/10 v3.20 KOMA-Script package (loading files)
 26 | 
 27 | Package scrlfile, 2016/05/10 v3.20 KOMA-Script package (loading files)
 28 |                   Copyright (C) Markus Kohm
 29 | 
 30 | ))) (/usr/local/texlive/2016/texmf-dist/tex/latex/koma-script/tocbasic.sty
 31 | Package: tocbasic 2016/05/10 v3.20 KOMA-Script package (handling toc-files)
 32 | \scr@dte@tocline@numberwidth=\skip41
 33 | \scr@dte@tocline@numbox=\box26
 34 | )
 35 | Package tocbasic Info: omitting babel extension for `toc'
 36 | (tocbasic)             because of feature `nobabel' available
 37 | (tocbasic)             for `toc' on input line 130.
 38 | Package tocbasic Info: omitting babel extension for `lof'
 39 | (tocbasic)             because of feature `nobabel' available
 40 | (tocbasic)             for `lof' on input line 131.
 41 | Package tocbasic Info: omitting babel extension for `lot'
 42 | (tocbasic)             because of feature `nobabel' available
 43 | (tocbasic)             for `lot' on input line 132.
 44 | Class scrartcl Info: File `scrsize11pt.clo' used to setup font sizes on input l
 45 | ine 2051.
 46 | 
 47 | (/usr/local/texlive/2016/texmf-dist/tex/latex/koma-script/scrsize11pt.clo
 48 | File: scrsize11pt.clo 2016/05/10 v3.20 KOMA-Script font size class option (11pt
 49 | )
 50 | )
 51 | (/usr/local/texlive/2016/texmf-dist/tex/latex/koma-script/typearea.sty
 52 | Package: typearea 2016/05/10 v3.20 KOMA-Script package (type area)
 53 | 
 54 | Package typearea, 2016/05/10 v3.20 KOMA-Script package (type area)
 55 |                   Copyright (C) Frank Neukam, 1992-1994
 56 |                   Copyright (C) Markus Kohm, 1994-
 57 | 
 58 | \ta@bcor=\skip42
 59 | \ta@div=\count79
 60 | \ta@hblk=\skip43
 61 | \ta@vblk=\skip44
 62 | \ta@temp=\skip45
 63 | \footheight=\skip46
 64 | Package typearea Info: These are the values describing the layout:
 65 | (typearea)             DIV  = 10
 66 | (typearea)             BCOR = 0.0pt
 67 | (typearea)             \paperwidth      = 597.50793pt
 68 | (typearea)              \textwidth      = 418.25555pt
 69 | (typearea)              DIV departure   = -6%
 70 | (typearea)              \evensidemargin = 17.3562pt
 71 | (typearea)              \oddsidemargin  = 17.3562pt
 72 | (typearea)             \paperheight     = 845.04694pt
 73 | (typearea)              \textheight     = 595.80026pt
 74 | (typearea)              \topmargin      = -25.16531pt
 75 | (typearea)              \headheight     = 17.0pt
 76 | (typearea)              \headsep        = 20.40001pt
 77 | (typearea)              \topskip        = 11.0pt
 78 | (typearea)              \footskip       = 47.6pt
 79 | (typearea)              \baselineskip   = 13.6pt
 80 | (typearea)              on input line 1528.
 81 | )
 82 | \c@part=\count80
 83 | \c@section=\count81
 84 | \c@subsection=\count82
 85 | \c@subsubsection=\count83
 86 | \c@paragraph=\count84
 87 | \c@subparagraph=\count85
 88 | \scr@dte@part@maxnumwidth=\skip47
 89 | \scr@dte@section@maxnumwidth=\skip48
 90 | \scr@dte@subsection@maxnumwidth=\skip49
 91 | \scr@dte@subsubsection@maxnumwidth=\skip50
 92 | \scr@dte@paragraph@maxnumwidth=\skip51
 93 | \scr@dte@subparagraph@maxnumwidth=\skip52
 94 | LaTeX Info: Redefining \textsubscript on input line 4025.
 95 | \abovecaptionskip=\skip53
 96 | \belowcaptionskip=\skip54
 97 | \c@pti@nb@sid@b@x=\box27
 98 | \c@figure=\count86
 99 | \c@table=\count87
100 | Class scrartcl Info: Redefining `\numberline' on input line 5012.
101 | \bibindent=\dimen102
102 | ) (/usr/local/texlive/2016/texmf-dist/tex/latex/base/fontenc.sty
103 | Package: fontenc 2005/09/27 v1.99g Standard LaTeX package
104 | 
105 | (/usr/local/texlive/2016/texmf-dist/tex/latex/base/t1enc.def
106 | File: t1enc.def 2005/09/27 v1.99g Standard LaTeX file
107 | LaTeX Font Info:    Redeclaring font encoding T1 on input line 48.
108 | ))
109 | (/usr/local/texlive/2016/texmf-dist/tex/generic/babel/babel.sty
110 | Package: babel 2016/04/23 3.9r The Babel package
111 | 
112 | (/usr/local/texlive/2016/texmf-dist/tex/generic/babel-english/english.ldf
113 | Language: english 2012/08/20 v3.3p English support from the babel system
114 | 
115 | (/usr/local/texlive/2016/texmf-dist/tex/generic/babel/babel.def
116 | File: babel.def 2016/04/23 3.9r Babel common definitions
117 | \babel@savecnt=\count88
118 | \U@D=\dimen103
119 | )
120 | \l@canadian = a dialect from \language\l@american 
121 | \l@australian = a dialect from \language\l@british 
122 | \l@newzealand = a dialect from \language\l@british 
123 | ))
124 | (/usr/local/texlive/2016/texmf-dist/tex/latex/amsmath/amsmath.sty
125 | Package: amsmath 2016/03/10 v2.15b AMS math features
126 | \@mathmargin=\skip55
127 | 
128 | For additional information on amsmath, use the `?' option.
129 | (/usr/local/texlive/2016/texmf-dist/tex/latex/amsmath/amstext.sty
130 | Package: amstext 2000/06/29 v2.01 AMS text
131 | 
132 | (/usr/local/texlive/2016/texmf-dist/tex/latex/amsmath/amsgen.sty
133 | File: amsgen.sty 1999/11/30 v2.0 generic functions
134 | \@emptytoks=\toks15
135 | \ex@=\dimen104
136 | ))
137 | (/usr/local/texlive/2016/texmf-dist/tex/latex/amsmath/amsbsy.sty
138 | Package: amsbsy 1999/11/29 v1.2d Bold Symbols
139 | \pmbraise@=\dimen105
140 | )
141 | (/usr/local/texlive/2016/texmf-dist/tex/latex/amsmath/amsopn.sty
142 | Package: amsopn 2016/03/08 v2.02 operator names
143 | )
144 | \inf@bad=\count89
145 | LaTeX Info: Redefining \frac on input line 199.
146 | \uproot@=\count90
147 | \leftroot@=\count91
148 | LaTeX Info: Redefining \overline on input line 297.
149 | \classnum@=\count92
150 | \DOTSCASE@=\count93
151 | LaTeX Info: Redefining \ldots on input line 394.
152 | LaTeX Info: Redefining \dots on input line 397.
153 | LaTeX Info: Redefining \cdots on input line 518.
154 | \Mathstrutbox@=\box28
155 | \strutbox@=\box29
156 | \big@size=\dimen106
157 | LaTeX Font Info:    Redeclaring font encoding OML on input line 634.
158 | LaTeX Font Info:    Redeclaring font encoding OMS on input line 635.
159 | \macc@depth=\count94
160 | \c@MaxMatrixCols=\count95
161 | \dotsspace@=\muskip10
162 | \c@parentequation=\count96
163 | \dspbrk@lvl=\count97
164 | \tag@help=\toks16
165 | \row@=\count98
166 | \column@=\count99
167 | \maxfields@=\count100
168 | \andhelp@=\toks17
169 | \eqnshift@=\dimen107
170 | \alignsep@=\dimen108
171 | \tagshift@=\dimen109
172 | \tagwidth@=\dimen110
173 | \totwidth@=\dimen111
174 | \lineht@=\dimen112
175 | \@envbody=\toks18
176 | \multlinegap=\skip56
177 | \multlinetaggap=\skip57
178 | \mathdisplay@stack=\toks19
179 | LaTeX Info: Redefining \[ on input line 2739.
180 | LaTeX Info: Redefining \] on input line 2740.
181 | )
182 | (/usr/local/texlive/2016/texmf-dist/tex/latex/amsfonts/amsfonts.sty
183 | Package: amsfonts 2013/01/14 v3.01 Basic AMSFonts support
184 | \symAMSa=\mathgroup4
185 | \symAMSb=\mathgroup5
186 | LaTeX Font Info:    Overwriting math alphabet `\mathfrak' in version `bold'
187 | (Font)                  U/euf/m/n --> U/euf/b/n on input line 106.
188 | )
189 | (/usr/local/texlive/2016/texmf-dist/tex/latex/amscls/amsthm.sty
190 | Package: amsthm 2015/03/04 v2.20.2
191 | \thm@style=\toks20
192 | \thm@bodyfont=\toks21
193 | \thm@headfont=\toks22
194 | \thm@notefont=\toks23
195 | \thm@headpunct=\toks24
196 | \thm@preskip=\skip58
197 | \thm@postskip=\skip59
198 | \thm@headsep=\skip60
199 | \dth@everypar=\toks25
200 | )
201 | (/usr/local/texlive/2016/texmf-dist/tex/latex/hyperref/hyperref.sty
202 | Package: hyperref 2016/05/05 v6.83n Hypertext links for LaTeX
203 | 
204 | (/usr/local/texlive/2016/texmf-dist/tex/generic/oberdiek/hobsub-hyperref.sty
205 | Package: hobsub-hyperref 2016/05/16 v1.14 Bundle oberdiek, subset hyperref (HO)
206 | 
207 | 
208 | (/usr/local/texlive/2016/texmf-dist/tex/generic/oberdiek/hobsub-generic.sty
209 | Package: hobsub-generic 2016/05/16 v1.14 Bundle oberdiek, subset generic (HO)
210 | Package: hobsub 2016/05/16 v1.14 Construct package bundles (HO)
211 | Package: infwarerr 2016/05/16 v1.4 Providing info/warning/error messages (HO)
212 | Package: ltxcmds 2016/05/16 v1.23 LaTeX kernel commands for general use (HO)
213 | Package: ifluatex 2016/05/16 v1.4 Provides the ifluatex switch (HO)
214 | Package ifluatex Info: LuaTeX not detected.
215 | Package: ifvtex 2016/05/16 v1.6 Detect VTeX and its facilities (HO)
216 | Package ifvtex Info: VTeX not detected.
217 | Package: intcalc 2016/05/16 v1.2 Expandable calculations with integers (HO)
218 | Package: ifpdf 2016/05/14 v3.1 Provides the ifpdf switch
219 | Package: etexcmds 2016/05/16 v1.6 Avoid name clashes with e-TeX commands (HO)
220 | Package etexcmds Info: Could not find \expanded.
221 | (etexcmds)             That can mean that you are not using pdfTeX 1.50 or
222 | (etexcmds)             that some package has redefined \expanded.
223 | (etexcmds)             In the latter case, load this package earlier.
224 | Package: kvsetkeys 2016/05/16 v1.17 Key value parser (HO)
225 | Package: kvdefinekeys 2016/05/16 v1.4 Define keys (HO)
226 | Package: pdftexcmds 2016/05/10 v0.21 Utility functions of pdfTeX for LuaTeX (HO
227 | )
228 | Package pdftexcmds Info: LuaTeX not detected.
229 | Package pdftexcmds Info: \pdf@primitive is available.
230 | Package pdftexcmds Info: \pdf@ifprimitive is available.
231 | Package pdftexcmds Info: \pdfdraftmode found.
232 | Package: pdfescape 2016/05/16 v1.14 Implements pdfTeX's escape features (HO)
233 | Package: bigintcalc 2016/05/16 v1.4 Expandable calculations on big integers (HO
234 | )
235 | Package: bitset 2016/05/16 v1.2 Handle bit-vector datatype (HO)
236 | Package: uniquecounter 2016/05/16 v1.3 Provide unlimited unique counter (HO)
237 | )
238 | Package hobsub Info: Skipping package `hobsub' (already loaded).
239 | Package: letltxmacro 2016/05/16 v1.5 Let assignment for LaTeX macros (HO)
240 | Package: hopatch 2016/05/16 v1.3 Wrapper for package hooks (HO)
241 | Package: xcolor-patch 2016/05/16 xcolor patch
242 | Package: atveryend 2016/05/16 v1.9 Hooks at the very end of document (HO)
243 | Package atveryend Info: \enddocument detected (standard20110627).
244 | Package: atbegshi 2016/05/16 v1.17 At begin shipout hook (HO)
245 | Package: refcount 2016/05/16 v3.5 Data extraction from label references (HO)
246 | Package: hycolor 2016/05/16 v1.8 Color options for hyperref/bookmark (HO)
247 | )
248 | (/usr/local/texlive/2016/texmf-dist/tex/generic/ifxetex/ifxetex.sty
249 | Package: ifxetex 2010/09/12 v0.6 Provides ifxetex conditional
250 | )
251 | (/usr/local/texlive/2016/texmf-dist/tex/latex/oberdiek/auxhook.sty
252 | Package: auxhook 2016/05/16 v1.4 Hooks for auxiliary files (HO)
253 | )
254 | (/usr/local/texlive/2016/texmf-dist/tex/latex/oberdiek/kvoptions.sty
255 | Package: kvoptions 2016/05/16 v3.12 Key value format for package options (HO)
256 | )
257 | \@linkdim=\dimen113
258 | \Hy@linkcounter=\count101
259 | \Hy@pagecounter=\count102
260 | 
261 | (/usr/local/texlive/2016/texmf-dist/tex/latex/hyperref/pd1enc.def
262 | File: pd1enc.def 2016/05/05 v6.83n Hyperref: PDFDocEncoding definition (HO)
263 | )
264 | \Hy@SavedSpaceFactor=\count103
265 | 
266 | (/usr/local/texlive/2016/texmf-dist/tex/latex/latexconfig/hyperref.cfg
267 | File: hyperref.cfg 2002/06/06 v1.2 hyperref configuration of TeXLive
268 | )
269 | Package hyperref Info: Hyper figures OFF on input line 4446.
270 | Package hyperref Info: Link nesting OFF on input line 4451.
271 | Package hyperref Info: Hyper index ON on input line 4454.
272 | Package hyperref Info: Plain pages OFF on input line 4461.
273 | Package hyperref Info: Backreferencing OFF on input line 4466.
274 | Package hyperref Info: Implicit mode ON; LaTeX internals redefined.
275 | Package hyperref Info: Bookmarks ON on input line 4691.
276 | \c@Hy@tempcnt=\count104
277 | 
278 | (/usr/local/texlive/2016/texmf-dist/tex/latex/url/url.sty
279 | \Urlmuskip=\muskip11
280 | Package: url 2013/09/16  ver 3.4  Verb mode for urls, etc.
281 | )
282 | LaTeX Info: Redefining \url on input line 5044.
283 | \XeTeXLinkMargin=\dimen114
284 | \Fld@menulength=\count105
285 | \Field@Width=\dimen115
286 | \Fld@charsize=\dimen116
287 | Package hyperref Info: Hyper figures OFF on input line 6298.
288 | Package hyperref Info: Link nesting OFF on input line 6303.
289 | Package hyperref Info: Hyper index ON on input line 6306.
290 | Package hyperref Info: backreferencing OFF on input line 6313.
291 | Package hyperref Info: Link coloring OFF on input line 6318.
292 | Package hyperref Info: Link coloring with OCG OFF on input line 6323.
293 | Package hyperref Info: PDF/A mode OFF on input line 6328.
294 | LaTeX Info: Redefining \ref on input line 6368.
295 | LaTeX Info: Redefining \pageref on input line 6372.
296 | \Hy@abspage=\count106
297 | \c@Item=\count107
298 | \c@Hfootnote=\count108
299 | )
300 | 
301 | Package hyperref Message: Driver (autodetected): hpdftex.
302 | 
303 | (/usr/local/texlive/2016/texmf-dist/tex/latex/hyperref/hpdftex.def
304 | File: hpdftex.def 2016/05/05 v6.83n Hyperref driver for pdfTeX
305 | \Fld@listcount=\count109
306 | \c@bookmark@seq@number=\count110
307 | 
308 | (/usr/local/texlive/2016/texmf-dist/tex/latex/oberdiek/rerunfilecheck.sty
309 | Package: rerunfilecheck 2016/05/16 v1.8 Rerun checks for auxiliary files (HO)
310 | Package uniquecounter Info: New unique counter `rerunfilecheck' on input line 2
311 | 82.
312 | )
313 | \Hy@SectionHShift=\skip61
314 | )
315 | (/usr/local/texlive/2016/texmf-dist/tex/latex/lipsum/lipsum.sty
316 | Package: lipsum 2014/07/27 v1.3 150 paragraphs of Lorem Ipsum dummy text
317 | \c@lips@count=\count111
318 | )
319 | (/usr/local/texlive/2016/texmf-dist/tex/latex/graphics/graphicx.sty
320 | Package: graphicx 2014/10/28 v1.0g Enhanced LaTeX Graphics (DPC,SPQR)
321 | 
322 | (/usr/local/texlive/2016/texmf-dist/tex/latex/graphics/graphics.sty
323 | Package: graphics 2016/05/09 v1.0r Standard LaTeX Graphics (DPC,SPQR)
324 | 
325 | (/usr/local/texlive/2016/texmf-dist/tex/latex/graphics/trig.sty
326 | Package: trig 2016/01/03 v1.10 sin cos tan (DPC)
327 | )
328 | (/usr/local/texlive/2016/texmf-dist/tex/latex/graphics-cfg/graphics.cfg
329 | File: graphics.cfg 2016/01/02 v1.10 sample graphics configuration
330 | )
331 | Package graphics Info: Driver file: pdftex.def on input line 96.
332 | 
333 | (/usr/local/texlive/2016/texmf-dist/tex/latex/pdftex-def/pdftex.def
334 | File: pdftex.def 2011/05/27 v0.06d Graphics/color for pdfTeX
335 | \Gread@gobject=\count112
336 | ))
337 | \Gin@req@height=\dimen117
338 | \Gin@req@width=\dimen118
339 | )
340 | (/usr/local/texlive/2016/texmf-dist/tex/latex/sectsty/sectsty.sty
341 | Package: sectsty 2002/02/25 v2.0.2 Commands to change all sectional heading sty
342 | les
343 | )
344 | 
345 | Class scrartcl Warning: Usage of package `fancyhdr' together
346 | (scrartcl)              with a KOMA-Script class is not recommended.
347 | (scrartcl)              I'd suggest to use 
348 | (scrartcl)              package `scrlayer-scrpage'.
349 | (scrartcl)              Nevertheless, using requested
350 | (scrartcl)              package `fancyhdr' on input line 34.
351 | 
352 | (/usr/local/texlive/2016/texmf-dist/tex/latex/fancyhdr/fancyhdr.sty
353 | \fancy@headwidth=\skip62
354 | \f@ncyO@elh=\skip63
355 | \f@ncyO@erh=\skip64
356 | \f@ncyO@olh=\skip65
357 | \f@ncyO@orh=\skip66
358 | \f@ncyO@elf=\skip67
359 | \f@ncyO@erf=\skip68
360 | \f@ncyO@olf=\skip69
361 | \f@ncyO@orf=\skip70
362 | )
363 | (./lagrangemultipliers.aux)
364 | \openout1 = `lagrangemultipliers.aux'.
365 | 
366 | LaTeX Font Info:    Checking defaults for OML/cmm/m/it on input line 67.
367 | LaTeX Font Info:    ... okay on input line 67.
368 | LaTeX Font Info:    Checking defaults for T1/cmr/m/n on input line 67.
369 | LaTeX Font Info:    ... okay on input line 67.
370 | LaTeX Font Info:    Checking defaults for OT1/cmr/m/n on input line 67.
371 | LaTeX Font Info:    ... okay on input line 67.
372 | LaTeX Font Info:    Checking defaults for OMS/cmsy/m/n on input line 67.
373 | LaTeX Font Info:    ... okay on input line 67.
374 | LaTeX Font Info:    Checking defaults for OMX/cmex/m/n on input line 67.
375 | LaTeX Font Info:    ... okay on input line 67.
376 | LaTeX Font Info:    Checking defaults for U/cmr/m/n on input line 67.
377 | LaTeX Font Info:    ... okay on input line 67.
378 | LaTeX Font Info:    Checking defaults for PD1/pdf/m/n on input line 67.
379 | LaTeX Font Info:    ... okay on input line 67.
380 | \AtBeginShipoutBox=\box30
381 | Package hyperref Info: Link coloring OFF on input line 67.
382 | 
383 | (/usr/local/texlive/2016/texmf-dist/tex/latex/hyperref/nameref.sty
384 | Package: nameref 2012/10/27 v2.43 Cross-referencing by name of section
385 | 
386 | (/usr/local/texlive/2016/texmf-dist/tex/generic/oberdiek/gettitlestring.sty
387 | Package: gettitlestring 2016/05/16 v1.5 Cleanup title references (HO)
388 | )
389 | \c@section@level=\count113
390 | )
391 | LaTeX Info: Redefining \ref on input line 67.
392 | LaTeX Info: Redefining \pageref on input line 67.
393 | LaTeX Info: Redefining \nameref on input line 67.
394 | 
395 | (./lagrangemultipliers.out) (./lagrangemultipliers.out)
396 | \@outlinefile=\write3
397 | \openout3 = `lagrangemultipliers.out'.
398 | 
399 | 
400 | (/usr/local/texlive/2016/texmf-dist/tex/context/base/mkii/supp-pdf.mkii
401 | [Loading MPS to PDF converter (version 2006.09.02).]
402 | \scratchcounter=\count114
403 | \scratchdimen=\dimen119
404 | \scratchbox=\box31
405 | \nofMPsegments=\count115
406 | \nofMParguments=\count116
407 | \everyMPshowfont=\toks26
408 | \MPscratchCnt=\count117
409 | \MPscratchDim=\dimen120
410 | \MPnumerator=\count118
411 | \makeMPintoPDFobject=\count119
412 | \everyMPtoPDFconversion=\toks27
413 | ) (/usr/local/texlive/2016/texmf-dist/tex/latex/oberdiek/epstopdf-base.sty
414 | Package: epstopdf-base 2016/05/15 v2.6 Base part for package epstopdf
415 | 
416 | (/usr/local/texlive/2016/texmf-dist/tex/latex/oberdiek/grfext.sty
417 | Package: grfext 2016/05/16 v1.2 Manage graphics extensions (HO)
418 | )
419 | Package grfext Info: Graphics extension search list:
420 | (grfext)             [.png,.pdf,.jpg,.mps,.jpeg,.jbig2,.jb2,.PNG,.PDF,.JPG,.JPE
421 | G,.JBIG2,.JB2,.eps]
422 | (grfext)             \AppendGraphicsExtensions on input line 456.
423 | 
424 | (/usr/local/texlive/2016/texmf-dist/tex/latex/latexconfig/epstopdf-sys.cfg
425 | File: epstopdf-sys.cfg 2010/07/13 v1.3 Configuration of (r)epstopdf for TeX Liv
426 | e
427 | ))
428 | LaTeX Font Info:    Try loading font information for T1+cmss on input line 70.
429 | 
430 | (/usr/local/texlive/2016/texmf-dist/tex/latex/base/t1cmss.fd
431 | File: t1cmss.fd 2014/09/29 v2.5h Standard LaTeX font definitions
432 | )
433 | LaTeX Font Info:    Try loading font information for U+msa on input line 70.
434 | 
435 | (/usr/local/texlive/2016/texmf-dist/tex/latex/amsfonts/umsa.fd
436 | File: umsa.fd 2013/01/14 v3.01 AMS symbols A
437 | )
438 | LaTeX Font Info:    Try loading font information for U+msb on input line 70.
439 | 
440 | (/usr/local/texlive/2016/texmf-dist/tex/latex/amsfonts/umsb.fd
441 | File: umsb.fd 2013/01/14 v3.01 AMS symbols B
442 | )
443 | <../img/regularization.png, id=20, 706.64pt x 527.9725pt>
444 | File: ../img/regularization.png Graphic file (type png)
445 | 
446 | <use ../img/regularization.png>
447 | Package pdftex.def Info: ../img/regularization.png used on input line 73.
448 | (pdftex.def)             Requested size: 418.25555pt x 312.5081pt.
449 | 
450 | Underfull \hbox (badness 10000) in paragraph at lines 89--90
451 | 
452 |  []
453 | 
454 | 
455 | Underfull \hbox (badness 10000) in paragraph at lines 89--90
456 | 
457 |  []
458 | 
459 | 
460 | Underfull \hbox (badness 10000) in paragraph at lines 91--92
461 | 
462 |  []
463 | 
464 | 
465 | Underfull \hbox (badness 10000) in paragraph at lines 91--92
466 | 
467 |  []
468 | 
469 | LaTeX Font Info:    Try loading font information for OMS+cmr on input line 95.
470 | (/usr/local/texlive/2016/texmf-dist/tex/latex/base/omscmr.fd
471 | File: omscmr.fd 2014/09/29 v2.5h Standard LaTeX font definitions
472 | )
473 | LaTeX Font Info:    Font shape `OMS/cmr/m/n' in size <10.95> not available
474 | (Font)              Font shape `OMS/cmsy/m/n' tried instead on input line 95.
475 |  [1
476 | 
477 | 
478 | {/usr/local/texlive/2016/texmf-var/fonts/map/pdftex/updmap/pdftex.map}] [2 <../
479 | img/regularization.png>]
480 | Underfull \hbox (badness 10000) in paragraph at lines 106--107
481 | 
482 |  []
483 | 
484 | 
485 | Underfull \hbox (badness 10000) in paragraph at lines 106--107
486 | 
487 |  []
488 | 
489 | [3]
490 | Underfull \hbox (badness 10000) in paragraph at lines 136--138
491 | 
492 |  []
493 | 
494 | 
495 | Underfull \hbox (badness 10000) in paragraph at lines 136--138
496 | 
497 |  []
498 | 
499 | 
500 | Underfull \hbox (badness 10000) in paragraph at lines 139--141
501 | 
502 |  []
503 | 
504 | 
505 | Underfull \hbox (badness 10000) in paragraph at lines 139--141
506 | 
507 |  []
508 | 
509 | [4]
510 | Package atveryend Info: Empty hook `BeforeClearDocument' on input line 162.
511 |  [5]
512 | Package atveryend Info: Empty hook `AfterLastShipout' on input line 162.
513 |  (./lagrangemultipliers.aux)
514 | Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 162.
515 | Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 162.
516 | Package rerunfilecheck Info: File `lagrangemultipliers.out' has not changed.
517 | (rerunfilecheck)             Checksum: 80004CA321872B6677625AA713B80C89;288.
518 | Package atveryend Info: Empty hook `AtVeryVeryEnd' on input line 162.
519 |  ) 
520 | Here is how much of TeX's memory you used:
521 |  8769 strings out of 493014
522 |  135593 string characters out of 6133351
523 |  410388 words of memory out of 5000000
524 |  12180 multiletter control sequences out of 15000+600000
525 |  22342 words of font info for 60 fonts, out of 8000000 for 9000
526 |  1141 hyphenation exceptions out of 8191
527 |  41i,11n,51p,8498b,404s stack positions out of 5000i,500n,10000p,200000b,80000s
528 | {/usr/local/texlive/2016/texmf-dist/fonts/enc/dvips/cm-super/cm-super-t1.enc}
529 | </usr/local/texlive/2016/texmf-dist/fonts/type1/public/amsfonts/cm/cmbx8.pfb></
530 | usr/local/texlive/2016/texmf-dist/fonts/type1/public/amsfonts/cm/cmex10.pfb></u
531 | sr/local/texlive/2016/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi10.pfb></us
532 | r/local/texlive/2016/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi8.pfb></usr/
533 | local/texlive/2016/texmf-dist/fonts/type1/public/amsfonts/cm/cmr10.pfb></usr/lo
534 | cal/texlive/2016/texmf-dist/fonts/type1/public/amsfonts/cm/cmr8.pfb></usr/local
535 | /texlive/2016/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy10.pfb></usr/local/
536 | texlive/2016/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy8.pfb></usr/local/te
537 | xlive/2016/texmf-dist/fonts/type1/public/cm-super/sfbx1095.pfb></usr/local/texl
538 | ive/2016/texmf-dist/fonts/type1/public/cm-super/sfcc1095.pfb></usr/local/texliv
539 | e/2016/texmf-dist/fonts/type1/public/cm-super/sfcc1440.pfb></usr/local/texlive/
540 | 2016/texmf-dist/fonts/type1/public/cm-super/sfrm1095.pfb></usr/local/texlive/20
541 | 16/texmf-dist/fonts/type1/public/cm-super/sfrm1440.pfb></usr/local/texlive/2016
542 | /texmf-dist/fonts/type1/public/cm-super/sfrm2074.pfb></usr/local/texlive/2016/t
543 | exmf-dist/fonts/type1/public/cm-super/sfti1095.pfb>
544 | Output written on lagrangemultipliers.pdf (5 pages, 321671 bytes).
545 | PDF statistics:
546 |  123 PDF objects out of 1000 (max. 8388607)
547 |  98 compressed objects within 1 object stream
548 |  19 named destinations out of 1000 (max. 500000)
549 |  38 words of extra memory for PDF output out of 10000 (max. 10000000)
550 | 
551 | 


--------------------------------------------------------------------------------
/pdfs/lagrangemultipliers.out:
--------------------------------------------------------------------------------
1 | \BOOKMARK [1][-]{section.1}{Introduction to Constrained Optimization}{}% 1
2 | \BOOKMARK [1][-]{section.2}{Derivation of Lagrange Multipliers}{}% 2
3 | \BOOKMARK [1][-]{section.3}{Interpreting Lasso and Ridge regression}{}% 3
4 | \BOOKMARK [1][-]{section.4}{How much of a constraint do we use?}{}% 4
5 | 


--------------------------------------------------------------------------------
/pdfs/lagrangemultipliers.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/pdfs/lagrangemultipliers.pdf


--------------------------------------------------------------------------------
/pdfs/lagrangemultipliers.synctex.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/pdfs/lagrangemultipliers.synctex.gz


--------------------------------------------------------------------------------
/pdfs/lagrangemultipliers.tex:
--------------------------------------------------------------------------------
  1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | % Short Sectioned Assignment
  3 | % LaTeX Template
  4 | % Version 1.0 (5/5/12)
  5 | %
  6 | % This template has been downloaded from:
  7 | % http://www.LaTeXTemplates.com
  8 | %
  9 | % Original author:
 10 | % Frits Wenneker (http://www.howtotex.com)
 11 | %
 12 | % License:
 13 | % CC BY-NC-SA 3.0 (http://creativecommons.org/licenses/by-nc-sa/3.0/)
 14 | %
 15 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 16 | 
 17 | %----------------------------------------------------------------------------------------
 18 | %	PACKAGES AND OTHER DOCUMENT CONFIGURATIONS
 19 | %----------------------------------------------------------------------------------------
 20 | 
 21 | \documentclass[paper=a4, fontsize=11pt]{scrartcl} % A4 paper and 11pt font size
 22 | 
 23 | \usepackage[T1]{fontenc} % Use 8-bit encoding that has 256 glyphs
 24 | %\usepackage{fourier} % Use the Adobe Utopia font for the document - comment this line to return to the LaTeX default
 25 | \usepackage[english]{babel} % English language/hyphenation
 26 | \usepackage{amsmath,amsfonts,amsthm} % Math packages
 27 | \usepackage{hyperref}
 28 | \usepackage{lipsum} % Used for inserting dummy 'Lorem ipsum' text into the template
 29 | \usepackage{graphicx}
 30 | \usepackage{sectsty} % Allows customizing section commands
 31 | \allsectionsfont{\centering \normalfont\scshape} % Make all sections centered, the default font and small caps
 32 | 
 33 | \usepackage{fancyhdr} % Custom headers and footers
 34 | \pagestyle{fancyplain} % Makes all pages in the document conform to the custom headers and footers
 35 | \fancyhead{} % No page header - if you want one, create it in the same way as the footers below
 36 | \fancyfoot[L]{} % Empty left footer
 37 | \fancyfoot[C]{} % Empty center footer
 38 | \fancyfoot[R]{\thepage} % Page numbering for right footer
 39 | \renewcommand{\headrulewidth}{0pt} % Remove header underlines
 40 | \renewcommand{\footrulewidth}{0pt} % Remove footer underlines
 41 | \setlength{\headheight}{13.6pt} % Customize the height of the header
 42 | 
 43 | \numberwithin{equation}{section} % Number equations within sections (i.e. 1.1, 1.2, 2.1, 2.2 instead of 1, 2, 3, 4)
 44 | \numberwithin{figure}{section} % Number figures within sections (i.e. 1.1, 1.2, 2.1, 2.2 instead of 1, 2, 3, 4)
 45 | \numberwithin{table}{section} % Number tables within sections (i.e. 1.1, 1.2, 2.1, 2.2 instead of 1, 2, 3, 4)
 46 | 
 47 | \setlength\parindent{0pt} % Removes all indentation from paragraphs - comment this line for an assignment with lots of text
 48 | 
 49 | %----------------------------------------------------------------------------------------
 50 | %	TITLE SECTION
 51 | %----------------------------------------------------------------------------------------
 52 | 
 53 | \newcommand{\horrule}[1]{\rule{\linewidth}{#1}} % Create horizontal rule command with 1 argument of height
 54 | 
 55 | \title{	
 56 | \normalfont \normalsize 
 57 | \textsc{Introduction to Data Science in Industry} \\ [25pt] % Your university, school and/or department name(s)
 58 | \horrule{0.5pt} \\[0.4cm] % Thin top horizontal rule
 59 | \huge Lagrange Multipliers and Optimization % The assignment title
 60 | \horrule{2pt} \\[0.5cm] % Thick bottom horizontal rule
 61 | }
 62 | 
 63 | \author{Dorian Goldman} % Your name
 64 | 
 65 | \date{\normalsize\today} % Today's date or a custom date
 66 | 
 67 | \begin{document}
 68 | 
 69 | \maketitle % Print the title
 70 | 
 71 | \section{Introduction to Constrained Optimization}
 72 | \begin{figure}
 73 |   \includegraphics[width=\linewidth]{../img/regularization.png}
 74 |   \caption{$L^1$ and $L^2$ regularizaiton.}
 75 |   \label{reg}
 76 |   \end{figure}
 77 |  
 78 | Let $\beta = (\beta_1, \beta_2)$ be the desired coefficients in a linear regression so that we seek to minimize
 79 | \begin{equation}
 80 | \mathcal{L}(\beta) := \frac{1}{N} \sum_{i=1}^N (y_i - \beta_1 x_{i1} - \beta_2 x_{i2})^2.
 81 | \end{equation}
 82 | 
 83 | Recall that we wish to penalize the size of the coefficients in order to not over fit our model, so we impose a constraint on the size of $\beta$. More precisely, we seek to solve the \textbf{constrained optimization problem:}
 84 | \begin{align}
 85 | \min_{\beta}\; &\mathcal{L}_{\lambda} (\beta) \\
 86 | |\beta_1|^p + |\beta_2|^p &\leq C,
 87 | \end{align}
 88 | 
 89 | for $p=1, 2$. \\\\
 90 | 
 91 | \textbf{Common Question:} Why do we choose $p=1$ or $p=2$? Why not some other p?\\\\
 92 | 
 93 | \textbf{Answer:} 
 94 | \begin{itemize}
 95 | \item The norm $L^2$ (ie. $p=2$ )  is very well behaved and is related to the equation for a sphere ($x^2+y^2 = r^2$). Recall that we have an exact solution to the linear regression problem, ie. (1.1), when we choose our norm
 96 | to be $L^2$ (as we have above, known as ordinary least squares - there is no exact solution if we replace the exponent $2$ in (1.1) with a $1$). For $L^1$ we can have many solutions to the same problem. ie. $|1/2| + |1/2| = 1$ and $|1|+|0|=1$. 
 97 | \item The constant surfaces of $L^2$ (level surfaces, ie. where $x^2 + y^2 = r^2$) are the surfaces of constant distance from the origin, meaning that distances are rotationally invariant (why would we want to count certain directions more than others?)
 98 | \item Thinking of a convex function $f(x) = x^2$, this has a constant second derivative $f''(x) = 2$ so it's \textbf{uniformly convex} - meaning we have stability, uniqueness, etc when minimizing with respect to this norm generally.
 99 | \item $p=1$ is rather unique in that it provides a way of \emph{regularizing which results in sparse coefficients } (ie. many zero coefficients, allowing you to choose the most important features). This is explained in these notes below.
100 | \item There isn't really any other advantage to using some other $L^p$ space for $p > 2$, yet many disadvantages (such as degeneracy). 
101 | \end{itemize}
102 | 
103 | Let's define $g(\beta) = |\beta_1|^p + |\beta_2|^p$, and for now, focus on $p=2$. Referring to the figure on the right below, we seek to minimize (1.1) with some constraint
104 | \[ \beta_1^2 + \beta_2^2 \leq C.\]
105 | 
106 | The size of the constraint depends on how strong we want our regularization to be, and we choose the constant $C$ which gives the best performance on test data (as in lecture). More on this below.\\\\
107 | 
108 | 
109 | How do we minimize this? Imagine to fix ideas that $C=1$ so that $\beta_1^2 + \beta_2^2 \leq 1$, and we seek to solve
110 | \begin{align}
111 | \min_{\beta} \; &\mathcal{L}_{\lambda} (\beta) \\
112 | \beta_1^2 + \beta_2^2 &\leq 1.
113 | \end{align}
114 | 
115 | \section{Derivation of Lagrange Multipliers}
116 | 
117 | The following facts will make the above clear:
118 | \begin{itemize}
119 | \item $ \beta \mapsto \mathcal{L}(\beta)$ is constant along \emph{level sets} (ie. where $\mathcal{L}(\beta) = \textrm{ constant } $) by definition.
120 | \item $ \beta \mapsto \mathcal{L}(\beta)$ changes only in the direction \textbf{orthogonal} to the level sets, and this is given by $\nabla_{\beta} \mathcal{L}(\beta)$. This is clear because
121 | in any direction along the surface, $\mathcal{L}$ is constant. 
122 | 
123 | \item \textbf{Case 1:} If $\nabla \mathcal{L}(\beta_0) = 0$ for some $\mathbf{\beta^0}$ in $\beta_1^2 + \beta_2^2 < 1$ then we solve as we do in normal calculus. 
124 | \item  \textbf{Case 2:} If $\nabla \mathcal{L}(\beta_0) \neq 0$ in $\beta_1^2 + \beta_2^2 < 1$. Then the minimum occurs on the boundary of $\beta_1^2 + \beta_2^2$. This is the Lagrange multiplier case.
125 | \item Recall the vector orthogonal to the level set is the gradient vector. So if $g(\beta_1,\beta_2) = \beta_1^2 + \beta_2^2$, then the orthogonal vector to the surface $\beta_1^2 + \beta_2^2$ is in the direction
126 | of $\nabla g = 2\langle \beta_1, \beta_2 \rangle$.
127 | \item \textbf{Main Point:} The minimum of $\mathcal{L}$ has to occur at a point where $\nabla \mathcal {L}$ is in the same direction as $\nabla g$. If it weren't, then we could move along the surface $\beta_1^2 + \beta_2^2$ a bit
128 | to decrease the value (try drawing a picture or looking at the figures), so it wouldn't be a minimum!.
129 | \end{itemize}
130 | 
131 | From the above points, we conclude that the minimum occurs at some point $\langle \beta_1^*, \beta_2^* \rangle$ such that
132 | \[ \nabla \mathcal{L}(\beta_1^*, \beta_2^*) = \lambda \nabla g(\beta_1^*, \beta_2^*). \]
133 | 
134 | \section{Interpreting Lasso and Ridge regression}
135 | 
136 | Observing the figure on the right, when we have $\beta_1^2 + \beta_2^2 \leq C$, we see the minimum has an equal chance of hitting the level set of $\beta_1^2 + \beta_2^2 = C$ at any point. As a result,
137 | the errors are generally equally distributed amongst the coefficients $\beta_1$ and $\beta_2$.\\\\
138 | 
139 | On the other hand, when $|\beta_1| + |\beta_2| = C$, we see that the level set of $\mathcal{L}$ is most likely to be tangent to the level sets (diamonds) at a corner  (ie. where $\beta_1=0$ or $\beta_2=0$),
140 | \textbf{since there are only 4 possible directions where both of the coefficients are non-zero, making it highly unlikely that the level sets of $\mathcal{L}$ has a tangent parallel to any one of these directions. }\\\\
141 | 
142 | 
143 | \textbf{Conclusion:} As a result, Lasso tends to result in \emph{sparser} coefficients (ie. many zero coefficients), while Ridge generally distributes the error more evenly among the coefficients. 
144 | 
145 | \section{How much of a constraint do we use?}
146 | 
147 | Recall from lecture that we want to train a model by solving the problem:
148 | 
149 | \begin{align}
150 | \min_{\beta}\; &\mathcal{L}_{\lambda} (\beta | (x_i,y_i) = \textrm{ training data }) \\
151 | &|\beta_1|^p + |\beta_2|^p \leq C.
152 | \end{align}
153 | 
154 | Let's denote $\beta_C$ as the solution to the above constrained optimization problem (note it depends on $C$), so that our model is
155 | \[ f_C(x) = \beta_{C} \cdot x.\]
156 | 
157 |  Then the optimal C, denoted $C^*$ is determined by
158 | 
159 | \begin{equation}
160 | C^* = \textrm{argmin}_C \frac{\sum_{i=1}^N (y_i - f_C(x_i))^2}{\sum_{i=1}^N (y_i - \bar y)^2}.
161 | \end{equation}
162 | \end{document}


--------------------------------------------------------------------------------
/pdfs/lecture4notes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Columbia-Intro-Data-Science/APMAE4990-/3b85ce52a29355353f10d13c70b973388e41496d/pdfs/lecture4notes.pdf


--------------------------------------------------------------------------------
/recengine/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled source #
 2 | ###################
 3 | *.com
 4 | *.class
 5 | *.dll
 6 | *.exe
 7 | *.o
 8 | *.so
 9 | 
10 | # Packages #
11 | ############
12 | # it's better to unpack these files and commit the raw source
13 | # git has its own built in compression methods
14 | *.7z
15 | *.dmg
16 | *.gz
17 | *.iso
18 | *.jar
19 | *.rar
20 | *.tar
21 | *.zip
22 | 
23 | # Logs and databases #
24 | ######################
25 | *.log
26 | *.sqlite
27 | 
28 | ## python specific
29 | *.pyc
30 | 
31 | ## generic files to ignore
32 | *~
33 | *.lock
34 | *.DS_Store
35 | *.swp
36 | *.out
37 | 
38 | ## virtualenv files
39 | venv/*


--------------------------------------------------------------------------------
/recengine/README.md:
--------------------------------------------------------------------------------
 1 | # Recommendation Engine for Resident Advisor 
 2 | 
 3 | **Website:** `http://69.195.124.253/~speakit9/data/`
 4 | 
 5 | # php-files:
 6 | 
 7 | - Contains the php files which write read/write data to the SQL database and present
 8 | the interface to the user. This is stored on my remote server speakinimages.com
 9 | - userid.php is the main php file which reads the data from the database and outputs it into tables on the site.
10 | - write_events.php is a php script which writes the data to my SQL database. This program is modified depending on the particular
11 | table and format of the event.
12 | 
13 | 
14 | # scraper-programs:
15 | 
16 | - Contains the program RAEventPageScraper.py which scrape event data listings page on RA. These files are then outputted
17 | into files which are read by event-suggestors/`RA_scrapedoutput_reader.py`
18 | 
19 | #nearest-neighbors-generators:
20 | 
21 | Contains `RASparse_rowcol_generator.py` which generates the row/column matrix entries for userid/events. Then `RA_generate_neighbors.py`
22 | uses the output of this file and collaborative filtering to list nearest neighbors of each userid.
23 | 
24 | # event-suggestors:
25 | 
26 | - `RA_history_event_suggestions.py` which generates recommendations from reading output of `RA_scrapedoutput_reader.py` and the users
27 | event history (djs, promoters, venues). 
28 | - `RA_scrapedoutput_reader.py` reads the raw output from `RAEventPageScraper.py` and generates a listing of each user/event with the djs/promoters outputed
29 | on one line.
30 | - `RA_neighbors_event_suggestions.py` generates event suggestions from finding events that the nearest neighbors found in `RA_generate_neighbors.py` produced
31 | and listing them (up to a maximum of 10).
32 | 


--------------------------------------------------------------------------------
/recengine/event-suggestors/RA_history_event_suggestions.py:
--------------------------------------------------------------------------------
  1 | # This program loads in all of the previous event history and then for each userid, will output
  2 | # userid, events_dj1-5, events_promoter1-5, events_venue1-5 where events_dj,events_promoter and events_venue
  3 | # are future events which have djs, promoters and venues in userid's history.
  4 | 
  5 | import pandas as pd
  6 | import numpy as np
  7 | import itertools
  8 | 
  9 | # Load in all event history from the 4 files below and merge them.
 10 | mylist = [pd.read_csv('RAevent_data%d.csv' % i, delim_whitespace=True, error_bad_lines=False) for i in range(1,4)]
 11 | df_f = pd.concat(mylist).drop_duplicates()
 12 | 
 13 | # These are the future urls - must be modified depending on date.
 14 | future_urls = df_4.drop_duplicates()[-2400:]
 15 | 
 16 | # List all of the users past djs.
 17 | def get_djs(userid):
 18 |     df_user = df_f[df_f['userid']==userid]
 19 |     mylist = list(df_user['dj1']) + list(df_user['dj2']) + list(df_user['dj3'])
 20 |     return mylist
 21 | 
 22 | # List all of the users past promoters.
 23 | def get_promoters(userid):
 24 |     df_user = df_f[df_f['userid']==userid]
 25 |     mylist = list(df_user['promoter1']) + list(df_user['promoter2']) + list(df_user['promoter3'])
 26 |     return mylist
 27 | 
 28 | # List all of the users past venues.
 29 | def get_venues(userid):
 30 |     df_user = df_f[df_f['userid']==userid]
 31 |     mylist = list(df_user['venue'])
 32 |     return mylist
 33 | 
 34 | 
 35 | # Find all future events which have a dj in the list of userids dj list gotten from get_djs().
 36 | def future_dj(userid):
 37 |     dj_list = list(get_djs(userid))
 38 |     event_list = []
 39 |     dj_favs = []
 40 |     for dj in dj_list:
 41 |         if dj != 'None':
 42 |             events_dj1 = future_urls[future_urls['dj1'] == dj]
 43 |             events_dj2 = future_urls[future_urls['dj2'] == dj]
 44 |             events_dj3 = future_urls[future_urls['dj3'] == dj]
 45 |             event_list.append(list(events_dj1['url'].drop_duplicates()))
 46 |             event_list.append(list(events_dj2['url'].drop_duplicates()))
 47 |             event_list.append(list(events_dj3['url'].drop_duplicates()))
 48 |             if len(events_dj1) > 0 or len(events_dj2) > 0 or len(events_dj3) > 0 and dj != 'None' and dj not in dj_favs:
 49 |                 dj_favs.append(dj)
 50 |     return list(itertools.chain(*event_list)), dj_favs, userid
 51 | 
 52 | # Find all future events which have a promoter in the list of userid's promoter list gotten from get_promoters()
 53 | def future_promoters(userid):
 54 |     promoter_list = list(get_promoters(userid))
 55 |     event_list = []
 56 |     promoter_favs = []
 57 |     for promoter in promoter_list:
 58 |         if promoter != 'None':
 59 |             events_dj1 = future_urls[future_urls['promoter1'] == promoter]
 60 |             events_dj2 = future_urls[future_urls['promoter2'] == promoter]
 61 |             events_dj3 = future_urls[future_urls['promoter3'] == promoter]
 62 |             event_list.append(list(events_dj1['url'].drop_duplicates()))
 63 |             event_list.append(list(events_dj2['url'].drop_duplicates()))
 64 |             event_list.append(list(events_dj3['url'].drop_duplicates()))
 65 |             if len(events_dj1) > 0 or len(events_dj2) > 0 or len(events_dj3) > 0 and promoter != 'None' and promoter not in promoter_favs:
 66 |                 promoter_favs.append(promoter)
 67 |     return list(itertools.chain(*event_list)), promoter_favs, userid
 68 |     
 69 | # Find all future events with venue userid has attended previously.
 70 | 
 71 | def future_venue(userid):
 72 |     venue_list = list(get_venues(userid))
 73 |     event_list = []
 74 |     venue_favs = []
 75 |     for venue in venue_list:
 76 |         if venue != 'None':
 77 |             events_dj1 = future_urls[future_urls['venue'] == venue]
 78 |             event_list.append(list(events_dj1['url'].drop_duplicates()))
 79 |             if len(events_dj1) > 0 and venue != 'None' and venue not in venue_favs:
 80 |                 venue_favs.append(venue)
 81 |     return list(itertools.chain(*event_list)), venue_favs, userid
 82 | 
 83 | # Next need to find future events with these particular performers!
 84 | 
 85 | # Load three arrays, url_djs, url_promoters, url_venues with the above data, up to a maximum of 5 urls.
 86 | def generate_favs(userid):
 87 |     urls1, djs, uid = future_dj(userid)
 88 |     urls2, promoters, uid = future_promoters(userid)
 89 |     urls3, venues, uid = future_venue(userid)
 90 |     length1 = len(urls1)
 91 |     length2 = len(urls2)
 92 |     length3 = len(urls3)
 93 |     url_djs = []
 94 |     url_promoters = []
 95 |     url_venues = []
 96 | 
 97 |     # Fill url_djs array with suggested urls.
 98 |     for i in range(0, min(length1,5)):
 99 |         url_djs.append(urls1[i])
100 | 
101 |     # Fill url_promoters with suggested urls.
102 |     for i in range(0, min(length2,5)):
103 |         url_promoters.append(urls2[i])
104 | 
105 |     # Fill url_venues with suggested urls.
106 |     for i in range(0, min(length3,5)):
107 |         url_venues.append(urls3[i])
108 |       
109 |     # If there aren't enough, simply put None as a place holder for each.
110 |     if length1 < 5:
111 |         for i in range(length1, 5):
112 |             url_djs.append('None')
113 |     if length2 < 5:
114 |         for i in range(length2, 5):
115 |             url_promoters.append('None')
116 |     if length3 < 5:
117 |         for i in range(length3, 5):
118 |             url_venues.append('None')
119 |     return url_djs, url_promoters, url_venues
120 | 
121 | 
122 | 
123 | # Go through all users, and list suggestions as userid, event_djs, event_promoters, event_venues. This data will be
124 | # loaded into SQL after being outputted to file.
125 | 
126 | for user in list(df_f['userid'].drop_duplicates()):
127 | 	result = generate_favs(user)
128 | 	print user,
129 | 	for i in range(0,3):
130 |     		for j in range(0,5):
131 |         		print result[i][j],
132 | 	print ''
133 | 


--------------------------------------------------------------------------------
/recengine/event-suggestors/RA_neighbors_event_suggestions.py:
--------------------------------------------------------------------------------
  1 | # This program generates for each userid, a list of 10 (maximum) events which other users who are 'similar' to userid
  2 | # are attending on RA. 
  3 | from scipy import sparse
  4 | from numpy import linalg
  5 | from numpy.random import rand
  6 | import pandas as pd
  7 | from scipy.sparse import coo_matrix
  8 | from sklearn.neighbors import NearestNeighbors
  9 | from sklearn.decomposition import TruncatedSVD
 10 | from sklearn.neighbors import KNeighborsClassifier
 11 | from scipy.sparse.linalg import eigs
 12 | import numpy as np
 13 | from scipy import linalg
 14 | 
 15 | # Load in all lists of userids/event pairs generated by RAEventMatrixGenerator.py
 16 | 
 17 | df_eventurl0 = pd.read_csv('../RAUseridEventurl.csv', delim_whitespace=True, error_bad_lines=False)
 18 | df_eventurl1 = pd.read_csv('../RAUseridEventurl-1.csv', delim_whitespace=True, error_bad_lines=False)
 19 | df_eventurl2 = pd.read_csv('../RAUseridEventurl-2.csv', delim_whitespace=True, error_bad_lines=False)
 20 | df_eventurl3 = pd.read_csv('../RAUseridEventurl-3.csv', delim_whitespace=True, error_bad_lines=False)
 21 | df_uidevent = pd.read_csv('../RA_row_cols_indexreset2.csv', delim_whitespace=True)
 22 | 
 23 | # List of row/columns of user attendances. 
 24 | df_rowcols1 = pd.read_csv('RA_row_col_id_urlSept25_2.csv', delim_whitespace=True)
 25 | df_rowcols2 = pd.read_csv('../RA_row_col_id_urlSept25_2Part2.csv', delim_whitespace=True)
 26 | rowcols = [df_rowcols1,df_rowcols2]
 27 | 
 28 | # Concatenate the two files into one.
 29 | df_rowcols = pd.concat(rowcols, ignore_index=True).drop_duplicates()
 30 | 
 31 | # Merge all of the data files which contain the userid/eventurl pairs.
 32 | df_eventurl0 = df_eventurl0.drop_duplicates()
 33 | df_eventurl1 = df_eventurl1.drop_duplicates()
 34 | df_eventurl2 = df_eventurl2.drop_duplicates()
 35 | df_eventurl3 = df_eventurl3.drop_duplicates()
 36 | mylist = [df_eventurl0, df_eventurl1, df_eventurl2, df_eventurl3]
 37 | df_eventurl = pd.concat(mylist, ignore_index=True).drop_duplicates()
 38 | rows = df_uidevent['row']
 39 | 
 40 | 
 41 | # Some data was corrupt - check by looking for h and cut out the bad indices where there isn't a valid URL.
 42 | 
 43 | bad_index1 = df_eventurl['userid'].str.contains("h")
 44 | df_eventurl = df_eventurl[bad_index1 == False]
 45 | df_eventurl = df_eventurl[df_eventurl['url'].str.contains("http") == True]
 46 | 
 47 | 
 48 | # Generate the sparse matrix of users/events.
 49 | rows = np.array(df_rowcols['row'])
 50 | columns = np.array(df_rowcols['column'])
 51 | data = [1]*len(columns)
 52 | X = coo_matrix((data, (rows,columns)), shape=(75988+1,25022+1))
 53 | 
 54 | 
 55 | # Load the file of nearest neighbor rows for each userid.
 56 | 
 57 | df_nn = pd.read_csv('RA_nearest_neighbors_normalizedOct1.csv', delim_whitespace=True)
 58 | 
 59 | 
 60 | # Get the future events of user on user_row. Modify -100 depending on date.
 61 | def get_events(user_row):
 62 |     user_events = np.squeeze(np.asarray(X.getrow(user_row).todense()))[-100:]
 63 |     nonzeroind = np.nonzero(user_events)[0]
 64 |     nonzeroind = np.add(nonzeroind, 25022+1-120)
 65 |     return nonzeroind
 66 |         
 67 | 
 68 | # For each of the nearest neighbors, find the future events they are attending and append them to event_list.
 69 | def print_top_events(user_row_list):
 70 |     event_list = []
 71 |     reversed_list = user_row_list[::-1]
 72 |     for user_row in reversed_list[2:]:
 73 |         events = get_events(user_row)
 74 |         for event in events:
 75 |                 event_object = df_rowcols[df_rowcols['column'] == event]['url'].drop_duplicates()
 76 |                 event_object = str(event_object)[str(event_object).find('http:'):str(event_object).find('\n')]
 77 |                 event_list.append(event_object)
 78 |     return event_list
 79 | 
 80 | 
 81 | # print urls at the top of the csv file so pandas can read them easily later.
 82 | print 'userid',
 83 | for i in range(1,11):
 84 | 	mystr = 'url' + str(i)
 85 | 	print mystr,
 86 | print ''
 87 | 
 88 | # For each user, get their list of nearest neighbors, print userid and the top 10 events suggested.
 89 | for i in range(0,len(df_nn)): 
 90 |     sample = np.squeeze(np.asarray(df_nn.ix[i]))
 91 |     event_list = print_top_events(sample)
 92 |     max_num=0
 93 |     if len(event_list) > 0:
 94 | 	print str(df_rowcols[df_rowcols['row']==i]['userid'].drop_duplicates()).split()[1],
 95 |         for event in event_list:
 96 |             if max_num >= 10:
 97 |                 break
 98 |             else:
 99 |                 print event,
100 |                 max_num += 1
101 |         print ''
102 | 


--------------------------------------------------------------------------------
/recengine/event-suggestors/RA_scrapedoutput_reader.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import urllib2
  3 | from bs4 import BeautifulSoup
  4 | import re
  5 | from multiprocessing import Pool
  6 | import re
  7 | import numpy as np
  8 | 
  9 | 
 10 | # The prices are often listed as price + fee. This takes the string and adds those numbers, returns the number as a float type.
 11 | def add_prices(price_str):
 12 |     price_str = re.sub("[\\\][x|a|c]",'',price_str) 
 13 |     price_str = re.sub("a",'', price_str)
 14 |     if price_str is None:
 15 |         return None
 16 |     if price_str.find('+') != -1:
 17 | 	if price_str.isdigit() == True:
 18 |         	return np.float(price_str.split('+')[0]) + np.float(price_str.split('+')[1])
 19 |     	else:
 20 | 		return 0
 21 |     else:
 22 | 	if price_str.isdigit() == True:
 23 |         	return np.float(price_str)
 24 | 	else:
 25 | 		return 0
 26 | 
 27 | # Given the raw scraper output generated by RAEventPageScraper.py, breaks up the information and returns price, lineup, promoters, venue, url.
 28 | def read_event_data():
 29 | 
 30 |     tickets_available = re.sub("[(]",'',sample_data.split(',')[0])
 31 |     release = re.sub("[(')]", '', sample_data.split(',')[1])
 32 |     if re.sub("[ ']", '', sample_data.split(',')[1]) == 'None':
 33 |         price = 0
 34 |         istart = 3
 35 |     else:
 36 |         price = add_prices(re.sub("[u'$)( ]", '', sample_data.split(',')[2]))
 37 |         istart = 4
 38 |     split_data = sample_data.split(',')
 39 |     lineup = []
 40 |     done = False
 41 |    
 42 |     if re.sub("[ \['\]]",'',sample_data.split(',')[istart])[0:]=='':
 43 |         lineupExists=False
 44 |         istart += 1
 45 |     else:
 46 |         for i in range(istart,10):
 47 |             lineup.append(re.sub("[ \['\]]",'',sample_data.split(',')[i])[0:])
 48 |             if split_data[i][-2:] == ']]':
 49 |                 done = True
 50 |                 break
 51 |             else:
 52 |                 pass
 53 |         istart = i+1
 54 |     promoters = []
 55 |     
 56 |     if re.sub("[ \['\]]", '', sample_data.split(',')[istart]) == '':
 57 |             promotersExist = False
 58 |             istart += 1
 59 |     else:
 60 |             promotersExist = True
 61 |             istart -=1
 62 |     if promotersExist == True:        
 63 |         for j in range(istart+1,istart+5):
 64 |             promoters.append(re.sub("[ \['\]]",'',sample_data.split(',')[j])[0:])
 65 |             if split_data[j][-2:] == ']]':
 66 |                 done = True
 67 |                 break
 68 |             else:
 69 |                 pass
 70 |         istart = j+1
 71 |     venue = re.sub("[' \[\]]",'',split_data[istart])
 72 |     url = re.sub(' ', '', sample_data[sample_data.find('http://www.residentadvisor.net/event.aspx?'):])
 73 |     return price, lineup, promoters, venue, url
 74 | 
 75 | 
 76 | user_id_list = []
 77 | def get_user_ids():
 78 | 	id_index = [m.start() for m in re.finditer("Id", sample_data)]
 79 | 	user_ids = []
 80 | 	for i in id_index:
 81 |     		user_ids.append(re.sub("[^\\d]",'',sample_data[i+3:i+15]))
 82 | 	return user_ids
 83 | 
 84 | # This is the raw output file generated by RAEventPageScraper.py. 
 85 | file = open('RAScraperDumpSept22_1')
 86 | lines = file.readlines()
 87 | 
 88 | # Format the columsn so pandas gives column headers later.
 89 | print 'userid cost dj1 dj2 dj3 promoter1 promoter2 promoter3 venue url'
 90 | 
 91 | 
 92 | # For each line of raw output
 93 | for line in lines:
 94 | 	data = []
 95 | 	sample_data = line
 96 | 	start = sample_data.find('ai=8')
 97 | 	end = sample_data.find('dy=')
 98 | 	print sample_data[start:end+5]
 99 | 	user_list = get_user_ids()
100 | 	for user_id in user_list:
101 | 	    event_data_list = read_event_data()		
102 |     	    data.append(user_id)
103 | 	    data.append(event_data_list[0])
104 | 	    dj_list = event_data_list[1][0:min(3,len(event_data_list[1]))]
105 | 	    for dj in dj_list:
106 | 		data.append(dj)		
107 | 	    for i in range(0, max(3 - len(event_data_list[1]), 0)):
108 | 		data.append('None')
109 | 	    promoter_list = event_data_list[2][0:min(3,len(event_data_list[2]))]
110 | 	    for promoter in promoter_list:
111 | 		data.append(promoter)
112 | 	    for i in range(0, max(3 - len(event_data_list[2]), 0)):
113 | 		data.append('None')
114 | 	    data.append(event_data_list[3])    
115 | 	    data.append(event_data_list[-1])
116 | 	    for item in data:
117 | 		pass
118 | 		print item,
119 | 	    print ''
120 | 	    data = []
121 | 


--------------------------------------------------------------------------------
/recengine/nearest-neighbors-generators/RASparse_rowcol_generator.py:
--------------------------------------------------------------------------------
 1 | # The purpose of this program is to take each userid/url pair, convert that into a row/column, and output the data so we can later
 2 | # generate a sparse matrix with RA_generate_neighbors.py
 3 | 
 4 | from scipy import sparse
 5 | from numpy import linalg
 6 | from numpy.random import rand
 7 | import pandas as pd
 8 | import numpy as np
 9 | 
10 | # Load in all userid/event files and merge them into one dataframe.
11 | df_eventurl0 = pd.read_csv('../RAUseridEventurl.csv', delim_whitespace=True, error_bad_lines=False)
12 | df_eventurl1 = pd.read_csv('../RAUseridEventurl-1.csv', delim_whitespace=True, error_bad_lines=False)
13 | df_eventurl2 = pd.read_csv('../RAUseridEventurl-2.csv', delim_whitespace=True, error_bad_lines=False)
14 | df_eventurl3 = pd.read_csv('../RAUseridEventurl-3.csv', delim_whitespace=True, error_bad_lines=False)
15 | df_eventurl0 = df_eventurl0.drop_duplicates()
16 | df_eventurl1 = df_eventurl1.drop_duplicates()
17 | df_eventurl2 = df_eventurl2.drop_duplicates()
18 | df_eventurl3 = df_eventurl3.drop_duplicates()
19 | mylist = [df_eventurl0, df_eventurl1, df_eventurl2, df_eventurl3]
20 | df_eventurl = pd.concat(mylist).drop_duplicates()
21 | 
22 | # Simply gets the event number from the URL (number after the ?)
23 | def get_event_num(event_url):
24 |     try:
25 |         pos = event_url.find('?')
26 |     except:
27 |         return 0
28 |     if pos:
29 |         return event_url[pos+1:]
30 |     else:
31 |         return 0
32 | row = np.array(df_eventurl['userid'])
33 | df_eventurl['url2'] = df_eventurl['url'].apply(get_event_num)
34 | col = np.array(df_eventurl['url'].apply(get_event_num))
35 | df_userids = df_eventurl['userid'].drop_duplicates()
36 | 
37 | 
38 | # Filter out the corrupt data in the dataframe 
39 | df_eventurl = df_eventurl[df_eventurl['url'].notnull()]
40 | df_eventurl = df_eventurl[df_eventurl['url'] != 'e']
41 | df_eventurl = df_eventurl[df_eventurl['userid'] != 'e']
42 | df_eventurl['url2'] = df_eventurl['url'].apply(get_event_num)
43 | 
44 | # Reset the index after the merge so we can associate rows/columsn with userid/events.
45 | df_useridrow = df_eventurl['userid'].drop_duplicates().reset_index(drop=True)
46 | df_eventcolumn = df_eventurl['url'].drop_duplicates().reset_index(drop=True)
47 | 
48 | 
49 | # Print out the row, column, userid and url now - the output will then be used in RA_neighbors_generator.py
50 | 
51 | print 'row column userid url'
52 | 
53 | 
54 | # Get rid of the corrupt data which was in the file - some URLS were not valid.
55 | bad_index1 = df_eventurl['userid'].str.contains("h")
56 | df_eventurl = df_eventurl[bad_index1 == False]
57 | df_eventurl = df_eventurl[df_eventurl['url'].str.contains("http") == True]
58 | df_userids = df_eventurl['userid'].drop_duplicates().reset_index(drop=True)
59 | df_urls =  df_eventurl['url'].drop_duplicates().reset_index(drop=True)
60 | 
61 | # Go through entire list and print row/col, userid/event. This output will be used by RA_neighbors_geneartor.py to form sparse matrix.
62 | for i in range(0, len(df_eventurl)):
63 | 	try:	
64 | 		print df_userids[df_userids == df_eventurl['userid'][i]].index.tolist()[0], df_urls[df_urls == df_eventurl['url'][i]].index.tolist()[0], df_eventurl['userid'][i], df_eventurl['url'][i], i
65 | 	except:
66 | 		pass
67 | 


--------------------------------------------------------------------------------
/recengine/nearest-neighbors-generators/RA_generate_neighbors.py:
--------------------------------------------------------------------------------
 1 | # This program generates for each userid, 5 nearest neighbors, by using collaborative filterting. In particular, the sparse matrix
 2 | # of userid/event is generated, then one takes the dot product of this matrix (normalized) with itself. 
 3 | # This program should load csv files generated by RASparse_rowcol_generator.py (there are 2 files below since I did this in two stages).
 4 | 
 5 | from scipy import sparse
 6 | import pandas as pd
 7 | from scipy.sparse import coo_matrix
 8 | import numpy as np
 9 | from sklearn.preprocessing import normalize
10 | 
11 | # Load in all row/column/id entries which will form our sparse matrix.
12 | df_rowcols1 = pd.read_csv('../RA_row_col_id_urlSept25_2.csv', delim_whitespace=True)
13 | df_rowcols2 = pd.read_csv('../RA_row_col_id_urlSept25_2Part2.csv', delim_whitespace=True)
14 | rowcols = [df_rowcols1,df_rowcols2]
15 | df_rowcols = pd.concat(rowcols, ignore_index=True).drop_duplicates()
16 | 
17 | 
18 | # Generate sparse userid/event matrix.
19 | rows = np.array(df_rowcols['row'])
20 | columns = np.array(df_rowcols['column'])
21 | data = [1.0]*len(columns)
22 | X = coo_matrix((data, (rows,columns)), shape=(75988+1,25022+1))
23 | 
24 | # Normalize all of the columns
25 | X_n = normalize(X, norm='l2', axis=1)
26 | 
27 | # Take dot product with transpose to generate matrix of user/user similarity.
28 | Y = X_n.dot(X_n.T)
29 | 
30 | # Output the nearest neighbors (5) for each userid, by taking the top 5 entries from each row in Y.
31 | print 'n1 n2 n3 n4 n5 row'
32 | for i in range(0, 75988+1):
33 | 	row_nn = np.squeeze(np.asarray(Y.getrow(i).todense()))
34 | 	nnarr = np.argsort(row_nn)[-5:]
35 | 	print nnarr[0], nnarr[1], nnarr[2], nnarr[3], nnarr[4], i
36 | 


--------------------------------------------------------------------------------
/recengine/php-files/raticket_advisor.php:
--------------------------------------------------------------------------------
  1 | <html>
  2 |   <body>
  3 |     <?php
  4 | 
  5 | $con=mysqli_connect("localhost","speakit9","Dorianbassem@11","speakit9_RA");
  6 | // Check connection
  7 | if (mysqli_connect_errno()) {
  8 |   echo "Failed to connect to MySQL: " . mysqli_connect_error();
  9 | } else {
 10 | //  echo "Connection successful!";
 11 | }
 12 | 
 13 | 
 14 |         $DisplayForm=True;
 15 |       if (array_key_exists('content', $_POST)) {
 16 | 
 17 |         # Check to see if the user has entered an email address
 18 | 
 19 |                 if(empty($_POST['content'])) {
 20 |                         echo "You did not enter an email address.";
 21 |                         die();
 22 |                 }
 23 |         # Check that the email address is a valid one, such as example@example.com
 24 | 
 25 |                 if (!filter_var($_POST['content'],FILTER_VALIDATE_EMAIL))
 26 |                 {
 27 |                         echo "The email address you have entered is invalid.";
 28 |                         die();
 29 |                 }
 30 | 
 31 |                 $returnValue = parse_url($_POST['event']);
 32 | 
 33 |         # Check that the URL entered is indeed in valid form for a Resident Advisor music event.
 34 | 
 35 |                 if($returnValue['host'] != 'www.residentadvisor.net') {
 36 |                         echo "The URL must be www.residentadvisor.net";
 37 |                         die();
 38 |                 }
 39 | 
 40 |                 if($returnValue['path'] != '/event.aspx') {
 41 |                         echo "You did not enter a valid RA event.";
 42 |                         die();
 43 |                 }
 44 |                 if($returnValue['query'] == '') {
 45 |                         echo "You did not enter a valid RA event.";
 46 |                         die();
 47 |                 }
 48 |   # Extract the title of the event from the webpage.
 49 |                  
 50 |                 preg_match("/<title>(.+)<\/title>/siU", file_get_contents($_POST['event']), $matches);
 51 |                 $title = $matches[1];
 52 |                 $titles = mb_substr($title,3,13);
 53 | 
 54 |         # Ensure that the URL indeed leads to an event page, and not the default page which occurs when event is invalid
 55 |   
 56 |                 if(strncmp($titles,"RA: Events",10)==0) {
 57 |                         echo "This does not appear to be a valid RA event. Check the number after the question mark";
 58 |                         die();
 59 |                 }
 60 |                 $DisplayForm=False;
 61 |                 $addEvent=1;
 62 |       
 63 |                 
 64 |         # Check to see if the list of emails for this event already exists
 65 |                         
 66 |                  $email = $_POST['content'];
 67 |                  $sql_check = "SELECT * from ticket_advisor where email='" . $email . "' and url='" . $_POST['event'] . "'";
 68 |                 $result = mysqli_query($con, $sql_check);
 69 |                 $row = mysqli_fetch_array($result);
 70 |                 echo $row[0];
 71 |                 if(!$result) {
 72 |                         die('Error: ' . mysqli_error($con));
 73 |                 }
 74 |                 if(strlen($row[0]) > 0 ) {
 75 |                         echo "Your email is already listed for this event - don't worry, you'll be notified!";
 76 |                         die();
 77 |                 }
 78 |                 $url = $_POST['event'];
 79 |                 $sql_insert = "INSERT INTO ticket_advisor VALUES ('$email', '$url', 0)";
 80 |                 $result = mysqli_query($con, $sql_insert);
 81 |                         
 82 |                 if (!result) {
 83 |                         die('Error: ' . mysqli_error($con));
 84 |                 }
 85 |         # Go through each line in the email file and check to see if the email has already been entered.
 86 | 
 87 |                 
 88 |         # If everything is valid, add the URL of the RA event to the RAEventFile, and notify the user. The rest is up to the Python app now.
 89 |                 echo "Thank you. You will receive an email at:<pre>\n";
 90 |                 echo htmlspecialchars($_POST['content']);
 91 |                 echo " when tickets are available.";
 92 |                 echo "\n</pre>";
 93 | 
 94 |      } 
 95 | 
 96 | if($DisplayForm) {
 97 | ?>                      
 98 | <style>                 
 99 | .font1 {         
100 |         font-size: 15px;
101 | }               
102 | .font2 {        
103 |         font-size: 12px;
104 | }               
105 | </style>                
106 | <title>Resident Advisor Ticket Advisor</title>
107 | <center><IMG SRC ="http://www.math.nyu.edu/~dgoldman/RATicketpic.gif" ALT="Test" WIDTH=701 HEIGHT=177><br>
108 | <p class="font2">Please enter the full URL of the Resident Advisor event (eg. http://www.residentadvisor.net/event.aspx?558361) and your email address, then
109 | click Submit.</p>
110 | <form action="raticket_advisor.php" method="post">
111 | <input name="event" placeholder="http://www.residentadvisor.net/event.aspx?558361" required="" style="width:400px; height:40px" class="font1">
112 | <input id="emailaddress" type="content" name="content" placeholder="anything@example.com" required="" style="width:300px; height:40px" class="font1">
113 |                 
114 | <button type="Submit" style="height:40px; width:100px">Submit</button>
115 |     </form></center>
116 | <?php
117 | } ?>
118 |  </body>                
119 | </html>
120 | 
121 | 
122 | 


--------------------------------------------------------------------------------
/recengine/php-files/ratickets4b.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | import urllib2
 4 | import time
 5 | import sys
 6 | import smtplib
 7 | import os.path
 8 | import os
 9 | import MySQLdb
10 | 
11 | # Connect to MYSQL database
12 | db = MySQLdb.connect(host="localhost", # your host, usually localhost
13 |                      user="speakit9", # your username
14 |                       passwd="Dorianbassem@11", # your password
15 |                       db="speakit9_RA") # name of the data base
16 | 
17 | # you must create a Cursor object. It will let
18 | #  you execute all the queries you need
19 | cur = db.cursor()
20 | 
21 | # Select all entries from ticket_advisor table.
22 | cur.execute("SELECT * FROM ticket_advisor")
23 | 
24 | # print all the first cell of all the rows
25 | 
26 | # Gmail account login information
27 | 
28 | fromaddr = 'raticketapp2@gmail.com'
29 | msg='Tickets are now available on RA'
30 | username='raticketapp2@gmail.com'
31 | password='hntpcv01!!'
32 | 
33 | fromaddr = 'speakit9@speakinimages.com'
34 | msg = 'Tickets are now available on RA'
35 | username='speakit9'
36 | password='Dorianbassem@11'
37 | # Go through every line in the RAEventFile
38 | 
39 | 
40 | for row in cur.fetchall(): #for lines in rafile:
41 |         # If the event hasn't already sent out tickets, then access the email_list file for the event and send out emails to everyone
42 |         if row[2] == 0:
43 |                 print "Haven't sent out tickets yet"
44 |                 soup = urllib2.urlopen(row[1]).read()
45 |                 if soup.find('<a id="addToBasket" href="#" title="Add to basket">Add to basket</a>')==-1:
46 |                         print('None')
47 |                 else:
48 |                         server = smtplib.SMTP('localhost:25')
49 |                     #server.ehlo()
50 |                         server.starttls()
51 |                         server.login(username,password)
52 |                         sql_string = "UPDATE ticket_advisor SET sent=1 WHERE email='" + row[0] + "' and url='" + row[1] + "'"
53 |                         print sql_string
54 |                         cur.execute(sql_string);
55 |                         print 'Sending email'
56 |                         msg = "\r\n".join([
57 |                                 "From: RATicketAdvisor@gmail.com",
58 |                                 "To: user@gmail.com",
59 |                                 "Subject: Tickets for the event at " + row[1] + " are now available!",
60 |                                 "",
61 |                                 "Tickets for the event at <a href='" + row[1] + "'>" + row[1] + "</a> are now available!"
62 |                                 ])
63 |                         server.sendmail('raticketapp2@gmail.com',row[0],msg)
64 |                         server.quit()
65 |                         print('Tickets available!')
66 | 
67 | 


--------------------------------------------------------------------------------
/recengine/php-files/userid.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" 
 2 |     "http://www.w3.org/TR/html4/loose.dtd">
 3 | <html>
 4 | <head>
 5 | <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
 6 | <link rel = "stylesheet" type = "text/css" href="style.css">
 7 | <title>Resident Advisor Event Advisor</title>
 8 | </head>
 9 | <body>
10 | <style>
11 | .font1 {
12 | 	font-size: 15px;
13 | }
14 | .font2 {
15 | 	font-size: 12px;
16 | }
17 | </style>
18 | <center>
19 | <img src="http://www.math.nyu.edu/~dgoldman/RATicketpic.gif">
20 | <br>
21 | <br>
22 | <br>
23 | </center>
24 | <center>
25 | <h1> Please enter your username to get event recommendations<br><br></h1>
26 |     <form method="post" action="userid.php" >
27 | <!--
28 |         <table border="1" >
29 |             <tr>-->
30 |       <!--          <td><label for="users_idl">RA User Id:</label></td>-->
31 | <input name="users_id" placeholder="your_RA_username" required="" style="width:400px; height:40px" class="font1">               
32 | <button type="Submit" style="height:40px; width:100px">Submit</button>
33 | <br><br>
34 | <center>
35 | <a href ='http://www.math.nyu.edu/~dgoldman/ratickets2b.php'>I want to know when tickets to a particular event will be available</a></center>
36 | <!-- 
37 | <td><input type="text" 
38 |                   name="users_id" id="users_id"></td>
39 |             </tr>
40 |                 <td><input type="submit" value="Submit"/>
41 |                 <td><input type="reset" value="Reset"/>
42 |             </tr>
43 |         </table>
44 | -->    
45 | </form>
46 | </center>
47 | </body>
48 | </html>
49 | 


--------------------------------------------------------------------------------
/recengine/php-files/userid.php:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
  2 |     "http://www.w3.org/TR/html4/loose.dtd">
  3 | <html>
  4 | <head>
  5 | <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
  6 | <link rel = "stylesheet" type = "text/css" href="style.css">
  7 | 
  8 | <title>Resident advisor event recommendations</title>
  9 | 
 10 | 
 11 | <style>
 12 | p {
 13 |     margin-left: 100px;
 14 | }
 15 |    * {
 16 |        margin: 0; 
 17 |        padding: 0;
 18 |      }
 19 | 
 20 | #logo a {
 21 | background-size: 86px 43px;
 22 | height: 43px;
 23 | width: 86px;
 24 | }
 25 | 
 26 | #logo a, .black #logo a:hover, #logo.dark a:hover {
 27 | }
 28 | 
 29 |      div#banner { 
 30 |        position: absolute; 
 31 |        top: 0; 
 32 |        left: 0; 
 33 |        background-color: #000000; 
 34 |        width: 100%; 
 35 |      }
 36 |      div#banner-content { 
 37 |        width: 1800px; 
 38 |        margin: 0 auto; 
 39 |        padding: 10px; 
 40 |        border: 1px solid #000;
 41 |      }
 42 |      div#main-content { 
 43 |        padding-top: 150px;
 44 |     }
 45 | table.outer-table
 46 | {
 47 |     table-layout: fixed;
 48 |     width: 1400px;
 49 | }
 50 | .date a {
 51 | font-size: 11px;
 52 | color: #9c9c9c;
 53 | }
 54 | img#profilepic { 
 55 | border: 1px solid white;
 56 | position: absolute;
 57 | right: 20px;
 58 | top: 20px;
 59 | }
 60 | 
 61 |     .black_overlay{
 62 |         display: none;
 63 |         position: absolute;
 64 |         top: 0%;
 65 |         left: 0%;
 66 |         width: 100%;
 67 |         height: 100%;
 68 |         background-color: black;
 69 |         z-index:1001;
 70 |         -moz-opacity: 0.8;
 71 |         opacity:.80;
 72 |         filter: alpha(opacity=80);
 73 |     }
 74 |     .white_content {
 75 |         display: none;
 76 |         position: absolute;
 77 |         top: 25%;
 78 |         left: 25%;
 79 |         width: 50%;
 80 |         height: 50%;
 81 |         padding: 16px;
 82 |         border: 16px solid orange;
 83 |         background-color: white;
 84 |         z-index:1002;
 85 |         overflow: auto;
 86 |     }
 87 | 
 88 | .myButton {
 89 | 	-moz-box-shadow:inset 0px 1px 0px 0px #ffffff;
 90 | 	-webkit-box-shadow:inset 0px 1px 0px 0px #ffffff;
 91 | 	box-shadow:inset 0px 1px 0px 0px #ffffff;
 92 | 	background:-webkit-gradient(linear, left top, left bottom, color-stop(0.05, #ededed), color-stop(1, #dfdfdf));
 93 | 	background:-moz-linear-gradient(top, #ededed 5%, #dfdfdf 100%);
 94 | 	background:-webkit-linear-gradient(top, #ededed 5%, #dfdfdf 100%);
 95 | 	background:-o-linear-gradient(top, #ededed 5%, #dfdfdf 100%);
 96 | 	background:-ms-linear-gradient(top, #ededed 5%, #dfdfdf 100%);
 97 | 	background:linear-gradient(to bottom, #ededed 5%, #dfdfdf 100%);
 98 | 	filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ededed', endColorstr='#dfdfdf',GradientType=0);
 99 | 	background-color:#ededed;
100 | 	-moz-border-radius:4px;
101 | 	-webkit-border-radius:6px;
102 | 	border-radius:4px;
103 | 	border:1px solid #dcdcdc;
104 | 	display:inline-block;
105 | 	cursor:pointer;
106 | 	color:#777777;
107 | 	font-family:arial;
108 | 	font-size:12px;
109 | 	font-weight:bold;
110 | 	padding:4px 16px;
111 | 	text-decoration:none;
112 | 	text-shadow:0px 1px 0px #ffffff;
113 | }
114 | .myButton:hover {
115 | 	background:-webkit-gradient(linear, left top, left bottom, color-stop(0.05, #dfdfdf), color-stop(1, #ededed));
116 | 	background:-moz-linear-gradient(top, #dfdfdf 5%, #ededed 100%);
117 | 	background:-webkit-linear-gradient(top, #dfdfdf 5%, #ededed 100%);
118 | 	background:-o-linear-gradient(top, #dfdfdf 5%, #ededed 100%);
119 | 	background:-ms-linear-gradient(top, #dfdfdf 5%, #ededed 100%);
120 | 	background:linear-gradient(to bottom, #dfdfdf 5%, #ededed 100%);
121 | 	filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#dfdfdf', endColorstr='#ededed',GradientType=0);
122 | 	background-color:#dfdfdf;
123 | }
124 | .myButton:active {
125 | 	position:relative;
126 | 	top:1px;
127 | }
128 | .classname {
129 | 	-moz-box-shadow:inset 0px 1px 0px 0px #ffffff;
130 | 	-webkit-box-shadow:inset 0px 1px 0px 0px #ffffff;
131 | 	box-shadow:inset 0px 1px 0px 0px #ffffff;
132 | 	background:-webkit-gradient( linear, left top, left bottom, color-stop(0.05, #ededed), color-stop(1, #dfdfdf) );
133 | 	background:-moz-linear-gradient( center top, #ededed 5%, #dfdfdf 100% );
134 | 	filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#ededed', endColorstr='#dfdfdf');
135 | 	background-color:#ededed;
136 | 	-webkit-border-top-left-radius:6px;
137 | 	-moz-border-radius-topleft:6px;
138 | 	border-top-left-radius:6px;
139 | 	-webkit-border-top-right-radius:6px;
140 | 	-moz-border-radius-topright:6px;
141 | 	border-top-right-radius:6px;
142 | 	-webkit-border-bottom-right-radius:6px;
143 | 	-moz-border-radius-bottomright:6px;
144 | 	border-bottom-right-radius:6px;
145 | 	-webkit-border-bottom-left-radius:6px;
146 | 	-moz-border-radius-bottomleft:6px;
147 | 	border-bottom-left-radius:6px;
148 | 	text-indent:0;
149 | 	border:1px solid #dcdcdc;
150 | 	display:inline-block;
151 | 	color:#777777;
152 | 	font-family:arial;
153 | 	font-size:10px;
154 | 	font-weight:bold;
155 | 	font-style:normal;
156 | 	height:20px;
157 | 	line-height:20px;
158 | 	width:200px;
159 | 	text-decoration:none;
160 | 	text-align:center;
161 | 	text-shadow:1px 1px 0px #ffffff;
162 | }
163 | .classname:hover {
164 | 	background:-webkit-gradient( linear, left top, left bottom, color-stop(0.05, #dfdfdf), color-stop(1, #ededed) );
165 | 	background:-moz-linear-gradient( center top, #dfdfdf 5%, #ededed 100% );
166 | 	filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#dfdfdf', endColorstr='#ededed');
167 | 	background-color:#dfdfdf;
168 | }.classname:active {
169 | 	position:relative;
170 | 	top:1px;
171 | </style>
172 | </head>
173 | <body>
174 | <div id="banner">
175 | <div id="banner-content">  
176 | <img src='RAlogo2.png'><br>
177 | 
178 | <?php
179 | 
180 | // Connect to MySQL database.
181 | 
182 | $con=mysqli_connect("localhost","speakit9","Dorianbassem@11","speakit9_RA");
183 | 
184 | // Check connection
185 | if (mysqli_connect_errno()) {
186 |   echo "Failed to connect to MySQL: " . mysqli_connect_error();
187 | } else {
188 |   echo "Connection successful!" . "<br>";
189 | }
190 | 
191 | // Get the user id from the username in the userids table 
192 | $username = $_POST["users_id"];
193 | $result_username = mysqli_query($con, "SELECT distinct * FROM userids WHERE username like '$username%'");
194 |    
195 | // Try to find the users RA profile pic      
196 | $row_userid = mysqli_fetch_array($result_username);
197 | $image = 'http://www.residentadvisor.net/images/user/av/' . $row_userid['userid'] . '-' . $username . '.jpg';
198 | 
199 | // Check to see if the users image exists, otherwise dont display it.
200 | if (@getimagesize($image)) {
201 | 	echo "<img src = '$image' width=100 height=100 id='profilepic'>";
202 | }
203 | ?>    
204 | </div>
205 | </div>
206 | 
207 | <!-- Form to connect to ticket advisor app -->
208 | 
209 | <form action="http://www.math.nyu.edu/~dgoldman/ratickets2.php" method="post" name="myform">
210 | <input id="event" name="event" placeholder="http://www.residentadvisor.net/event.aspx?558361" required="" style="width:400px; height:40px" class="font1" type="hidden">
211 | <input id="emailaddress" type="hidden" name="content" placeholder="anything@example.com" required="" style="width:300px; height:40px" class="font1">
212 | <button type="Submit" style="height:40px; width:100px">Submit</button>
213 | </form>
214 | 
215 | <script>
216 | 
217 | // This function takes the users email and notifies them when tickets to the event become available. 
218 | 
219 | function load_text(url) {
220 |    var person = prompt("Please enter your email to be notified", "you@you.com");
221 | 	if (person != null) {
222 |                 var elem = document.getElementById("emailaddress");
223 |                 elem.value = "doriang102@gmail.com";
224 |                 var elem2 = document.getElementById("event");
225 |                 elem2.value = "http://www.residentadvisor.net/event.aspx?637777";
226 |                 document.forms["myform"].submit();
227 |     }
228 | }
229 | </script>
230 | 
231 | </body>
232 | </html>
233 | <?php
234 | 
235 | // Connect to MySQL server
236 | 
237 | $con=mysqli_connect("localhost","speakit9","Dorianbassem@11","speakit9_RA");
238 | 
239 | // Check connection
240 | if (mysqli_connect_errno()) {
241 |   	echo "Failed to connect to MySQL: " . mysqli_connect_error();
242 | } else {
243 | 	//  echo "Connection successful!" . "<br>";
244 | }
245 | 
246 | // Get users id from SQL database
247 | 
248 | $username = $_POST["users_id"];
249 | $result_username = mysqli_query($con, "SELECT distinct * FROM userids WHERE username like '$username%'");
250 | $row_userid = mysqli_fetch_array($result_username);
251 | 
252 | // Find url which links to users profile image.
253 | 
254 | $image = 'http://www.residentadvisor.net/images/user/av/' . $row_userid['userid'] . '-' . $username . '.jpg';
255 | echo "<br><br><br><br><br><br><br><br>";
256 | 
257 | // Check to see if user image exists on RA website, and otherwise just display default message
258 | 
259 | if (@getimagesize($image)) {
260 | 	echo "<br><br>";
261 | 	echo "<table valign='top'><td>";
262 | 	echo "</td><td valign='top'>";
263 | 	echo "<h1>Welcome " . $row_userid['username'] . "!</h1><br>";
264 | 	echo "</td>";
265 | 	echo "</table>";
266 | 	echo "<br>";
267 | } else 
268 | {
269 | 	echo "<br><h1>Welcome " . $row_userid['username'] . "!</h1><br>";
270 | }
271 | echo '<table><thead><tr>';
272 | 
273 | // Create column headers for event suggestions
274 | 
275 | echo '<th align="left"><span><h2>Favorite DJs</h2></span></th>';
276 | echo '<th align="left"><h2>Favorite promoters</h2></th>';
277 | echo '<th align="left"><h2>Users similar to you are attending</h2></th>';
278 | echo '</tr></thead><tbody>';
279 | 
280 | // Find user favorites from favorites table
281 | 
282 | $id = $row_userid['userid'];
283 | $result = mysqli_query($con, "SELECT * FROM favorites WHERE userid = '$id'");
284 | $row = mysqli_fetch_array($result);
285 | 
286 | $result2 = mysqli_query($con, "SELECT * FROM favs WHERE uid = '$id'");
287 | $row2 = mysqli_fetch_array($result2);
288 | 
289 | 
290 | // Initalize arrays
291 | 
292 | $title_dj = array();		// Event titles related to dj history
293 | $title_promoter = array();	// Event titles related to promoter history
294 | $title_venue = array();		// Event titles related to venue history (currently not using)
295 | $tickets = array();		// Indicates if tickets are available for event or not
296 | $str_tickets = array();		// Added to title in the case tickets are not available
297 | $dates = array();		// Dates for the events
298 | $event_num = array();		// Obtain the event number to get the event logo image from site
299 | 
300 | // For events based on djs, get top event suggetions from MySQL database and display them.
301 | 
302 | for($i=1; $i <= 5; $i++) {
303 | 	$column = 'e' . $i;
304 | 	$url = $row2[$column];
305 | 	
306 | 	// Get event name of the url listed.
307 | 
308 | 	$result_url = mysqli_query($con, "SELECT * FROM event_names WHERE url like '%$url%'");
309 | 	$row_url = mysqli_fetch_array($result_url);
310 | 	
311 | 	// Get event date of the url listed.
312 | 
313 | 	$date_query = mysqli_query($con, "SELECT * FROM dates WHERE url like '%$url%'");
314 | 	$row_dates = mysqli_fetch_array($date_query);
315 | 	$dates[$i] = $row_dates['date'];
316 | 	
317 | 
318 | 	// Create the title, after some formatting to remove "RA Tickets:"
319 | 
320 | 	$title_dj[$i] = $row_url['name'];
321 | 	$title_dj[$i] = str_replace(':','',strstr($title_dj[$i], ':'));
322 | 	
323 | 	// Indicates true/false for ticket availability
324 | 
325 | 	$tickets[$i] = str_replace(' ', '', $row_url['tickets']);
326 | 	
327 | 	
328 | 	// Obtain event number from url to generate image url for logo
329 | 
330 | 	$event_num[$i] = str_replace('?','',strstr($url, '?'));
331 | 
332 | 	// This code finds the venue and places it on a new line in unbolded font
333 | 
334 | 	$ename = strstr($title_dj[$i], " at ", true);
335 |                 $location = str_replace(" at ", "", strstr($title_dj[$i], " at "));
336 |                 if(strlen($location) > 1) {
337 |                         $title_dj[$i] = "<b>" . $ename . "</b>" . "<br>" . $location;
338 |                 } else {
339 |                         $title_dj[$i] = "<b>" . $title_dj[$i] . "</b>";
340 |                 }
341 | 
342 | 	// If tickets are not available, add a button which connects to the php form which notifies users when tickets are available.
343 | 
344 | 	if(strcmp($tickets[$i],'True') == 0) {
345 | 		$str_tickets[$i] = '';
346 | 	} else {
347 | 		$str_tickets[$i] = "<input id='clickMe' class = 'classname' type='button' value='Notify me when tickets are available *' onclick='load_text(\"$url\");' />";
348 | 	}
349 | }
350 | 
351 | // For events based on promoters, get top event suggetions from MySQL database and display them.
352 | 
353 | for($i=5; $i <= 10; $i++) {
354 |         
355 | 	$column = 'e' . $i;
356 |         $url = $row2[$column];
357 |         
358 | 	// Obtain event names from SQL database.
359 | 
360 | 	$result_url = mysqli_query($con, "SELECT * FROM event_names WHERE url like '%$url%'");
361 |         $row_url = mysqli_fetch_array($result_url);
362 | 	
363 | 
364 | 	// Obtain dates of events from SQL database.
365 | 	$date_query = mysqli_query($con, "SELECT * FROM dates WHERE url like '%$url%'");
366 |         $row_dates = mysqli_fetch_array($date_query);
367 | 
368 | 	// Fetch the event title, and format it as before to have venue on the bottom in non-bold text.
369 | 
370 |         $title_promoter[$i-5] = $row_url['name'];
371 | 	$title_promoter[$i-5] = str_replace(':','',strstr($title_promoter[$i-5], ':'));
372 | 	$ename = strstr($title_promoter[$i-5], " at ", true);
373 |        	$location = str_replace(" at ", "", strstr($title_promoter[$i-5], " at "));
374 |         if(strlen($location) > 1) {
375 |                $title_promoter[$i-5] = "<b>" . $ename . "</b>" . "<br>" . $location;
376 |          } else {
377 |                         $title_promoter[$i-5] = "<b>" . $title_promoter[$i-5] . "</b>";
378 |          }
379 |         
380 | 	// Fetch ticket availability information from SQL.
381 | 	$tickets[$i] = str_replace(' ', '', $row_url['tickets']);
382 | 
383 | 	// Find event number from url to generate logo for event.
384 | 
385 | 	$event_num[$i] = str_replace('?','',strstr($url, '?'));
386 | 
387 | 	// Fetch even date from SQL.
388 | 	$dates[$i] = $row_dates['date'];
389 | 
390 | 
391 | 	// If tickets are not yet available, create button which when clicked informs users when tickets are available after supplying email.
392 | 
393 |         if(strcmp($tickets[$i],'True') == 0) {
394 |                 $str_tickets[$i] = '';
395 |         } else {
396 |         	
397 | 		$str_tickets[$i] = "<input id='clickMe' class = 'classname' type='button' value='Notify me when tickets are available' onclick='myFunction(" . '$url' . ");' />";
398 |         }
399 | }
400 | // Code to generate suggestions based on venues, currently not in use but may use in future.
401 | 
402 | /*
403 | for($i=10; $i <= 15; $i++) {
404 |         $column = 'e' . $i;
405 |         $url = $row2[$column];
406 |         $result_url = mysqli_query($con, "SELECT * FROM event_names WHERE url like '%$url%'");
407 |         $row_url = mysqli_fetch_array($result_url);
408 | 	 $date_query = mysqli_query($con, "SELECT * FROM dates WHERE url like '%$url%'");
409 |         $row_dates = mysqli_fetch_array($date_query);
410 |         $title_venue[$i-10] = $row_url['name'];
411 |         $tickets[$i] = str_replace(' ', '', $row_url['tickets']);
412 | 	$title_venue[$i] = str_replace(':','',strstr($title_venue[$i], ':'));
413 | 	$event_num[$i] = str_replace('?','',strstr($url, '?'));
414 |         $dates[$i] = $row_dates['date'];
415 | 	if(strcmp($tickets[$i],'True') == 0) {
416 |                 $str_tickets[$i] = '';
417 |         } else {
418 |                 $str_tickets[$i] = 'Notify me when tickets become available! (Coming soon)';
419 |         
420 |                 $str_tickets[$i] = "  <button onclick='myFunction()' class='myButton'>Notify me when tickets are available</button>";
421 | 		$str_tickets[$i] = "<a href='#' class='classname'>Notify me when tickets are available</a>";
422 | //              $str_tickets[$i] = "<a href = 'javascript:void(0)' onclick = 'document.getElementById(\"light\").style.display=\"block\";document.getElementById(\"fade\").style.display=\"$
423 |         }
424 | }
425 | */
426 | 
427 | // Analagous arrays to previous section but for collaborative filtering suggestions
428 | 
429 | $title2_s = array();
430 | $tickets_s = array();
431 | $str_tickets_s = array();
432 | $dates_s = array();
433 | $event_num_s = array();
434 | 
435 | // Fetch suggestions from SQL table.
436 | 
437 | $result = mysqli_query($con, "SELECT * FROM knn WHERE uid = '$id'");
438 | $row = mysqli_fetch_array($result);
439 | 
440 | // Go through each of the 10 suggestions in the database.
441 | 
442 | for($k=1; $k <= 10; $k++) {
443 |         $column = 'event' . $k;
444 |         $url = $row[$column];
445 | 
446 | 	// Fetch url from SQL table.
447 |         $result_url = mysqli_query($con, "SELECT * FROM event_names WHERE url like '%$url%'");
448 |         $row_url = mysqli_fetch_array($result_url);
449 |          
450 | 
451 | 	// Fetch date from SQL table.
452 | 	$date_query = mysqli_query($con, "SELECT * FROM dates WHERE url like '%$url%'");
453 |         $row_dates = mysqli_fetch_array($date_query);
454 |         $dates_s[$k] = $row_dates['date'];
455 |         
456 | 
457 | 	// If the entry is not empty, then proceed to generate title of event 
458 | 	if(strlen($url) > 1) {
459 |                 $title2_s[$k] = $row_url['name'];
460 | 		
461 | 		// Remove the "RA:" part of the title.
462 |                 $title2_s[$k] = "" .  str_replace(':','',strstr($title2_s[$k], ':')) . "";
463 | 		
464 | 		// Seperate the name of the event and venue.
465 | 		$ename = strstr($title2_s[$k], " at ", true);
466 | 		$location = str_replace(" at ", "", strstr($title2_s[$k], " at "));
467 | 
468 | 		// If there is a venue, put it on a new line non-bolded, otherwise just set title to be the name.
469 | 		if(strlen($location) > 1) {
470 | 			$title2_s[$k] = "<b>" . $ename . "</b>" . "<br>" . $location;
471 | 		} else {
472 | 			$title2_s[$k] = "<b>" . $title2_s[$k] . "</b>";
473 | 		}	
474 | 
475 | 		// See if tickets are available.
476 | 		$tickets_s[$k] = str_replace(' ', '', $row_url['tickets']);
477 |            
478 | 		// If tickets are not available, add button which lets users be notified when they are.
479 | 
480 | 		$event_num_s[$k] = str_replace('?','',strstr($url, '?'));
481 |                 if(strcmp($tickets_s[$k],'True') == 0) {
482 |                         $str_tickets_s[$k] = '';
483 |                 } else {
484 |                         $str_tickets_s[$k] = 'Notify me when tickets become available! (Coming soon)';
485 |         
486 |                         $str_tickets_s[$k] = "  <button onclick='myFunction()'>Notify me when tickets are available</button>";
487 |                 }
488 |         }
489 | }
490 | 
491 | // Finally output the suggestions into the table.
492 | 
493 | echo "<h2>Based on your event history preferences we recommend the following events: </h2>" . "<br>";
494 | 
495 | for($i =1; $i <= count($title_dj); $i++) {
496 |         $column = 'e' . $i;
497 |         $url = $row2[$column];
498 | 	echo "<tr>";
499 | 	echo "<td>";
500 | 	
501 | 	// Fron the date string, we generate the URL for the image logo.
502 | 	$date_str = explode("/", $dates[$i]);
503 | 	$day = sprintf("%02s", $date_str[0]);
504 | 	$month = sprintf("%02s", $date_str[1]);
505 | 	$image = "http://www.residentadvisor.net/images/events/flyer/2014/10/us-" . $month . $day . "-" . $event_num[$i] . "-list.jpg";
506 | 	
507 | 	// Output the date on top left of event page.
508 | 	echo "<font color='#9c9c9c' size='2px'>" . $dates[$i] . "</font><br>";
509 | 	echo "<table border='0' cellpadding = '3' cellspacing = '3'>";
510 | 	echo "<td width='152'>";
511 | 
512 | 	// If event logo exist, then show it, otherwise show the default image.
513 | 	if (@getimagesize($image)) {
514 | 		echo "<img src = " . $image . "><br>";
515 | 	} else {
516 | 		echo "<img src = 'RAlogo2.png' width = 152px height=76px><br>"; 
517 | 	}
518 | 	// Put the event title in the adjacent sub-table.
519 | 
520 | 	echo "<td width='300' valign='top'>";
521 | 	echo "<a href='" . $url . "' style='text-decoration:none'>" . $title_dj[$i] . "</a>" . $str_tickets[$i] . "<br>";
522 | 	echo "</table>";
523 | 
524 | 	// Move to promoter column and repeat the above.
525 | 
526 | 	$j = $i + 5;
527 |         $column = 'e' . $j;   
528 |         $url = $row2[$column];
529 |         echo "</td><td>";
530 | 	$date_str = explode("/", $dates[$j]);
531 |         $day = sprintf("%02s", $date_str[0]);
532 |         $month = sprintf("%02s", $date_str[1]);
533 | 	$image = "http://www.residentadvisor.net/images/events/flyer/2014/10/us-" . $month . $day . "-" . $event_num[$j] . "-list.jpg";
534 | 	if(strlen($url) > 1) {
535 | 	 echo "<font color='#9c9c9c' size='2px'>" . $dates[$i] . "</font><br>";
536 | 	}
537 | 	echo "<table border='0' cellpadding = '3' cellspacing = '3'>";
538 |         echo "<td width='152'>";
539 | 	 if (@getimagesize($image)) {
540 |         	echo "<img src = " . $image . "><br>";
541 |         } else {
542 |         	echo "<img src = 'RAlogo2.png' width = 152px height=76px><br>";
543 |         }	
544 | 	echo "<td width='300' valign='top'>";
545 | 	echo "<a href='" . $url . "' style='text-decoration:none'>" . $title_promoter[$i] . "</a>" . $str_tickets[$i+5] . "<br>";
546 | 	echo "</table>";
547 | 
548 | 
549 | 	// Move to 'similar users' column and repeat the above.
550 | 
551 | 	$j = $i + 10;
552 |          $column = 'e' . $j;  
553 |         $url = $row2[$column];
554 | 	echo "</td><td>";
555 |  	$date_str = explode("/", $dates_s[$j-10]);
556 |         $day = sprintf("%02s", $date_str[0]);
557 |         $month = sprintf("%02s", $date_str[1]);
558 |         $image = "http://www.residentadvisor.net/images/events/flyer/2014/10/us-" . $month . $day . "-" . $event_num_s[$j-10] . "-list.jpg";
559 | 	if(strlen($title2_s[$i]) > 1) {
560 | 	 echo "<font color='#9c9c9c' size='2px'>" . $dates_s[$i] . "</font><br>";
561 | 	}
562 | 	echo "<table border='0' cellpadding = '3' cellspacing = '3'>";
563 |         echo "<td width=152px height=76px>";
564 | 	if (strlen($title2_s[$i]) > 1) {
565 | 	 if (@getimagesize($image)) {
566 |         	echo "<img src = " . $image . "><br>";
567 |         } else {
568 |         	echo "<img src = 'RAlogo2.png' width = 152px height=76px><br>";
569 |         }
570 |         echo "<td width='300' valign='top'>";
571 | 	echo "<a href='" . $url . "' style='text-decoration:none'>" . $title2_s[$i] . "</a>" . $str_tickets_s[$i] . "<br>";
572 | 	}
573 | 	echo "</table>";
574 | 	echo "</td></tr>";
575 | } 
576 | echo "</tbody></table>";
577 | 
578 | echo "<br><br>";
579 | echo "<center>";
580 | 
581 | ?>
582 | 


--------------------------------------------------------------------------------
/recengine/php-files/write_events.php:
--------------------------------------------------------------------------------
 1 | 
 2 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 3 | <html xmlns="http://www.w3.org/1999/xhtml">
 4 | <head>
 5 | <link rel = "stylesheet" type = "text/css" href="style.css">
 6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 7 | <title>Home Page</title>
 8 | </head>
 9 |  
10 | <body>
11 | <h1>Welcome <?php echo $_SESSION["sess_user_id"] ?>!</h1>
12 |  <form method="post" action="post.php" >
13 |         <table border="1" >
14 |             <tr>
15 |                 <td><label for="title">Title</label></td>
16 |                 <td><input type="text"
17 |                   name="title" id="title" style="width:400pt"></input></td>
18 |             </tr>
19 |             <tr>
20 |                 <td><label for="message">Text</label></td>
21 |                 <td><input name="message"
22 |                   type="text" id="message" style="height:200pt;width:400pt" onfocus="moveCursorToStart(this);"></input></td>
23 |             </tr>
24 |             <tr>
25 |                 <td><input type="submit" value="Submit"/>
26 |                 <td><input type="reset" value="Reset"/>
27 |             </tr>
28 |         </table>
29 |     </form>
30 | 
31 | <?php
32 | $title = $_POST['title'];
33 | $message = $_POST['message'];
34 | $name = $_SESSION["sess_user_id"];
35 | $handle = fopen("RA_nearest_neighbors_oct1.csv", "r");
36 | 
37 | $con=mysqli_connect("localhost","speakit9","Dorianbassem@11","speakit9_RA");
38 | // Check connection
39 | if (mysqli_connect_errno()) {
40 |   echo "Failed to connect to MySQL: " . mysqli_connect_error();
41 | } else {
42 |   echo "Connection successful!";
43 | }
44 | 
45 | if ($handle) {   
46 |     while (($line = fgets($handle)) !== false) {
47 |         // process the line read.
48 |         $data = explode(" ", $line);
49 |         $size = count($data);
50 | 	$size = min($size,10);
51 | 	$sql = "INSERT INTO knn VALUES ('$data[0]', '$data[1]', '$data[2]', '$data[3]', '$data[4]', '$data[5]', '$data[6]', '$data[7]', '$data[8]', '$data[9]', '$data[10]')";
52 | if (!mysqli_query($con, $sql)) {
53 |         die('Error: ' . mysqli_error($con));
54 | }       
55 |  }
56 | } else {
57 |     // error opening the file.
58 | echo "Error opening file";
59 | }
60 | fclose($handle); 
61 | exit();
62 | 
63 | //$sql = "INSERT INTO favorites (userid, dj, promoter, venue) VALUES ('$data[0]', '$data[1]', '$data[2]', '$data[3]')";
64 | 
65 | //if (!mysqli_query($con, $sql)) {
66 | //	die('Error: ' . mysqli_error($con));
67 | //}
68 | 
69 | echo "<h1>Message board posts:</h1>" . "<br>";
70 | $result = mysqli_query($con, "SELECT * from entry");
71 | while($row = mysqli_fetch_array($result)) {
72 |         echo "<h1> " . $row['title'] . " </h1><br>";
73 | 	echo $row['text'] . " <h2>Author: " . $row['name'] . "</h2>";
74 | 	echo "<br>";
75 | }
76 | ?>
77 | 
78 |    <form method="post" action="logout.php" >
79 |         <table border="1" >
80 |             <tr>
81 |                 <td><input type="submit" value="Logout"/>
82 |             </tr>
83 |         </table>
84 |     </form>
85 | </body>
86 | </html>
87 | 


--------------------------------------------------------------------------------
/recengine/scraper-programs/RAEventPageScraper.py:
--------------------------------------------------------------------------------
  1 | import urllib2
  2 | from bs4 import BeautifulSoup
  3 | import re
  4 | from multiprocessing import Pool
  5 | import pandas as pd
  6 | import numpy as np
  7 | 
  8 | # This is the raw list of URLS which have been generated by generate_search_urls() below. Each page
  9 | # listing contains list of events for that month/day/year in New York City.
 10 | 
 11 | df_urls = pd.read_csv('../RAUrlsFridaySept1920082014', header=None, lineterminator=',')
 12 | 
 13 | 
 14 | # Check to see if the addToBasket tag is available or not - determines if tickets are available.
 15 | def tickets_available(soup_obj):
 16 | 	check = soup_obj.select('#addToBasket')
 17 | 	if len(check) > 0:
 18 | 		return True
 19 | 	else:
 20 | 		return False
 21 | 
 22 | # Extract the ticket cost from the css object.
 23 | def event_cost(soup_obj):
 24 | 	cost_text = soup_obj.find_all("li", class_="onsale but")
 25 | 	soup2 = BeautifulSoup(str(cost_text))
 26 | 	p = soup2.findAll('p')
 27 | 	soup3 = BeautifulSoup(str(p))
 28 | 	spans = soup3.find('span')
 29 | 	if tickets_available(soup_obj) == True:
 30 | 		price = spans.text
 31 | 		release = str(p)[4]
 32 | 		return release, price
 33 | 	else:
 34 | 		cost_text = soup_obj.find_all("li", class_="closed")
 35 |                 if cost_text:
 36 |                     soup2 = BeautifulSoup(str(cost_text))
 37 |                     p = soup2.findAll('p')
 38 |                     soup3 = BeautifulSoup(str(p))
 39 |                     spans = soup3.find('span')
 40 |                     price = spans.text
 41 |                     return 'F', price
 42 |                 else:
 43 |                     return None
 44 | 
 45 | # Returns the date of the event.
 46 | def event_date_city(soup_obj):
 47 | 	detail_info = soup_obj.select('#detail')
 48 | 	detail_info = str(detail_info)
 49 | 	start_location = detail_info.find('events.aspx?')+12
 50 | 	end_location = detail_info.find('dy=')+5
 51 | 	return detail_info[start_location:end_location]
 52 | 
 53 | # Find the venue of the event from the html source.
 54 | 
 55 | def venue(soup_obj):
 56 |     venue = soup_obj.find_all("li", class_="wide")
 57 |     venue_string = str(venue)
 58 |     venue_soup = BeautifulSoup(venue_string)
 59 |     strv = 'Venue /</div>'
 60 |     start_position = venue_string.find(strv) + len(strv)
 61 |     strv2 = '<br/>'
 62 |     end_position = venue_string[start_position:].find(strv2)
 63 |     venue_soup = BeautifulSoup(venue_string[start_position:])
 64 | 
 65 |     for link in venue_soup.findAll('a'):
 66 |         if len(str(link.contents)) < 100:
 67 |             return link.contents
 68 |         else:
 69 |             return venue_string[start_position:end_position+start_position]
 70 | 
 71 | 
 72 | # Returns the lineup of djs performing at the event.
 73 | 
 74 | def lineup(soup_obj):
 75 | 	djs = []
 76 | 	lineups = soup_obj.find_all("p", class_="lineup")
 77 | 	soup2 = BeautifulSoup(str(lineups))
 78 | 	for link in soup2.findAll('a'):
 79 | 		djs.append(link.contents)
 80 | 	return djs
 81 | 
 82 | # Returns the list of promoters at the event.
 83 | def promoter(soup_obj):
 84 |         links = soup_obj.findAll('a')
 85 |         promoters = []
 86 |         for link in links:
 87 |                 if 'promoter' in str(link.attrs):
 88 |                         if link.contents[0] != 'RA Events':
 89 |                                 promoters.append(link.contents)
 90 |                         else:
 91 |                                 return promoters
 92 | 
 93 | # Returns the event number from the URL.
 94 | def get_event_number(event_url):
 95 | 	return event_url[event_url.find('?')+1:]
 96 | 
 97 | # Code to make direct query to the ashx script to find list of users attending the event (found this in javacsript code).
 98 | 
 99 | def members_attending(event_number):	
100 | 	members_url = 'http://www.residentadvisor.net/WebServices/Rollcall.ashx?friends=false&eventId=' + str(event_number) + '&startRowNo=0&pageSize=10000'
101 | 	raw_page2 = urllib2.urlopen(members_url).read()
102 | 	soup_members = BeautifulSoup(raw_page2)
103 | 	return soup_members.prettify()
104 | 
105 | 
106 | # Find the events first
107 | urls = []
108 | search_urls = []
109 | 
110 | # Generate the search URLS - this gives all possible page listings for New York City ai=8 represents NYC.
111 | def generate_search_urls():
112 | 	for year in range(2008,2015):
113 | 		for month in range(1,13):
114 | 			for day in range(1,29):
115 | 				search_url = "http://www.residentadvisor.net/events.aspx?ai=8&v=day&mn=" + str(month) + "&yr=" + str(year) + "&dy=" + str(day)	
116 | 				search_urls.append(search_url)
117 | 
118 | # Given a page listing, from the generate_search_urls() function, find all event listings on the page and store them in urls.
119 | 
120 | def get_event_urls(search_url):
121 | 	raw_page = urllib2.urlopen(search_url).read()
122 | 	soup = BeautifulSoup(raw_page)
123 | 	links = soup.findAll('a')
124 | 	for link in links:
125 | 		if '/event.aspx?' in str(link.attrs):
126 | 			if 'title' in link.attrs:
127 | 				url_str = 'http://www.residentadvisor.net' + str(link.attrs[u'href'])
128 | 				urls.append(url_str)
129 | 	return urls
130 | 
131 | # Given an event url, return all of the information about the event by calling the above functions.
132 | 
133 | def get_event_data(event_url):
134 | 	raw_page = urllib2.urlopen(event_url).read()
135 | 	soup = BeautifulSoup(raw_page)
136 | 	return soup.title, tickets_available(soup), event_cost(soup), event_date_city(soup), lineup(soup), promoter(soup), venue(soup), members_attending(get_event_number(event_url))
137 | 
138 | 
139 | 
140 | for i in range(0,len(df_urls)):
141 | 	ev_url = df_urls.ix[i,0][2:-1]
142 | 	eventdata = get_event_data(ev_url)
143 | 
144 | 	# Format the title of the event to remove title tag.
145 | 	title_name = str(eventdata[0])[str(eventdata[0]).find('<title>')+8:]
146 | 	title_name = title_name[:title_name.find('</title>')]
147 | 	title_name = title_name[title_name.find('RA'):]
148 | 	title_name = title_name[:title_name.find('York')+4]
149 | 
150 | 	# Output the event data sepearted by ':::' for later analysis.
151 | 	print title_name, ':::', eventdata[1:], ':::', ev_url, i
152 | 
153 | 


--------------------------------------------------------------------------------
/src/project_proposal_bot.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | import random
 4 | df = pd.read_csv('data/student_presentations.csv')
 5 | df_not_presented = df[df['Presented'].isnull()]
 6 | df_not_presented.reset_index(inplace=True)
 7 | student_num = random.randint(0,len(df_not_presented))
 8 | 
 9 | student = df_not_presented.loc[student_num]
10 | name = student['Name']
11 | uni = student['UNI']
12 | print name
13 | print uni
14 | msg = name + " @" + uni + " - you have been selected to present!"
15 | command = 'curl https://slack.com/api/chat.postMessage -X POST -d "channel=#projects" -d "text=' + msg + '" -d "username=project_proposals" -d "token={REMOVED}" -d "icon_emoji=:simple_smile:"'
16 | 
17 | os.system(command)
18 | 


--------------------------------------------------------------------------------
/webapp/hello.py:
--------------------------------------------------------------------------------
 1 | from flask import Flask
 2 | from flask import request
 3 | from flask import render_template
 4 | 
 5 | app = Flask(__name__)
 6 | 
 7 | @app.route('/')
 8 | def my_form():
 9 |     return render_template("my-form.html")
10 | 
11 | @app.route('/', methods=['POST'])
12 | def my_form_post():
13 | 
14 |     text = request.form['text']
15 |     processed_text = text.upper()
16 |     return processed_text
17 | 
18 | if __name__ == '__main__':
19 |     app.run()
20 | 


--------------------------------------------------------------------------------
/webapp/templates/my-form.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <body>
 4 |     <h1>Enter some text</h1>
 5 |     <h2>(it will be converted to uppercase)</h2>
 6 |     <form action="." method="POST">
 7 |         <input type="text" name="text">
 8 |         <input type="submit" name="my-form" value="Send">
 9 |     </form>
10 | </body>
11 | </html>
12 | 


--------------------------------------------------------------------------------