"
810 | ],
811 | "text/plain": [
812 | " title \\\n",
813 | "0 An Exploration of R, Yelp, and the Search for ... \n",
814 | "1 Deep Advances in Generative Modeling \n",
815 | "2 Spark Pipelines: Elegant Yet Powerful \n",
816 | "3 Shit VCs Say \n",
817 | "4 Python, Machine Learning, and Language Wars \n",
818 | "\n",
819 | " date days \n",
820 | "0 5 points by Rogerh91 6 hours ago | discuss 1 \n",
821 | "1 7 points by gwulfs 15 hours ago | 1 comment 1 \n",
822 | "2 3 points by aouyang1 9 hours ago | discuss 1 \n",
823 | "3 3 points by Argentum01 10 hours ago | discuss 1 \n",
824 | "4 4 points by pmigdal 17 hours ago | discuss 1 "
825 | ]
826 | },
827 | "execution_count": 60,
828 | "metadata": {},
829 | "output_type": "execute_result"
830 | }
831 | ],
832 | "source": [
833 | "df.head()"
834 | ]
835 | },
836 | {
837 | "cell_type": "code",
838 | "execution_count": 61,
839 | "metadata": {
840 | "collapsed": false
841 | },
842 | "outputs": [
843 | {
844 | "data": {
845 | "text/html": [
846 | "
\n",
847 | "
\n",
848 | " \n",
849 | "
\n",
850 | "
\n",
851 | "
title
\n",
852 | "
date
\n",
853 | "
days
\n",
854 | "
\n",
855 | " \n",
856 | " \n",
857 | "
\n",
858 | "
175
\n",
859 | "
Getting Started with Statistics for Data Science
\n",
860 | "
3 points by nickhould 35 days ago | discuss
\n",
861 | "
35
\n",
862 | "
\n",
863 | "
\n",
864 | "
176
\n",
865 | "
Rodeo 1.3 - Tab-completion for docstrings
\n",
866 | "
3 points by glamp 35 days ago | discuss
\n",
867 | "
35
\n",
868 | "
\n",
869 | "
\n",
870 | "
177
\n",
871 | "
Teaching D3.js - links
\n",
872 | "
3 points by pmigdal 35 days ago | discuss
\n",
873 | "
35
\n",
874 | "
\n",
875 | "
\n",
876 | "
178
\n",
877 | "
Parallel scikit-learn on YARN
\n",
878 | "
5 points by stijntonk 39 days ago | discuss
\n",
879 | "
39
\n",
880 | "
\n",
881 | "
\n",
882 | "
179
\n",
883 | "
Meetup: Free Live Webinar on Prescriptive Anal...
\n",
884 | "
2 points by ann928 32 days ago | discuss
\n",
885 | "
32
\n",
886 | "
\n",
887 | " \n",
888 | "
\n",
889 | "
"
890 | ],
891 | "text/plain": [
892 | " title \\\n",
893 | "175 Getting Started with Statistics for Data Science \n",
894 | "176 Rodeo 1.3 - Tab-completion for docstrings \n",
895 | "177 Teaching D3.js - links \n",
896 | "178 Parallel scikit-learn on YARN \n",
897 | "179 Meetup: Free Live Webinar on Prescriptive Anal... \n",
898 | "\n",
899 | " date days \n",
900 | "175 3 points by nickhould 35 days ago | discuss 35 \n",
901 | "176 3 points by glamp 35 days ago | discuss 35 \n",
902 | "177 3 points by pmigdal 35 days ago | discuss 35 \n",
903 | "178 5 points by stijntonk 39 days ago | discuss 39 \n",
904 | "179 2 points by ann928 32 days ago | discuss 32 "
905 | ]
906 | },
907 | "execution_count": 61,
908 | "metadata": {},
909 | "output_type": "execute_result"
910 | }
911 | ],
912 | "source": [
913 | "df.tail()"
914 | ]
915 | },
916 | {
917 | "cell_type": "code",
918 | "execution_count": 62,
919 | "metadata": {
920 | "collapsed": true
921 | },
922 | "outputs": [],
923 | "source": [
924 | "# Let us save to a dataframe\n",
925 | "df.to_csv('data_tau_days.csv', index=False)"
926 | ]
927 | }
928 | ],
929 | "metadata": {
930 | "kernelspec": {
931 | "display_name": "Python 3",
932 | "language": "python",
933 | "name": "python3"
934 | },
935 | "language_info": {
936 | "codemirror_mode": {
937 | "name": "ipython",
938 | "version": 3
939 | },
940 | "file_extension": ".py",
941 | "mimetype": "text/x-python",
942 | "name": "python",
943 | "nbconvert_exporter": "python",
944 | "pygments_lexer": "ipython3",
945 | "version": "3.5.1"
946 | }
947 | },
948 | "nbformat": 4,
949 | "nbformat_minor": 0
950 | }
951 |
--------------------------------------------------------------------------------
/text_mining/data_tau.csv:
--------------------------------------------------------------------------------
1 | title,date
2 | "An Exploration of R, Yelp, and the Search for Good Indian Food",5 points by Rogerh91 6 hours ago | discuss
3 | Deep Advances in Generative Modeling,7 points by gwulfs 15 hours ago | 1 comment
4 | Spark Pipelines: Elegant Yet Powerful,3 points by aouyang1 9 hours ago | discuss
5 | Shit VCs Say,3 points by Argentum01 10 hours ago | discuss
6 | "Python, Machine Learning, and Language Wars",4 points by pmigdal 17 hours ago | discuss
7 | A Neural Network in 11 lines of Python ,3 points by dekhtiar 14 hours ago | discuss
8 | Markov Chains Explained Visually,13 points by zeroviscosity 1 day ago | 1 comment
9 | Dplython: Dplyr for Python,13 points by thenaturalist 1 day ago | 3 comments
10 | Inferring causal impact using Bayesian structural time-series models,8 points by Homunculiheaded 1 day ago | 1 comment
11 | A Billion Taxi Rides on Amazon EMR running Spark,5 points by marklit 1 day ago | 1 comment
12 | Tutorial: Web scraping and mapping breweries with import.io and R,4 points by jasdumas 1 day ago | discuss
13 | The rise of greedy robots,4 points by yanir 2 days ago | discuss
14 | "Python for Data Structures, Algorithms, and Interviews",18 points by kokoubaby 4 days ago | discuss
15 | Extracting image metadata at scale,2 points by zachwill 1 day ago | discuss
16 | Lift charts - A data scientist's secret weapon,14 points by datenheini 4 days ago | 2 comments
17 | Data Science Side Project,7 points by yashpatel5400 2 days ago | 9 comments
18 | How To Become A Machine Learning Expert In One Simple Step,4 points by swanint 2 days ago | discuss
19 | Engineers Shouldn’t Write ETL: High Functioning Data Science Departments,10 points by legel 4 days ago | 3 comments
20 | Simple estimation of hierarchical events with petersburg,3 points by wdm0006 2 days ago | discuss
21 | Unsupervised Computer Vision: The Current State of the Art,6 points by carlosfaham 3 days ago | discuss
22 | Data Engineering at Slack: Twelve Mistakes I've Made In My First Three Months,14 points by gwulfs 6 days ago | 2 comments
23 | What data visualization tools do /r/DataIsBeautiful OC creators use?,3 points by pmigdal 2 days ago | discuss
24 | Reshaping in Pandas,6 points by carlosgg 4 days ago | discuss
25 | An unusual interactive machine learning challenge,4 points by gglumov 3 days ago | discuss
26 | Datumbox Machine Learning Framework 0.7.0 Released,4 points by datumbox 3 days ago | discuss
27 | Data science intro for math/phys background,14 points by pmigdal 7 days ago | discuss
28 | Neural Networks demystified,16 points by elyase 8 days ago | discuss
29 | What machines can learn from Apple Watch: detecting undiagnosed heart condition,9 points by koukouhappy 6 days ago | discuss
30 | Data Science Tools: The Biggest Winners and Losers,12 points by AnnaOnTheWeb 7 days ago | discuss
31 | 10 Years of Open Source Machine Learning,9 points by tstonez 6 days ago | 1 comment
32 | Has your conversion rate changed? Bayesian timeseries analysis with Python,12 points by yummyfajitas 8 days ago | discuss
33 | Do jobs run in families?,5 points by Anon84 5 days ago | 1 comment
34 | Introduction to Scikit Flow - Simplified Interface to TensorFlow,8 points by lefish 7 days ago | discuss
35 | "XGBoost4J: Portable Distributed XGboost in Spark, Flink and Dataflow",8 points by crowwork 8 days ago | discuss
36 | How to learn machine learning?,8 points by kiechu 8 days ago | 1 comment
37 | The Deep Roots of Javascript Fatigue,5 points by nikkielizdemere 6 days ago | 1 comment
38 | How do we make Data Tau work?,27 points by hal8 9 days ago | 18 comments
39 | "Machine Learning: An In-Depth, Non-Technical Guide — Part 4",7 points by innoarchitech 8 days ago | discuss
40 | Data Science Slack channel - Click for invite,7 points by jyotsna 8 days ago | discuss
41 | [Ask DT] What are some rookie mistakes in R?,3 points by HKtemp 3 days ago | discuss
42 | "Playing ""Moneyball"" on EA FIFA 16",16 points by aabb13 13 days ago | 3 comments
43 | Intellexer - Natural Language Processing and Text Mining REST API,16 points by j_downer 13 days ago | discuss
44 | Descriptive Statistics in SQL,5 points by nickhould 7 days ago | discuss
45 | Genomic Data Visualization using Python,2 points by RadhouaneAniba 4 days ago | discuss
46 | How to Use Cohort Data to Analyze User Behavior,2 points by clevertap 4 days ago | discuss
47 | Making transparent how variations in analytical choices affect results,4 points by rahmaniacc 7 days ago | discuss
48 | Show DT: Datasets.co - An easy way to share and discover ml datasets,2 points by mrborgen86 4 days ago | discuss
49 | Is Scala a better choice than Python for Apache Spark?,7 points by srinify 10 days ago | 1 comment
50 | Julia: A Fast Language for Numerical Computing,7 points by srinify 10 days ago | 1 comment
51 | "An Ode To The Rice Cooker, The Smartest Kitchen Appliance I’ve Ever Owned",2 points by tfturing 4 days ago | discuss
52 | Computing Classification Evaluation Metrics in R,4 points by lefish 7 days ago | discuss
53 | Analyzing Golden State Warriors' passing network using GraphFrames in Spark,3 points by yukiegosapporo 6 days ago | discuss
54 | Megaman: Manifold Learning with Millions of points,4 points by dperry 8 days ago | 3 comments
55 | How to Detect Outliers on Parametric and Non Parametric Methods,2 points by clevertap 5 days ago | discuss
56 | BallR: Interactive NBA Shot Charts with R and Shiny,12 points by carlosgg 14 days ago | discuss
57 | A Billion Taxi Rides on Amazon EMR Running Presto,4 points by marklit 8 days ago | discuss
58 | Minecraft to run artificial intelligence experiments,4 points by bsadeghi 8 days ago | discuss
59 | Deep Q-Learning (Space Invaders),4 points by pmigdal 8 days ago | discuss
60 | Theano Tutorial,2 points by pmigdal 5 days ago | discuss
61 | The Personality Space of Cartoon Characters,3 points by lefish 7 days ago | discuss
62 | Announcing Apache Flink 1.0.0,11 points by mxm 14 days ago | discuss
63 | "Telemetry with Collectd, Logstash, Elasticsearch and Grafana (ELG)",3 points by helloanand 7 days ago | discuss
64 | Statisticians Agree: It’s Time To Stop Misusing P-Value,10 points by jpiburn 15 days ago | 5 comments
65 | Bayesian Reasoning in The Twilight Zone!,2 points by Homunculiheaded 6 days ago | discuss
66 | Bayesian Estimation of G Train Wait Times,7 points by jamesdreiss 12 days ago | discuss
67 | XGBoost: A Scalable Tree Boosting System article,6 points by tfturing 12 days ago | discuss
68 | Some experiments into explaining complex black box ensemble predictions,2 points by lefish 6 days ago | discuss
69 | Creating a Hadoop Pseudo-Distributed Environment,2 points by lefish 6 days ago | discuss
70 | "Data Science Pop-Up in Austin, TX",2 points by AnnaOnTheWeb 6 days ago | discuss
71 | Train your own image classifier with Inception in TensorFlow,7 points by elyase 13 days ago | discuss
72 | Shiny app for running a Tensorflow demo,3 points by shinyman 9 days ago | discuss
73 | File details and owners with gitnoc and git-pandas,3 points by wdm0006 9 days ago | discuss
74 | 7 Big Data Technologies and When to Use Them that All Data Engineers Should Know,2 points by galvanize 7 days ago | discuss
75 | Topic clusters with TF-IDF vectorization with Spark and Scala,2 points by lefish 7 days ago | discuss
76 | Neural Doodles: Workflows for the Next Generation of Artists,5 points by pmigdal 12 days ago | discuss
77 | Graph Databases 101,5 points by carlosgg 12 days ago | discuss
78 | DataRadar.IO - Data Science RSS Feed - Do you have enough data about your data,2 points by dekhtiar 8 days ago | 3 comments
79 | International Women's Day: What #PledgeForParity Means To Us,5 points by ddrum001 14 days ago | discuss
80 | Top 50 Data Science thought leaders on Twitter,3 points by datawerq 11 days ago | 3 comments
81 | Ask DT: Who Is Hiring? (March 2016),27 points by whoishiring 21 days ago | 15 comments
82 | Deriving Better Insights From Time Series Data With Cycle Plots,3 points by clevertap 11 days ago | discuss
83 | Introducing GraphFrames,7 points by falaki 19 days ago | discuss
84 | SQL for Data Analysis,4 points by nickhould 14 days ago | 6 comments
85 | Stream processing and messaging systems for the IoT age,3 points by gradientflow 12 days ago | discuss
86 | Announcing R Tools for Visual Studio,3 points by brakmic 13 days ago | discuss
87 | A simpler way to merge data streams,3 points by apoverton 13 days ago | discuss
88 | Optimizing Notification Timing for One Signal,9 points by megandias 26 days ago | discuss
89 | Skizze - A high throughput probabilistic data structure service and storage,3 points by seiflotfy 14 days ago | discuss
90 | Question: What do you want to say about working with data?,2 points by emiller425 8 days ago | discuss
91 | Genomic Ranges - an Introduction to Working with Genomic Data,3 points by AnnaOnTheWeb 13 days ago | discuss
92 | TensorFlow for Poets,9 points by ebellm 21 days ago | 1 comment
93 | Unsupervised Learning with Even Less Supervision Using Bayesian Optimization,2 points by idewanck 11 days ago | discuss
94 | How to work with large JSON datasets using Python and Pandas,9 points by brian_spiering 21 days ago | discuss
95 | DrivenData Competition: Model/Visualize Fog Patterns in Morocco,4 points by bull 15 days ago | discuss
96 | Deep Learning: Nine Lectures at Collège de France by Yan LeCun,5 points by Anon84 17 days ago | discuss
97 | Optimizing Facebook Campaigns with R,2 points by AnnaOnTheWeb 12 days ago | 1 comment
98 | "Trump Tweets on a Globe (aka Fun with d3, socket.io, and the Twitter API)",8 points by joelgrus 21 days ago | discuss
99 | Why pandas users should be excited about Apache Arrow,17 points by pmigdal 29 days ago | discuss
100 | Histogram intersection for change detection,8 points by datadive 22 days ago | discuss
101 | Distributed TensorFlow just open-sourced,10 points by elyase 25 days ago | discuss
102 | D3.js Screencasts (1 in 3 are free),4 points by Veerle 18 days ago | discuss
103 | Regression and Classification with Examples in R,5 points by soates 20 days ago | discuss
104 | Free online course on statistical shape modelling,8 points by shapemean 25 days ago | discuss
105 | "Don't worry about deep learning, deepen your understanding of causality instead",22 points by yanir 37 days ago | discuss
106 | Work with private repositories and other updates of the FlyElephant platform,2 points by m31 15 days ago | discuss
107 | How to import XML to almost anywhere,4 points by Jammink 20 days ago | discuss
108 | Survival Analysis of Cricket Player Careers,8 points by keshav92 26 days ago | 6 comments
109 | Generate image analogies using neural matching and blending,2 points by pmigdal 15 days ago | discuss
110 | "Analyzing 1.8M tweets from Super Bowl 50 (Twython, Twitter API, AYLIEN)",4 points by mikewally 20 days ago | discuss
111 | Newly released sklearn compatible library of categorical encoders,7 points by wdm0006 25 days ago | discuss
112 | Watch Tiny Neural Nets Learn,4 points by swanint 21 days ago | discuss
113 | Four pitfalls of hill climbing: An animated look,5 points by csaid81 23 days ago | discuss
114 | "Decision Forests, Convolutional Networks and the Models in-Between",2 points by ebellm 16 days ago | discuss
115 | How a Math Genius Hacked OkCupid to Find True Love,15 points by roh_codeur 34 days ago | discuss
116 | No developers for PyLearn2,3 points by tfturing 19 days ago | discuss
117 | Density Estimation with Dirichlet Process Mixtures using PyMC3,6 points by MidsizeBlowfish 25 days ago | discuss
118 | Using survival analysis and git-pandas to estimate code quality,3 points by wdm0006 20 days ago | discuss
119 | An Analysis of the Flint Michigan Water Crisis: Part 1 Initial Corrosivity,3 points by JHorn 20 days ago | discuss
120 | An Analysis of Republican Twitter Follower Interests,6 points by michelangelo 26 days ago | discuss
121 | Introduction to ML talk,8 points by cjbayesian 29 days ago | discuss
122 | GloVe vs word2vec revisited,3 points by pmigdal 20 days ago | discuss
123 | Overoptimizing: a story about kaggle,4 points by wdm0006 30 days ago | discuss
124 | Undergrad Data Analysis/Science internships SF Bay?,3 points by tctctc 15 days ago | 5 comments
125 | The Role of Statistical Significance in Growth Hacking,6 points by rawls234 27 days ago | discuss
126 | Data Science Course @ Harvard,7 points by rahmaniacc 29 days ago | 2 comments
127 | Principal Component Projection Without Principal Component Analysis,6 points by genofon 27 days ago | discuss
128 | "Machine Learning: An In-Depth, Non-Technical Guide - Part 3",7 points by innoarchitech 29 days ago | discuss
129 | Stochastic Dummy Boosting,2 points by mikeskim 18 days ago | discuss
130 | Interactive Map: Hong-Kong through The Lense of Instagram,2 points by BrianN 19 days ago | discuss
131 | Data Science at Monsanto,3 points by doctorcroc 22 days ago | discuss
132 | Data Science at Instacart,11 points by jeremystan 34 days ago | 3 comments
133 | Building a Streaming Search Platform,6 points by ddrum001 28 days ago | discuss
134 | Kafka Producer Latency with Large Topic Counts,3 points by marklit 26 days ago | discuss
135 | A Sneak Peak of the Cloud: the 2 Minute Intro for Beginners,2 points by andymaheshw 20 days ago | discuss
136 | Win-Vector video courses: price/status changes,2 points by jmount 20 days ago | discuss
137 | 50+ Data Science and Machine Learning Cheat Sheets,20 points by elyase 42 days ago | 1 comment
138 | One More Reason Not To Be Scared of Deep Learning,2 points by amplifier_khan 21 days ago | discuss
139 | Visual Logic Authoring vs Code,2 points by AnnaOnTheWeb 21 days ago | discuss
140 | Data Science in Python online training with hands-on experience,2 points by Puneet 21 days ago | discuss
141 | Viewing the US Presidential Primary Through the Lens of Twitter,8 points by michelangelo 33 days ago | discuss
142 | Caffe on Spark open sourced,4 points by rahmaniacc 27 days ago | discuss
143 | The Ethical Data Scientist,5 points by tfturing 29 days ago | discuss
144 | Answers to Frequently Asked Questions in Machine Learning,3 points by rasbt 21 days ago | discuss
145 | Intro to A/B Testing and P-Values,2 points by randyzwitch 22 days ago | discuss
146 | Visualizing State Level Data With R and Statebins,2 points by usujason 22 days ago | discuss
147 | "Probabilistic Graphical Models slides & video lectures (Eric Xing, CMU)",4 points by ororm 28 days ago | discuss
148 | Sense2vec with spaCy and Gensim,9 points by elyase 36 days ago | 2 comments
149 | A Billion NYC Taxi and Uber Rides in AWS Redshift,3 points by marklit 31 days ago | discuss
150 | How to Code and Understand DeepMind's Neural Stack Machine (in Python),2 points by genofon 23 days ago | discuss
151 | How to make polished Jupyter presentations with optional code visibility,9 points by csaid81 36 days ago | discuss
152 | How to become a Bayesian in eight easy steps,17 points by EtzA 44 days ago | 1 comment
153 | Optimizing .*: Details of Vectorization and Metaprogramming in Julia,4 points by randyzwitch 29 days ago | discuss
154 | IBM certified Apache Spark Online Training,8 points by divya_jain 36 days ago | discuss
155 | Geographic Data Science course,2 points by rk 25 days ago | discuss
156 | "The Daily Mail Stole My Visualization, Twice",5 points by thehoff 32 days ago | 1 comment
157 | Ensemble Methods: Improved Machine Learning Results,9 points by PyBloggers 38 days ago | discuss
158 | Apache Spark and unsupervised learning in security,2 points by gradientflow 26 days ago | discuss
159 | MachineJS: Automated machine learning- just give it a data file!,2 points by dsernst 26 days ago | discuss
160 | The NSA’s SKYNET program may be killing thousands of innocent people,6 points by zlipp 35 days ago | discuss
161 | "Big Dimensions, and What You Can Do About It",2 points by ramsey 27 days ago | discuss
162 | Automate Your Oscars Pool with R,2 points by jamesdreiss 27 days ago | discuss
163 | Signal Processing with LIGO GW150914 data,9 points by tfturing 39 days ago | discuss
164 | Overview of DeZyre and Coursera Data Science Course,5 points by ann928 34 days ago | discuss
165 | Upcoming Datathon in NYC,2 points by VicTrey 28 days ago | discuss
166 | Summarizing Data in SQL,15 points by elisebreda 46 days ago | discuss
167 | A/B Testing for Scammers,2 points by sameermanek 28 days ago | discuss
168 | Highly interpretable classifiers for scikit learn using Bayesian decision rules,2 points by mcnulty 28 days ago | discuss
169 | Auto-scaling scikit-learn with Spark,11 points by falaki 43 days ago | discuss
170 | Where the f*** can I park?,2 points by manugarri 29 days ago | discuss
171 | "Machine Learning: An In-Depth, Non-Technical Guide - Part 2",5 points by innoarchitech 36 days ago | discuss
172 | Webhose.io now offers a historical data archive,7 points by databuffer 40 days ago | discuss
173 | Meetup: Introduction to Machine Learning Algorithms for Data Science.,4 points by ann928 36 days ago | discuss
174 | Exploring the Limits of Language Modeling,8 points by soates 42 days ago | discuss
175 | Text Mining South Park,7 points by pmigdal 41 days ago | discuss
176 | Finding the K in K-means by Parametric Bootstrap,7 points by jmount 42 days ago | 1 comment
177 | Getting Started with Statistics for Data Science,3 points by nickhould 35 days ago | discuss
178 | Rodeo 1.3 - Tab-completion for docstrings,3 points by glamp 35 days ago | discuss
179 | Teaching D3.js - links,3 points by pmigdal 35 days ago | discuss
180 | Parallel scikit-learn on YARN,5 points by stijntonk 39 days ago | discuss
181 | Meetup: Free Live Webinar on Prescriptive Analytics for Fun and Profit,2 points by ann928 32 days ago | discuss
182 |
--------------------------------------------------------------------------------
/text_mining/data_tau_days.csv:
--------------------------------------------------------------------------------
1 | title,date,days
2 | "An Exploration of R, Yelp, and the Search for Good Indian Food",5 points by Rogerh91 6 hours ago | discuss,1
3 | Deep Advances in Generative Modeling,7 points by gwulfs 15 hours ago | 1 comment,1
4 | Spark Pipelines: Elegant Yet Powerful,3 points by aouyang1 9 hours ago | discuss,1
5 | Shit VCs Say,3 points by Argentum01 10 hours ago | discuss,1
6 | "Python, Machine Learning, and Language Wars",4 points by pmigdal 17 hours ago | discuss,1
7 | A Neural Network in 11 lines of Python ,3 points by dekhtiar 14 hours ago | discuss,1
8 | Markov Chains Explained Visually,13 points by zeroviscosity 1 day ago | 1 comment,1
9 | Dplython: Dplyr for Python,13 points by thenaturalist 1 day ago | 3 comments,1
10 | Inferring causal impact using Bayesian structural time-series models,8 points by Homunculiheaded 1 day ago | 1 comment,1
11 | A Billion Taxi Rides on Amazon EMR running Spark,5 points by marklit 1 day ago | 1 comment,1
12 | Tutorial: Web scraping and mapping breweries with import.io and R,4 points by jasdumas 1 day ago | discuss,1
13 | The rise of greedy robots,4 points by yanir 2 days ago | discuss,2
14 | "Python for Data Structures, Algorithms, and Interviews",18 points by kokoubaby 4 days ago | discuss,4
15 | Extracting image metadata at scale,2 points by zachwill 1 day ago | discuss,1
16 | Lift charts - A data scientist's secret weapon,14 points by datenheini 4 days ago | 2 comments,4
17 | Data Science Side Project,7 points by yashpatel5400 2 days ago | 9 comments,2
18 | How To Become A Machine Learning Expert In One Simple Step,4 points by swanint 2 days ago | discuss,2
19 | Engineers Shouldn?t Write ETL: High Functioning Data Science Departments,10 points by legel 4 days ago | 3 comments,4
20 | Simple estimation of hierarchical events with petersburg,3 points by wdm0006 2 days ago | discuss,2
21 | Unsupervised Computer Vision: The Current State of the Art,6 points by carlosfaham 3 days ago | discuss,3
22 | Data Engineering at Slack: Twelve Mistakes I've Made In My First Three Months,14 points by gwulfs 6 days ago | 2 comments,6
23 | What data visualization tools do /r/DataIsBeautiful OC creators use?,3 points by pmigdal 2 days ago | discuss,2
24 | Reshaping in Pandas,6 points by carlosgg 4 days ago | discuss,4
25 | An unusual interactive machine learning challenge,4 points by gglumov 3 days ago | discuss,3
26 | Datumbox Machine Learning Framework 0.7.0 Released,4 points by datumbox 3 days ago | discuss,3
27 | Data science intro for math/phys background,14 points by pmigdal 7 days ago | discuss,7
28 | Neural Networks demystified,16 points by elyase 8 days ago | discuss,8
29 | What machines can learn from Apple Watch: detecting undiagnosed heart condition,9 points by koukouhappy 6 days ago | discuss,6
30 | Data Science Tools: The Biggest Winners and Losers,12 points by AnnaOnTheWeb 7 days ago | discuss,7
31 | 10 Years of Open Source Machine Learning,9 points by tstonez 6 days ago | 1 comment,6
32 | Has your conversion rate changed? Bayesian timeseries analysis with Python,12 points by yummyfajitas 8 days ago | discuss,8
33 | Do jobs run in families?,5 points by Anon84 5 days ago | 1 comment,5
34 | Introduction to Scikit Flow - Simplified Interface to TensorFlow,8 points by lefish 7 days ago | discuss,7
35 | "XGBoost4J: Portable Distributed XGboost in Spark, Flink and Dataflow",8 points by crowwork 8 days ago | discuss,8
36 | How to learn machine learning?,8 points by kiechu 8 days ago | 1 comment,8
37 | The Deep Roots of Javascript Fatigue,5 points by nikkielizdemere 6 days ago | 1 comment,6
38 | How do we make Data Tau work?,27 points by hal8 9 days ago | 18 comments,9
39 | "Machine Learning: An In-Depth, Non-Technical Guide???Part 4",7 points by innoarchitech 8 days ago | discuss,8
40 | Data Science Slack channel - Click for invite,7 points by jyotsna 8 days ago | discuss,8
41 | [Ask DT] What are some rookie mistakes in R?,3 points by HKtemp 3 days ago | discuss,3
42 | "Playing ""Moneyball"" on EA FIFA 16",16 points by aabb13 13 days ago | 3 comments,13
43 | Intellexer - Natural Language Processing and Text Mining REST API,16 points by j_downer 13 days ago | discuss,13
44 | Descriptive Statistics in SQL,5 points by nickhould 7 days ago | discuss,7
45 | Genomic Data Visualization using Python,2 points by RadhouaneAniba 4 days ago | discuss,4
46 | How to Use Cohort Data to Analyze User Behavior,2 points by clevertap 4 days ago | discuss,4
47 | Making transparent how variations in analytical choices affect results,4 points by rahmaniacc 7 days ago | discuss,7
48 | Show DT: Datasets.co - An easy way to share and discover ml datasets,2 points by mrborgen86 4 days ago | discuss,4
49 | Is Scala a better choice than Python for Apache Spark?,7 points by srinify 10 days ago | 1 comment,10
50 | Julia: A Fast Language for Numerical Computing,7 points by srinify 10 days ago | 1 comment,10
51 | "An Ode To The Rice Cooker, The Smartest Kitchen Appliance I?ve Ever Owned",2 points by tfturing 4 days ago | discuss,4
52 | Computing Classification Evaluation Metrics in R,4 points by lefish 7 days ago | discuss,7
53 | Analyzing Golden State Warriors' passing network using GraphFrames in Spark,3 points by yukiegosapporo 6 days ago | discuss,6
54 | Megaman: Manifold Learning with Millions of points,4 points by dperry 8 days ago | 3 comments,8
55 | How to Detect Outliers on Parametric and Non Parametric Methods,2 points by clevertap 5 days ago | discuss,5
56 | BallR: Interactive NBA Shot Charts with R and Shiny,12 points by carlosgg 14 days ago | discuss,14
57 | A Billion Taxi Rides on Amazon EMR Running Presto,4 points by marklit 8 days ago | discuss,8
58 | Minecraft to run artificial intelligence experiments,4 points by bsadeghi 8 days ago | discuss,8
59 | Deep Q-Learning (Space Invaders),4 points by pmigdal 8 days ago | discuss,8
60 | Theano Tutorial,2 points by pmigdal 5 days ago | discuss,5
61 | The Personality Space of Cartoon Characters,3 points by lefish 7 days ago | discuss,7
62 | Announcing Apache Flink 1.0.0,11 points by mxm 14 days ago | discuss,14
63 | "Telemetry with Collectd, Logstash, Elasticsearch and Grafana (ELG)",3 points by helloanand 7 days ago | discuss,7
64 | Statisticians Agree: It?s Time To Stop Misusing P-Value,10 points by jpiburn 15 days ago | 5 comments,15
65 | Bayesian Reasoning in The Twilight Zone!,2 points by Homunculiheaded 6 days ago | discuss,6
66 | Bayesian Estimation of G Train Wait Times,7 points by jamesdreiss 12 days ago | discuss,12
67 | XGBoost: A Scalable Tree Boosting System article,6 points by tfturing 12 days ago | discuss,12
68 | Some experiments into explaining complex black box ensemble predictions,2 points by lefish 6 days ago | discuss,6
69 | Creating a Hadoop Pseudo-Distributed Environment,2 points by lefish 6 days ago | discuss,6
70 | "Data Science Pop-Up in Austin, TX",2 points by AnnaOnTheWeb 6 days ago | discuss,6
71 | Train your own image classifier with Inception in TensorFlow,7 points by elyase 13 days ago | discuss,13
72 | Shiny app for running a Tensorflow demo,3 points by shinyman 9 days ago | discuss,9
73 | File details and owners with gitnoc and git-pandas,3 points by wdm0006 9 days ago | discuss,9
74 | 7 Big Data Technologies and When to Use Them that All Data Engineers Should Know,2 points by galvanize 7 days ago | discuss,7
75 | Topic clusters with TF-IDF vectorization with Spark and Scala,2 points by lefish 7 days ago | discuss,7
76 | Neural Doodles: Workflows for the Next Generation of Artists,5 points by pmigdal 12 days ago | discuss,12
77 | Graph Databases 101,5 points by carlosgg 12 days ago | discuss,12
78 | DataRadar.IO - Data Science RSS Feed - Do you have enough data about your data,2 points by dekhtiar 8 days ago | 3 comments,8
79 | International Women's Day: What #PledgeForParity Means To Us,5 points by ddrum001 14 days ago | discuss,14
80 | Top 50 Data Science thought leaders on Twitter,3 points by datawerq 11 days ago | 3 comments,11
81 | Ask DT: Who Is Hiring? (March 2016),27 points by whoishiring 21 days ago | 15 comments,21
82 | Deriving Better Insights From Time Series Data With Cycle Plots,3 points by clevertap 11 days ago | discuss,11
83 | Introducing GraphFrames,7 points by falaki 19 days ago | discuss,19
84 | SQL for Data Analysis,4 points by nickhould 14 days ago | 6 comments,14
85 | Stream processing and messaging systems for the IoT age,3 points by gradientflow 12 days ago | discuss,12
86 | Announcing R Tools for Visual Studio,3 points by brakmic 13 days ago | discuss,13
87 | A simpler way to merge data streams,3 points by apoverton 13 days ago | discuss,13
88 | Optimizing Notification Timing for One Signal,9 points by megandias 26 days ago | discuss,26
89 | Skizze - A high throughput probabilistic data structure service and storage,3 points by seiflotfy 14 days ago | discuss,14
90 | Question: What do you want to say about working with data?,2 points by emiller425 8 days ago | discuss,8
91 | Genomic Ranges - an Introduction to Working with Genomic Data,3 points by AnnaOnTheWeb 13 days ago | discuss,13
92 | TensorFlow for Poets,9 points by ebellm 21 days ago | 1 comment,21
93 | Unsupervised Learning with Even Less Supervision Using Bayesian Optimization,2 points by idewanck 11 days ago | discuss,11
94 | How to work with large JSON datasets using Python and Pandas,9 points by brian_spiering 21 days ago | discuss,21
95 | DrivenData Competition: Model/Visualize Fog Patterns in Morocco,4 points by bull 15 days ago | discuss,15
96 | Deep Learning: Nine Lectures at Coll?ge de France by Yan LeCun,5 points by Anon84 17 days ago | discuss,17
97 | Optimizing Facebook Campaigns with R,2 points by AnnaOnTheWeb 12 days ago | 1 comment,12
98 | "Trump Tweets on a Globe (aka Fun with d3, socket.io, and the Twitter API)",8 points by joelgrus 21 days ago | discuss,21
99 | Why pandas users should be excited about Apache Arrow,17 points by pmigdal 29 days ago | discuss,29
100 | Histogram intersection for change detection,8 points by datadive 22 days ago | discuss,22
101 | Distributed TensorFlow just open-sourced,10 points by elyase 25 days ago | discuss,25
102 | D3.js Screencasts (1 in 3 are free),4 points by Veerle 18 days ago | discuss,18
103 | Regression and Classification with Examples in R,5 points by soates 20 days ago | discuss,20
104 | Free online course on statistical shape modelling,8 points by shapemean 25 days ago | discuss,25
105 | "Don't worry about deep learning, deepen your understanding of causality instead",22 points by yanir 37 days ago | discuss,37
106 | Work with private repositories and other updates of the FlyElephant platform,2 points by m31 15 days ago | discuss,15
107 | How to import XML to almost anywhere,4 points by Jammink 20 days ago | discuss,20
108 | Survival Analysis of Cricket Player Careers,8 points by keshav92 26 days ago | 6 comments,26
109 | Generate image analogies using neural matching and blending,2 points by pmigdal 15 days ago | discuss,15
110 | "Analyzing 1.8M tweets from Super Bowl 50 (Twython, Twitter API, AYLIEN)",4 points by mikewally 20 days ago | discuss,20
111 | Newly released sklearn compatible library of categorical encoders,7 points by wdm0006 25 days ago | discuss,25
112 | Watch Tiny Neural Nets Learn,4 points by swanint 21 days ago | discuss,21
113 | Four pitfalls of hill climbing: An animated look,5 points by csaid81 23 days ago | discuss,23
114 | "Decision Forests, Convolutional Networks and the Models in-Between",2 points by ebellm 16 days ago | discuss,16
115 | How a Math Genius Hacked OkCupid to Find True Love,15 points by roh_codeur 34 days ago | discuss,34
116 | No developers for PyLearn2,3 points by tfturing 19 days ago | discuss,19
117 | Density Estimation with Dirichlet Process Mixtures using PyMC3,6 points by MidsizeBlowfish 25 days ago | discuss,25
118 | Using survival analysis and git-pandas to estimate code quality,3 points by wdm0006 20 days ago | discuss,20
119 | An Analysis of the Flint Michigan Water Crisis: Part 1 Initial Corrosivity,3 points by JHorn 20 days ago | discuss,20
120 | An Analysis of Republican Twitter Follower Interests,6 points by michelangelo 26 days ago | discuss,26
121 | Introduction to ML talk,8 points by cjbayesian 29 days ago | discuss,29
122 | GloVe vs word2vec revisited,3 points by pmigdal 20 days ago | discuss,20
123 | Overoptimizing: a story about kaggle,4 points by wdm0006 30 days ago | discuss,30
124 | Undergrad Data Analysis/Science internships SF Bay?,3 points by tctctc 15 days ago | 5 comments,15
125 | The Role of Statistical Significance in Growth Hacking,6 points by rawls234 27 days ago | discuss,27
126 | Data Science Course @ Harvard,7 points by rahmaniacc 29 days ago | 2 comments,29
127 | Principal Component Projection Without Principal Component Analysis,6 points by genofon 27 days ago | discuss,27
128 | "Machine Learning: An In-Depth, Non-Technical Guide - Part 3",7 points by innoarchitech 29 days ago | discuss,29
129 | Stochastic Dummy Boosting,2 points by mikeskim 18 days ago | discuss,18
130 | Interactive Map: Hong-Kong through The Lense of Instagram,2 points by BrianN 19 days ago | discuss,19
131 | Data Science at Monsanto,3 points by doctorcroc 22 days ago | discuss,22
132 | Data Science at Instacart,11 points by jeremystan 34 days ago | 3 comments,34
133 | Building a Streaming Search Platform,6 points by ddrum001 28 days ago | discuss,28
134 | Kafka Producer Latency with Large Topic Counts,3 points by marklit 26 days ago | discuss,26
135 | A Sneak Peak of the Cloud: the 2 Minute Intro for Beginners,2 points by andymaheshw 20 days ago | discuss,20
136 | Win-Vector video courses: price/status changes,2 points by jmount 20 days ago | discuss,20
137 | 50+ Data Science and Machine Learning Cheat Sheets,20 points by elyase 42 days ago | 1 comment,42
138 | One More Reason Not To Be Scared of Deep Learning,2 points by amplifier_khan 21 days ago | discuss,21
139 | Visual Logic Authoring vs Code,2 points by AnnaOnTheWeb 21 days ago | discuss,21
140 | Data Science in Python online training with hands-on experience,2 points by Puneet 21 days ago | discuss,21
141 | Viewing the US Presidential Primary Through the Lens of Twitter,8 points by michelangelo 33 days ago | discuss,33
142 | Caffe on Spark open sourced,4 points by rahmaniacc 27 days ago | discuss,27
143 | The Ethical Data Scientist,5 points by tfturing 29 days ago | discuss,29
144 | Answers to Frequently Asked Questions in Machine Learning,3 points by rasbt 21 days ago | discuss,21
145 | Intro to A/B Testing and P-Values,2 points by randyzwitch 22 days ago | discuss,22
146 | Visualizing State Level Data With R and Statebins,2 points by usujason 22 days ago | discuss,22
147 | "Probabilistic Graphical Models slides & video lectures (Eric Xing, CMU)",4 points by ororm 28 days ago | discuss,28
148 | Sense2vec with spaCy and Gensim,9 points by elyase 36 days ago | 2 comments,36
149 | A Billion NYC Taxi and Uber Rides in AWS Redshift,3 points by marklit 31 days ago | discuss,31
150 | How to Code and Understand DeepMind's Neural Stack Machine (in Python),2 points by genofon 23 days ago | discuss,23
151 | How to make polished Jupyter presentations with optional code visibility,9 points by csaid81 36 days ago | discuss,36
152 | How to become a Bayesian in eight easy steps,17 points by EtzA 44 days ago | 1 comment,44
153 | Optimizing .*: Details of Vectorization and Metaprogramming in Julia,4 points by randyzwitch 29 days ago | discuss,29
154 | IBM certified Apache Spark Online Training,8 points by divya_jain 36 days ago | discuss,36
155 | Geographic Data Science course,2 points by rk 25 days ago | discuss,25
156 | "The Daily Mail Stole My Visualization, Twice",5 points by thehoff 32 days ago | 1 comment,32
157 | Ensemble Methods: Improved Machine Learning Results,9 points by PyBloggers 38 days ago | discuss,38
158 | Apache Spark and unsupervised learning in security,2 points by gradientflow 26 days ago | discuss,26
159 | MachineJS: Automated machine learning- just give it a data file!,2 points by dsernst 26 days ago | discuss,26
160 | The NSA?s SKYNET program may be killing thousands of innocent people,6 points by zlipp 35 days ago | discuss,35
161 | "Big Dimensions, and What You Can Do About It",2 points by ramsey 27 days ago | discuss,27
162 | Automate Your Oscars Pool with R,2 points by jamesdreiss 27 days ago | discuss,27
163 | Signal Processing with LIGO GW150914 data,9 points by tfturing 39 days ago | discuss,39
164 | Overview of DeZyre and Coursera Data Science Course,5 points by ann928 34 days ago | discuss,34
165 | Upcoming Datathon in NYC,2 points by VicTrey 28 days ago | discuss,28
166 | Summarizing Data in SQL,15 points by elisebreda 46 days ago | discuss,46
167 | A/B Testing for Scammers,2 points by sameermanek 28 days ago | discuss,28
168 | Highly interpretable classifiers for scikit learn using Bayesian decision rules,2 points by mcnulty 28 days ago | discuss,28
169 | Auto-scaling scikit-learn with Spark,11 points by falaki 43 days ago | discuss,43
170 | Where the f*** can I park?,2 points by manugarri 29 days ago | discuss,29
171 | "Machine Learning: An In-Depth, Non-Technical Guide - Part 2",5 points by innoarchitech 36 days ago | discuss,36
172 | Webhose.io now offers a historical data archive,7 points by databuffer 40 days ago | discuss,40
173 | Meetup: Introduction to Machine Learning Algorithms for Data Science.,4 points by ann928 36 days ago | discuss,36
174 | Exploring the Limits of Language Modeling,8 points by soates 42 days ago | discuss,42
175 | Text Mining South Park,7 points by pmigdal 41 days ago | discuss,41
176 | Finding the K in K-means by Parametric Bootstrap,7 points by jmount 42 days ago | 1 comment,42
177 | Getting Started with Statistics for Data Science,3 points by nickhould 35 days ago | discuss,35
178 | Rodeo 1.3 - Tab-completion for docstrings,3 points by glamp 35 days ago | discuss,35
179 | Teaching D3.js - links,3 points by pmigdal 35 days ago | discuss,35
180 | Parallel scikit-learn on YARN,5 points by stijntonk 39 days ago | discuss,39
181 | Meetup: Free Live Webinar on Prescriptive Analytics for Fun and Profit,2 points by ann928 32 days ago | discuss,32
182 |
--------------------------------------------------------------------------------
/text_mining/img/chunk-segmentation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amitkaps/machine-learning/3b15a08198e3e151719b75f1d58e9f2ff157b324/text_mining/img/chunk-segmentation.png
--------------------------------------------------------------------------------
/text_mining/img/datatau.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amitkaps/machine-learning/3b15a08198e3e151719b75f1d58e9f2ff157b324/text_mining/img/datatau.png
--------------------------------------------------------------------------------
/text_mining/img/date.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amitkaps/machine-learning/3b15a08198e3e151719b75f1d58e9f2ff157b324/text_mining/img/date.png
--------------------------------------------------------------------------------
/text_mining/img/entity_extraction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amitkaps/machine-learning/3b15a08198e3e151719b75f1d58e9f2ff157b324/text_mining/img/entity_extraction.png
--------------------------------------------------------------------------------
/text_mining/img/gutenberg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amitkaps/machine-learning/3b15a08198e3e151719b75f1d58e9f2ff157b324/text_mining/img/gutenberg.png
--------------------------------------------------------------------------------
/text_mining/img/punkt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amitkaps/machine-learning/3b15a08198e3e151719b75f1d58e9f2ff157b324/text_mining/img/punkt.png
--------------------------------------------------------------------------------
/text_mining/img/title.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amitkaps/machine-learning/3b15a08198e3e151719b75f1d58e9f2ff157b324/text_mining/img/title.png
--------------------------------------------------------------------------------
/text_mining/nltk_data.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amitkaps/machine-learning/3b15a08198e3e151719b75f1d58e9f2ff157b324/text_mining/nltk_data.zip
--------------------------------------------------------------------------------
/time_series/1-Frame.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 1. Frame the Problem\n",
8 | "\n",
9 | "In late 2010, Onion prices shot through the roof and causing grave crisis. Apparently the crisis was caused by lack of rainfall in major onion producing region - Maharashtra and Karnataka and led to large scale hoarding by the traders. The crisis caused political tension in the country and described as \"a grave concern\" by then Prime Minister Manmohan Singh.\n",
10 | "\n",
11 | "\n",
12 | "- BBC Article in Dec 2010 - [Stink over onion crisis is enough to make you cry](http://www.bbc.co.uk/blogs/thereporters/soutikbiswas/2010/12/indias_onion_crisis.html)\n",
13 | "- Hindu OpEd in Dec 2010 - [The political price of onions](http://www.thehindu.com/opinion/editorial/article977100.ece)\n",
14 | "\n",
15 | "\n",
16 | "\n",
17 | "So what are the type of questions on Onion Prices - you would like to ask. \n",
18 | "\n",
19 | "\n",
20 | "## Types of Question\n",
21 | "\n",
22 | "> \"Doing data analysis requires quite a bit of thinking and we believe that when you’ve completed a good data analysis, you’ve spent more time thinking than doing.\" - Roger Peng\n",
23 | "\n",
24 | "1. **Descriptive** - \"seeks to summarize a characteristic of a set of data\"\n",
25 | "2. **Exploratory** - \"analyze the data to see if there are patterns, trends, or relationships between variables\" (hypothesis generating) \n",
26 | "3. **Inferential** - \"a restatement of this proposed hypothesis as a question and would be answered by analyzing a different set of data\" (hypothesis testing)\n",
27 | "4. **Predictive** - \"determine the impact on one factor based on other factor in a population - to make a prediction\"\n",
28 | "5. **Causal** - \"asks whether changing one factor will change another factor in a population - to establish a causal link\" \n",
29 | "6. **Mechanistic** - \"establish *how* the change in one factor results in change in another factor in a population - to determine the exact mechanism\"\n",
30 | "\n",
31 | "\n",
32 | "### Descriptive \n",
33 | "- Which states have the highest onion production and sales?\n",
34 | "- Which city (Mandi's) have the highest sales?\n",
35 | "- What is the average price for Onion across a year in Bangalore?\n",
36 | "- ...\n",
37 | "\n",
38 | "### Exploratory & Inferential \n",
39 | "- Is there a large difference between High and Low prices of Onion in a day?\n",
40 | "- What is the trend of onion price across days or months in Bangalore?\n",
41 | "- How is the price on onion correlated with volume of onion?\n",
42 | "- How is the export volume of onion correlated to domestic production volume?\n",
43 | "- ...\n",
44 | "\n",
45 | "### Predictive \n",
46 | "- What is the price of onion likely to be next day?\n",
47 | "- What is the price of onion likely to be next month?\n",
48 | "- What will be the sales quantity of onion tommorrow in Delhi?\n",
49 | "- ...\n",
50 | "\n",
51 | "### Causal\n",
52 | "- Does the change in production of onion have an impact on the onion prices? \n",
53 | "- Does the change in rainfall in monsoon have an impact on onion prices?\n",
54 | "- ...\n",
55 | "\n",
56 | "### Mechanistic\n",
57 | "- How does change in onion production impact the price of onion?\n",
58 | "- How does onion export volumes impact the prices of onion in local markets in India?\n",
59 | "- ...\n",
60 | "\n",
61 | "\n",
62 | "## Questions we will attempt\n",
63 | "\n",
64 | "### 1. Descriptive: How big is the Bangalore onion market compared to other cities in India?\n",
65 | "\n",
66 | "### 2. Exploratory / Inferential: Have the price variation in onion prices in Bangalore really gone up over the years?\n",
67 | "\n",
68 | "### 3. Predictive: Can we predict the price of onion in Bangalore?"
69 | ]
70 | },
71 | {
72 | "cell_type": "code",
73 | "execution_count": null,
74 | "metadata": {
75 | "collapsed": true
76 | },
77 | "outputs": [],
78 | "source": []
79 | }
80 | ],
81 | "metadata": {
82 | "kernelspec": {
83 | "display_name": "Python 3",
84 | "language": "python",
85 | "name": "python3"
86 | },
87 | "language_info": {
88 | "codemirror_mode": {
89 | "name": "ipython",
90 | "version": 3
91 | },
92 | "file_extension": ".py",
93 | "mimetype": "text/x-python",
94 | "name": "python",
95 | "nbconvert_exporter": "python",
96 | "pygments_lexer": "ipython3",
97 | "version": "3.5.1"
98 | }
99 | },
100 | "nbformat": 4,
101 | "nbformat_minor": 0
102 | }
103 |
--------------------------------------------------------------------------------
/time_series/city_geocode.csv:
--------------------------------------------------------------------------------
1 | city,lon,lat
2 | GUWAHATI,91.7362365,26.1445169
3 | KOLKATA,88.363895,22.572646
4 | SRIRAMPUR,88.3385053,23.4033393
5 | SHEROAPHULY,88.3215014,22.7690032
6 | BURDWAN,87.8614793,23.2324214
7 | MIDNAPUR,87.3214908,22.4308892
8 | PURULIA,86.365208,23.3320779
9 | DHULIA,86.0618818,22.0347727
10 | BHUBNESWER,85.8245398,20.2960587
11 | BIHARSHARIF,85.5148735,25.1982147
12 | RANCHI,85.309562,23.3440997
13 | PATNA,85.1375645,25.5940947
14 | BALLIA,84.1487319,25.7584381
15 | DEORIA,83.7838214,26.4862373
16 | GORAKHPUR,83.3731675,26.7605545
17 | VARANASI,82.9739144,25.3176452
18 | RAJAHMUNDRY,81.8040345,17.0005383
19 | RAIPUR,81.6296413,21.2513844
20 | DINDORI,81.0768455,22.9417931
21 | LUCKNOW,80.946166,26.8466937
22 | KANPUR,80.3318736,26.449923
23 | CHENNAI,80.2707184,13.0826802
24 | HALDWANI,79.5129767,29.2182644
25 | BAREILLY,79.4304381,28.3670355
26 | NAGPUR,79.0881546,21.1458004
27 | ETAWAH,79.0046898,26.8117116
28 | SAGAR,78.7378068,23.838805
29 | SAIKHEDA,78.5831181,22.962215
30 | HYDERABAD,78.486671,17.385044
31 | KOLAR,78.1325611,13.1357446
32 | MADURAI,78.1197754,9.9252007
33 | ALIGARH,78.0880129,27.8973944
34 | KURNOOL,78.0372792,15.8281257
35 | DEHRADOON,78.0321918,30.3164945
36 | AGRA,78.0080745,27.1766701
37 | DINDIGUL,77.9802906,10.3673123
38 | CHICKBALLAPUR,77.7280396,13.432366
39 | MEERUT,77.7064137,28.9844618
40 | BANGALORE,77.5945627,12.9715987
41 | BHOPAL,77.412615,23.2599333
42 | RAICHUR,77.3439283,16.2120031
43 | DELHI,77.2090212,28.6139391
44 | SHIMLA,77.1734033,31.1048145
45 | KARNAL,76.9904825,29.6856929
46 | COIMBATORE,76.9558321,11.0168445
47 | PALAYAM,76.9513432,8.5027684
48 | TRIVENDRUM,76.9366376,8.5241391
49 | CHANDIGARH,76.7794179,30.7333148
50 | CHALLAKERE,76.6528225,14.313395
51 | ALWAR,76.6345735,27.5529907
52 | PATIALA,76.3868797,30.3397809
53 | DEVALA,76.3820088,11.4725502
54 | KHANNA,76.2112286,30.697852
55 | HASSAN,76.0995519,13.0068142
56 | DEWAS,76.0507949,22.9622672
57 | DHAVANGERE,75.9238397,14.4663438
58 | HOSHIARPUR,75.911483,31.5143178
59 | SOLAPUR,75.9063906,17.6599188
60 | KOTA,75.8647527,25.2138156
61 | INDORE,75.8577258,22.7195687
62 | LUDHIANA,75.8572758,30.900965
63 | JAIPUR,75.7872709,26.9124336
64 | UJJAIN,75.7849097,23.1793013
65 | BIJAPUR,75.710031,16.8301708
66 | JALANDHAR,75.5761829,31.3260152
67 | JALGAON,75.5626039,21.0076578
68 | HUBLI,75.1239547,15.3647083
69 | MANDSOUR,75.0692952,24.076836
70 | BHATINDA,74.9454745,30.210994
71 | SRINAGAR,74.9442585,34.1255413
72 | NEWASA,74.9281063,19.5511772
73 | AMRITSAR,74.8722642,31.6339793
74 | NEEMUCH,74.8624092,24.4763852
75 | JAMMU,74.8576539,32.7217819
76 | AHMEDNAGAR,74.7495916,19.0952075
77 | SHRIRAMPUR,74.6576091,19.6222323
78 | RAHURI,74.6488264,19.392678
79 | AJMER,74.6399163,26.4498954
80 | SANGALI,74.5814773,16.8523973
81 | MALEGAON,74.5100291,20.5547497
82 | BELGAUM,74.4976741,15.8496953
83 | RAHATA,74.483335,19.7127021
84 | YEOLA,74.4818698,20.0471229
85 | KOPERGAON,74.4790898,19.8916791
86 | MANMAD,74.4366016,20.2511789
87 | PHALTAN ,74.4360424,17.9844507
88 | CHANDVAD,74.2472779,20.3271277
89 | KOLHAPUR,74.2432527,16.7049873
90 | LASALGAON,74.2326058,20.1491422
91 | SANGAMNER,74.2079648,19.5771387
92 | SATANA,74.2032581,20.598224
93 | ABOHAR,74.1993043,30.1452928
94 | LONAND,74.1861821,18.041706
95 | NIPHAD,74.1093141,20.0799646
96 | SINNAR,74.0006328,19.8530593
97 | PIMPALGAON,73.9873787,20.1699678
98 | SRIGANGANAGAR,73.8771901,29.9038399
99 | JUNNAR,73.87425,19.2031842
100 | CHAKAN,73.8630346,18.7602664
101 | PUNE,73.8567437,18.5204303
102 | NASIK,73.7898023,19.9974533
103 | UDAIPUR,73.712479,24.585445
104 | BIKANER,73.3119159,28.0229348
105 | JODHPUR,73.0243094,26.2389469
106 | NANDGAON,72.9276008,18.3855337
107 | MUMBAI,72.8776559,19.0759837
108 | SURAT,72.8310607,21.1702401
109 | AHMEDABAD,72.5713621,23.022505
110 | DEESA,72.1906721,24.2585031
111 | BHAVNAGAR,72.1519304,21.7644725
112 | MAHUVA,71.7563169,21.0902193
113 | RAJKOT,70.8021599,22.3038945
114 | GONDAL,70.792297,21.9619463
115 | JAMNAGAR,70.05773,22.4707019
116 | KALVAN,73.13054,19.24033
117 | VANI,73.89189,20.33749
118 | BOMBORI,72.87766,19.07598
--------------------------------------------------------------------------------
/time_series/img/Cov_nonstationary.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amitkaps/machine-learning/3b15a08198e3e151719b75f1d58e9f2ff157b324/time_series/img/Cov_nonstationary.png
--------------------------------------------------------------------------------
/time_series/img/Mean_nonstationary.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amitkaps/machine-learning/3b15a08198e3e151719b75f1d58e9f2ff157b324/time_series/img/Mean_nonstationary.png
--------------------------------------------------------------------------------
/time_series/img/Var_nonstationary.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amitkaps/machine-learning/3b15a08198e3e151719b75f1d58e9f2ff157b324/time_series/img/Var_nonstationary.png
--------------------------------------------------------------------------------
/time_series/img/left_merge.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amitkaps/machine-learning/3b15a08198e3e151719b75f1d58e9f2ff157b324/time_series/img/left_merge.png
--------------------------------------------------------------------------------
/time_series/img/onion_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amitkaps/machine-learning/3b15a08198e3e151719b75f1d58e9f2ff157b324/time_series/img/onion_small.png
--------------------------------------------------------------------------------
/time_series/img/onion_tables.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amitkaps/machine-learning/3b15a08198e3e151719b75f1d58e9f2ff157b324/time_series/img/onion_tables.png
--------------------------------------------------------------------------------
/time_series/img/peeling_the_onion_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amitkaps/machine-learning/3b15a08198e3e151719b75f1d58e9f2ff157b324/time_series/img/peeling_the_onion_small.png
--------------------------------------------------------------------------------
/time_series/img/pivot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amitkaps/machine-learning/3b15a08198e3e151719b75f1d58e9f2ff157b324/time_series/img/pivot.png
--------------------------------------------------------------------------------
/time_series/img/splitapplycombine.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amitkaps/machine-learning/3b15a08198e3e151719b75f1d58e9f2ff157b324/time_series/img/splitapplycombine.png
--------------------------------------------------------------------------------
/time_series/img/subsetcolumns.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amitkaps/machine-learning/3b15a08198e3e151719b75f1d58e9f2ff157b324/time_series/img/subsetcolumns.png
--------------------------------------------------------------------------------
/time_series/img/subsetrows.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amitkaps/machine-learning/3b15a08198e3e151719b75f1d58e9f2ff157b324/time_series/img/subsetrows.png
--------------------------------------------------------------------------------
/time_series/state_geocode.csv:
--------------------------------------------------------------------------------
1 | "state","name","lon","lat"
2 | "MS","Maharashtra",75.7138884,19.7514798
3 | "GUJ","Gujarat",71.1923805,22.258652
4 | "MP","Madhya pradesh",78.6568942,22.9734229
5 | "TN","Tamil Nadu",78.6568942,11.1271225
6 | "KNT","Karnataka",75.7138884,15.3172775
7 | "DEL","Delhi",77.2090212,28.6139391
8 | "HR","Haryana",76.085601,29.0587757
9 | "RAJ","Rajasthan",74.2179326,27.0238036
10 | "AP","Andhra Pradesh",79.7399875,15.9128998
11 | "UP","Uttar Pradesh",80.9461592,26.8467088
12 | "JK","Jammu & Kashmir",74.8576539,32.7217819
13 | "BHR","Bihar",85.3131194,25.0960742
14 | "WB","West Bengal",87.8549755,22.9867569
15 | "HP","Himachal Pradesh",77.1733901,31.1048294
16 | "ASM","Assam",92.9375739,26.2006043
17 | "KEL","Kerala",76.2710833,10.8505159
18 | "JH","Jharkhand",85.2799354,23.6101808
19 | "OR","Orissa",85.0985236,20.9516658
20 | "PB","Punjab",75.3412179,31.1471305
21 | "KER","Kerala",76.2710833,10.8505159
22 | "CH","Chandigarh",76.7794179,30.7333148
23 |
--------------------------------------------------------------------------------