├── .gitignore ├── LICENSE ├── README.md ├── check_env.py ├── data ├── DataTau.html ├── data_tau.csv ├── data_tau_days.csv ├── data_tau_ta.csv ├── flatlands.txt ├── negative_words.txt └── postive_words.txt ├── img ├── chunk-segmentation.png ├── datatau.png ├── date.png ├── entity_extraction.png ├── gutenberg.png ├── ldaformula.png ├── nb.png ├── punkt.png └── title.png └── notebook ├── data-tau ├── Acquire.ipynb ├── Explore.ipynb ├── Model.ipynb └── Refine.ipynb └── twitter ├── Acquire.ipynb ├── Explore.ipynb ├── Model.ipynb ├── Refine.ipynb ├── demonetization.csv └── tweets.csv /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/macos,python 3 | 4 | ### macOS ### 5 | *.DS_Store 6 | .AppleDouble 7 | .LSOverride 8 | 9 | # Icon must end with two \r 10 | Icon 11 | # Thumbnails 12 | ._* 13 | # Files that might appear in the root of a volume 14 | .DocumentRevisions-V100 15 | .fseventsd 16 | .Spotlight-V100 17 | .TemporaryItems 18 | .Trashes 19 | .VolumeIcon.icns 20 | .com.apple.timemachine.donotpresent 21 | # Directories potentially created on remote AFP share 22 | .AppleDB 23 | .AppleDesktop 24 | Network Trash Folder 25 | Temporary Items 26 | .apdisk 27 | 28 | 29 | ### Python ### 30 | # Byte-compiled / optimized / DLL files 31 | __pycache__/ 32 | *.py[cod] 33 | *$py.class 34 | 35 | # C extensions 36 | *.so 37 | 38 | # Distribution / packaging 39 | .Python 40 | env/ 41 | build/ 42 | develop-eggs/ 43 | dist/ 44 | downloads/ 45 | eggs/ 46 | .eggs/ 47 | lib/ 48 | lib64/ 49 | parts/ 50 | sdist/ 51 | var/ 52 | *.egg-info/ 53 | .installed.cfg 54 | *.egg 55 | 56 | # PyInstaller 57 | # Usually these files are written by a python script from a template 58 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 59 | *.manifest 60 | *.spec 61 | 62 | # Installer logs 63 | pip-log.txt 64 | pip-delete-this-directory.txt 65 | 66 | # Unit test / coverage reports 67 | htmlcov/ 68 | .tox/ 69 | .coverage 70 | .coverage.* 71 | .cache 72 | nosetests.xml 73 | coverage.xml 74 | *,cover 75 | .hypothesis/ 76 | 77 | # Translations 78 | *.mo 79 | *.pot 80 | 81 | # Django stuff: 82 | *.log 83 | local_settings.py 84 | 85 | # Flask stuff: 86 | instance/ 87 | .webassets-cache 88 | 89 | # Scrapy stuff: 90 | .scrapy 91 | 92 | # Sphinx documentation 93 | docs/_build/ 94 | 95 | # PyBuilder 96 | target/ 97 | 98 | # Jupyter Notebook 99 | .ipynb_checkpoints 100 | 101 | # pyenv 102 | .python-version 103 | 104 | # celery beat schedule file 105 | celerybeat-schedule 106 | 107 | # dotenv 108 | .env 109 | 110 | # virtualenv 111 | .venv/ 112 | venv/ 113 | ENV/ 114 | 115 | # Spyder project settings 116 | .spyderproject 117 | 118 | # Rope project settings 119 | .ropeproject 120 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Amit Kapoor 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Text Mining in Python 2 | 3 | ## Topics 4 | - Regular Expression 5 | - Stopword Removal, Stemming 6 | - Creating features from text 7 | - Term Frequency and Inverse Document Frequency (TF-IDF) 8 | - Visualising text data and models 9 | - Sentiment Analysis (Using Naive Bayes Classifier) 10 | - Topic Modelling - Latent Dirichlet Allocation (LDA) 11 | -------------------------------------------------------------------------------- /check_env.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script to check if the required packages for the workshop are installed 3 | 4 | Author: Bargava Subramanian 5 | 6 | """ 7 | import sys 8 | 9 | # requirements 10 | has = dict( 11 | gensim='0.12.4', 12 | IPython='5.0.0', 13 | lda='1.0.4', 14 | matplotlib='1.5.1', 15 | numpy='1.11.0', 16 | pandas='0.19', 17 | requests='2.11.1', 18 | scipy='0.17.0', 19 | seaborn='0.7', 20 | sklearn='0.18', 21 | spacy='1.2.0', 22 | tweepy='3.5.0' 23 | ) 24 | 25 | 26 | 27 | returns = 0 28 | 29 | # check installed packages 30 | for module in has.keys(): 31 | try: 32 | _module = module.split('-')[-1] 33 | __module__ = __import__(_module, globals(), locals(), [], 0) 34 | exec('%s = __module__' % _module) 35 | except ImportError: 36 | print("%s:: %s" % (module, sys.exc_info()[1])) 37 | #run.pop(module, None) 38 | returns += 1 39 | 40 | 41 | # check required versions 42 | from distutils.version import LooseVersion as V 43 | for module,version in has.items(): 44 | try: 45 | _module = module.split('-')[-1] 46 | assert V(eval(_module).__version__) >= V(version) 47 | except NameError: 48 | pass # failed import 49 | except AttributeError: 50 | pass # can't version-check non-standard packages... 51 | except AssertionError: 52 | print("%s:: Version >= %s is required" % (module, version)) 53 | returns += 1 54 | 55 | #Check for image 56 | try: 57 | from PIL import Image 58 | except: 59 | print("Image not found. Please install pillow package or Image package") 60 | 61 | #check for wordcloud 62 | try: 63 | import wordcloud 64 | except: 65 | print("wordcloud package not found") 66 | 67 | 68 | # final report 69 | if not returns: 70 | print('-'*50) 71 | print('OK. All required items installed.') 72 | 73 | sys.exit(returns) 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /data/DataTau.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | DataTau
DataTaunew | comments | leaders | submitlogin
1.
An Exploration of R, Yelp, and the Search for Good Indian Food (springboard.com)
5 points by Rogerh91 4 hours ago | discuss
2.
Spark Pipelines: Elegant Yet Powerful (insightdatalabs.com)
3 points by aouyang1 7 hours ago | discuss
3.
Deep Advances in Generative Modeling (youtube.com)
7 points by gwulfs 13 hours ago | 1 comment
4.
Shit VCs Say (buzzfeed.com)
3 points by Argentum01 8 hours ago | discuss
5.
Python, Machine Learning, and Language Wars (sebastianraschka.com)
4 points by pmigdal 15 hours ago | discuss
6.
A Neural Network in 11 lines of Python (github.io)
3 points by dekhtiar 13 hours ago | discuss
7.
Markov Chains Explained Visually (setosa.io)
13 points by zeroviscosity 1 day ago | 1 comment
8.
Dplython: Dplyr for Python (github.com)
13 points by thenaturalist 1 day ago | 3 comments
9.
Inferring causal impact using Bayesian structural time-series models (google.com)
8 points by Homunculiheaded 1 day ago | 1 comment
10.
A Billion Taxi Rides on Amazon EMR running Spark (marksblogg.com)
5 points by marklit 1 day ago | 1 comment
11.
Tutorial: Web scraping and mapping breweries with import.io and R (trendct.org)
4 points by jasdumas 1 day ago | discuss
12.
The rise of greedy robots (yanirseroussi.com)
4 points by yanir 2 days ago | discuss
13.
Python for Data Structures, Algorithms, and Interviews (github.com)
18 points by kokoubaby 4 days ago | discuss
14.
Extracting image metadata at scale (netflix.com)
2 points by zachwill 1 day ago | discuss
15.
Lift charts - A data scientist's secret weapon (datalifebalance.com)
14 points by datenheini 4 days ago | 2 comments
16.
How To Become A Machine Learning Expert In One Simple Step (swanintelligence.com)
4 points by swanint 2 days ago | discuss
17.
Engineers Shouldn’t Write ETL: High Functioning Data Science Departments (stitchfix.com)
10 points by legel 4 days ago | 3 comments
18.
Simple estimation of hierarchical events with petersburg (willmcginnis.com)
3 points by wdm0006 2 days ago | discuss
19.
Data Science Side Project
6 points by yashpatel5400 2 days ago | 8 comments
20.
Unsupervised Computer Vision: The Current State of the Art (stitchfix.com)
6 points by carlosfaham 3 days ago | discuss
21.
Data Engineering at Slack: Twelve Mistakes I've Made In My First Three Months (google.com)
14 points by gwulfs 6 days ago | 2 comments
22.
What data visualization tools do /r/DataIsBeautiful OC creators use? (randalolson.com)
3 points by pmigdal 2 days ago | discuss
23.
Reshaping in Pandas (nikolaygrozev.wordpress.com)
6 points by carlosgg 4 days ago | discuss
24.
An unusual interactive machine learning challenge (blackboxchallenge.com)
4 points by gglumov 3 days ago | discuss
25.
Datumbox Machine Learning Framework 0.7.0 Released (datumbox.com)
4 points by datumbox 3 days ago | discuss
26.
Data science intro for math/phys background (p.migdal.pl)
14 points by pmigdal 7 days ago | discuss
27.
Neural Networks demystified (lumiverse.io)
16 points by elyase 8 days ago | discuss
28.
What machines can learn from Apple Watch: detecting undiagnosed heart condition (insighthealthdata.com)
9 points by koukouhappy 6 days ago | discuss
29.
Data Science Tools: The Biggest Winners and Losers (dominodatalab.com)
12 points by AnnaOnTheWeb 7 days ago | discuss
30.
10 Years of Open Source Machine Learning (medium.com)
9 points by tstonez 6 days ago | 1 comment
More

37 |
RSS 38 | | Announcements 39 |
-------------------------------------------------------------------------------- /data/data_tau.csv: -------------------------------------------------------------------------------- 1 | title,date 2 | "An Exploration of R, Yelp, and the Search for Good Indian Food",5 points by Rogerh91 6 hours ago | discuss 3 | Deep Advances in Generative Modeling,7 points by gwulfs 15 hours ago | 1 comment 4 | Spark Pipelines: Elegant Yet Powerful,3 points by aouyang1 9 hours ago | discuss 5 | Shit VCs Say,3 points by Argentum01 10 hours ago | discuss 6 | "Python, Machine Learning, and Language Wars",4 points by pmigdal 17 hours ago | discuss 7 | A Neural Network in 11 lines of Python ,3 points by dekhtiar 14 hours ago | discuss 8 | Markov Chains Explained Visually,13 points by zeroviscosity 1 day ago | 1 comment 9 | Dplython: Dplyr for Python,13 points by thenaturalist 1 day ago | 3 comments 10 | Inferring causal impact using Bayesian structural time-series models,8 points by Homunculiheaded 1 day ago | 1 comment 11 | A Billion Taxi Rides on Amazon EMR running Spark,5 points by marklit 1 day ago | 1 comment 12 | Tutorial: Web scraping and mapping breweries with import.io and R,4 points by jasdumas 1 day ago | discuss 13 | The rise of greedy robots,4 points by yanir 2 days ago | discuss 14 | "Python for Data Structures, Algorithms, and Interviews",18 points by kokoubaby 4 days ago | discuss 15 | Extracting image metadata at scale,2 points by zachwill 1 day ago | discuss 16 | Lift charts - A data scientist's secret weapon,14 points by datenheini 4 days ago | 2 comments 17 | Data Science Side Project,7 points by yashpatel5400 2 days ago | 9 comments 18 | How To Become A Machine Learning Expert In One Simple Step,4 points by swanint 2 days ago | discuss 19 | Engineers Shouldn’t Write ETL: High Functioning Data Science Departments,10 points by legel 4 days ago | 3 comments 20 | Simple estimation of hierarchical events with petersburg,3 points by wdm0006 2 days ago | discuss 21 | Unsupervised Computer Vision: The Current State of the Art,6 points by carlosfaham 3 days ago | discuss 22 | Data Engineering at Slack: Twelve Mistakes I've Made In My First Three Months,14 points by gwulfs 6 days ago | 2 comments 23 | What data visualization tools do /r/DataIsBeautiful OC creators use?,3 points by pmigdal 2 days ago | discuss 24 | Reshaping in Pandas,6 points by carlosgg 4 days ago | discuss 25 | An unusual interactive machine learning challenge,4 points by gglumov 3 days ago | discuss 26 | Datumbox Machine Learning Framework 0.7.0 Released,4 points by datumbox 3 days ago | discuss 27 | Data science intro for math/phys background,14 points by pmigdal 7 days ago | discuss 28 | Neural Networks demystified,16 points by elyase 8 days ago | discuss 29 | What machines can learn from Apple Watch: detecting undiagnosed heart condition,9 points by koukouhappy 6 days ago | discuss 30 | Data Science Tools: The Biggest Winners and Losers,12 points by AnnaOnTheWeb 7 days ago | discuss 31 | 10 Years of Open Source Machine Learning,9 points by tstonez 6 days ago | 1 comment 32 | Has your conversion rate changed? Bayesian timeseries analysis with Python,12 points by yummyfajitas 8 days ago | discuss 33 | Do jobs run in families?,5 points by Anon84 5 days ago | 1 comment 34 | Introduction to Scikit Flow - Simplified Interface to TensorFlow,8 points by lefish 7 days ago | discuss 35 | "XGBoost4J: Portable Distributed XGboost in Spark, Flink and Dataflow",8 points by crowwork 8 days ago | discuss 36 | How to learn machine learning?,8 points by kiechu 8 days ago | 1 comment 37 | The Deep Roots of Javascript Fatigue,5 points by nikkielizdemere 6 days ago | 1 comment 38 | How do we make Data Tau work?,27 points by hal8 9 days ago | 18 comments 39 | "Machine Learning: An In-Depth, Non-Technical Guide — Part 4",7 points by innoarchitech 8 days ago | discuss 40 | Data Science Slack channel - Click for invite,7 points by jyotsna 8 days ago | discuss 41 | [Ask DT] What are some rookie mistakes in R?,3 points by HKtemp 3 days ago | discuss 42 | "Playing ""Moneyball"" on EA FIFA 16",16 points by aabb13 13 days ago | 3 comments 43 | Intellexer - Natural Language Processing and Text Mining REST API,16 points by j_downer 13 days ago | discuss 44 | Descriptive Statistics in SQL,5 points by nickhould 7 days ago | discuss 45 | Genomic Data Visualization using Python,2 points by RadhouaneAniba 4 days ago | discuss 46 | How to Use Cohort Data to Analyze User Behavior,2 points by clevertap 4 days ago | discuss 47 | Making transparent how variations in analytical choices affect results,4 points by rahmaniacc 7 days ago | discuss 48 | Show DT: Datasets.co - An easy way to share and discover ml datasets,2 points by mrborgen86 4 days ago | discuss 49 | Is Scala a better choice than Python for Apache Spark?,7 points by srinify 10 days ago | 1 comment 50 | Julia: A Fast Language for Numerical Computing,7 points by srinify 10 days ago | 1 comment 51 | "An Ode To The Rice Cooker, The Smartest Kitchen Appliance I’ve Ever Owned",2 points by tfturing 4 days ago | discuss 52 | Computing Classification Evaluation Metrics in R,4 points by lefish 7 days ago | discuss 53 | Analyzing Golden State Warriors' passing network using GraphFrames in Spark,3 points by yukiegosapporo 6 days ago | discuss 54 | Megaman: Manifold Learning with Millions of points,4 points by dperry 8 days ago | 3 comments 55 | How to Detect Outliers on Parametric and Non Parametric Methods,2 points by clevertap 5 days ago | discuss 56 | BallR: Interactive NBA Shot Charts with R and Shiny,12 points by carlosgg 14 days ago | discuss 57 | A Billion Taxi Rides on Amazon EMR Running Presto,4 points by marklit 8 days ago | discuss 58 | Minecraft to run artificial intelligence experiments,4 points by bsadeghi 8 days ago | discuss 59 | Deep Q-Learning (Space Invaders),4 points by pmigdal 8 days ago | discuss 60 | Theano Tutorial,2 points by pmigdal 5 days ago | discuss 61 | The Personality Space of Cartoon Characters,3 points by lefish 7 days ago | discuss 62 | Announcing Apache Flink 1.0.0,11 points by mxm 14 days ago | discuss 63 | "Telemetry with Collectd, Logstash, Elasticsearch and Grafana (ELG)",3 points by helloanand 7 days ago | discuss 64 | Statisticians Agree: It’s Time To Stop Misusing P-Value,10 points by jpiburn 15 days ago | 5 comments 65 | Bayesian Reasoning in The Twilight Zone!,2 points by Homunculiheaded 6 days ago | discuss 66 | Bayesian Estimation of G Train Wait Times,7 points by jamesdreiss 12 days ago | discuss 67 | XGBoost: A Scalable Tree Boosting System article,6 points by tfturing 12 days ago | discuss 68 | Some experiments into explaining complex black box ensemble predictions,2 points by lefish 6 days ago | discuss 69 | Creating a Hadoop Pseudo-Distributed Environment,2 points by lefish 6 days ago | discuss 70 | "Data Science Pop-Up in Austin, TX",2 points by AnnaOnTheWeb 6 days ago | discuss 71 | Train your own image classifier with Inception in TensorFlow,7 points by elyase 13 days ago | discuss 72 | Shiny app for running a Tensorflow demo,3 points by shinyman 9 days ago | discuss 73 | File details and owners with gitnoc and git-pandas,3 points by wdm0006 9 days ago | discuss 74 | 7 Big Data Technologies and When to Use Them that All Data Engineers Should Know,2 points by galvanize 7 days ago | discuss 75 | Topic clusters with TF-IDF vectorization with Spark and Scala,2 points by lefish 7 days ago | discuss 76 | Neural Doodles: Workflows for the Next Generation of Artists,5 points by pmigdal 12 days ago | discuss 77 | Graph Databases 101,5 points by carlosgg 12 days ago | discuss 78 | DataRadar.IO - Data Science RSS Feed - Do you have enough data about your data,2 points by dekhtiar 8 days ago | 3 comments 79 | International Women's Day: What #PledgeForParity Means To Us,5 points by ddrum001 14 days ago | discuss 80 | Top 50 Data Science thought leaders on Twitter,3 points by datawerq 11 days ago | 3 comments 81 | Ask DT: Who Is Hiring? (March 2016),27 points by whoishiring 21 days ago | 15 comments 82 | Deriving Better Insights From Time Series Data With Cycle Plots,3 points by clevertap 11 days ago | discuss 83 | Introducing GraphFrames,7 points by falaki 19 days ago | discuss 84 | SQL for Data Analysis,4 points by nickhould 14 days ago | 6 comments 85 | Stream processing and messaging systems for the IoT age,3 points by gradientflow 12 days ago | discuss 86 | Announcing R Tools for Visual Studio,3 points by brakmic 13 days ago | discuss 87 | A simpler way to merge data streams,3 points by apoverton 13 days ago | discuss 88 | Optimizing Notification Timing for One Signal,9 points by megandias 26 days ago | discuss 89 | Skizze - A high throughput probabilistic data structure service and storage,3 points by seiflotfy 14 days ago | discuss 90 | Question: What do you want to say about working with data?,2 points by emiller425 8 days ago | discuss 91 | Genomic Ranges - an Introduction to Working with Genomic Data,3 points by AnnaOnTheWeb 13 days ago | discuss 92 | TensorFlow for Poets,9 points by ebellm 21 days ago | 1 comment 93 | Unsupervised Learning with Even Less Supervision Using Bayesian Optimization,2 points by idewanck 11 days ago | discuss 94 | How to work with large JSON datasets using Python and Pandas,9 points by brian_spiering 21 days ago | discuss 95 | DrivenData Competition: Model/Visualize Fog Patterns in Morocco,4 points by bull 15 days ago | discuss 96 | Deep Learning: Nine Lectures at Collège de France by Yan LeCun,5 points by Anon84 17 days ago | discuss 97 | Optimizing Facebook Campaigns with R,2 points by AnnaOnTheWeb 12 days ago | 1 comment 98 | "Trump Tweets on a Globe (aka Fun with d3, socket.io, and the Twitter API)",8 points by joelgrus 21 days ago | discuss 99 | Why pandas users should be excited about Apache Arrow,17 points by pmigdal 29 days ago | discuss 100 | Histogram intersection for change detection,8 points by datadive 22 days ago | discuss 101 | Distributed TensorFlow just open-sourced,10 points by elyase 25 days ago | discuss 102 | D3.js Screencasts (1 in 3 are free),4 points by Veerle 18 days ago | discuss 103 | Regression and Classification with Examples in R,5 points by soates 20 days ago | discuss 104 | Free online course on statistical shape modelling,8 points by shapemean 25 days ago | discuss 105 | "Don't worry about deep learning, deepen your understanding of causality instead",22 points by yanir 37 days ago | discuss 106 | Work with private repositories and other updates of the FlyElephant platform,2 points by m31 15 days ago | discuss 107 | How to import XML to almost anywhere,4 points by Jammink 20 days ago | discuss 108 | Survival Analysis of Cricket Player Careers,8 points by keshav92 26 days ago | 6 comments 109 | Generate image analogies using neural matching and blending,2 points by pmigdal 15 days ago | discuss 110 | "Analyzing 1.8M tweets from Super Bowl 50 (Twython, Twitter API, AYLIEN)",4 points by mikewally 20 days ago | discuss 111 | Newly released sklearn compatible library of categorical encoders,7 points by wdm0006 25 days ago | discuss 112 | Watch Tiny Neural Nets Learn,4 points by swanint 21 days ago | discuss 113 | Four pitfalls of hill climbing: An animated look,5 points by csaid81 23 days ago | discuss 114 | "Decision Forests, Convolutional Networks and the Models in-Between",2 points by ebellm 16 days ago | discuss 115 | How a Math Genius Hacked OkCupid to Find True Love,15 points by roh_codeur 34 days ago | discuss 116 | No developers for PyLearn2,3 points by tfturing 19 days ago | discuss 117 | Density Estimation with Dirichlet Process Mixtures using PyMC3,6 points by MidsizeBlowfish 25 days ago | discuss 118 | Using survival analysis and git-pandas to estimate code quality,3 points by wdm0006 20 days ago | discuss 119 | An Analysis of the Flint Michigan Water Crisis: Part 1 Initial Corrosivity,3 points by JHorn 20 days ago | discuss 120 | An Analysis of Republican Twitter Follower Interests,6 points by michelangelo 26 days ago | discuss 121 | Introduction to ML talk,8 points by cjbayesian 29 days ago | discuss 122 | GloVe vs word2vec revisited,3 points by pmigdal 20 days ago | discuss 123 | Overoptimizing: a story about kaggle,4 points by wdm0006 30 days ago | discuss 124 | Undergrad Data Analysis/Science internships SF Bay?,3 points by tctctc 15 days ago | 5 comments 125 | The Role of Statistical Significance in Growth Hacking,6 points by rawls234 27 days ago | discuss 126 | Data Science Course @ Harvard,7 points by rahmaniacc 29 days ago | 2 comments 127 | Principal Component Projection Without Principal Component Analysis,6 points by genofon 27 days ago | discuss 128 | "Machine Learning: An In-Depth, Non-Technical Guide - Part 3",7 points by innoarchitech 29 days ago | discuss 129 | Stochastic Dummy Boosting,2 points by mikeskim 18 days ago | discuss 130 | Interactive Map: Hong-Kong through The Lense of Instagram,2 points by BrianN 19 days ago | discuss 131 | Data Science at Monsanto,3 points by doctorcroc 22 days ago | discuss 132 | Data Science at Instacart,11 points by jeremystan 34 days ago | 3 comments 133 | Building a Streaming Search Platform,6 points by ddrum001 28 days ago | discuss 134 | Kafka Producer Latency with Large Topic Counts,3 points by marklit 26 days ago | discuss 135 | A Sneak Peak of the Cloud: the 2 Minute Intro for Beginners,2 points by andymaheshw 20 days ago | discuss 136 | Win-Vector video courses: price/status changes,2 points by jmount 20 days ago | discuss 137 | 50+ Data Science and Machine Learning Cheat Sheets,20 points by elyase 42 days ago | 1 comment 138 | One More Reason Not To Be Scared of Deep Learning,2 points by amplifier_khan 21 days ago | discuss 139 | Visual Logic Authoring vs Code,2 points by AnnaOnTheWeb 21 days ago | discuss 140 | Data Science in Python online training with hands-on experience,2 points by Puneet 21 days ago | discuss 141 | Viewing the US Presidential Primary Through the Lens of Twitter,8 points by michelangelo 33 days ago | discuss 142 | Caffe on Spark open sourced,4 points by rahmaniacc 27 days ago | discuss 143 | The Ethical Data Scientist,5 points by tfturing 29 days ago | discuss 144 | Answers to Frequently Asked Questions in Machine Learning,3 points by rasbt 21 days ago | discuss 145 | Intro to A/B Testing and P-Values,2 points by randyzwitch 22 days ago | discuss 146 | Visualizing State Level Data With R and Statebins,2 points by usujason 22 days ago | discuss 147 | "Probabilistic Graphical Models slides & video lectures (Eric Xing, CMU)",4 points by ororm 28 days ago | discuss 148 | Sense2vec with spaCy and Gensim,9 points by elyase 36 days ago | 2 comments 149 | A Billion NYC Taxi and Uber Rides in AWS Redshift,3 points by marklit 31 days ago | discuss 150 | How to Code and Understand DeepMind's Neural Stack Machine (in Python),2 points by genofon 23 days ago | discuss 151 | How to make polished Jupyter presentations with optional code visibility,9 points by csaid81 36 days ago | discuss 152 | How to become a Bayesian in eight easy steps,17 points by EtzA 44 days ago | 1 comment 153 | Optimizing .*: Details of Vectorization and Metaprogramming in Julia,4 points by randyzwitch 29 days ago | discuss 154 | IBM certified Apache Spark Online Training,8 points by divya_jain 36 days ago | discuss 155 | Geographic Data Science course,2 points by rk 25 days ago | discuss 156 | "The Daily Mail Stole My Visualization, Twice",5 points by thehoff 32 days ago | 1 comment 157 | Ensemble Methods: Improved Machine Learning Results,9 points by PyBloggers 38 days ago | discuss 158 | Apache Spark and unsupervised learning in security,2 points by gradientflow 26 days ago | discuss 159 | MachineJS: Automated machine learning- just give it a data file!,2 points by dsernst 26 days ago | discuss 160 | The NSA’s SKYNET program may be killing thousands of innocent people,6 points by zlipp 35 days ago | discuss 161 | "Big Dimensions, and What You Can Do About It",2 points by ramsey 27 days ago | discuss 162 | Automate Your Oscars Pool with R,2 points by jamesdreiss 27 days ago | discuss 163 | Signal Processing with LIGO GW150914 data,9 points by tfturing 39 days ago | discuss 164 | Overview of DeZyre and Coursera Data Science Course,5 points by ann928 34 days ago | discuss 165 | Upcoming Datathon in NYC,2 points by VicTrey 28 days ago | discuss 166 | Summarizing Data in SQL,15 points by elisebreda 46 days ago | discuss 167 | A/B Testing for Scammers,2 points by sameermanek 28 days ago | discuss 168 | Highly interpretable classifiers for scikit learn using Bayesian decision rules,2 points by mcnulty 28 days ago | discuss 169 | Auto-scaling scikit-learn with Spark,11 points by falaki 43 days ago | discuss 170 | Where the f*** can I park?,2 points by manugarri 29 days ago | discuss 171 | "Machine Learning: An In-Depth, Non-Technical Guide - Part 2",5 points by innoarchitech 36 days ago | discuss 172 | Webhose.io now offers a historical data archive,7 points by databuffer 40 days ago | discuss 173 | Meetup: Introduction to Machine Learning Algorithms for Data Science.,4 points by ann928 36 days ago | discuss 174 | Exploring the Limits of Language Modeling,8 points by soates 42 days ago | discuss 175 | Text Mining South Park,7 points by pmigdal 41 days ago | discuss 176 | Finding the K in K-means by Parametric Bootstrap,7 points by jmount 42 days ago | 1 comment 177 | Getting Started with Statistics for Data Science,3 points by nickhould 35 days ago | discuss 178 | Rodeo 1.3 - Tab-completion for docstrings,3 points by glamp 35 days ago | discuss 179 | Teaching D3.js - links,3 points by pmigdal 35 days ago | discuss 180 | Parallel scikit-learn on YARN,5 points by stijntonk 39 days ago | discuss 181 | Meetup: Free Live Webinar on Prescriptive Analytics for Fun and Profit,2 points by ann928 32 days ago | discuss 182 | -------------------------------------------------------------------------------- /data/data_tau_days.csv: -------------------------------------------------------------------------------- 1 | title,date,days 2 | "An Exploration of R, Yelp, and the Search for Good Indian Food",5 points by Rogerh91 6 hours ago | discuss,1 3 | Deep Advances in Generative Modeling,7 points by gwulfs 15 hours ago | 1 comment,1 4 | Spark Pipelines: Elegant Yet Powerful,3 points by aouyang1 9 hours ago | discuss,1 5 | Shit VCs Say,3 points by Argentum01 10 hours ago | discuss,1 6 | "Python, Machine Learning, and Language Wars",4 points by pmigdal 17 hours ago | discuss,1 7 | A Neural Network in 11 lines of Python ,3 points by dekhtiar 14 hours ago | discuss,1 8 | Markov Chains Explained Visually,13 points by zeroviscosity 1 day ago | 1 comment,1 9 | Dplython: Dplyr for Python,13 points by thenaturalist 1 day ago | 3 comments,1 10 | Inferring causal impact using Bayesian structural time-series models,8 points by Homunculiheaded 1 day ago | 1 comment,1 11 | A Billion Taxi Rides on Amazon EMR running Spark,5 points by marklit 1 day ago | 1 comment,1 12 | Tutorial: Web scraping and mapping breweries with import.io and R,4 points by jasdumas 1 day ago | discuss,1 13 | The rise of greedy robots,4 points by yanir 2 days ago | discuss,2 14 | "Python for Data Structures, Algorithms, and Interviews",18 points by kokoubaby 4 days ago | discuss,4 15 | Extracting image metadata at scale,2 points by zachwill 1 day ago | discuss,1 16 | Lift charts - A data scientist's secret weapon,14 points by datenheini 4 days ago | 2 comments,4 17 | Data Science Side Project,7 points by yashpatel5400 2 days ago | 9 comments,2 18 | How To Become A Machine Learning Expert In One Simple Step,4 points by swanint 2 days ago | discuss,2 19 | Engineers Shouldn?t Write ETL: High Functioning Data Science Departments,10 points by legel 4 days ago | 3 comments,4 20 | Simple estimation of hierarchical events with petersburg,3 points by wdm0006 2 days ago | discuss,2 21 | Unsupervised Computer Vision: The Current State of the Art,6 points by carlosfaham 3 days ago | discuss,3 22 | Data Engineering at Slack: Twelve Mistakes I've Made In My First Three Months,14 points by gwulfs 6 days ago | 2 comments,6 23 | What data visualization tools do /r/DataIsBeautiful OC creators use?,3 points by pmigdal 2 days ago | discuss,2 24 | Reshaping in Pandas,6 points by carlosgg 4 days ago | discuss,4 25 | An unusual interactive machine learning challenge,4 points by gglumov 3 days ago | discuss,3 26 | Datumbox Machine Learning Framework 0.7.0 Released,4 points by datumbox 3 days ago | discuss,3 27 | Data science intro for math/phys background,14 points by pmigdal 7 days ago | discuss,7 28 | Neural Networks demystified,16 points by elyase 8 days ago | discuss,8 29 | What machines can learn from Apple Watch: detecting undiagnosed heart condition,9 points by koukouhappy 6 days ago | discuss,6 30 | Data Science Tools: The Biggest Winners and Losers,12 points by AnnaOnTheWeb 7 days ago | discuss,7 31 | 10 Years of Open Source Machine Learning,9 points by tstonez 6 days ago | 1 comment,6 32 | Has your conversion rate changed? Bayesian timeseries analysis with Python,12 points by yummyfajitas 8 days ago | discuss,8 33 | Do jobs run in families?,5 points by Anon84 5 days ago | 1 comment,5 34 | Introduction to Scikit Flow - Simplified Interface to TensorFlow,8 points by lefish 7 days ago | discuss,7 35 | "XGBoost4J: Portable Distributed XGboost in Spark, Flink and Dataflow",8 points by crowwork 8 days ago | discuss,8 36 | How to learn machine learning?,8 points by kiechu 8 days ago | 1 comment,8 37 | The Deep Roots of Javascript Fatigue,5 points by nikkielizdemere 6 days ago | 1 comment,6 38 | How do we make Data Tau work?,27 points by hal8 9 days ago | 18 comments,9 39 | "Machine Learning: An In-Depth, Non-Technical Guide???Part 4",7 points by innoarchitech 8 days ago | discuss,8 40 | Data Science Slack channel - Click for invite,7 points by jyotsna 8 days ago | discuss,8 41 | [Ask DT] What are some rookie mistakes in R?,3 points by HKtemp 3 days ago | discuss,3 42 | "Playing ""Moneyball"" on EA FIFA 16",16 points by aabb13 13 days ago | 3 comments,13 43 | Intellexer - Natural Language Processing and Text Mining REST API,16 points by j_downer 13 days ago | discuss,13 44 | Descriptive Statistics in SQL,5 points by nickhould 7 days ago | discuss,7 45 | Genomic Data Visualization using Python,2 points by RadhouaneAniba 4 days ago | discuss,4 46 | How to Use Cohort Data to Analyze User Behavior,2 points by clevertap 4 days ago | discuss,4 47 | Making transparent how variations in analytical choices affect results,4 points by rahmaniacc 7 days ago | discuss,7 48 | Show DT: Datasets.co - An easy way to share and discover ml datasets,2 points by mrborgen86 4 days ago | discuss,4 49 | Is Scala a better choice than Python for Apache Spark?,7 points by srinify 10 days ago | 1 comment,10 50 | Julia: A Fast Language for Numerical Computing,7 points by srinify 10 days ago | 1 comment,10 51 | "An Ode To The Rice Cooker, The Smartest Kitchen Appliance I?ve Ever Owned",2 points by tfturing 4 days ago | discuss,4 52 | Computing Classification Evaluation Metrics in R,4 points by lefish 7 days ago | discuss,7 53 | Analyzing Golden State Warriors' passing network using GraphFrames in Spark,3 points by yukiegosapporo 6 days ago | discuss,6 54 | Megaman: Manifold Learning with Millions of points,4 points by dperry 8 days ago | 3 comments,8 55 | How to Detect Outliers on Parametric and Non Parametric Methods,2 points by clevertap 5 days ago | discuss,5 56 | BallR: Interactive NBA Shot Charts with R and Shiny,12 points by carlosgg 14 days ago | discuss,14 57 | A Billion Taxi Rides on Amazon EMR Running Presto,4 points by marklit 8 days ago | discuss,8 58 | Minecraft to run artificial intelligence experiments,4 points by bsadeghi 8 days ago | discuss,8 59 | Deep Q-Learning (Space Invaders),4 points by pmigdal 8 days ago | discuss,8 60 | Theano Tutorial,2 points by pmigdal 5 days ago | discuss,5 61 | The Personality Space of Cartoon Characters,3 points by lefish 7 days ago | discuss,7 62 | Announcing Apache Flink 1.0.0,11 points by mxm 14 days ago | discuss,14 63 | "Telemetry with Collectd, Logstash, Elasticsearch and Grafana (ELG)",3 points by helloanand 7 days ago | discuss,7 64 | Statisticians Agree: It?s Time To Stop Misusing P-Value,10 points by jpiburn 15 days ago | 5 comments,15 65 | Bayesian Reasoning in The Twilight Zone!,2 points by Homunculiheaded 6 days ago | discuss,6 66 | Bayesian Estimation of G Train Wait Times,7 points by jamesdreiss 12 days ago | discuss,12 67 | XGBoost: A Scalable Tree Boosting System article,6 points by tfturing 12 days ago | discuss,12 68 | Some experiments into explaining complex black box ensemble predictions,2 points by lefish 6 days ago | discuss,6 69 | Creating a Hadoop Pseudo-Distributed Environment,2 points by lefish 6 days ago | discuss,6 70 | "Data Science Pop-Up in Austin, TX",2 points by AnnaOnTheWeb 6 days ago | discuss,6 71 | Train your own image classifier with Inception in TensorFlow,7 points by elyase 13 days ago | discuss,13 72 | Shiny app for running a Tensorflow demo,3 points by shinyman 9 days ago | discuss,9 73 | File details and owners with gitnoc and git-pandas,3 points by wdm0006 9 days ago | discuss,9 74 | 7 Big Data Technologies and When to Use Them that All Data Engineers Should Know,2 points by galvanize 7 days ago | discuss,7 75 | Topic clusters with TF-IDF vectorization with Spark and Scala,2 points by lefish 7 days ago | discuss,7 76 | Neural Doodles: Workflows for the Next Generation of Artists,5 points by pmigdal 12 days ago | discuss,12 77 | Graph Databases 101,5 points by carlosgg 12 days ago | discuss,12 78 | DataRadar.IO - Data Science RSS Feed - Do you have enough data about your data,2 points by dekhtiar 8 days ago | 3 comments,8 79 | International Women's Day: What #PledgeForParity Means To Us,5 points by ddrum001 14 days ago | discuss,14 80 | Top 50 Data Science thought leaders on Twitter,3 points by datawerq 11 days ago | 3 comments,11 81 | Ask DT: Who Is Hiring? (March 2016),27 points by whoishiring 21 days ago | 15 comments,21 82 | Deriving Better Insights From Time Series Data With Cycle Plots,3 points by clevertap 11 days ago | discuss,11 83 | Introducing GraphFrames,7 points by falaki 19 days ago | discuss,19 84 | SQL for Data Analysis,4 points by nickhould 14 days ago | 6 comments,14 85 | Stream processing and messaging systems for the IoT age,3 points by gradientflow 12 days ago | discuss,12 86 | Announcing R Tools for Visual Studio,3 points by brakmic 13 days ago | discuss,13 87 | A simpler way to merge data streams,3 points by apoverton 13 days ago | discuss,13 88 | Optimizing Notification Timing for One Signal,9 points by megandias 26 days ago | discuss,26 89 | Skizze - A high throughput probabilistic data structure service and storage,3 points by seiflotfy 14 days ago | discuss,14 90 | Question: What do you want to say about working with data?,2 points by emiller425 8 days ago | discuss,8 91 | Genomic Ranges - an Introduction to Working with Genomic Data,3 points by AnnaOnTheWeb 13 days ago | discuss,13 92 | TensorFlow for Poets,9 points by ebellm 21 days ago | 1 comment,21 93 | Unsupervised Learning with Even Less Supervision Using Bayesian Optimization,2 points by idewanck 11 days ago | discuss,11 94 | How to work with large JSON datasets using Python and Pandas,9 points by brian_spiering 21 days ago | discuss,21 95 | DrivenData Competition: Model/Visualize Fog Patterns in Morocco,4 points by bull 15 days ago | discuss,15 96 | Deep Learning: Nine Lectures at Coll?ge de France by Yan LeCun,5 points by Anon84 17 days ago | discuss,17 97 | Optimizing Facebook Campaigns with R,2 points by AnnaOnTheWeb 12 days ago | 1 comment,12 98 | "Trump Tweets on a Globe (aka Fun with d3, socket.io, and the Twitter API)",8 points by joelgrus 21 days ago | discuss,21 99 | Why pandas users should be excited about Apache Arrow,17 points by pmigdal 29 days ago | discuss,29 100 | Histogram intersection for change detection,8 points by datadive 22 days ago | discuss,22 101 | Distributed TensorFlow just open-sourced,10 points by elyase 25 days ago | discuss,25 102 | D3.js Screencasts (1 in 3 are free),4 points by Veerle 18 days ago | discuss,18 103 | Regression and Classification with Examples in R,5 points by soates 20 days ago | discuss,20 104 | Free online course on statistical shape modelling,8 points by shapemean 25 days ago | discuss,25 105 | "Don't worry about deep learning, deepen your understanding of causality instead",22 points by yanir 37 days ago | discuss,37 106 | Work with private repositories and other updates of the FlyElephant platform,2 points by m31 15 days ago | discuss,15 107 | How to import XML to almost anywhere,4 points by Jammink 20 days ago | discuss,20 108 | Survival Analysis of Cricket Player Careers,8 points by keshav92 26 days ago | 6 comments,26 109 | Generate image analogies using neural matching and blending,2 points by pmigdal 15 days ago | discuss,15 110 | "Analyzing 1.8M tweets from Super Bowl 50 (Twython, Twitter API, AYLIEN)",4 points by mikewally 20 days ago | discuss,20 111 | Newly released sklearn compatible library of categorical encoders,7 points by wdm0006 25 days ago | discuss,25 112 | Watch Tiny Neural Nets Learn,4 points by swanint 21 days ago | discuss,21 113 | Four pitfalls of hill climbing: An animated look,5 points by csaid81 23 days ago | discuss,23 114 | "Decision Forests, Convolutional Networks and the Models in-Between",2 points by ebellm 16 days ago | discuss,16 115 | How a Math Genius Hacked OkCupid to Find True Love,15 points by roh_codeur 34 days ago | discuss,34 116 | No developers for PyLearn2,3 points by tfturing 19 days ago | discuss,19 117 | Density Estimation with Dirichlet Process Mixtures using PyMC3,6 points by MidsizeBlowfish 25 days ago | discuss,25 118 | Using survival analysis and git-pandas to estimate code quality,3 points by wdm0006 20 days ago | discuss,20 119 | An Analysis of the Flint Michigan Water Crisis: Part 1 Initial Corrosivity,3 points by JHorn 20 days ago | discuss,20 120 | An Analysis of Republican Twitter Follower Interests,6 points by michelangelo 26 days ago | discuss,26 121 | Introduction to ML talk,8 points by cjbayesian 29 days ago | discuss,29 122 | GloVe vs word2vec revisited,3 points by pmigdal 20 days ago | discuss,20 123 | Overoptimizing: a story about kaggle,4 points by wdm0006 30 days ago | discuss,30 124 | Undergrad Data Analysis/Science internships SF Bay?,3 points by tctctc 15 days ago | 5 comments,15 125 | The Role of Statistical Significance in Growth Hacking,6 points by rawls234 27 days ago | discuss,27 126 | Data Science Course @ Harvard,7 points by rahmaniacc 29 days ago | 2 comments,29 127 | Principal Component Projection Without Principal Component Analysis,6 points by genofon 27 days ago | discuss,27 128 | "Machine Learning: An In-Depth, Non-Technical Guide - Part 3",7 points by innoarchitech 29 days ago | discuss,29 129 | Stochastic Dummy Boosting,2 points by mikeskim 18 days ago | discuss,18 130 | Interactive Map: Hong-Kong through The Lense of Instagram,2 points by BrianN 19 days ago | discuss,19 131 | Data Science at Monsanto,3 points by doctorcroc 22 days ago | discuss,22 132 | Data Science at Instacart,11 points by jeremystan 34 days ago | 3 comments,34 133 | Building a Streaming Search Platform,6 points by ddrum001 28 days ago | discuss,28 134 | Kafka Producer Latency with Large Topic Counts,3 points by marklit 26 days ago | discuss,26 135 | A Sneak Peak of the Cloud: the 2 Minute Intro for Beginners,2 points by andymaheshw 20 days ago | discuss,20 136 | Win-Vector video courses: price/status changes,2 points by jmount 20 days ago | discuss,20 137 | 50+ Data Science and Machine Learning Cheat Sheets,20 points by elyase 42 days ago | 1 comment,42 138 | One More Reason Not To Be Scared of Deep Learning,2 points by amplifier_khan 21 days ago | discuss,21 139 | Visual Logic Authoring vs Code,2 points by AnnaOnTheWeb 21 days ago | discuss,21 140 | Data Science in Python online training with hands-on experience,2 points by Puneet 21 days ago | discuss,21 141 | Viewing the US Presidential Primary Through the Lens of Twitter,8 points by michelangelo 33 days ago | discuss,33 142 | Caffe on Spark open sourced,4 points by rahmaniacc 27 days ago | discuss,27 143 | The Ethical Data Scientist,5 points by tfturing 29 days ago | discuss,29 144 | Answers to Frequently Asked Questions in Machine Learning,3 points by rasbt 21 days ago | discuss,21 145 | Intro to A/B Testing and P-Values,2 points by randyzwitch 22 days ago | discuss,22 146 | Visualizing State Level Data With R and Statebins,2 points by usujason 22 days ago | discuss,22 147 | "Probabilistic Graphical Models slides & video lectures (Eric Xing, CMU)",4 points by ororm 28 days ago | discuss,28 148 | Sense2vec with spaCy and Gensim,9 points by elyase 36 days ago | 2 comments,36 149 | A Billion NYC Taxi and Uber Rides in AWS Redshift,3 points by marklit 31 days ago | discuss,31 150 | How to Code and Understand DeepMind's Neural Stack Machine (in Python),2 points by genofon 23 days ago | discuss,23 151 | How to make polished Jupyter presentations with optional code visibility,9 points by csaid81 36 days ago | discuss,36 152 | How to become a Bayesian in eight easy steps,17 points by EtzA 44 days ago | 1 comment,44 153 | Optimizing .*: Details of Vectorization and Metaprogramming in Julia,4 points by randyzwitch 29 days ago | discuss,29 154 | IBM certified Apache Spark Online Training,8 points by divya_jain 36 days ago | discuss,36 155 | Geographic Data Science course,2 points by rk 25 days ago | discuss,25 156 | "The Daily Mail Stole My Visualization, Twice",5 points by thehoff 32 days ago | 1 comment,32 157 | Ensemble Methods: Improved Machine Learning Results,9 points by PyBloggers 38 days ago | discuss,38 158 | Apache Spark and unsupervised learning in security,2 points by gradientflow 26 days ago | discuss,26 159 | MachineJS: Automated machine learning- just give it a data file!,2 points by dsernst 26 days ago | discuss,26 160 | The NSA?s SKYNET program may be killing thousands of innocent people,6 points by zlipp 35 days ago | discuss,35 161 | "Big Dimensions, and What You Can Do About It",2 points by ramsey 27 days ago | discuss,27 162 | Automate Your Oscars Pool with R,2 points by jamesdreiss 27 days ago | discuss,27 163 | Signal Processing with LIGO GW150914 data,9 points by tfturing 39 days ago | discuss,39 164 | Overview of DeZyre and Coursera Data Science Course,5 points by ann928 34 days ago | discuss,34 165 | Upcoming Datathon in NYC,2 points by VicTrey 28 days ago | discuss,28 166 | Summarizing Data in SQL,15 points by elisebreda 46 days ago | discuss,46 167 | A/B Testing for Scammers,2 points by sameermanek 28 days ago | discuss,28 168 | Highly interpretable classifiers for scikit learn using Bayesian decision rules,2 points by mcnulty 28 days ago | discuss,28 169 | Auto-scaling scikit-learn with Spark,11 points by falaki 43 days ago | discuss,43 170 | Where the f*** can I park?,2 points by manugarri 29 days ago | discuss,29 171 | "Machine Learning: An In-Depth, Non-Technical Guide - Part 2",5 points by innoarchitech 36 days ago | discuss,36 172 | Webhose.io now offers a historical data archive,7 points by databuffer 40 days ago | discuss,40 173 | Meetup: Introduction to Machine Learning Algorithms for Data Science.,4 points by ann928 36 days ago | discuss,36 174 | Exploring the Limits of Language Modeling,8 points by soates 42 days ago | discuss,42 175 | Text Mining South Park,7 points by pmigdal 41 days ago | discuss,41 176 | Finding the K in K-means by Parametric Bootstrap,7 points by jmount 42 days ago | 1 comment,42 177 | Getting Started with Statistics for Data Science,3 points by nickhould 35 days ago | discuss,35 178 | Rodeo 1.3 - Tab-completion for docstrings,3 points by glamp 35 days ago | discuss,35 179 | Teaching D3.js - links,3 points by pmigdal 35 days ago | discuss,35 180 | Parallel scikit-learn on YARN,5 points by stijntonk 39 days ago | discuss,39 181 | Meetup: Free Live Webinar on Prescriptive Analytics for Fun and Profit,2 points by ann928 32 days ago | discuss,32 182 | -------------------------------------------------------------------------------- /data/data_tau_ta.csv: -------------------------------------------------------------------------------- 1 | title,date,days,tokens,stem,lemma,pos_tags,named_entities 2 | Deep Advances in Generative Modeling,6 points by gwulfs 5 hours ago | discuss,1,"deep,advances,generative,modeling",Deep Advances in Generative Model,Deep Advances in Generative Modeling,"[('Deep', 'JJ'), ('Advances', 'NNS'), ('in', 'IN'), ('Generative', 'NNP'), ('Modeling', 'NNP')]",['Generative Modeling'] 3 | A Neural Network in 11 lines of Python ,2 points by dekhtiar 5 hours ago | discuss,1,"neural,network,11,lines,python",A Neural Network in 11 lines of Python ,A Neural Network in 11 lines of Python ,"[('A', 'DT'), ('Neural', 'NNP'), ('Network', 'NNP'), ('in', 'IN'), ('11', 'CD'), ('lines', 'NNS'), ('of', 'IN'), ('Python', 'NNP')]",['Python'] 4 | "Python, Machine Learning, and Language Wars",3 points by pmigdal 7 hours ago | discuss,1,"python,machine,learning,language,wars","Python, Machine Learning, and Language War","Python, Machine Learning, and Language Wars","[('Python', 'NNP'), (',', ','), ('Machine', 'NNP'), ('Learning', 'NNP'), (',', ','), ('and', 'CC'), ('Language', 'NNP'), ('Wars', 'NNP')]","['Python', 'Machine Learning', 'Language Wars']" 5 | Markov Chains Explained Visually,11 points by zeroviscosity 1 day ago | 1 comment,1,"markov,chains,explained,visually",Markov Chains Explained Visu,Markov Chains Explained Visually,"[('Markov', 'NNP'), ('Chains', 'NNP'), ('Explained', 'VBD'), ('Visually', 'NNP')]","['Markov Chains', 'Visually']" 6 | Dplython: Dplyr for Python,10 points by thenaturalist 1 day ago | 3 comments,1,"dplython,dplyr,python",Dplython: Dplyr for Python,Dplython: Dplyr for Python,"[('Dplython', 'NN'), (':', ':'), ('Dplyr', 'NNP'), ('for', 'IN'), ('Python', 'NNP')]","['Dplython', 'Python']" 7 | Inferring causal impact using Bayesian structural time-series models,7 points by Homunculiheaded 1 day ago | 1 comment,1,"inferring,causal,impact,using,bayesian,structural,time,series,models",Inferring causal impact using Bayesian structural time-series model,Inferring causal impact using Bayesian structural time-series models,"[('Inferring', 'VBG'), ('causal', 'JJ'), ('impact', 'NN'), ('using', 'VBG'), ('Bayesian', 'JJ'), ('structural', 'JJ'), ('time', 'NN'), ('-', ':'), ('series', 'NN'), ('models', 'NNS')]",['Bayesian'] 8 | Tutorial: Web scraping and mapping breweries with import.io and R,4 points by jasdumas 1 day ago | discuss,1,"tutorial,web,scraping,mapping,breweries,import,io,r",Tutorial: Web scraping and mapping breweries with import.io and R,Tutorial: Web scraping and mapping breweries with import.io and R,"[('Tutorial', 'JJ'), (':', ':'), ('Web', 'JJ'), ('scraping', 'NN'), ('and', 'CC'), ('mapping', 'NN'), ('breweries', 'NNS'), ('with', 'IN'), ('import', 'NN'), ('.', '.'), ('io', 'NN'), ('and', 'CC'), ('R', 'NN')]",[] 9 | A Billion Taxi Rides on Amazon EMR running Spark,3 points by marklit 1 day ago | 1 comment,1,"billion,taxi,rides,amazon,emr,running,spark",A Billion Taxi Rides on Amazon EMR running Spark,A Billion Taxi Rides on Amazon EMR running Spark,"[('A', 'DT'), ('Billion', 'NNP'), ('Taxi', 'NNP'), ('Rides', 'NNP'), ('on', 'IN'), ('Amazon', 'NNP'), ('EMR', 'NNP'), ('running', 'VBG'), ('Spark', 'NNP')]","['Amazon', 'Spark']" 10 | The rise of greedy robots,4 points by yanir 1 day ago | discuss,1,"rise,greedy,robots",The rise of greedy robot,The rise of greedy robots,"[('The', 'DT'), ('rise', 'NN'), ('of', 'IN'), ('greedy', 'NN'), ('robots', 'NNS')]",[] 11 | Extracting image metadata at scale,2 points by zachwill 1 day ago | discuss,1,"extracting,image,metadata,scale",Extracting image metadata at scal,Extracting image metadata at scale,"[('Extracting', 'VBG'), ('image', 'NN'), ('metadata', 'NN'), ('at', 'IN'), ('scale', 'NN')]",[] 12 | "Python for Data Structures, Algorithms, and Interviews",17 points by kokoubaby 4 days ago | discuss,4,"python,data,structures,algorithms,interviews","Python for Data Structures, Algorithms, and Interview","Python for Data Structures, Algorithms, and Interviews","[('Python', 'NNP'), ('for', 'IN'), ('Data', 'NNP'), ('Structures', 'NNP'), (',', ','), ('Algorithms', 'NNP'), (',', ','), ('and', 'CC'), ('Interviews', 'NNS')]","['Python', 'Data Structures', 'Algorithms']" 13 | Lift charts - A data scientist's secret weapon,14 points by datenheini 4 days ago | 2 comments,4,"lift,charts,data,scientist,secret,weapon",Lift charts - A data scientist's secret weapon,Lift charts - A data scientist's secret weapon,"[('Lift', 'NNP'), ('charts', 'VBZ'), ('-', ':'), ('A', 'DT'), ('data', 'JJ'), ('scientist', 'NN'), (""'"", 'POS'), ('s', 'NN'), ('secret', 'VBZ'), ('weapon', 'NN')]",['Lift'] 14 | How To Become A Machine Learning Expert In One Simple Step,4 points by swanint 2 days ago | discuss,2,"become,machine,learning,expert,one,simple,step",How To Become A Machine Learning Expert In One Simple Step,How To Become A Machine Learning Expert In One Simple Step,"[('How', 'WRB'), ('To', 'TO'), ('Become', 'VB'), ('A', 'NNP'), ('Machine', 'NNP'), ('Learning', 'NNP'), ('Expert', 'NNP'), ('In', 'IN'), ('One', 'CD'), ('Simple', 'JJ'), ('Step', 'NN')]",[] 15 | Data Science Side Project,6 points by yashpatel5400 1 day ago | 8 comments,1,"data,science,side,project",Data Science Side Project,Data Science Side Project,"[('Data', 'NNP'), ('Science', 'NNP'), ('Side', 'NNP'), ('Project', 'NNP')]",['Data Science Side'] 16 | Simple estimation of hierarchical events with petersburg,3 points by wdm0006 1 day ago | discuss,1,"simple,estimation,hierarchical,events,petersburg",Simple estimation of hierarchical events with petersburg,Simple estimation of hierarchical events with petersburg,"[('Simple', 'JJ'), ('estimation', 'NN'), ('of', 'IN'), ('hierarchical', 'JJ'), ('events', 'NNS'), ('with', 'IN'), ('petersburg', 'NN')]",['Simple'] 17 | Engineers Shouldn?t Write ETL: High Functioning Data Science Departments,9 points by legel 4 days ago | 3 comments,4,"engineers,write,etl,high,functioning,data,science,departments",Engineers Shouldn?t Write ETL: High Functioning Data Science Depart,Engineers Shouldn?t Write ETL: High Functioning Data Science Departments,"[('Engineers', 'NNS'), ('Shouldn', 'NNP'), ('?', '.'), ('t', 'NN'), ('Write', 'NNP'), ('ETL', 'NNP'), (':', ':'), ('High', 'JJ'), ('Functioning', 'NNP'), ('Data', 'NNP'), ('Science', 'NNP'), ('Departments', 'NNP')]",['Write'] 18 | Unsupervised Computer Vision: The Current State of the Art,6 points by carlosfaham 3 days ago | discuss,3,"unsupervised,computer,vision,current,state,art",Unsupervised Computer Vision: The Current State of the Art,Unsupervised Computer Vision: The Current State of the Art,"[('Unsupervised', 'VBN'), ('Computer', 'NNP'), ('Vision', 'NNP'), (':', ':'), ('The', 'DT'), ('Current', 'NNP'), ('State', 'NNP'), ('of', 'IN'), ('the', 'DT'), ('Art', 'NN')]",['Computer Vision'] 19 | What data visualization tools do /r/DataIsBeautiful OC creators use?,3 points by pmigdal 2 days ago | discuss,2,"data,visualization,tools,r,dataisbeautiful,oc,creators,use",What data visualization tools do /r/DataIsBeautiful OC creators use?,What data visualization tools do /r/DataIsBeautiful OC creators use?,"[('What', 'WP'), ('data', 'VBZ'), ('visualization', 'NN'), ('tools', 'NNS'), ('do', 'VBP'), ('/', 'RB'), ('r', 'VB'), ('/', 'NNP'), ('DataIsBeautiful', 'NNP'), ('OC', 'NNP'), ('creators', 'NNS'), ('use', 'VBP'), ('?', '.')]",[] 20 | Data Engineering at Slack: Twelve Mistakes I've Made In My First Three Months,13 points by gwulfs 5 days ago | 2 comments,5,"data,engineering,slack,twelve,mistakes,made,first,three,months",Data Engineering at Slack: Twelve Mistakes I've Made In My First Three Month,Data Engineering at Slack: Twelve Mistakes I've Made In My First Three Months,"[('Data', 'NNP'), ('Engineering', 'NNP'), ('at', 'IN'), ('Slack', 'NNP'), (':', ':'), ('Twelve', 'NNP'), ('Mistakes', 'NNP'), ('I', 'PRP'), (""'"", ""''""), ('ve', 'NN'), ('Made', 'VBN'), ('In', 'IN'), ('My', 'NNP'), ('First', 'NNP'), ('Three', 'CD'), ('Months', 'NNP')]","['Data Engineering', 'Slack']" 21 | An unusual interactive machine learning challenge,4 points by gglumov 3 days ago | discuss,3,"unusual,interactive,machine,learning,challenge",An unusual interactive machine learning challeng,An unusual interactive machine learning challenge,"[('An', 'DT'), ('unusual', 'JJ'), ('interactive', 'JJ'), ('machine', 'NN'), ('learning', 'NN'), ('challenge', 'NN')]",[] 22 | Datumbox Machine Learning Framework 0.7.0 Released,4 points by datumbox 3 days ago | discuss,3,"datumbox,machine,learning,framework,0,7,0,released",Datumbox Machine Learning Framework 0.7.0 Releas,Datumbox Machine Learning Framework 0.7.0 Released,"[('Datumbox', 'NNP'), ('Machine', 'NNP'), ('Learning', 'NNP'), ('Framework', 'NNP'), ('0', 'CD'), ('.', '.'), ('7', 'CD'), ('.', '.'), ('0', 'CD'), ('Released', 'VBD')]",['Datumbox Machine'] 23 | Reshaping in Pandas,5 points by carlosgg 3 days ago | discuss,3,"reshaping,pandas",Reshaping in Panda,Reshaping in Pandas,"[('Reshaping', 'VBG'), ('in', 'IN'), ('Pandas', 'NNP')]",['Pandas'] 24 | Data science intro for math/phys background,14 points by pmigdal 6 days ago | discuss,6,"data,science,intro,math,phys,background",Data science intro for math/phys background,Data science intro for math/phys background,"[('Data', 'NNP'), ('science', 'NN'), ('intro', 'NN'), ('for', 'IN'), ('math', 'NN'), ('/', 'NNP'), ('phys', 'NN'), ('background', 'NN')]",['Data'] 25 | Neural Networks demystified,16 points by elyase 7 days ago | discuss,7,"neural,networks,demystified",Neural Networks demystifi,Neural Networks demystified,"[('Neural', 'JJ'), ('Networks', 'NNP'), ('demystified', 'VBD')]",['Neural Networks'] 26 | What machines can learn from Apple Watch: detecting undiagnosed heart condition,9 points by koukouhappy 5 days ago | discuss,5,"machines,learn,apple,watch,detecting,undiagnosed,heart,condition",What machines can learn from Apple Watch: detecting undiagnosed heart condit,What machines can learn from Apple Watch: detecting undiagnosed heart condition,"[('What', 'WP'), ('machines', 'NNS'), ('can', 'MD'), ('learn', 'VB'), ('from', 'IN'), ('Apple', 'NNP'), ('Watch', 'NNP'), (':', ':'), ('detecting', 'NN'), ('undiagnosed', 'JJ'), ('heart', 'NN'), ('condition', 'NN')]",['Apple Watch'] 27 | Data Science Tools: The Biggest Winners and Losers,12 points by AnnaOnTheWeb 7 days ago | discuss,7,"data,science,tools,biggest,winners,losers",Data Science Tools: The Biggest Winners and Los,Data Science Tools: The Biggest Winners and Losers,"[('Data', 'NNP'), ('Science', 'NNP'), ('Tools', 'NNP'), (':', ':'), ('The', 'DT'), ('Biggest', 'NNP'), ('Winners', 'NNPS'), ('and', 'CC'), ('Losers', 'NNS')]","['Data Science Tools', 'Biggest Winners']" 28 | 10 Years of Open Source Machine Learning,9 points by tstonez 6 days ago | 1 comment,6,"10,years,open,source,machine,learning",10 Years of Open Source Machine Learn,10 Years of Open Source Machine Learning,"[('10', 'CD'), ('Years', 'NNS'), ('of', 'IN'), ('Open', 'NNP'), ('Source', 'NNP'), ('Machine', 'NNP'), ('Learning', 'NNP')]",['Open Source Machine'] 29 | Do jobs run in families?,5 points by Anon84 5 days ago | 1 comment,5,"jobs,run,families",Do jobs run in families?,Do jobs run in families?,"[('Do', 'NNP'), ('jobs', 'NNS'), ('run', 'VB'), ('in', 'IN'), ('families', 'NNS'), ('?', '.')]",[] 30 | Has your conversion rate changed? Bayesian timeseries analysis with Python,12 points by yummyfajitas 8 days ago | discuss,8,"conversion,rate,changed,bayesian,timeseries,analysis,python",Has your conversion rate changed? Bayesian timeseries analysis with Python,Has your conversion rate changed? Bayesian timeseries analysis with Python,"[('Has', 'NNP'), ('your', 'PRP$'), ('conversion', 'NN'), ('rate', 'NN'), ('changed', 'VBN'), ('?', '.'), ('Bayesian', 'JJ'), ('timeseries', 'NNS'), ('analysis', 'NN'), ('with', 'IN'), ('Python', 'NNP')]",['Python'] 31 | "XGBoost4J: Portable Distributed XGboost in Spark, Flink and Dataflow",8 points by crowwork 7 days ago | discuss,7,"xgboost4j,portable,distributed,xgboost,spark,flink,dataflow","XGBoost4J: Portable Distributed XGboost in Spark, Flink and Dataflow","XGBoost4J: Portable Distributed XGboost in Spark, Flink and Dataflow","[('XGBoost4J', 'NN'), (':', ':'), ('Portable', 'JJ'), ('Distributed', 'NNP'), ('XGboost', 'NN'), ('in', 'IN'), ('Spark', 'NNP'), (',', ','), ('Flink', 'NNP'), ('and', 'CC'), ('Dataflow', 'NNP')]","['XGBoost4J', 'Spark', 'Flink', 'Dataflow']" 32 | Introduction to Scikit Flow - Simplified Interface to TensorFlow,7 points by lefish 7 days ago | discuss,7,"introduction,scikit,flow,simplified,interface,tensorflow",Introduction to Scikit Flow - Simplified Interface to TensorFlow,Introduction to Scikit Flow - Simplified Interface to TensorFlow,"[('Introduction', 'NN'), ('to', 'TO'), ('Scikit', 'NNP'), ('Flow', 'NNP'), ('-', ':'), ('Simplified', 'VBD'), ('Interface', 'NNP'), ('to', 'TO'), ('TensorFlow', 'VB')]",['Scikit Flow'] 33 | How to learn machine learning?,8 points by kiechu 7 days ago | 1 comment,7,"learn,machine,learning",How to learn machine learning?,How to learn machine learning?,"[('How', 'WRB'), ('to', 'TO'), ('learn', 'VB'), ('machine', 'NN'), ('learning', 'NN'), ('?', '.')]",[] 34 | The Deep Roots of Javascript Fatigue,5 points by nikkielizdemere 6 days ago | 1 comment,6,"deep,roots,javascript,fatigue",The Deep Roots of Javascript Fatigu,The Deep Roots of Javascript Fatigue,"[('The', 'DT'), ('Deep', 'NNP'), ('Roots', 'NNP'), ('of', 'IN'), ('Javascript', 'NNP'), ('Fatigue', 'NNP')]","['Deep Roots', 'Javascript Fatigue']" 35 | How do we make Data Tau work?,27 points by hal8 8 days ago | 18 comments,8,"make,data,tau,work",How do we make Data Tau work?,How do we make Data Tau work?,"[('How', 'WRB'), ('do', 'VBP'), ('we', 'PRP'), ('make', 'VB'), ('Data', 'NNP'), ('Tau', 'NNP'), ('work', 'NN'), ('?', '.')]",['Data Tau'] 36 | "Machine Learning: An In-Depth, Non-Technical Guide???Part 4",7 points by innoarchitech 8 days ago | discuss,8,"machine,learning,depth,non,technical,guide,???,part,4","Machine Learning: An In-Depth, Non-Technical Guide???Part 4","Machine Learning: An In-Depth, Non-Technical Guide???Part 4","[('Machine', 'NN'), ('Learning', 'NNP'), (':', ':'), ('An', 'DT'), ('In', 'IN'), ('-', ':'), ('Depth', 'NN'), (',', ','), ('Non', 'NNP'), ('-', ':'), ('Technical', 'NNP'), ('Guide', 'NNP'), ('???', 'NNP'), ('Part', 'NNP'), ('4', 'CD')]","['Machine Learning', 'Non', 'Technical Guide']" 37 | Data Science Slack channel - Click for invite,7 points by jyotsna 8 days ago | discuss,8,"data,science,slack,channel,click,invite",Data Science Slack channel - Click for invit,Data Science Slack channel - Click for invite,"[('Data', 'NNP'), ('Science', 'NNP'), ('Slack', 'NNP'), ('channel', 'NN'), ('-', ':'), ('Click', 'NN'), ('for', 'IN'), ('invite', 'NN')]","['Data Science Slack', 'Click']" 38 | Genomic Data Visualization using Python,2 points by RadhouaneAniba 3 days ago | discuss,3,"genomic,data,visualization,using,python",Genomic Data Visualization using Python,Genomic Data Visualization using Python,"[('Genomic', 'NNP'), ('Data', 'NNP'), ('Visualization', 'NNP'), ('using', 'VBG'), ('Python', 'NNP')]","['Genomic Data', 'Python']" 39 | Descriptive Statistics in SQL,5 points by nickhould 7 days ago | discuss,7,"descriptive,statistics,sql",Descriptive Statistics in SQL,Descriptive Statistics in SQL,"[('Descriptive', 'JJ'), ('Statistics', 'NNS'), ('in', 'IN'), ('SQL', 'NNP')]",['SQL'] 40 | "Playing ""Moneyball"" on EA FIFA 16",16 points by aabb13 13 days ago | 3 comments,13,"playing,moneyball,ea,fifa,16","Playing ""Moneyball"" on EA FIFA 16","Playing ""Moneyball"" on EA FIFA 16","[('Playing', 'VBG'), ('""', 'NNP'), ('Moneyball', 'NNP'), ('""', 'NNP'), ('on', 'IN'), ('EA', 'NNP'), ('FIFA', 'NNP'), ('16', 'CD')]",[] 41 | Intellexer - Natural Language Processing and Text Mining REST API,16 points by j_downer 13 days ago | discuss,13,"intellexer,natural,language,processing,text,mining,rest,api",Intellexer - Natural Language Processing and Text Mining REST API,Intellexer - Natural Language Processing and Text Mining REST API,"[('Intellexer', 'NNP'), ('-', ':'), ('Natural', 'JJ'), ('Language', 'NNP'), ('Processing', 'NNP'), ('and', 'CC'), ('Text', 'NNP'), ('Mining', 'NNP'), ('REST', 'NNP'), ('API', 'NNP')]","['Intellexer', 'Natural Language', 'Text Mining']" 42 | How to Use Cohort Data to Analyze User Behavior,2 points by clevertap 3 days ago | discuss,3,"use,cohort,data,analyze,user,behavior",How to Use Cohort Data to Analyze User Behavior,How to Use Cohort Data to Analyze User Behavior,"[('How', 'WRB'), ('to', 'TO'), ('Use', 'VB'), ('Cohort', 'NNP'), ('Data', 'NNP'), ('to', 'TO'), ('Analyze', 'NNP'), ('User', 'NNP'), ('Behavior', 'NNP')]","['Cohort Data', 'Analyze User Behavior']" 43 | Show DT: Datasets.co - An easy way to share and discover ml datasets,2 points by mrborgen86 4 days ago | discuss,4,"show,dt,datasets,co,easy,way,share,discover,ml,datasets",Show DT: Datasets.co - An easy way to share and discover ml dataset,Show DT: Datasets.co - An easy way to share and discover ml datasets,"[('Show', 'NNP'), ('DT', 'NNP'), (':', ':'), ('Datasets', 'NNS'), ('.', '.'), ('co', 'SYM'), ('-', ':'), ('An', 'DT'), ('easy', 'JJ'), ('way', 'NN'), ('to', 'TO'), ('share', 'NN'), ('and', 'CC'), ('discover', 'NN'), ('ml', 'NN'), ('datasets', 'NNS')]",['Show'] 44 | "An Ode To The Rice Cooker, The Smartest Kitchen Appliance I?ve Ever Owned",2 points by tfturing 4 days ago | discuss,4,"ode,rice,cooker,smartest,kitchen,appliance,ever,owned","An Ode To The Rice Cooker, The Smartest Kitchen Appliance I?ve Ever Own","An Ode To The Rice Cooker, The Smartest Kitchen Appliance I?ve Ever Owned","[('An', 'DT'), ('Ode', 'NNP'), ('To', 'TO'), ('The', 'DT'), ('Rice', 'NNP'), ('Cooker', 'NNP'), (',', ','), ('The', 'DT'), ('Smartest', 'NNP'), ('Kitchen', 'NNP'), ('Appliance', 'NNP'), ('I', 'PRP'), ('?', '.'), ('ve', ""''""), ('Ever', 'RB'), ('Owned', 'VBD')]","['Rice Cooker', 'Smartest Kitchen']" 45 | Making transparent how variations in analytical choices affect results,4 points by rahmaniacc 6 days ago | discuss,6,"making,transparent,variations,analytical,choices,affect,results",Making transparent how variations in analytical choices affect result,Making transparent how variations in analytical choices affect results,"[('Making', 'VBG'), ('transparent', 'JJ'), ('how', 'WRB'), ('variations', 'NNS'), ('in', 'IN'), ('analytical', 'JJ'), ('choices', 'NNS'), ('affect', 'VBP'), ('results', 'NNS')]",[] 46 | [Ask DT] What are some rookie mistakes in R?,2 points by HKtemp 2 days ago | discuss,2,"ask,dt,rookie,mistakes,r",[Ask DT] What are some rookie mistakes in R?,[Ask DT] What are some rookie mistakes in R?,"[('[', 'JJ'), ('Ask', 'NNP'), ('DT', 'NNP'), (']', 'NNP'), ('What', 'WP'), ('are', 'VBP'), ('some', 'DT'), ('rookie', 'NN'), ('mistakes', 'NNS'), ('in', 'IN'), ('R', 'NNP'), ('?', '.')]",[] 47 | Is Scala a better choice than Python for Apache Spark?,6 points by srinify 9 days ago | 1 comment,9,"scala,better,choice,python,apache,spark",Is Scala a better choice than Python for Apache Spark?,Is Scala a better choice than Python for Apache Spark?,"[('Is', 'VBZ'), ('Scala', 'NNP'), ('a', 'DT'), ('better', 'JJR'), ('choice', 'NN'), ('than', 'IN'), ('Python', 'NNP'), ('for', 'IN'), ('Apache', 'NNP'), ('Spark', 'NNP'), ('?', '.')]","['Python', 'Apache Spark']" 48 | Julia: A Fast Language for Numerical Computing,6 points by srinify 9 days ago | 1 comment,9,"julia,fast,language,numerical,computing",Julia: A Fast Language for Numerical Comput,Julia: A Fast Language for Numerical Computing,"[('Julia', 'NNS'), (':', ':'), ('A', 'DT'), ('Fast', 'NNP'), ('Language', 'NNP'), ('for', 'IN'), ('Numerical', 'NNP'), ('Computing', 'NNP')]",['Numerical Computing'] 49 | Analyzing Golden State Warriors' passing network using GraphFrames in Spark,3 points by yukiegosapporo 6 days ago | discuss,6,"analyzing,golden,state,warriors,passing,network,using,graphframes,spark",Analyzing Golden State Warriors' passing network using GraphFrames in Spark,Analyzing Golden State Warriors' passing network using GraphFrames in Spark,"[('Analyzing', 'VBG'), ('Golden', 'NNP'), ('State', 'NNP'), ('Warriors', 'NNP'), (""'"", 'POS'), ('passing', 'NN'), ('network', 'NN'), ('using', 'VBG'), ('GraphFrames', 'NNP'), ('in', 'IN'), ('Spark', 'NNP')]","['Golden State Warriors', 'GraphFrames', 'Spark']" 50 | Megaman: Manifold Learning with Millions of points,4 points by dperry 7 days ago | 3 comments,7,"megaman,manifold,learning,millions,points",Megaman: Manifold Learning with Millions of point,Megaman: Manifold Learning with Millions of points,"[('Megaman', 'NN'), (':', ':'), ('Manifold', 'NNP'), ('Learning', 'VBG'), ('with', 'IN'), ('Millions', 'NNP'), ('of', 'IN'), ('points', 'NNS')]","['Megaman', 'Millions']" 51 | How to Detect Outliers on Parametric and Non Parametric Methods,2 points by clevertap 4 days ago | discuss,4,"detect,outliers,parametric,non,parametric,methods",How to Detect Outliers on Parametric and Non Parametric Method,How to Detect Outliers on Parametric and Non Parametric Methods,"[('How', 'WRB'), ('to', 'TO'), ('Detect', 'VB'), ('Outliers', 'NNP'), ('on', 'IN'), ('Parametric', 'NNP'), ('and', 'CC'), ('Non', 'NNP'), ('Parametric', 'NNP'), ('Methods', 'NNP')]","['Outliers', 'Parametric', 'Non Parametric Methods']" 52 | BallR: Interactive NBA Shot Charts with R and Shiny,12 points by carlosgg 14 days ago | discuss,14,"ballr,interactive,nba,shot,charts,r,shiny",BallR: Interactive NBA Shot Charts with R and Shini,BallR: Interactive NBA Shot Charts with R and Shiny,"[('BallR', 'NN'), (':', ':'), ('Interactive', 'JJ'), ('NBA', 'NNP'), ('Shot', 'NNP'), ('Charts', 'NNP'), ('with', 'IN'), ('R', 'NNP'), ('and', 'CC'), ('Shiny', 'NNP')]","['BallR', 'NBA Shot', 'Shiny']" 53 | Minecraft to run artificial intelligence experiments,4 points by bsadeghi 8 days ago | discuss,8,"minecraft,run,artificial,intelligence,experiments",Minecraft to run artificial intelligence experi,Minecraft to run artificial intelligence experiments,"[('Minecraft', 'NN'), ('to', 'TO'), ('run', 'VB'), ('artificial', 'JJ'), ('intelligence', 'NN'), ('experiments', 'NNS')]",['Minecraft'] 54 | Deep Q-Learning (Space Invaders),4 points by pmigdal 8 days ago | discuss,8,"deep,q,learning,space,invaders",Deep Q-Learning (Space Invaders),Deep Q-Learning (Space Invaders),"[('Deep', 'NNP'), ('Q', 'NNP'), ('-', ':'), ('Learning', 'NNP'), ('(', '('), ('Space', 'NNP'), ('Invaders', 'NNP'), (')', ')')]","['Deep', 'Space Invaders']" 55 | Theano Tutorial,2 points by pmigdal 5 days ago | discuss,5,"theano,tutorial",Theano Tutori,Theano Tutorial,"[('Theano', 'NNP'), ('Tutorial', 'NNP')]",['Theano Tutorial'] 56 | Computing Classification Evaluation Metrics in R,3 points by lefish 7 days ago | discuss,7,"computing,classification,evaluation,metrics,r",Computing Classification Evaluation Metrics in R,Computing Classification Evaluation Metrics in R,"[('Computing', 'VBG'), ('Classification', 'NNP'), ('Evaluation', 'NNP'), ('Metrics', 'NNP'), ('in', 'IN'), ('R', 'NNP')]",[] 57 | The Personality Space of Cartoon Characters,3 points by lefish 7 days ago | discuss,7,"personality,space,cartoon,characters",The Personality Space of Cartoon Charact,The Personality Space of Cartoon Characters,"[('The', 'DT'), ('Personality', 'NNP'), ('Space', 'NNP'), ('of', 'IN'), ('Cartoon', 'NNP'), ('Characters', 'NNS')]","['Personality Space', 'Cartoon']" 58 | Announcing Apache Flink 1.0.0,11 points by mxm 14 days ago | discuss,14,"announcing,apache,flink,1,0,0",Announcing Apache Flink 1.0.0,Announcing Apache Flink 1.0.0,"[('Announcing', 'VBG'), ('Apache', 'NNP'), ('Flink', 'NNP'), ('1', 'CD'), ('.', '.'), ('0', 'CD'), ('.', '.'), ('0', 'CD')]",['Apache'] 59 | Bayesian Reasoning in The Twilight Zone!,2 points by Homunculiheaded 5 days ago | discuss,5,"bayesian,reasoning,twilight,zone",Bayesian Reasoning in The Twilight Zone!,Bayesian Reasoning in The Twilight Zone!,"[('Bayesian', 'JJ'), ('Reasoning', 'NNP'), ('in', 'IN'), ('The', 'DT'), ('Twilight', 'NNP'), ('Zone', 'NN'), ('!', '.')]","['Bayesian Reasoning', 'Twilight Zone']" 60 | Bayesian Estimation of G Train Wait Times,7 points by jamesdreiss 12 days ago | discuss,12,"bayesian,estimation,g,train,wait,times",Bayesian Estimation of G Train Wait Tim,Bayesian Estimation of G Train Wait Times,"[('Bayesian', 'JJ'), ('Estimation', 'NNP'), ('of', 'IN'), ('G', 'NNP'), ('Train', 'NNP'), ('Wait', 'NNP'), ('Times', 'NNP')]",['Bayesian Estimation'] 61 | Some experiments into explaining complex black box ensemble predictions,2 points by lefish 6 days ago | discuss,6,"experiments,explaining,complex,black,box,ensemble,predictions",Some experiments into explaining complex black box ensemble predict,Some experiments into explaining complex black box ensemble predictions,"[('Some', 'DT'), ('experiments', 'NNS'), ('into', 'IN'), ('explaining', 'VBG'), ('complex', 'JJ'), ('black', 'JJ'), ('box', 'NN'), ('ensemble', 'JJ'), ('predictions', 'NNS')]",[] 62 | Creating a Hadoop Pseudo-Distributed Environment,2 points by lefish 6 days ago | discuss,6,"creating,hadoop,pseudo,distributed,environment",Creating a Hadoop Pseudo-Distributed Environ,Creating a Hadoop Pseudo-Distributed Environment,"[('Creating', 'VBG'), ('a', 'DT'), ('Hadoop', 'NNP'), ('Pseudo', 'NNP'), ('-', ':'), ('Distributed', 'VBD'), ('Environment', 'JJ')]",['Hadoop Pseudo'] 63 | "Data Science Pop-Up in Austin, TX",2 points by AnnaOnTheWeb 6 days ago | discuss,6,"data,science,pop,austin,tx","Data Science Pop-Up in Austin, TX","Data Science Pop-Up in Austin, TX","[('Data', 'NNP'), ('Science', 'NNP'), ('Pop', 'NNP'), ('-', ':'), ('Up', 'NN'), ('in', 'IN'), ('Austin', 'NNP'), (',', ','), ('TX', 'NNP')]","['Data Science Pop', 'Austin']" 64 | A Billion Taxi Rides on Amazon EMR Running Presto,3 points by marklit 8 days ago | discuss,8,"billion,taxi,rides,amazon,emr,running,presto",A Billion Taxi Rides on Amazon EMR Running Presto,A Billion Taxi Rides on Amazon EMR Running Presto,"[('A', 'DT'), ('Billion', 'NNP'), ('Taxi', 'NNP'), ('Rides', 'NNP'), ('on', 'IN'), ('Amazon', 'NNP'), ('EMR', 'NNP'), ('Running', 'NNP'), ('Presto', 'NNP')]",['Amazon'] 65 | Train your own image classifier with Inception in TensorFlow,7 points by elyase 12 days ago | discuss,12,"train,image,classifier,inception,tensorflow",Train your own image classifier with Inception in TensorFlow,Train your own image classifier with Inception in TensorFlow,"[('Train', 'VB'), ('your', 'PRP$'), ('own', 'JJ'), ('image', 'NN'), ('classifier', 'NN'), ('with', 'IN'), ('Inception', 'NNP'), ('in', 'IN'), ('TensorFlow', 'NNP')]",['TensorFlow'] 66 | Statisticians Agree: It?s Time To Stop Misusing P-Value,9 points by jpiburn 15 days ago | 5 comments,15,"statisticians,agree,time,stop,misusing,p,value",Statisticians Agree: It?s Time To Stop Misusing P-Valu,Statisticians Agree: It?s Time To Stop Misusing P-Value,"[('Statisticians', 'NNS'), ('Agree', 'VBP'), (':', ':'), ('It', 'PRP'), ('?', '.'), ('s', 'JJ'), ('Time', 'NNP'), ('To', 'TO'), ('Stop', 'VB'), ('Misusing', 'NNP'), ('P', 'NNP'), ('-', ':'), ('Value', 'NN')]",[] 67 | Shiny app for running a Tensorflow demo,3 points by shinyman 8 days ago | discuss,8,"shiny,app,running,tensorflow,demo",Shiny app for running a Tensorflow demo,Shiny app for running a Tensorflow demo,"[('Shiny', 'NNP'), ('app', 'NN'), ('for', 'IN'), ('running', 'VBG'), ('a', 'DT'), ('Tensorflow', 'NNP'), ('demo', 'NN')]","['Shiny', 'Tensorflow']" 68 | File details and owners with gitnoc and git-pandas,3 points by wdm0006 9 days ago | discuss,9,"file,details,owners,gitnoc,git,pandas",File details and owners with gitnoc and git-panda,File details and owners with gitnoc and git-pandas,"[('File', 'NN'), ('details', 'NNS'), ('and', 'CC'), ('owners', 'NNS'), ('with', 'IN'), ('gitnoc', 'NN'), ('and', 'CC'), ('git', 'JJ'), ('-', ':'), ('pandas', 'NN')]",['File'] 69 | 7 Big Data Technologies and When to Use Them that All Data Engineers Should Know,2 points by galvanize 7 days ago | discuss,7,"7,big,data,technologies,use,data,engineers,know",7 Big Data Technologies and When to Use Them that All Data Engineers Should Know,7 Big Data Technologies and When to Use Them that All Data Engineers Should Know,"[('7', 'CD'), ('Big', 'NNP'), ('Data', 'NNP'), ('Technologies', 'NNPS'), ('and', 'CC'), ('When', 'WRB'), ('to', 'TO'), ('Use', 'VB'), ('Them', 'NNP'), ('that', 'IN'), ('All', 'NNP'), ('Data', 'NNP'), ('Engineers', 'NNP'), ('Should', 'NNP'), ('Know', 'NNP')]",['All Data Engineers Should Know'] 70 | Topic clusters with TF-IDF vectorization with Spark and Scala,2 points by lefish 7 days ago | discuss,7,"topic,clusters,tf,idf,vectorization,spark,scala",Topic clusters with TF-IDF vectorization with Spark and Scala,Topic clusters with TF-IDF vectorization with Spark and Scala,"[('Topic', 'NN'), ('clusters', 'NNS'), ('with', 'IN'), ('TF', 'NNP'), ('-', ':'), ('IDF', 'NNP'), ('vectorization', 'NN'), ('with', 'IN'), ('Spark', 'NNP'), ('and', 'CC'), ('Scala', 'NNP')]","['Topic', 'TF', 'IDF', 'Spark', 'Scala']" 71 | Neural Doodles: Workflows for the Next Generation of Artists,5 points by pmigdal 12 days ago | discuss,12,"neural,doodles,workflows,next,generation,artists",Neural Doodles: Workflows for the Next Generation of Artist,Neural Doodles: Workflows for the Next Generation of Artists,"[('Neural', 'JJ'), ('Doodles', 'NNS'), (':', ':'), ('Workflows', 'NNS'), ('for', 'IN'), ('the', 'DT'), ('Next', 'JJ'), ('Generation', 'NNP'), ('of', 'IN'), ('Artists', 'NNS')]",['Next Generation'] 72 | Graph Databases 101,5 points by carlosgg 12 days ago | discuss,12,"graph,databases,101",Graph Databases 101,Graph Databases 101,"[('Graph', 'NNP'), ('Databases', 'VBZ'), ('101', 'CD')]",['Graph'] 73 | "Telemetry with Collectd, Logstash, Elasticsearch and Grafana (ELG)",2 points by helloanand 7 days ago | discuss,7,"telemetry,collectd,logstash,elasticsearch,grafana,elg","Telemetry with Collectd, Logstash, Elasticsearch and Grafana (ELG)","Telemetry with Collectd, Logstash, Elasticsearch and Grafana (ELG)","[('Telemetry', 'NN'), ('with', 'IN'), ('Collectd', 'NNP'), (',', ','), ('Logstash', 'NNP'), (',', ','), ('Elasticsearch', 'NNP'), ('and', 'CC'), ('Grafana', 'NNP'), ('(', '('), ('ELG', 'NNP'), (')', ')')]","['Telemetry', 'Collectd', 'Logstash', 'Elasticsearch', 'Grafana', 'ELG']" 74 | XGBoost: A Scalable Tree Boosting System article,5 points by tfturing 12 days ago | discuss,12,"xgboost,scalable,tree,boosting,system,article",XGBoost: A Scalable Tree Boosting System articl,XGBoost: A Scalable Tree Boosting System article,"[('XGBoost', 'NN'), (':', ':'), ('A', 'DT'), ('Scalable', 'JJ'), ('Tree', 'NNP'), ('Boosting', 'NNP'), ('System', 'NNP'), ('article', 'NN')]",['XGBoost'] 75 | DataRadar.IO - Data Science RSS Feed - Do you have enough data about your data,2 points by dekhtiar 8 days ago | 3 comments,8,"dataradar,io,data,science,rss,feed,enough,data,data",DataRadar.IO - Data Science RSS Feed - Do you have enough data about your data,DataRadar.IO - Data Science RSS Feed - Do you have enough data about your data,"[('DataRadar', 'NNP'), ('.', '.'), ('IO', 'NNP'), ('-', ':'), ('Data', 'NNP'), ('Science', 'NNP'), ('RSS', 'NNP'), ('Feed', 'NNP'), ('-', ':'), ('Do', 'VBP'), ('you', 'PRP'), ('have', 'VBP'), ('enough', 'VBN'), ('data', 'NNS'), ('about', 'IN'), ('your', 'PRP$'), ('data', 'NNS')]","['DataRadar', 'IO', 'Data Science']" 76 | International Women's Day: What #PledgeForParity Means To Us,5 points by ddrum001 13 days ago | discuss,13,"international,women,day,#,pledgeforparity,means,us",International Women's Day: What #PledgeForParity Means To U,International Women's Day: What #PledgeForParity Means To Us,"[('International', 'NNP'), ('Women', 'NNP'), (""'"", 'POS'), ('s', 'JJ'), ('Day', 'NNP'), (':', ':'), ('What', 'WP'), ('#', '#'), ('PledgeForParity', 'NN'), ('Means', 'NNPS'), ('To', 'TO'), ('Us', 'VB')]","['International Women', 'PledgeForParity Means']" 77 | Top 50 Data Science thought leaders on Twitter,3 points by datawerq 11 days ago | 3 comments,11,"top,50,data,science,thought,leaders,twitter",Top 50 Data Science thought leaders on Twitt,Top 50 Data Science thought leaders on Twitter,"[('Top', 'JJ'), ('50', 'CD'), ('Data', 'NNP'), ('Science', 'NNP'), ('thought', 'VBD'), ('leaders', 'NNS'), ('on', 'IN'), ('Twitter', 'NN')]",[] 78 | Ask DT: Who Is Hiring? (March 2016),27 points by whoishiring 21 days ago | 15 comments,21,"ask,dt,hiring,march,2016",Ask DT: Who Is Hiring? (March 2016),Ask DT: Who Is Hiring? (March 2016),"[('Ask', 'NNP'), ('DT', 'NNP'), (':', ':'), ('Who', 'WP'), ('Is', 'VBZ'), ('Hiring', 'VBG'), ('?', '.'), ('(', '('), ('March', 'NNP'), ('2016', 'CD'), (')', ')')]",['Ask'] 79 | Introducing GraphFrames,7 points by falaki 18 days ago | discuss,18,"introducing,graphframes",Introducing GraphFram,Introducing GraphFrames,"[('Introducing', 'VBG'), ('GraphFrames', 'NNS')]",[] 80 | Announcing R Tools for Visual Studio,3 points by brakmic 13 days ago | discuss,13,"announcing,r,tools,visual,studio",Announcing R Tools for Visual Studio,Announcing R Tools for Visual Studio,"[('Announcing', 'VBG'), ('R', 'NNP'), ('Tools', 'NNP'), ('for', 'IN'), ('Visual', 'NNP'), ('Studio', 'NNP')]",['Visual Studio'] 81 | Question: What do you want to say about working with data?,2 points by emiller425 7 days ago | discuss,7,"question,want,say,working,data",Question: What do you want to say about working with data?,Question: What do you want to say about working with data?,"[('Question', 'NN'), (':', ':'), ('What', 'WP'), ('do', 'VBP'), ('you', 'PRP'), ('want', 'VB'), ('to', 'TO'), ('say', 'VB'), ('about', 'IN'), ('working', 'VBG'), ('with', 'IN'), ('data', 'NNS'), ('?', '.')]",[] 82 | Genomic Ranges - an Introduction to Working with Genomic Data,3 points by AnnaOnTheWeb 13 days ago | discuss,13,"genomic,ranges,introduction,working,genomic,data",Genomic Ranges - an Introduction to Working with Genomic Data,Genomic Ranges - an Introduction to Working with Genomic Data,"[('Genomic', 'NNP'), ('Ranges', 'NNP'), ('-', ':'), ('an', 'DT'), ('Introduction', 'NN'), ('to', 'TO'), ('Working', 'VBG'), ('with', 'IN'), ('Genomic', 'NNP'), ('Data', 'NNP')]","['Genomic Ranges', 'Genomic Data']" 83 | TensorFlow for Poets,9 points by ebellm 21 days ago | 1 comment,21,"tensorflow,poets",TensorFlow for Poet,TensorFlow for Poets,"[('TensorFlow', 'NNP'), ('for', 'IN'), ('Poets', 'NNS')]",['TensorFlow'] 84 | Unsupervised Learning with Even Less Supervision Using Bayesian Optimization,2 points by idewanck 10 days ago | discuss,10,"unsupervised,learning,even,less,supervision,using,bayesian,optimization",Unsupervised Learning with Even Less Supervision Using Bayesian Optim,Unsupervised Learning with Even Less Supervision Using Bayesian Optimization,"[('Unsupervised', 'VBN'), ('Learning', 'VBG'), ('with', 'IN'), ('Even', 'NNP'), ('Less', 'NNP'), ('Supervision', 'NNP'), ('Using', 'NNP'), ('Bayesian', 'NNP'), ('Optimization', 'NNP')]",['Even Less Supervision Using Bayesian'] 85 | How to work with large JSON datasets using Python and Pandas,9 points by brian_spiering 21 days ago | discuss,21,"work,large,json,datasets,using,python,pandas",How to work with large JSON datasets using Python and Panda,How to work with large JSON datasets using Python and Pandas,"[('How', 'WRB'), ('to', 'TO'), ('work', 'VB'), ('with', 'IN'), ('large', 'JJ'), ('JSON', 'NNP'), ('datasets', 'NNS'), ('using', 'VBG'), ('Python', 'NNP'), ('and', 'CC'), ('Pandas', 'NNP')]","['JSON', 'Python', 'Pandas']" 86 | DrivenData Competition: Model/Visualize Fog Patterns in Morocco,4 points by bull 15 days ago | discuss,15,"drivendata,competition,model,visualize,fog,patterns,morocco",DrivenData Competition: Model/Visualize Fog Patterns in Morocco,DrivenData Competition: Model/Visualize Fog Patterns in Morocco,"[('DrivenData', 'NNP'), ('Competition', 'NN'), (':', ':'), ('Model', 'NNP'), ('/', 'NNP'), ('Visualize', 'NNP'), ('Fog', 'NNP'), ('Patterns', 'NNP'), ('in', 'IN'), ('Morocco', 'NNP')]","['DrivenData', 'Morocco']" 87 | Deriving Better Insights From Time Series Data With Cycle Plots,2 points by clevertap 11 days ago | discuss,11,"deriving,better,insights,time,series,data,cycle,plots",Deriving Better Insights From Time Series Data With Cycle Plot,Deriving Better Insights From Time Series Data With Cycle Plots,"[('Deriving', 'VBG'), ('Better', 'NNP'), ('Insights', 'NNPS'), ('From', 'NNP'), ('Time', 'NNP'), ('Series', 'NNP'), ('Data', 'NNP'), ('With', 'IN'), ('Cycle', 'NNP'), ('Plots', 'NNP')]","['Better Insights From Time Series Data', 'Cycle Plots']" 88 | Deep Learning: Nine Lectures at Coll?ge de France by Yan LeCun,5 points by Anon84 16 days ago | discuss,16,"deep,learning,nine,lectures,coll,ge,de,france,yan,lecun",Deep Learning: Nine Lectures at Coll?ge de France by Yan LeCun,Deep Learning: Nine Lectures at Coll?ge de France by Yan LeCun,"[('Deep', 'JJ'), ('Learning', 'NNP'), (':', ':'), ('Nine', 'JJ'), ('Lectures', 'NNS'), ('at', 'IN'), ('Coll', 'NNP'), ('?', '.'), ('ge', 'NN'), ('de', 'IN'), ('France', 'NNP'), ('by', 'IN'), ('Yan', 'NNP'), ('LeCun', 'NNP')]","['Deep', 'Coll', 'France', 'Yan']" 89 | SQL for Data Analysis,3 points by nickhould 14 days ago | 6 comments,14,"sql,data,analysis",SQL for Data Analysi,SQL for Data Analysis,"[('SQL', 'NNP'), ('for', 'IN'), ('Data', 'NNP'), ('Analysis', 'NNP')]","['SQL', 'Data Analysis']" 90 | Stream processing and messaging systems for the IoT age,2 points by gradientflow 12 days ago | discuss,12,"stream,processing,messaging,systems,iot,age",Stream processing and messaging systems for the IoT ag,Stream processing and messaging systems for the IoT age,"[('Stream', 'NN'), ('processing', 'NN'), ('and', 'CC'), ('messaging', 'VBG'), ('systems', 'NNS'), ('for', 'IN'), ('the', 'DT'), ('IoT', 'NNP'), ('age', 'NN')]","['Stream', 'IoT']" 91 | Optimizing Facebook Campaigns with R,2 points by AnnaOnTheWeb 12 days ago | 1 comment,12,"optimizing,facebook,campaigns,r",Optimizing Facebook Campaigns with R,Optimizing Facebook Campaigns with R,"[('Optimizing', 'VBG'), ('Facebook', 'NNP'), ('Campaigns', 'NNP'), ('with', 'IN'), ('R', 'NNP')]",['Facebook Campaigns'] 92 | "Trump Tweets on a Globe (aka Fun with d3, socket.io, and the Twitter API)",8 points by joelgrus 21 days ago | discuss,21,"trump,tweets,globe,aka,fun,d3,socket,io,twitter,api","Trump Tweets on a Globe (aka Fun with d3, socket.io, and the Twitter API)","Trump Tweets on a Globe (aka Fun with d3, socket.io, and the Twitter API)","[('Trump', 'NNP'), ('Tweets', 'NNP'), ('on', 'IN'), ('a', 'DT'), ('Globe', 'NNP'), ('(', '('), ('aka', 'JJ'), ('Fun', 'NNP'), ('with', 'IN'), ('d3', 'NN'), (',', ','), ('socket', 'NN'), ('.', '.'), ('io', 'NN'), (',', ','), ('and', 'CC'), ('the', 'DT'), ('Twitter', 'NNP'), ('API', 'NNP'), (')', ')')]","['Trump Tweets', 'Twitter']" 93 | Why pandas users should be excited about Apache Arrow,17 points by pmigdal 28 days ago | discuss,28,"pandas,users,excited,apache,arrow",Why pandas users should be excited about Apache Arrow,Why pandas users should be excited about Apache Arrow,"[('Why', 'WRB'), ('pandas', 'JJ'), ('users', 'NNS'), ('should', 'MD'), ('be', 'VB'), ('excited', 'VBN'), ('about', 'IN'), ('Apache', 'NNP'), ('Arrow', 'NNP')]",['Apache Arrow'] 94 | Histogram intersection for change detection,8 points by datadive 22 days ago | discuss,22,"histogram,intersection,change,detection",Histogram intersection for change detect,Histogram intersection for change detection,"[('Histogram', 'NNP'), ('intersection', 'NN'), ('for', 'IN'), ('change', 'NN'), ('detection', 'NN')]",['Histogram'] 95 | A simpler way to merge data streams,2 points by apoverton 13 days ago | discuss,13,"simpler,way,merge,data,streams",A simpler way to merge data stream,A simpler way to merge data streams,"[('A', 'DT'), ('simpler', 'JJ'), ('way', 'NN'), ('to', 'TO'), ('merge', 'VB'), ('data', 'NNS'), ('streams', 'NNS')]",[] 96 | Distributed TensorFlow just open-sourced,10 points by elyase 25 days ago | discuss,25,"distributed,tensorflow,open,sourced",Distributed TensorFlow just open-sourc,Distributed TensorFlow just open-sourced,"[('Distributed', 'VBN'), ('TensorFlow', 'NNP'), ('just', 'RB'), ('open', 'VB'), ('-', ':'), ('sourced', 'VBN')]",['TensorFlow'] 97 | D3.js Screencasts (1 in 3 are free),4 points by Veerle 18 days ago | discuss,18,"d3,js,screencasts,1,3,free",D3.js Screencasts (1 in 3 are free),D3.js Screencasts (1 in 3 are free),"[('D3', 'NNP'), ('.', '.'), ('js', 'NN'), ('Screencasts', 'NNS'), ('(', '('), ('1', 'CD'), ('in', 'IN'), ('3', 'CD'), ('are', 'VBP'), ('free', 'JJ'), (')', ')')]",[] 98 | Regression and Classification with Examples in R,5 points by soates 19 days ago | discuss,19,"regression,classification,examples,r",Regression and Classification with Examples in R,Regression and Classification with Examples in R,"[('Regression', 'NN'), ('and', 'CC'), ('Classification', 'NN'), ('with', 'IN'), ('Examples', 'NNP'), ('in', 'IN'), ('R', 'NNP')]","['Regression', 'Examples']" 99 | Free online course on statistical shape modelling,8 points by shapemean 25 days ago | discuss,25,"free,online,course,statistical,shape,modelling",Free online course on statistical shape model,Free online course on statistical shape modelling,"[('Free', 'JJ'), ('online', 'NN'), ('course', 'NN'), ('on', 'IN'), ('statistical', 'JJ'), ('shape', 'NN'), ('modelling', 'NN')]",['Free'] 100 | "Don't worry about deep learning, deepen your understanding of causality instead",22 points by yanir 36 days ago | discuss,36,"worry,deep,learning,deepen,understanding,causality,instead","Don't worry about deep learning, deepen your understanding of causality instead","Don't worry about deep learning, deepen your understanding of causality instead","[('Don', 'NNP'), (""'"", 'POS'), ('t', 'NN'), ('worry', 'VBP'), ('about', 'IN'), ('deep', 'JJ'), ('learning', 'NN'), (',', ','), ('deepen', 'VB'), ('your', 'PRP$'), ('understanding', 'NN'), ('of', 'IN'), ('causality', 'NN'), ('instead', 'RB')]",['Don'] 101 | Skizze - A high throughput probabilistic data structure service and storage,2 points by seiflotfy 14 days ago | discuss,14,"skizze,high,throughput,probabilistic,data,structure,service,storage",Skizze - A high throughput probabilistic data structure service and storag,Skizze - A high throughput probabilistic data structure service and storage,"[('Skizze', 'NNP'), ('-', ':'), ('A', 'DT'), ('high', 'JJ'), ('throughput', 'NN'), ('probabilistic', 'JJ'), ('data', 'NNS'), ('structure', 'NN'), ('service', 'NN'), ('and', 'CC'), ('storage', 'NN')]",['Skizze'] 102 | Work with private repositories and other updates of the FlyElephant platform,2 points by m31 14 days ago | discuss,14,"work,private,repositories,updates,flyelephant,platform",Work with private repositories and other updates of the FlyElephant platform,Work with private repositories and other updates of the FlyElephant platform,"[('Work', 'NN'), ('with', 'IN'), ('private', 'JJ'), ('repositories', 'NNS'), ('and', 'CC'), ('other', 'JJ'), ('updates', 'NNS'), ('of', 'IN'), ('the', 'DT'), ('FlyElephant', 'NNP'), ('platform', 'NN')]","['Work', 'FlyElephant']" 103 | How to import XML to almost anywhere,4 points by Jammink 19 days ago | discuss,19,"import,xml,almost,anywhere",How to import XML to almost anywher,How to import XML to almost anywhere,"[('How', 'WRB'), ('to', 'TO'), ('import', 'VB'), ('XML', 'NN'), ('to', 'TO'), ('almost', 'RB'), ('anywhere', 'VB')]",['XML'] 104 | Optimizing Notification Timing for One Signal,8 points by megandias 25 days ago | discuss,25,"optimizing,notification,timing,one,signal",Optimizing Notification Timing for One Sign,Optimizing Notification Timing for One Signal,"[('Optimizing', 'VBG'), ('Notification', 'NNP'), ('Timing', 'NNP'), ('for', 'IN'), ('One', 'CD'), ('Signal', 'NNP')]",[] 105 | Survival Analysis of Cricket Player Careers,8 points by keshav92 25 days ago | 6 comments,25,"survival,analysis,cricket,player,careers",Survival Analysis of Cricket Player Car,Survival Analysis of Cricket Player Careers,"[('Survival', 'JJ'), ('Analysis', 'NN'), ('of', 'IN'), ('Cricket', 'NNP'), ('Player', 'NNP'), ('Careers', 'NNP')]",['Cricket Player Careers'] 106 | Generate image analogies using neural matching and blending,2 points by pmigdal 15 days ago | discuss,15,"generate,image,analogies,using,neural,matching,blending",Generate image analogies using neural matching and blend,Generate image analogies using neural matching and blending,"[('Generate', 'NNP'), ('image', 'NN'), ('analogies', 'NNS'), ('using', 'VBG'), ('neural', 'JJ'), ('matching', 'NN'), ('and', 'CC'), ('blending', 'NN')]",['Generate'] 107 | "Analyzing 1.8M tweets from Super Bowl 50 (Twython, Twitter API, AYLIEN)",4 points by mikewally 20 days ago | discuss,20,"analyzing,1,8m,tweets,super,bowl,50,twython,twitter,api,aylien","Analyzing 1.8M tweets from Super Bowl 50 (Twython, Twitter API, AYLIEN)","Analyzing 1.8M tweets from Super Bowl 50 (Twython, Twitter API, AYLIEN)","[('Analyzing', 'VBG'), ('1', 'CD'), ('.', '.'), ('8M', 'CD'), ('tweets', 'NNS'), ('from', 'IN'), ('Super', 'NNP'), ('Bowl', 'NNP'), ('50', 'CD'), ('(', '('), ('Twython', 'NNP'), (',', ','), ('Twitter', 'NNP'), ('API', 'NNP'), (',', ','), ('AYLIEN', 'NNP'), (')', ')')]","['Super Bowl', 'Twython', 'Twitter API', 'AYLIEN']" 108 | Newly released sklearn compatible library of categorical encoders,7 points by wdm0006 25 days ago | discuss,25,"newly,released,sklearn,compatible,library,categorical,encoders",Newly released sklearn compatible library of categorical encod,Newly released sklearn compatible library of categorical encoders,"[('Newly', 'RB'), ('released', 'VBN'), ('sklearn', 'NN'), ('compatible', 'JJ'), ('library', 'NN'), ('of', 'IN'), ('categorical', 'JJ'), ('encoders', 'NNS')]",[] 109 | Watch Tiny Neural Nets Learn,4 points by swanint 20 days ago | discuss,20,"watch,tiny,neural,nets,learn",Watch Tiny Neural Nets Learn,Watch Tiny Neural Nets Learn,"[('Watch', 'NNP'), ('Tiny', 'NNP'), ('Neural', 'NNP'), ('Nets', 'NNP'), ('Learn', 'NNP')]",['Watch Tiny Neural Nets Learn'] 110 | Four pitfalls of hill climbing: An animated look,5 points by csaid81 22 days ago | discuss,22,"four,pitfalls,hill,climbing,animated,look",Four pitfalls of hill climbing: An animated look,Four pitfalls of hill climbing: An animated look,"[('Four', 'CD'), ('pitfalls', 'NNS'), ('of', 'IN'), ('hill', 'NN'), ('climbing', 'VBG'), (':', ':'), ('An', 'DT'), ('animated', 'JJ'), ('look', 'NN')]",[] 111 | "Decision Forests, Convolutional Networks and the Models in-Between",2 points by ebellm 15 days ago | discuss,15,"decision,forests,convolutional,networks,models","Decision Forests, Convolutional Networks and the Models in-Between","Decision Forests, Convolutional Networks and the Models in-Between","[('Decision', 'NN'), ('Forests', 'NNS'), (',', ','), ('Convolutional', 'NNP'), ('Networks', 'NNP'), ('and', 'CC'), ('the', 'DT'), ('Models', 'NNP'), ('in', 'IN'), ('-', ':'), ('Between', 'NN')]","['Convolutional Networks', 'Models']" 112 | How a Math Genius Hacked OkCupid to Find True Love,15 points by roh_codeur 34 days ago | discuss,34,"math,genius,hacked,okcupid,find,true,love",How a Math Genius Hacked OkCupid to Find True Lov,How a Math Genius Hacked OkCupid to Find True Love,"[('How', 'WRB'), ('a', 'DT'), ('Math', 'NNP'), ('Genius', 'NNP'), ('Hacked', 'NNP'), ('OkCupid', 'NNP'), ('to', 'TO'), ('Find', 'VB'), ('True', 'JJ'), ('Love', 'NNP')]",['True Love'] 113 | No developers for PyLearn2,3 points by tfturing 18 days ago | discuss,18,"developers,pylearn2",No developers for PyLearn2,No developers for PyLearn2,"[('No', 'DT'), ('developers', 'NNS'), ('for', 'IN'), ('PyLearn2', 'NN')]",['PyLearn2'] 114 | Density Estimation with Dirichlet Process Mixtures using PyMC3,6 points by MidsizeBlowfish 25 days ago | discuss,25,"density,estimation,dirichlet,process,mixtures,using,pymc3",Density Estimation with Dirichlet Process Mixtures using PyMC3,Density Estimation with Dirichlet Process Mixtures using PyMC3,"[('Density', 'NNP'), ('Estimation', 'NNP'), ('with', 'IN'), ('Dirichlet', 'NNP'), ('Process', 'NNP'), ('Mixtures', 'NNP'), ('using', 'VBG'), ('PyMC3', 'NNP')]","['Density Estimation', 'Dirichlet Process Mixtures', 'PyMC3']" 115 | Using survival analysis and git-pandas to estimate code quality,3 points by wdm0006 19 days ago | discuss,19,"using,survival,analysis,git,pandas,estimate,code,quality",Using survival analysis and git-pandas to estimate code qu,Using survival analysis and git-pandas to estimate code quality,"[('Using', 'VBG'), ('survival', 'JJ'), ('analysis', 'NN'), ('and', 'CC'), ('git', 'JJ'), ('-', ':'), ('pandas', 'NN'), ('to', 'TO'), ('estimate', 'VB'), ('code', 'NN'), ('quality', 'NN')]",[] 116 | An Analysis of the Flint Michigan Water Crisis: Part 1 Initial Corrosivity,3 points by JHorn 19 days ago | discuss,19,"analysis,flint,michigan,water,crisis,part,1,initial,corrosivity",An Analysis of the Flint Michigan Water Crisis: Part 1 Initial Corros,An Analysis of the Flint Michigan Water Crisis: Part 1 Initial Corrosivity,"[('An', 'DT'), ('Analysis', 'NN'), ('of', 'IN'), ('the', 'DT'), ('Flint', 'NNP'), ('Michigan', 'NNP'), ('Water', 'NNP'), ('Crisis', 'NNP'), (':', ':'), ('Part', 'NN'), ('1', 'CD'), ('Initial', 'NNP'), ('Corrosivity', 'NNP')]",['Flint Michigan Water'] 117 | An Analysis of Republican Twitter Follower Interests,6 points by michelangelo 25 days ago | discuss,25,"analysis,republican,twitter,follower,interests",An Analysis of Republican Twitter Follower Interest,An Analysis of Republican Twitter Follower Interests,"[('An', 'DT'), ('Analysis', 'NN'), ('of', 'IN'), ('Republican', 'JJ'), ('Twitter', 'NNP'), ('Follower', 'NNP'), ('Interests', 'NNS')]",['Republican Twitter Follower'] 118 | Introduction to ML talk,8 points by cjbayesian 29 days ago | discuss,29,"introduction,ml,talk",Introduction to ML talk,Introduction to ML talk,"[('Introduction', 'NN'), ('to', 'TO'), ('ML', 'NNP'), ('talk', 'NN')]",[] 119 | GloVe vs word2vec revisited,3 points by pmigdal 20 days ago | discuss,20,"glove,vs,word2vec,revisited",GloVe vs word2vec revisit,GloVe vs word2vec revisited,"[('GloVe', 'NNP'), ('vs', 'NN'), ('word2vec', 'NN'), ('revisited', 'VBD')]",['GloVe'] 120 | Undergrad Data Analysis/Science internships SF Bay?,3 points by tctctc 15 days ago | 5 comments,15,"undergrad,data,analysis,science,internships,sf,bay",Undergrad Data Analysis/Science internships SF Bay?,Undergrad Data Analysis/Science internships SF Bay?,"[('Undergrad', 'NNP'), ('Data', 'NNP'), ('Analysis', 'NNP'), ('/', 'NNP'), ('Science', 'NNP'), ('internships', 'NNS'), ('SF', 'NNP'), ('Bay', 'NNP'), ('?', '.')]",['Undergrad Data Analysis'] 121 | The Role of Statistical Significance in Growth Hacking,6 points by rawls234 26 days ago | discuss,26,"role,statistical,significance,growth,hacking",The Role of Statistical Significance in Growth Hack,The Role of Statistical Significance in Growth Hacking,"[('The', 'DT'), ('Role', 'NNP'), ('of', 'IN'), ('Statistical', 'NNP'), ('Significance', 'NNP'), ('in', 'IN'), ('Growth', 'NNP'), ('Hacking', 'NNP')]","['Statistical Significance', 'Growth Hacking']" 122 | Data Science Course @ Harvard,7 points by rahmaniacc 28 days ago | 2 comments,28,"data,science,course,@,harvard",Data Science Course @ Harvard,Data Science Course @ Harvard,"[('Data', 'NNP'), ('Science', 'NNP'), ('Course', 'NNP'), ('@', 'NNP'), ('Harvard', 'NNP')]",['Data Science Course'] 123 | Principal Component Projection Without Principal Component Analysis,6 points by genofon 27 days ago | discuss,27,"principal,component,projection,without,principal,component,analysis",Principal Component Projection Without Principal Component Analysi,Principal Component Projection Without Principal Component Analysis,"[('Principal', 'JJ'), ('Component', 'NNP'), ('Projection', 'NNP'), ('Without', 'IN'), ('Principal', 'NNP'), ('Component', 'NNP'), ('Analysis', 'NN')]",['Principal'] 124 | "Machine Learning: An In-Depth, Non-Technical Guide - Part 3",7 points by innoarchitech 29 days ago | discuss,29,"machine,learning,depth,non,technical,guide,part,3","Machine Learning: An In-Depth, Non-Technical Guide - Part 3","Machine Learning: An In-Depth, Non-Technical Guide - Part 3","[('Machine', 'NN'), ('Learning', 'NNP'), (':', ':'), ('An', 'DT'), ('In', 'IN'), ('-', ':'), ('Depth', 'NN'), (',', ','), ('Non', 'NNP'), ('-', ':'), ('Technical', 'NNP'), ('Guide', 'NNP'), ('-', ':'), ('Part', 'NN'), ('3', 'CD')]","['Machine Learning', 'Non', 'Technical Guide']" 125 | Stochastic Dummy Boosting,2 points by mikeskim 17 days ago | discuss,17,"stochastic,dummy,boosting",Stochastic Dummy Boost,Stochastic Dummy Boosting,"[('Stochastic', 'JJ'), ('Dummy', 'NNP'), ('Boosting', 'NNP')]",['Stochastic Dummy'] 126 | Interactive Map: Hong-Kong through The Lense of Instagram,2 points by BrianN 18 days ago | discuss,18,"interactive,map,hong,kong,lense,instagram",Interactive Map: Hong-Kong through The Lense of Instagram,Interactive Map: Hong-Kong through The Lense of Instagram,"[('Interactive', 'JJ'), ('Map', 'NN'), (':', ':'), ('Hong', 'NNP'), ('-', ':'), ('Kong', 'NNP'), ('through', 'IN'), ('The', 'DT'), ('Lense', 'NNP'), ('of', 'IN'), ('Instagram', 'NNP')]","['Hong', 'Kong']" 127 | Data Science at Monsanto,3 points by doctorcroc 22 days ago | discuss,22,"data,science,monsanto",Data Science at Monsanto,Data Science at Monsanto,"[('Data', 'NNP'), ('Science', 'NNP'), ('at', 'IN'), ('Monsanto', 'NNP')]","['Data Science', 'Monsanto']" 128 | Data Science at Instacart,11 points by jeremystan 34 days ago | 3 comments,34,"data,science,instacart",Data Science at Instacart,Data Science at Instacart,"[('Data', 'NNP'), ('Science', 'NNP'), ('at', 'IN'), ('Instacart', 'NNP')]","['Data Science', 'Instacart']" 129 | Building a Streaming Search Platform,6 points by ddrum001 27 days ago | discuss,27,"building,streaming,search,platform",Building a Streaming Search Platform,Building a Streaming Search Platform,"[('Building', 'VBG'), ('a', 'DT'), ('Streaming', 'NNP'), ('Search', 'NNP'), ('Platform', 'NNP')]",[] 130 | A Sneak Peak of the Cloud: the 2 Minute Intro for Beginners,2 points by andymaheshw 19 days ago | discuss,19,"sneak,peak,cloud,2,minute,intro,beginners",A Sneak Peak of the Cloud: the 2 Minute Intro for Beginn,A Sneak Peak of the Cloud: the 2 Minute Intro for Beginners,"[('A', 'DT'), ('Sneak', 'NNP'), ('Peak', 'NNP'), ('of', 'IN'), ('the', 'DT'), ('Cloud', 'NNP'), (':', ':'), ('the', 'DT'), ('2', 'CD'), ('Minute', 'NNP'), ('Intro', 'NNP'), ('for', 'IN'), ('Beginners', 'NNP')]",['Sneak Peak'] 131 | Win-Vector video courses: price/status changes,2 points by jmount 19 days ago | discuss,19,"win,vector,video,courses,price,status,changes",Win-Vector video courses: price/status chang,Win-Vector video courses: price/status changes,"[('Win', 'NNP'), ('-', ':'), ('Vector', 'NNP'), ('video', 'NN'), ('courses', 'NNS'), (':', ':'), ('price', 'NN'), ('/', 'NN'), ('status', 'NN'), ('changes', 'NNS')]","['Win', 'Vector']" 132 | 50+ Data Science and Machine Learning Cheat Sheets,20 points by elyase 42 days ago | 1 comment,42,"50,+,data,science,machine,learning,cheat,sheets",50+ Data Science and Machine Learning Cheat Sheet,50+ Data Science and Machine Learning Cheat Sheets,"[('50', 'CD'), ('+', 'JJ'), ('Data', 'NNP'), ('Science', 'NNP'), ('and', 'CC'), ('Machine', 'NNP'), ('Learning', 'NNP'), ('Cheat', 'NNP'), ('Sheets', 'NNS')]","['Data Science', 'Machine Learning Cheat']" 133 | One More Reason Not To Be Scared of Deep Learning,2 points by amplifier_khan 21 days ago | discuss,21,"one,reason,scared,deep,learning",One More Reason Not To Be Scared of Deep Learn,One More Reason Not To Be Scared of Deep Learning,"[('One', 'CD'), ('More', 'JJR'), ('Reason', 'NNP'), ('Not', 'RB'), ('To', 'TO'), ('Be', 'VB'), ('Scared', 'NNP'), ('of', 'IN'), ('Deep', 'NNP'), ('Learning', 'NNP')]","['Reason', 'Deep Learning']" 134 | Visual Logic Authoring vs Code,2 points by AnnaOnTheWeb 21 days ago | discuss,21,"visual,logic,authoring,vs,code",Visual Logic Authoring vs Cod,Visual Logic Authoring vs Code,"[('Visual', 'JJ'), ('Logic', 'NNP'), ('Authoring', 'NNP'), ('vs', 'NN'), ('Code', 'NNP')]",['Visual Logic'] 135 | Data Science in Python online training with hands-on experience,2 points by Puneet 21 days ago | discuss,21,"data,science,python,online,training,hands,experience",Data Science in Python online training with hands-on experi,Data Science in Python online training with hands-on experience,"[('Data', 'NNP'), ('Science', 'NNP'), ('in', 'IN'), ('Python', 'NNP'), ('online', 'JJ'), ('training', 'NN'), ('with', 'IN'), ('hands', 'NNS'), ('-', ':'), ('on', 'IN'), ('experience', 'NN')]","['Data Science', 'Python']" 136 | Viewing the US Presidential Primary Through the Lens of Twitter,8 points by michelangelo 32 days ago | discuss,32,"viewing,us,presidential,primary,lens,twitter",Viewing the US Presidential Primary Through the Lens of Twitt,Viewing the US Presidential Primary Through the Lens of Twitter,"[('Viewing', 'VBG'), ('the', 'DT'), ('US', 'NNP'), ('Presidential', 'NNP'), ('Primary', 'NNP'), ('Through', 'IN'), ('the', 'DT'), ('Lens', 'NNP'), ('of', 'IN'), ('Twitter', 'NNP')]",['US'] 137 | Caffe on Spark open sourced,4 points by rahmaniacc 26 days ago | discuss,26,"caffe,spark,open,sourced",Caffe on Spark open sourc,Caffe on Spark open sourced,"[('Caffe', 'NNP'), ('on', 'IN'), ('Spark', 'NNP'), ('open', 'JJ'), ('sourced', 'VBD')]","['Caffe', 'Spark']" 138 | The Ethical Data Scientist,5 points by tfturing 28 days ago | discuss,28,"ethical,data,scientist",The Ethical Data Scientist,The Ethical Data Scientist,"[('The', 'DT'), ('Ethical', 'NNP'), ('Data', 'NNP'), ('Scientist', 'NN')]",['Ethical Data'] 139 | Answers to Frequently Asked Questions in Machine Learning,3 points by rasbt 21 days ago | discuss,21,"answers,frequently,asked,questions,machine,learning",Answers to Frequently Asked Questions in Machine Learn,Answers to Frequently Asked Questions in Machine Learning,"[('Answers', 'NNS'), ('to', 'TO'), ('Frequently', 'NNP'), ('Asked', 'NNP'), ('Questions', 'NNS'), ('in', 'IN'), ('Machine', 'NNP'), ('Learning', 'NNP')]","['Frequently Asked', 'Machine Learning']" 140 | Intro to A/B Testing and P-Values,2 points by randyzwitch 22 days ago | discuss,22,"intro,b,testing,p,values",Intro to A/B Testing and P-Valu,Intro to A/B Testing and P-Values,"[('Intro', 'NNP'), ('to', 'TO'), ('A', 'NNP'), ('/', 'NNP'), ('B', 'NNP'), ('Testing', 'NNP'), ('and', 'CC'), ('P', 'NNP'), ('-', ':'), ('Values', 'NNS')]",['Intro'] 141 | Visualizing State Level Data With R and Statebins,2 points by usujason 22 days ago | discuss,22,"visualizing,state,level,data,r,statebins",Visualizing State Level Data With R and Statebin,Visualizing State Level Data With R and Statebins,"[('Visualizing', 'VBG'), ('State', 'NNP'), ('Level', 'NNP'), ('Data', 'NNP'), ('With', 'IN'), ('R', 'NNP'), ('and', 'CC'), ('Statebins', 'NNP')]",[] 142 | "Probabilistic Graphical Models slides & video lectures (Eric Xing, CMU)",4 points by ororm 27 days ago | discuss,27,"probabilistic,graphical,models,slides,&,video,lectures,eric,xing,cmu","Probabilistic Graphical Models slides & video lectures (Eric Xing, CMU)","Probabilistic Graphical Models slides & video lectures (Eric Xing, CMU)","[('Probabilistic', 'JJ'), ('Graphical', 'NNP'), ('Models', 'NNP'), ('slides', 'VBZ'), ('&', 'CC'), ('video', 'NN'), ('lectures', 'NNS'), ('(', '('), ('Eric', 'NNP'), ('Xing', 'NNP'), (',', ','), ('CMU', 'NNP'), (')', ')')]","['Eric Xing', 'CMU']" 143 | Sense2vec with spaCy and Gensim,9 points by elyase 35 days ago | 2 comments,35,"sense2vec,spacy,gensim",Sense2vec with spaCy and Gensim,Sense2vec with spaCy and Gensim,"[('Sense2vec', 'NN'), ('with', 'IN'), ('spaCy', 'NN'), ('and', 'CC'), ('Gensim', 'NNP')]","['Sense2vec', 'spaCy', 'Gensim']" 144 | How to Code and Understand DeepMind's Neural Stack Machine (in Python),2 points by genofon 23 days ago | discuss,23,"code,understand,deepmind,neural,stack,machine,python",How to Code and Understand DeepMind's Neural Stack Machine (in Python),How to Code and Understand DeepMind's Neural Stack Machine (in Python),"[('How', 'WRB'), ('to', 'TO'), ('Code', 'NNP'), ('and', 'CC'), ('Understand', 'NNP'), ('DeepMind', 'NNP'), (""'"", 'POS'), ('s', 'JJ'), ('Neural', 'NNP'), ('Stack', 'NNP'), ('Machine', 'NNP'), ('(', '('), ('in', 'IN'), ('Python', 'NNP'), (')', ')')]","['Code', 'Understand DeepMind', 'Python']" 145 | How to make polished Jupyter presentations with optional code visibility,9 points by csaid81 36 days ago | discuss,36,"make,polished,jupyter,presentations,optional,code,visibility",How to make polished Jupyter presentations with optional code vis,How to make polished Jupyter presentations with optional code visibility,"[('How', 'WRB'), ('to', 'TO'), ('make', 'VB'), ('polished', 'JJ'), ('Jupyter', 'NNP'), ('presentations', 'NNS'), ('with', 'IN'), ('optional', 'JJ'), ('code', 'NN'), ('visibility', 'NN')]",[] 146 | How to become a Bayesian in eight easy steps,17 points by EtzA 43 days ago | 1 comment,43,"become,bayesian,eight,easy,steps",How to become a Bayesian in eight easy step,How to become a Bayesian in eight easy steps,"[('How', 'WRB'), ('to', 'TO'), ('become', 'VB'), ('a', 'DT'), ('Bayesian', 'JJ'), ('in', 'IN'), ('eight', 'CD'), ('easy', 'JJ'), ('steps', 'NNS')]",['Bayesian'] 147 | Optimizing .*: Details of Vectorization and Metaprogramming in Julia,4 points by randyzwitch 29 days ago | discuss,29,"optimizing,.*:,details,vectorization,metaprogramming,julia",Optimizing .*: Details of Vectorization and Metaprogramming in Julia,Optimizing .*: Details of Vectorization and Metaprogramming in Julia,"[('Optimizing', 'VBG'), ('.*:', 'NNP'), ('Details', 'NNP'), ('of', 'IN'), ('Vectorization', 'NNP'), ('and', 'CC'), ('Metaprogramming', 'NNP'), ('in', 'IN'), ('Julia', 'NNP')]",['Julia'] 148 | IBM certified Apache Spark Online Training,8 points by divya_jain 35 days ago | discuss,35,"ibm,certified,apache,spark,online,training",IBM certified Apache Spark Online Train,IBM certified Apache Spark Online Training,"[('IBM', 'NNP'), ('certified', 'VBD'), ('Apache', 'NNP'), ('Spark', 'NNP'), ('Online', 'NNP'), ('Training', 'NN')]","['IBM', 'Apache Spark Online']" 149 | Geographic Data Science course,2 points by rk 24 days ago | discuss,24,"geographic,data,science,course",Geographic Data Science cours,Geographic Data Science course,"[('Geographic', 'NNP'), ('Data', 'NNP'), ('Science', 'NNP'), ('course', 'NN')]",['Geographic Data Science'] 150 | "The Daily Mail Stole My Visualization, Twice",5 points by thehoff 32 days ago | 1 comment,32,"daily,mail,stole,visualization,twice","The Daily Mail Stole My Visualization, Twic","The Daily Mail Stole My Visualization, Twice","[('The', 'DT'), ('Daily', 'NNP'), ('Mail', 'NNP'), ('Stole', 'NNP'), ('My', 'NNP'), ('Visualization', 'NNP'), (',', ','), ('Twice', 'NNP')]",['Daily Mail Stole My Visualization'] 151 | Ensemble Methods: Improved Machine Learning Results,9 points by PyBloggers 37 days ago | discuss,37,"ensemble,methods,improved,machine,learning,results",Ensemble Methods: Improved Machine Learning Result,Ensemble Methods: Improved Machine Learning Results,"[('Ensemble', 'JJ'), ('Methods', 'NNS'), (':', ':'), ('Improved', 'VBN'), ('Machine', 'NNP'), ('Learning', 'NNP'), ('Results', 'NNP')]",['Machine Learning Results'] 152 | Apache Spark and unsupervised learning in security,2 points by gradientflow 26 days ago | discuss,26,"apache,spark,unsupervised,learning,security",Apache Spark and unsupervised learning in secur,Apache Spark and unsupervised learning in security,"[('Apache', 'NNP'), ('Spark', 'NNP'), ('and', 'CC'), ('unsupervised', 'JJ'), ('learning', 'NN'), ('in', 'IN'), ('security', 'NN')]",['Apache Spark'] 153 | MachineJS: Automated machine learning- just give it a data file!,2 points by dsernst 26 days ago | discuss,26,"machinejs,automated,machine,learning,give,data,file",MachineJS: Automated machine learning- just give it a data file!,MachineJS: Automated machine learning- just give it a data file!,"[('MachineJS', 'NN'), (':', ':'), ('Automated', 'VBN'), ('machine', 'NN'), ('learning', 'VBG'), ('-', ':'), ('just', 'RB'), ('give', 'VB'), ('it', 'PRP'), ('a', 'DT'), ('data', 'NN'), ('file', 'NN'), ('!', '.')]",['MachineJS'] 154 | Kafka Producer Latency with Large Topic Counts,2 points by marklit 26 days ago | discuss,26,"kafka,producer,latency,large,topic,counts",Kafka Producer Latency with Large Topic Count,Kafka Producer Latency with Large Topic Counts,"[('Kafka', 'NNP'), ('Producer', 'NNP'), ('Latency', 'NNP'), ('with', 'IN'), ('Large', 'NNP'), ('Topic', 'NNP'), ('Counts', 'NNP')]","['Kafka Producer Latency', 'Large Topic Counts']" 155 | The NSA?s SKYNET program may be killing thousands of innocent people,6 points by zlipp 35 days ago | discuss,35,"nsa,skynet,program,may,killing,thousands,innocent,people",The NSA?s SKYNET program may be killing thousands of innocent peopl,The NSA?s SKYNET program may be killing thousands of innocent people,"[('The', 'DT'), ('NSA', 'NNP'), ('?', '.'), ('s', 'JJ'), ('SKYNET', 'NNP'), ('program', 'NN'), ('may', 'MD'), ('be', 'VB'), ('killing', 'VBG'), ('thousands', 'NNS'), ('of', 'IN'), ('innocent', 'JJ'), ('people', 'NNS')]",['NSA'] 156 | Overoptimizing: a story about kaggle,3 points by wdm0006 29 days ago | discuss,29,"overoptimizing,story,kaggle",Overoptimizing: a story about kaggl,Overoptimizing: a story about kaggle,"[('Overoptimizing', 'NN'), (':', ':'), ('a', 'DT'), ('story', 'NN'), ('about', 'IN'), ('kaggle', 'NN')]",[] 157 | "Big Dimensions, and What You Can Do About It",2 points by ramsey 26 days ago | discuss,26,"big,dimensions","Big Dimensions, and What You Can Do About It","Big Dimensions, and What You Can Do About It","[('Big', 'JJ'), ('Dimensions', 'NNS'), (',', ','), ('and', 'CC'), ('What', 'WP'), ('You', 'PRP'), ('Can', 'MD'), ('Do', 'VB'), ('About', 'IN'), ('It', 'PRP')]",[] 158 | Automate Your Oscars Pool with R,2 points by jamesdreiss 27 days ago | discuss,27,"automate,oscars,pool,r",Automate Your Oscars Pool with R,Automate Your Oscars Pool with R,"[('Automate', 'VB'), ('Your', 'PRP$'), ('Oscars', 'NNP'), ('Pool', 'NNP'), ('with', 'IN'), ('R', 'NNP')]",['Oscars Pool'] 159 | Signal Processing with LIGO GW150914 data,9 points by tfturing 39 days ago | discuss,39,"signal,processing,ligo,gw150914,data",Signal Processing with LIGO GW150914 data,Signal Processing with LIGO GW150914 data,"[('Signal', 'JJ'), ('Processing', 'VBG'), ('with', 'IN'), ('LIGO', 'NNP'), ('GW150914', 'NNP'), ('data', 'NNS')]",['LIGO'] 160 | Overview of DeZyre and Coursera Data Science Course,5 points by ann928 34 days ago | discuss,34,"overview,dezyre,coursera,data,science,course",Overview of DeZyre and Coursera Data Science Cours,Overview of DeZyre and Coursera Data Science Course,"[('Overview', 'NN'), ('of', 'IN'), ('DeZyre', 'NNP'), ('and', 'CC'), ('Coursera', 'NNP'), ('Data', 'NNP'), ('Science', 'NNP'), ('Course', 'NNP')]","['Overview', 'DeZyre', 'Coursera Data Science Course']" 161 | Upcoming Datathon in NYC,2 points by VicTrey 27 days ago | discuss,27,"upcoming,datathon,nyc",Upcoming Datathon in NYC,Upcoming Datathon in NYC,"[('Upcoming', 'VBG'), ('Datathon', 'NNP'), ('in', 'IN'), ('NYC', 'NNP')]","['Datathon', 'NYC']" 162 | Summarizing Data in SQL,15 points by elisebreda 46 days ago | discuss,46,"summarizing,data,sql",Summarizing Data in SQL,Summarizing Data in SQL,"[('Summarizing', 'VBG'), ('Data', 'NNP'), ('in', 'IN'), ('SQL', 'NNP')]",['SQL'] 163 | A/B Testing for Scammers,2 points by sameermanek 28 days ago | discuss,28,"b,testing,scammers",A/B Testing for Scamm,A/B Testing for Scammers,"[('A', 'DT'), ('/', 'NN'), ('B', 'NNP'), ('Testing', 'NNP'), ('for', 'IN'), ('Scammers', 'NNP')]",[] 164 | Highly interpretable classifiers for scikit learn using Bayesian decision rules,2 points by mcnulty 28 days ago | discuss,28,"highly,interpretable,classifiers,scikit,learn,using,bayesian,decision,rules",Highly interpretable classifiers for scikit learn using Bayesian decision rul,Highly interpretable classifiers for scikit learn using Bayesian decision rules,"[('Highly', 'NNP'), ('interpretable', 'JJ'), ('classifiers', 'NNS'), ('for', 'IN'), ('scikit', 'NN'), ('learn', 'NN'), ('using', 'VBG'), ('Bayesian', 'JJ'), ('decision', 'NN'), ('rules', 'NNS')]","['Highly', 'Bayesian']" 165 | Auto-scaling scikit-learn with Spark,11 points by falaki 42 days ago | discuss,42,"auto,scaling,scikit,learn,spark",Auto-scaling scikit-learn with Spark,Auto-scaling scikit-learn with Spark,"[('Auto', 'NNP'), ('-', ':'), ('scaling', 'VBG'), ('scikit', 'JJ'), ('-', ':'), ('learn', 'NN'), ('with', 'IN'), ('Spark', 'NNP')]","['Auto', 'Spark']" 166 | Where the f*** can I park?,2 points by manugarri 29 days ago | discuss,29,"f,***,park",Where the f*** can I park?,Where the f*** can I park?,"[('Where', 'WRB'), ('the', 'DT'), ('f', 'NN'), ('***', 'NN'), ('can', 'MD'), ('I', 'PRP'), ('park', 'VB'), ('?', '.')]",[] 167 | "Machine Learning: An In-Depth, Non-Technical Guide - Part 2",5 points by innoarchitech 36 days ago | discuss,36,"machine,learning,depth,non,technical,guide,part,2","Machine Learning: An In-Depth, Non-Technical Guide - Part 2","Machine Learning: An In-Depth, Non-Technical Guide - Part 2","[('Machine', 'NN'), ('Learning', 'NNP'), (':', ':'), ('An', 'DT'), ('In', 'IN'), ('-', ':'), ('Depth', 'NN'), (',', ','), ('Non', 'NNP'), ('-', ':'), ('Technical', 'NNP'), ('Guide', 'NNP'), ('-', ':'), ('Part', 'NN'), ('2', 'CD')]","['Machine Learning', 'Non', 'Technical Guide']" 168 | Webhose.io now offers a historical data archive,7 points by databuffer 40 days ago | discuss,40,"webhose,io,offers,historical,data,archive",Webhose.io now offers a historical data arch,Webhose.io now offers a historical data archive,"[('Webhose', 'NNP'), ('.', '.'), ('io', 'NN'), ('now', 'RB'), ('offers', 'VBZ'), ('a', 'DT'), ('historical', 'JJ'), ('data', 'NN'), ('archive', 'NN')]",['Webhose'] 169 | Meetup: Introduction to Machine Learning Algorithms for Data Science.,4 points by ann928 35 days ago | discuss,35,"meetup,introduction,machine,learning,algorithms,data,science",Meetup: Introduction to Machine Learning Algorithms for Data Science.,Meetup: Introduction to Machine Learning Algorithms for Data Science.,"[('Meetup', 'NN'), (':', ':'), ('Introduction', 'NN'), ('to', 'TO'), ('Machine', 'NNP'), ('Learning', 'NNP'), ('Algorithms', 'NNP'), ('for', 'IN'), ('Data', 'NNP'), ('Science', 'NNP'), ('.', '.')]","['Meetup', 'Machine Learning Algorithms', 'Data Science']" 170 | Exploring the Limits of Language Modeling,8 points by soates 42 days ago | discuss,42,"exploring,limits,language,modeling",Exploring the Limits of Language Model,Exploring the Limits of Language Modeling,"[('Exploring', 'VBG'), ('the', 'DT'), ('Limits', 'NNS'), ('of', 'IN'), ('Language', 'NNP'), ('Modeling', 'NNP')]",['Language Modeling'] 171 | Text Mining South Park,7 points by pmigdal 41 days ago | discuss,41,"text,mining,south,park",Text Mining South Park,Text Mining South Park,"[('Text', 'NNP'), ('Mining', 'NNP'), ('South', 'NNP'), ('Park', 'NNP')]",['Text Mining South Park'] 172 | Finding the K in K-means by Parametric Bootstrap,7 points by jmount 41 days ago | 1 comment,41,"finding,k,k,means,parametric,bootstrap",Finding the K in K-means by Parametric Bootstrap,Finding the K in K-means by Parametric Bootstrap,"[('Finding', 'VBG'), ('the', 'DT'), ('K', 'NNP'), ('in', 'IN'), ('K', 'NNP'), ('-', ':'), ('means', 'NNS'), ('by', 'IN'), ('Parametric', 'NNP'), ('Bootstrap', 'NNP')]",['Parametric Bootstrap'] 173 | A Billion NYC Taxi and Uber Rides in AWS Redshift,2 points by marklit 31 days ago | discuss,31,"billion,nyc,taxi,uber,rides,aws,redshift",A Billion NYC Taxi and Uber Rides in AWS Redshift,A Billion NYC Taxi and Uber Rides in AWS Redshift,"[('A', 'DT'), ('Billion', 'NNP'), ('NYC', 'NNP'), ('Taxi', 'NNP'), ('and', 'CC'), ('Uber', 'NNP'), ('Rides', 'NNP'), ('in', 'IN'), ('AWS', 'NNP'), ('Redshift', 'NNP')]",['AWS Redshift'] 174 | Getting Started with Statistics for Data Science,3 points by nickhould 34 days ago | discuss,34,"getting,started,statistics,data,science",Getting Started with Statistics for Data Sci,Getting Started with Statistics for Data Science,"[('Getting', 'VBG'), ('Started', 'VBN'), ('with', 'IN'), ('Statistics', 'NNS'), ('for', 'IN'), ('Data', 'NNP'), ('Science', 'NNP')]",['Data Science'] 175 | Rodeo 1.3 - Tab-completion for docstrings,3 points by glamp 35 days ago | discuss,35,"rodeo,1,3,tab,completion,docstrings",Rodeo 1.3 - Tab-completion for docstr,Rodeo 1.3 - Tab-completion for docstrings,"[('Rodeo', 'NN'), ('1', 'CD'), ('.', '.'), ('3', 'CD'), ('-', ':'), ('Tab', 'NNP'), ('-', ':'), ('completion', 'NN'), ('for', 'IN'), ('docstrings', 'NNS')]",['Tab'] 176 | Teaching D3.js - links,3 points by pmigdal 35 days ago | discuss,35,"teaching,d3,js,links",Teaching D3.js - link,Teaching D3.js - links,"[('Teaching', 'VBG'), ('D3', 'NNP'), ('.', '.'), ('js', 'NN'), ('-', ':'), ('links', 'NNS')]",[] 177 | Parallel scikit-learn on YARN,5 points by stijntonk 39 days ago | discuss,39,"parallel,scikit,learn,yarn",Parallel scikit-learn on YARN,Parallel scikit-learn on YARN,"[('Parallel', 'NNP'), ('scikit', 'SYM'), ('-', ':'), ('learn', 'NN'), ('on', 'IN'), ('YARN', 'NN')]","['Parallel', 'YARN']" 178 | Meetup: Free Live Webinar on Prescriptive Analytics for Fun and Profit,2 points by ann928 32 days ago | discuss,32,"meetup,free,live,webinar,prescriptive,analytics,fun,profit",Meetup: Free Live Webinar on Prescriptive Analytics for Fun and Profit,Meetup: Free Live Webinar on Prescriptive Analytics for Fun and Profit,"[('Meetup', 'NN'), (':', ':'), ('Free', 'JJ'), ('Live', 'NNP'), ('Webinar', 'NNP'), ('on', 'IN'), ('Prescriptive', 'NNP'), ('Analytics', 'NNP'), ('for', 'IN'), ('Fun', 'NNP'), ('and', 'CC'), ('Profit', 'NN')]","['Meetup', 'Fun']" 179 | Access to VK.com (Vkontakte) API via R,2 points by dementiy 32 days ago | discuss,32,"access,vk,com,vkontakte,api,via,r",Access to VK.com (Vkontakte) API via R,Access to VK.com (Vkontakte) API via R,"[('Access', 'NN'), ('to', 'TO'), ('VK', 'NNP'), ('.', '.'), ('com', 'NN'), ('(', '('), ('Vkontakte', 'NNP'), (')', ')'), ('API', 'NNP'), ('via', 'IN'), ('R', 'NNP')]",['Access'] 180 | Deep Learning Tutorial by Y. LeCun and Y. Bengio,15 points by Anon84 50 days ago | 1 comment,50,"deep,learning,tutorial,lecun,bengio", Deep Learning Tutorial by Y. LeCun and Y. Bengio, Deep Learning Tutorial by Y. LeCun and Y. Bengio,"[('Deep', 'NNP'), ('Learning', 'NNP'), ('Tutorial', 'NNP'), ('by', 'IN'), ('Y', 'NNP'), ('.', '.'), ('LeCun', 'NNP'), ('and', 'CC'), ('Y', 'NNP'), ('.', '.'), ('Bengio', 'NNP')]","['Deep Learning Tutorial', 'LeCun']" 181 | Machine Learning Meets Economics,20 points by nicolaskruchten 55 days ago | discuss,55,"machine,learning,meets,economics",Machine Learning Meets Econom,Machine Learning Meets Economics,"[('Machine', 'NN'), ('Learning', 'NNP'), ('Meets', 'NNP'), ('Economics', 'NNP')]",['Machine Learning Meets Economics'] 182 | -------------------------------------------------------------------------------- /img/chunk-segmentation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amitkaps/text-mining/HEAD/img/chunk-segmentation.png -------------------------------------------------------------------------------- /img/datatau.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amitkaps/text-mining/HEAD/img/datatau.png -------------------------------------------------------------------------------- /img/date.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amitkaps/text-mining/HEAD/img/date.png -------------------------------------------------------------------------------- /img/entity_extraction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amitkaps/text-mining/HEAD/img/entity_extraction.png -------------------------------------------------------------------------------- /img/gutenberg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amitkaps/text-mining/HEAD/img/gutenberg.png -------------------------------------------------------------------------------- /img/ldaformula.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amitkaps/text-mining/HEAD/img/ldaformula.png -------------------------------------------------------------------------------- /img/nb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amitkaps/text-mining/HEAD/img/nb.png -------------------------------------------------------------------------------- /img/punkt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amitkaps/text-mining/HEAD/img/punkt.png -------------------------------------------------------------------------------- /img/title.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amitkaps/text-mining/HEAD/img/title.png -------------------------------------------------------------------------------- /notebook/data-tau/Refine.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Refine the Data" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 32, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 33, 24 | "metadata": { 25 | "collapsed": true 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "df = pd.read_csv('data_tau.csv')" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 34, 35 | "metadata": { 36 | "collapsed": false 37 | }, 38 | "outputs": [ 39 | { 40 | "data": { 41 | "text/html": [ 42 | "
\n", 43 | "\n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | "
titledate
0An Exploration of R, Yelp, and the Search for ...5 points by Rogerh91 6 hours ago | discuss
1Deep Advances in Generative Modeling7 points by gwulfs 15 hours ago | 1 comment
2Spark Pipelines: Elegant Yet Powerful3 points by aouyang1 9 hours ago | discuss
3Shit VCs Say3 points by Argentum01 10 hours ago | discuss
4Python, Machine Learning, and Language Wars4 points by pmigdal 17 hours ago | discuss
\n", 79 | "
" 80 | ], 81 | "text/plain": [ 82 | " title \\\n", 83 | "0 An Exploration of R, Yelp, and the Search for ... \n", 84 | "1 Deep Advances in Generative Modeling \n", 85 | "2 Spark Pipelines: Elegant Yet Powerful \n", 86 | "3 Shit VCs Say \n", 87 | "4 Python, Machine Learning, and Language Wars \n", 88 | "\n", 89 | " date \n", 90 | "0 5 points by Rogerh91 6 hours ago | discuss \n", 91 | "1 7 points by gwulfs 15 hours ago | 1 comment \n", 92 | "2 3 points by aouyang1 9 hours ago | discuss \n", 93 | "3 3 points by Argentum01 10 hours ago | discuss \n", 94 | "4 4 points by pmigdal 17 hours ago | discuss " 95 | ] 96 | }, 97 | "execution_count": 34, 98 | "metadata": {}, 99 | "output_type": "execute_result" 100 | } 101 | ], 102 | "source": [ 103 | "df.head()" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "To get the date of the title - we will need the following algorithm\n", 111 | "- If the string contains **hours** we can consider it **1 day**\n", 112 | "- And if the string has **day**, we pick the number preceding the **day**\n", 113 | "\n", 114 | "To apply this algorithm, we need to be able to pick these words and digits from a string. For that we will use Regular Expression." 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "## Introduction to Regular Expression (Regex)\n", 122 | "\n", 123 | "Regular expression is a way of selecting text using symbols in a string.\n", 124 | "\n", 125 | "Refer to the following links for an interactive playground\n", 126 | "- [http://regexr.com](http://regexr.com/)\n", 127 | "- [http://regex101.com/](http://regex101.com/)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 35, 133 | "metadata": { 134 | "collapsed": true 135 | }, 136 | "outputs": [], 137 | "source": [ 138 | "import re" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 36, 144 | "metadata": { 145 | "collapsed": true 146 | }, 147 | "outputs": [], 148 | "source": [ 149 | "test_string = \"Hello world, welcome to 2016.\"" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 37, 155 | "metadata": { 156 | "collapsed": false 157 | }, 158 | "outputs": [], 159 | "source": [ 160 | "# We can pass the whole string and re.search will give the first occurence of the value\n", 161 | "# re.search - This function searches for first occurrence of RE pattern within string.\n", 162 | "a = re.search('Hello world, welcome to 2016',test_string)" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 38, 168 | "metadata": { 169 | "collapsed": false 170 | }, 171 | "outputs": [ 172 | { 173 | "data": { 174 | "text/plain": [ 175 | "<_sre.SRE_Match object; span=(0, 28), match='Hello world, welcome to 2016'>" 176 | ] 177 | }, 178 | "execution_count": 38, 179 | "metadata": {}, 180 | "output_type": "execute_result" 181 | } 182 | ], 183 | "source": [ 184 | "a" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 39, 190 | "metadata": { 191 | "collapsed": false 192 | }, 193 | "outputs": [ 194 | { 195 | "data": { 196 | "text/plain": [ 197 | "'Hello world, welcome to 2016'" 198 | ] 199 | }, 200 | "execution_count": 39, 201 | "metadata": {}, 202 | "output_type": "execute_result" 203 | } 204 | ], 205 | "source": [ 206 | "a.group()" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 40, 212 | "metadata": { 213 | "collapsed": false 214 | }, 215 | "outputs": [ 216 | { 217 | "data": { 218 | "text/plain": [ 219 | "'H'" 220 | ] 221 | }, 222 | "execution_count": 40, 223 | "metadata": {}, 224 | "output_type": "execute_result" 225 | } 226 | ], 227 | "source": [ 228 | "# Match the first letters in the string\n", 229 | "a = re.search('.',test_string)\n", 230 | "a.group()" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": 41, 236 | "metadata": { 237 | "collapsed": false 238 | }, 239 | "outputs": [ 240 | { 241 | "data": { 242 | "text/plain": [ 243 | "'Hello world, welcome to 2016.'" 244 | ] 245 | }, 246 | "execution_count": 41, 247 | "metadata": {}, 248 | "output_type": "execute_result" 249 | } 250 | ], 251 | "source": [ 252 | "# Match all the letters in the string\n", 253 | "a = re.search('.*',test_string)\n", 254 | "a.group()" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 42, 260 | "metadata": { 261 | "collapsed": false 262 | }, 263 | "outputs": [ 264 | { 265 | "name": "stdout", 266 | "output_type": "stream", 267 | "text": [ 268 | "<_sre.SRE_Match object; span=(0, 5), match='Hello'>\n" 269 | ] 270 | } 271 | ], 272 | "source": [ 273 | "a = re.search('Hello',test_string)\n", 274 | "print(a)" 275 | ] 276 | }, 277 | { 278 | "cell_type": "markdown", 279 | "metadata": {}, 280 | "source": [ 281 | "** Some basic symbols**\n", 282 | "\n", 283 | "**`?`** \n", 284 | "\n", 285 | "The question mark indicates zero or one occurrences of the preceding element. For example, colou?r matches both \"color\" and \"colour\".\n", 286 | "\n", 287 | "**`\\*`**\n", 288 | "\n", 289 | "The asterisk indicates zero or more occurrences of the preceding element. For example, ab*c matches \"ac\", \"abc\", \"abbc\", \"abbbc\", and so on.\n", 290 | "\n", 291 | "**`\\+`**\t\n", 292 | "The plus sign indicates one or more occurrences of the preceding element. For example, ab+c matches \"abc\", \"abbc\", \"abbbc\", and so on, but not \"ac\".\n" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": 43, 298 | "metadata": { 299 | "collapsed": false 300 | }, 301 | "outputs": [ 302 | { 303 | "name": "stdout", 304 | "output_type": "stream", 305 | "text": [ 306 | "<_sre.SRE_Match object; span=(0, 2), match='He'>\n" 307 | ] 308 | } 309 | ], 310 | "source": [ 311 | "a = re.search('\\w.',test_string)\n", 312 | "print(a)" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": 44, 318 | "metadata": { 319 | "collapsed": false 320 | }, 321 | "outputs": [ 322 | { 323 | "name": "stdout", 324 | "output_type": "stream", 325 | "text": [ 326 | "<_sre.SRE_Match object; span=(0, 5), match='Hello'>\n" 327 | ] 328 | } 329 | ], 330 | "source": [ 331 | "a = re.search('\\w*',test_string)\n", 332 | "print(a)" 333 | ] 334 | }, 335 | { 336 | "cell_type": "markdown", 337 | "metadata": {}, 338 | "source": [ 339 | "### Exercises" 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": 45, 345 | "metadata": { 346 | "collapsed": true 347 | }, 348 | "outputs": [], 349 | "source": [ 350 | "string = '''In 2016, we are learning Text Analytics in Data Science 101\n", 351 | " by scraping http://datatau.com'''" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": 46, 357 | "metadata": { 358 | "collapsed": false 359 | }, 360 | "outputs": [], 361 | "source": [ 362 | "string = \"In 2016, we are learning Text Analytics in Data Science 101 by scraping http://datatau.com\"" 363 | ] 364 | }, 365 | { 366 | "cell_type": "markdown", 367 | "metadata": {}, 368 | "source": [ 369 | "Write a regex to pick the numbers 2016 from string above." 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": null, 375 | "metadata": { 376 | "collapsed": true 377 | }, 378 | "outputs": [], 379 | "source": [] 380 | }, 381 | { 382 | "cell_type": "markdown", 383 | "metadata": {}, 384 | "source": [ 385 | "Write a regex to pick the url link (http://xyz.com) from the string above " 386 | ] 387 | }, 388 | { 389 | "cell_type": "code", 390 | "execution_count": null, 391 | "metadata": { 392 | "collapsed": true 393 | }, 394 | "outputs": [], 395 | "source": [] 396 | }, 397 | { 398 | "cell_type": "markdown", 399 | "metadata": {}, 400 | "source": [ 401 | "## Lets get the date from our string" 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": 47, 407 | "metadata": { 408 | "collapsed": false 409 | }, 410 | "outputs": [ 411 | { 412 | "data": { 413 | "text/html": [ 414 | "
\n", 415 | "\n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | "
titledate
0An Exploration of R, Yelp, and the Search for ...5 points by Rogerh91 6 hours ago | discuss
1Deep Advances in Generative Modeling7 points by gwulfs 15 hours ago | 1 comment
2Spark Pipelines: Elegant Yet Powerful3 points by aouyang1 9 hours ago | discuss
3Shit VCs Say3 points by Argentum01 10 hours ago | discuss
4Python, Machine Learning, and Language Wars4 points by pmigdal 17 hours ago | discuss
\n", 451 | "
" 452 | ], 453 | "text/plain": [ 454 | " title \\\n", 455 | "0 An Exploration of R, Yelp, and the Search for ... \n", 456 | "1 Deep Advances in Generative Modeling \n", 457 | "2 Spark Pipelines: Elegant Yet Powerful \n", 458 | "3 Shit VCs Say \n", 459 | "4 Python, Machine Learning, and Language Wars \n", 460 | "\n", 461 | " date \n", 462 | "0 5 points by Rogerh91 6 hours ago | discuss \n", 463 | "1 7 points by gwulfs 15 hours ago | 1 comment \n", 464 | "2 3 points by aouyang1 9 hours ago | discuss \n", 465 | "3 3 points by Argentum01 10 hours ago | discuss \n", 466 | "4 4 points by pmigdal 17 hours ago | discuss " 467 | ] 468 | }, 469 | "execution_count": 47, 470 | "metadata": {}, 471 | "output_type": "execute_result" 472 | } 473 | ], 474 | "source": [ 475 | "df.head()" 476 | ] 477 | }, 478 | { 479 | "cell_type": "code", 480 | "execution_count": 48, 481 | "metadata": { 482 | "collapsed": false 483 | }, 484 | "outputs": [ 485 | { 486 | "data": { 487 | "text/html": [ 488 | "
\n", 489 | "\n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | "
titledate
175Getting Started with Statistics for Data Science3 points by nickhould 35 days ago | discuss
176Rodeo 1.3 - Tab-completion for docstrings3 points by glamp 35 days ago | discuss
177Teaching D3.js - links3 points by pmigdal 35 days ago | discuss
178Parallel scikit-learn on YARN5 points by stijntonk 39 days ago | discuss
179Meetup: Free Live Webinar on Prescriptive Anal...2 points by ann928 32 days ago | discuss
\n", 525 | "
" 526 | ], 527 | "text/plain": [ 528 | " title \\\n", 529 | "175 Getting Started with Statistics for Data Science \n", 530 | "176 Rodeo 1.3 - Tab-completion for docstrings \n", 531 | "177 Teaching D3.js - links \n", 532 | "178 Parallel scikit-learn on YARN \n", 533 | "179 Meetup: Free Live Webinar on Prescriptive Anal... \n", 534 | "\n", 535 | " date \n", 536 | "175 3 points by nickhould 35 days ago | discuss \n", 537 | "176 3 points by glamp 35 days ago | discuss \n", 538 | "177 3 points by pmigdal 35 days ago | discuss \n", 539 | "178 5 points by stijntonk 39 days ago | discuss \n", 540 | "179 2 points by ann928 32 days ago | discuss " 541 | ] 542 | }, 543 | "execution_count": 48, 544 | "metadata": {}, 545 | "output_type": "execute_result" 546 | } 547 | ], 548 | "source": [ 549 | "df.tail()" 550 | ] 551 | }, 552 | { 553 | "cell_type": "code", 554 | "execution_count": 49, 555 | "metadata": { 556 | "collapsed": true 557 | }, 558 | "outputs": [], 559 | "source": [ 560 | "date_string = df['date'][0]" 561 | ] 562 | }, 563 | { 564 | "cell_type": "code", 565 | "execution_count": 50, 566 | "metadata": { 567 | "collapsed": false 568 | }, 569 | "outputs": [ 570 | { 571 | "name": "stdout", 572 | "output_type": "stream", 573 | "text": [ 574 | "5 points by Rogerh91 6 hours ago | discuss\n" 575 | ] 576 | } 577 | ], 578 | "source": [ 579 | "print(date_string)" 580 | ] 581 | }, 582 | { 583 | "cell_type": "code", 584 | "execution_count": 51, 585 | "metadata": { 586 | "collapsed": false 587 | }, 588 | "outputs": [ 589 | { 590 | "data": { 591 | "text/plain": [ 592 | "<_sre.SRE_Match object; span=(23, 28), match='hours'>" 593 | ] 594 | }, 595 | "execution_count": 51, 596 | "metadata": {}, 597 | "output_type": "execute_result" 598 | } 599 | ], 600 | "source": [ 601 | "re.search('hours',date_string)" 602 | ] 603 | }, 604 | { 605 | "cell_type": "code", 606 | "execution_count": 52, 607 | "metadata": { 608 | "collapsed": true 609 | }, 610 | "outputs": [], 611 | "source": [ 612 | "date_string = df['date'][50]" 613 | ] 614 | }, 615 | { 616 | "cell_type": "code", 617 | "execution_count": 53, 618 | "metadata": { 619 | "collapsed": false 620 | }, 621 | "outputs": [ 622 | { 623 | "name": "stdout", 624 | "output_type": "stream", 625 | "text": [ 626 | "4 points by lefish 7 days ago | discuss\n" 627 | ] 628 | } 629 | ], 630 | "source": [ 631 | "print(date_string)" 632 | ] 633 | }, 634 | { 635 | "cell_type": "code", 636 | "execution_count": 54, 637 | "metadata": { 638 | "collapsed": true 639 | }, 640 | "outputs": [], 641 | "source": [ 642 | "# If hours is not there, we don't get any match\n", 643 | "re.search('hours',date_string)" 644 | ] 645 | }, 646 | { 647 | "cell_type": "code", 648 | "execution_count": 55, 649 | "metadata": { 650 | "collapsed": false 651 | }, 652 | "outputs": [ 653 | { 654 | "data": { 655 | "text/plain": [ 656 | "<_sre.SRE_Match object; span=(19, 24), match='7 day'>" 657 | ] 658 | }, 659 | "execution_count": 55, 660 | "metadata": {}, 661 | "output_type": "execute_result" 662 | } 663 | ], 664 | "source": [ 665 | "# Let us match the digit preceding the day text\n", 666 | "day_search = re.search('\\d+ day',date_string)\n", 667 | "day_search" 668 | ] 669 | }, 670 | { 671 | "cell_type": "code", 672 | "execution_count": 56, 673 | "metadata": { 674 | "collapsed": false 675 | }, 676 | "outputs": [ 677 | { 678 | "data": { 679 | "text/plain": [ 680 | "'7 day'" 681 | ] 682 | }, 683 | "execution_count": 56, 684 | "metadata": {}, 685 | "output_type": "execute_result" 686 | } 687 | ], 688 | "source": [ 689 | "days_string = day_search.group(0)\n", 690 | "days_string" 691 | ] 692 | }, 693 | { 694 | "cell_type": "code", 695 | "execution_count": 57, 696 | "metadata": { 697 | "collapsed": false 698 | }, 699 | "outputs": [ 700 | { 701 | "data": { 702 | "text/plain": [ 703 | "'7'" 704 | ] 705 | }, 706 | "execution_count": 57, 707 | "metadata": {}, 708 | "output_type": "execute_result" 709 | } 710 | ], 711 | "source": [ 712 | "days = days_string.split(' ')[0] \n", 713 | "days" 714 | ] 715 | }, 716 | { 717 | "cell_type": "markdown", 718 | "metadata": {}, 719 | "source": [] 720 | }, 721 | { 722 | "cell_type": "code", 723 | "execution_count": 58, 724 | "metadata": { 725 | "collapsed": true 726 | }, 727 | "outputs": [], 728 | "source": [ 729 | "def return_reg_ex_days(row):\n", 730 | " days = ''\n", 731 | " if re.search('hours',row['date']) is not None:\n", 732 | " # print('hours',row['date'])\n", 733 | " days = 1\n", 734 | " else:\n", 735 | " day_search = re.search('\\d+ day',row['date'])\n", 736 | " # print('day',day_search.group(0))\n", 737 | " days = day_search.group(0).split(' ')[0] \n", 738 | " \n", 739 | " #print(row,days)\n", 740 | " return days\n", 741 | " " 742 | ] 743 | }, 744 | { 745 | "cell_type": "code", 746 | "execution_count": 59, 747 | "metadata": { 748 | "collapsed": false 749 | }, 750 | "outputs": [], 751 | "source": [ 752 | "# Now we apply this function to each of the row in the dataframe\n", 753 | "df['days'] = df.apply(return_reg_ex_days,axis=1)" 754 | ] 755 | }, 756 | { 757 | "cell_type": "code", 758 | "execution_count": 60, 759 | "metadata": { 760 | "collapsed": false 761 | }, 762 | "outputs": [ 763 | { 764 | "data": { 765 | "text/html": [ 766 | "
\n", 767 | "\n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | "
titledatedays
0An Exploration of R, Yelp, and the Search for ...5 points by Rogerh91 6 hours ago | discuss1
1Deep Advances in Generative Modeling7 points by gwulfs 15 hours ago | 1 comment1
2Spark Pipelines: Elegant Yet Powerful3 points by aouyang1 9 hours ago | discuss1
3Shit VCs Say3 points by Argentum01 10 hours ago | discuss1
4Python, Machine Learning, and Language Wars4 points by pmigdal 17 hours ago | discuss1
\n", 809 | "
" 810 | ], 811 | "text/plain": [ 812 | " title \\\n", 813 | "0 An Exploration of R, Yelp, and the Search for ... \n", 814 | "1 Deep Advances in Generative Modeling \n", 815 | "2 Spark Pipelines: Elegant Yet Powerful \n", 816 | "3 Shit VCs Say \n", 817 | "4 Python, Machine Learning, and Language Wars \n", 818 | "\n", 819 | " date days \n", 820 | "0 5 points by Rogerh91 6 hours ago | discuss 1 \n", 821 | "1 7 points by gwulfs 15 hours ago | 1 comment 1 \n", 822 | "2 3 points by aouyang1 9 hours ago | discuss 1 \n", 823 | "3 3 points by Argentum01 10 hours ago | discuss 1 \n", 824 | "4 4 points by pmigdal 17 hours ago | discuss 1 " 825 | ] 826 | }, 827 | "execution_count": 60, 828 | "metadata": {}, 829 | "output_type": "execute_result" 830 | } 831 | ], 832 | "source": [ 833 | "df.head()" 834 | ] 835 | }, 836 | { 837 | "cell_type": "code", 838 | "execution_count": 61, 839 | "metadata": { 840 | "collapsed": false 841 | }, 842 | "outputs": [ 843 | { 844 | "data": { 845 | "text/html": [ 846 | "
\n", 847 | "\n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | "
titledatedays
175Getting Started with Statistics for Data Science3 points by nickhould 35 days ago | discuss35
176Rodeo 1.3 - Tab-completion for docstrings3 points by glamp 35 days ago | discuss35
177Teaching D3.js - links3 points by pmigdal 35 days ago | discuss35
178Parallel scikit-learn on YARN5 points by stijntonk 39 days ago | discuss39
179Meetup: Free Live Webinar on Prescriptive Anal...2 points by ann928 32 days ago | discuss32
\n", 889 | "
" 890 | ], 891 | "text/plain": [ 892 | " title \\\n", 893 | "175 Getting Started with Statistics for Data Science \n", 894 | "176 Rodeo 1.3 - Tab-completion for docstrings \n", 895 | "177 Teaching D3.js - links \n", 896 | "178 Parallel scikit-learn on YARN \n", 897 | "179 Meetup: Free Live Webinar on Prescriptive Anal... \n", 898 | "\n", 899 | " date days \n", 900 | "175 3 points by nickhould 35 days ago | discuss 35 \n", 901 | "176 3 points by glamp 35 days ago | discuss 35 \n", 902 | "177 3 points by pmigdal 35 days ago | discuss 35 \n", 903 | "178 5 points by stijntonk 39 days ago | discuss 39 \n", 904 | "179 2 points by ann928 32 days ago | discuss 32 " 905 | ] 906 | }, 907 | "execution_count": 61, 908 | "metadata": {}, 909 | "output_type": "execute_result" 910 | } 911 | ], 912 | "source": [ 913 | "df.tail()" 914 | ] 915 | }, 916 | { 917 | "cell_type": "code", 918 | "execution_count": 62, 919 | "metadata": { 920 | "collapsed": true 921 | }, 922 | "outputs": [], 923 | "source": [ 924 | "# Let us save to a dataframe\n", 925 | "df.to_csv('data_tau_days.csv', index=False)" 926 | ] 927 | } 928 | ], 929 | "metadata": { 930 | "kernelspec": { 931 | "display_name": "Python 3", 932 | "language": "python", 933 | "name": "python3" 934 | }, 935 | "language_info": { 936 | "codemirror_mode": { 937 | "name": "ipython", 938 | "version": 3 939 | }, 940 | "file_extension": ".py", 941 | "mimetype": "text/x-python", 942 | "name": "python", 943 | "nbconvert_exporter": "python", 944 | "pygments_lexer": "ipython3", 945 | "version": "3.5.1" 946 | } 947 | }, 948 | "nbformat": 4, 949 | "nbformat_minor": 0 950 | } 951 | -------------------------------------------------------------------------------- /notebook/twitter/Acquire.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Acquire the Data" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Sources of Data\n", 15 | "\n", 16 | "We want to understand what are the important trends in Machine Learning at the moment. So we want to get a list of articles about Machine Learning that people are talking about. We can do that from many sources, but we decided to pick three sources to do that.\n", 17 | "\n", 18 | "1. [Reddit.com - Machine Learning](https://www.reddit.com/r/MachineLearning/) - Reddit is a user generated discussion forum where recent articles and topics on Maching Learning are discussed by the community.\n", 19 | "\n", 20 | "2. [Data Tau](http://www.datatau.com/)- Data Tau is the hacker news for machine learning. Users post articles about latest trends in data science and machine learning and can have discussion arount it.\n", 21 | "\n", 22 | "3. [Twitter #machinelearning](https://twitter.com/search?q=%23machinelearning&src=typd) - We can also look at Twitter with #machinelearning tags to find the latest articles and post about machine learning that are being discussed in the social media.\n", 23 | "\n", 24 | "\n", 25 | "## Working with Twitter\n", 26 | "\n", 27 | "Let us get tweets related to demonetization from twitter, using the the Twitter REST Api.\n" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "## Understand how to connect with Twitter REST Api" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 9, 40 | "metadata": { 41 | "collapsed": false 42 | }, 43 | "outputs": [], 44 | "source": [ 45 | "import tweepy" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 24, 51 | "metadata": { 52 | "collapsed": false 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "consumer_key = 'EgZLCirrTC7ocLS1zykSJl1eG'\n", 57 | "consumer_secret = 'V6jnOXNS8i9mwlmJyA3SKKq3S70qQp6C8zJnxVNNu4bLgHeeeR'\n", 58 | "access_token = '126065173-O5vGO1nqeHRV5KOGjlCev2kR8bf1JDYEbW9dxCw1'\n", 59 | "access_token_secret = '4GnF0auo6tAoIpwiAB2h7W8xTe3yYjxKa0T6M41rrg'" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 25, 65 | "metadata": { 66 | "collapsed": true 67 | }, 68 | "outputs": [], 69 | "source": [ 70 | "auth = tweepy.OAuthHandler(consumer_key, consumer_secret)" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 26, 76 | "metadata": { 77 | "collapsed": true 78 | }, 79 | "outputs": [], 80 | "source": [ 81 | "auth.set_access_token(access_token, access_token_secret)" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 27, 87 | "metadata": { 88 | "collapsed": false 89 | }, 90 | "outputs": [], 91 | "source": [ 92 | "api = tweepy.API(auth)" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "### Search twitter for tweets which contain #demonetization" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 46, 105 | "metadata": { 106 | "collapsed": true 107 | }, 108 | "outputs": [], 109 | "source": [ 110 | "demonetization_tweets = tweepy.Cursor(api.search, q=\"#demonetization\").items(100)" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "### Write the data to a csv" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 47, 123 | "metadata": { 124 | "collapsed": true 125 | }, 126 | "outputs": [], 127 | "source": [ 128 | "import csv" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 49, 134 | "metadata": { 135 | "collapsed": false 136 | }, 137 | "outputs": [], 138 | "source": [ 139 | "with open('demonetization.csv', 'w') as outfile:\n", 140 | " writer = csv.writer(outfile, delimiter='\\t')\n", 141 | " data = []\n", 142 | " for tweet in demonetization_tweets:\n", 143 | " data.append([tweet.created_at, tweet.text, tweet.user.screen_name])\n", 144 | " writer.writerows(data)\n" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": { 151 | "collapsed": true 152 | }, 153 | "outputs": [], 154 | "source": [] 155 | } 156 | ], 157 | "metadata": { 158 | "anaconda-cloud": {}, 159 | "kernelspec": { 160 | "display_name": "Python [default]", 161 | "language": "python", 162 | "name": "python3" 163 | }, 164 | "language_info": { 165 | "codemirror_mode": { 166 | "name": "ipython", 167 | "version": 3 168 | }, 169 | "file_extension": ".py", 170 | "mimetype": "text/x-python", 171 | "name": "python", 172 | "nbconvert_exporter": "python", 173 | "pygments_lexer": "ipython3", 174 | "version": "3.5.2" 175 | } 176 | }, 177 | "nbformat": 4, 178 | "nbformat_minor": 0 179 | } 180 | -------------------------------------------------------------------------------- /notebook/twitter/Refine.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Refine the Data" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 2, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 4, 24 | "metadata": { 25 | "collapsed": false 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "df = pd.read_csv('tweets.csv', sep=\"\\t\")" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 5, 35 | "metadata": { 36 | "collapsed": false 37 | }, 38 | "outputs": [ 39 | { 40 | "data": { 41 | "text/html": [ 42 | "
\n", 43 | "\n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | "
created_atscreen_nametext
0Fri Nov 18 23:59:58 +0000 2016arunprasad72RT @Praveen_1singh: First the stone pelting st...
1Fri Nov 18 23:59:49 +0000 2016pranavkisuRT @NewDelhiTimesIN: Is the #demonetization of...
2Fri Nov 18 23:59:48 +0000 2016bablumohanRT @scoopwhoopnews: #BREAKING Banks across Ind...
3Fri Nov 18 23:59:37 +0000 2016NagrathRobRT @DrGPradhan: .@ravishndtv of @ndtv spreadin...
4Fri Nov 18 23:59:28 +0000 2016ManishPrasaRT @YesIamSaffron: जब भी #Demonetization व् का...
\n", 85 | "
" 86 | ], 87 | "text/plain": [ 88 | " created_at screen_name \\\n", 89 | "0 Fri Nov 18 23:59:58 +0000 2016 arunprasad72 \n", 90 | "1 Fri Nov 18 23:59:49 +0000 2016 pranavkisu \n", 91 | "2 Fri Nov 18 23:59:48 +0000 2016 bablumohan \n", 92 | "3 Fri Nov 18 23:59:37 +0000 2016 NagrathRob \n", 93 | "4 Fri Nov 18 23:59:28 +0000 2016 ManishPrasa \n", 94 | "\n", 95 | " text \n", 96 | "0 RT @Praveen_1singh: First the stone pelting st... \n", 97 | "1 RT @NewDelhiTimesIN: Is the #demonetization of... \n", 98 | "2 RT @scoopwhoopnews: #BREAKING Banks across Ind... \n", 99 | "3 RT @DrGPradhan: .@ravishndtv of @ndtv spreadin... \n", 100 | "4 RT @YesIamSaffron: जब भी #Demonetization व् का... " 101 | ] 102 | }, 103 | "execution_count": 5, 104 | "metadata": {}, 105 | "output_type": "execute_result" 106 | } 107 | ], 108 | "source": [ 109 | "df.head()" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "To get the date of the title - we will need the following algorithm\n", 117 | "- If the string contains **hours** we can consider it **1 day**\n", 118 | "- And if the string has **day**, we pick the number preceding the **day**\n", 119 | "\n", 120 | "To apply this algorithm, we need to be able to pick these words and digits from a string. For that we will use Regular Expression." 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "## Introduction to Regular Expression (Regex)\n", 128 | "\n", 129 | "Regular expression is a way of selecting text using symbols in a string.\n", 130 | "\n", 131 | "Refer to the following links for an interactive playground\n", 132 | "- [http://regexr.com](http://regexr.com/)\n", 133 | "- [http://regex101.com/](http://regex101.com/)" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 35, 139 | "metadata": { 140 | "collapsed": true 141 | }, 142 | "outputs": [], 143 | "source": [ 144 | "import re" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 36, 150 | "metadata": { 151 | "collapsed": true 152 | }, 153 | "outputs": [], 154 | "source": [ 155 | "test_string = \"Hello world, welcome to 2016.\"" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 37, 161 | "metadata": { 162 | "collapsed": false 163 | }, 164 | "outputs": [], 165 | "source": [ 166 | "# We can pass the whole string and re.search will give the first occurence of the value\n", 167 | "# re.search - This function searches for first occurrence of RE pattern within string.\n", 168 | "a = re.search('Hello world, welcome to 2016',test_string)" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 38, 174 | "metadata": { 175 | "collapsed": false 176 | }, 177 | "outputs": [ 178 | { 179 | "data": { 180 | "text/plain": [ 181 | "<_sre.SRE_Match object; span=(0, 28), match='Hello world, welcome to 2016'>" 182 | ] 183 | }, 184 | "execution_count": 38, 185 | "metadata": {}, 186 | "output_type": "execute_result" 187 | } 188 | ], 189 | "source": [ 190 | "a" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 39, 196 | "metadata": { 197 | "collapsed": false 198 | }, 199 | "outputs": [ 200 | { 201 | "data": { 202 | "text/plain": [ 203 | "'Hello world, welcome to 2016'" 204 | ] 205 | }, 206 | "execution_count": 39, 207 | "metadata": {}, 208 | "output_type": "execute_result" 209 | } 210 | ], 211 | "source": [ 212 | "a.group()" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": 40, 218 | "metadata": { 219 | "collapsed": false 220 | }, 221 | "outputs": [ 222 | { 223 | "data": { 224 | "text/plain": [ 225 | "'H'" 226 | ] 227 | }, 228 | "execution_count": 40, 229 | "metadata": {}, 230 | "output_type": "execute_result" 231 | } 232 | ], 233 | "source": [ 234 | "# Match the first letters in the string\n", 235 | "a = re.search('.',test_string)\n", 236 | "a.group()" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": 41, 242 | "metadata": { 243 | "collapsed": false 244 | }, 245 | "outputs": [ 246 | { 247 | "data": { 248 | "text/plain": [ 249 | "'Hello world, welcome to 2016.'" 250 | ] 251 | }, 252 | "execution_count": 41, 253 | "metadata": {}, 254 | "output_type": "execute_result" 255 | } 256 | ], 257 | "source": [ 258 | "# Match all the letters in the string\n", 259 | "a = re.search('.*',test_string)\n", 260 | "a.group()" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": 42, 266 | "metadata": { 267 | "collapsed": false 268 | }, 269 | "outputs": [ 270 | { 271 | "name": "stdout", 272 | "output_type": "stream", 273 | "text": [ 274 | "<_sre.SRE_Match object; span=(0, 5), match='Hello'>\n" 275 | ] 276 | } 277 | ], 278 | "source": [ 279 | "a = re.search('Hello',test_string)\n", 280 | "print(a)" 281 | ] 282 | }, 283 | { 284 | "cell_type": "markdown", 285 | "metadata": {}, 286 | "source": [ 287 | "** Some basic symbols**\n", 288 | "\n", 289 | "**`?`** \n", 290 | "\n", 291 | "The question mark indicates zero or one occurrences of the preceding element. For example, colou?r matches both \"color\" and \"colour\".\n", 292 | "\n", 293 | "**`\\*`**\n", 294 | "\n", 295 | "The asterisk indicates zero or more occurrences of the preceding element. For example, ab*c matches \"ac\", \"abc\", \"abbc\", \"abbbc\", and so on.\n", 296 | "\n", 297 | "**`\\+`**\t\n", 298 | "The plus sign indicates one or more occurrences of the preceding element. For example, ab+c matches \"abc\", \"abbc\", \"abbbc\", and so on, but not \"ac\".\n" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": 43, 304 | "metadata": { 305 | "collapsed": false 306 | }, 307 | "outputs": [ 308 | { 309 | "name": "stdout", 310 | "output_type": "stream", 311 | "text": [ 312 | "<_sre.SRE_Match object; span=(0, 2), match='He'>\n" 313 | ] 314 | } 315 | ], 316 | "source": [ 317 | "a = re.search('\\w.',test_string)\n", 318 | "print(a)" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": 44, 324 | "metadata": { 325 | "collapsed": false 326 | }, 327 | "outputs": [ 328 | { 329 | "name": "stdout", 330 | "output_type": "stream", 331 | "text": [ 332 | "<_sre.SRE_Match object; span=(0, 5), match='Hello'>\n" 333 | ] 334 | } 335 | ], 336 | "source": [ 337 | "a = re.search('\\w*',test_string)\n", 338 | "print(a)" 339 | ] 340 | }, 341 | { 342 | "cell_type": "markdown", 343 | "metadata": {}, 344 | "source": [ 345 | "### Exercises" 346 | ] 347 | }, 348 | { 349 | "cell_type": "markdown", 350 | "metadata": {}, 351 | "source": [ 352 | "Write a regex to remove URL link from the tweets" 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": 62, 358 | "metadata": { 359 | "collapsed": true 360 | }, 361 | "outputs": [], 362 | "source": [] 363 | } 364 | ], 365 | "metadata": { 366 | "anaconda-cloud": {}, 367 | "kernelspec": { 368 | "display_name": "Python [default]", 369 | "language": "python", 370 | "name": "python3" 371 | }, 372 | "language_info": { 373 | "codemirror_mode": { 374 | "name": "ipython", 375 | "version": 3 376 | }, 377 | "file_extension": ".py", 378 | "mimetype": "text/x-python", 379 | "name": "python", 380 | "nbconvert_exporter": "python", 381 | "pygments_lexer": "ipython3", 382 | "version": "3.5.2" 383 | } 384 | }, 385 | "nbformat": 4, 386 | "nbformat_minor": 0 387 | } 388 | -------------------------------------------------------------------------------- /notebook/twitter/demonetization.csv: -------------------------------------------------------------------------------- 1 | --------------------------------------------------------------------------------