├── pocket_tagger ├── __init__.py ├── logger.py ├── pocket_api_client.py ├── scraper.py ├── pocket_tagger.py └── language_service_client.py ├── LICENSE ├── examples └── example-1.py ├── setup.py ├── .gitignore └── README.md /pocket_tagger/__init__.py: -------------------------------------------------------------------------------- 1 | from .pocket_tagger import PocketTagger 2 | -------------------------------------------------------------------------------- /pocket_tagger/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | class Log: 4 | @classmethod 5 | def get_logger(cls, name): 6 | logging.basicConfig(level=logging.INFO) 7 | return logging.getLogger(name) 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Harshit Sanghvi 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /pocket_tagger/pocket_api_client.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | 4 | from pocket import Pocket, PocketException 5 | 6 | from .logger import Log 7 | 8 | logger = Log.get_logger(__name__) 9 | 10 | class PocketAPIClient: 11 | pocket_client = None 12 | def __init__(self, consumer_key, access_token): 13 | self.pocket_client = Pocket(consumer_key, access_token) 14 | 15 | def get_articles_data(self, *args, **kwargs): 16 | # Fetch the articles 17 | try: 18 | # For list of optional parameters the API supports - https://getpocket.com/developer/docs/v3/retrieve 19 | response, headers = self.pocket_client.get(*args, **kwargs) 20 | return response.get('list') 21 | except PocketException as e: 22 | print(e) 23 | 24 | def add_tags_to_articles(self, articles_with_tags, replace=False): 25 | try: 26 | total_articles = len(articles_with_tags.items()) 27 | if total_articles == 0: 28 | return 29 | pocket_instance = self.pocket_client 30 | # Start a bulk operation 31 | for id, data in articles_with_tags.items(): 32 | pocket_instance = pocket_instance.tags_add(id, data['tags']) 33 | 34 | # and commit 35 | response, headers = self.pocket_client.commit() 36 | logger.info('Added the tags to articles.') 37 | except PocketException as e: 38 | logger.error(e) 39 | -------------------------------------------------------------------------------- /examples/example-1.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | today = datetime.date.today() 3 | 4 | from pocket_tagger.pocket_tagger import PocketTagger 5 | 6 | def tag_em(): 7 | try: 8 | # Google Cloud - Enable Natural Language Processing API for a project, and get your service account API key. 9 | # Save it as gcloud_credentials_file.json 10 | 11 | # Pocket API - Create a credentials.py file with the following lines 12 | ''' 13 | pocket_credentials = { 14 | 'consumer_key': 'your-consumer-key', 15 | 'access_token': 'your-access-token' 16 | } 17 | ''' 18 | from credentials import pocket_credentials 19 | 20 | tagger = PocketTagger(gcloud_credentials_file='gcloud_credentials_file.json', 21 | consumer_key=pocket_credentials.get('consumer_key'), 22 | access_token=pocket_credentials.get('access_token')) 23 | 24 | # For list of optional parameters the API supports - https://getpocket.com/developer/docs/v3/retrieve 25 | articles = tagger.get_articles_from_api(count=10, offset=10, detailType='complete') 26 | # Alternatively you can load the articles from file if you saved them previously using save_articles_to_file 27 | # articles = tagger.get_articles_from_file('20190621.json') 28 | 29 | # Generate tags for each article 30 | articles_with_tags = tagger.get_tags_for_articles(articles) 31 | 32 | # Save the articles with tags to file 33 | tagger.save_articles_to_file(today.strftime('%Y%m%d-with-tags.json'), articles_with_tags) 34 | 35 | # You can skip this step if you want to do a dry run. Verify the tags in the file we generated in the previous step. 36 | tagger.add_tags_to_articles(articles_with_tags) 37 | 38 | except Exception as e: 39 | print(e) 40 | 41 | tag_em() 42 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import sys 3 | import os 4 | 5 | from os import path 6 | from io import open 7 | from setuptools import setup 8 | 9 | # io.open is needed for projects that support Python 2.7 10 | # It ensures open() defaults to text mode with universal newlines, 11 | # and accepts an argument to specify the text encoding 12 | # Python 3 only projects can skip this import 13 | from io import open 14 | 15 | here = path.abspath(path.dirname(__file__)) 16 | 17 | # Get the long description from the README file 18 | with open(path.join(here, 'README.md'), encoding='utf-8') as f: 19 | long_description = f.read() 20 | 21 | ''' 22 | When bumping the version 23 | 1. Update version number in this file 24 | 2. Generate package tar file - python setup.py sdist 25 | 3. Publish the package - twine upload dist/* 26 | 4. Tag the commit with same version number and push the tag to github 27 | ''' 28 | 29 | setup(name='pocket-tagger', 30 | version='0.1.1', 31 | description='Tag your pocket articles from getpocket.com automatically using NLP', 32 | long_description=long_description, 33 | long_description_content_type='text/markdown', 34 | url='http://github.com/sanghviharshit/pocket-tagger', 35 | author='Harshit Sanghvi', 36 | author_email='hello@sanghviharshit.com', 37 | classifiers=[ 38 | 'Development Status :: 4 - Beta', 39 | 'Intended Audience :: Developers', 40 | 'Intended Audience :: End Users/Desktop', 41 | 'Operating System :: OS Independent', 42 | 'Topic :: Software Development :: Libraries :: Python Modules', 43 | 'License :: OSI Approved :: MIT License', 44 | 'Programming Language :: Python :: 3', 45 | 'Programming Language :: Python :: 3.4', 46 | 'Programming Language :: Python :: 3.5', 47 | 'Programming Language :: Python :: 3.6', 48 | 'Programming Language :: Python :: 3.7', 49 | ], 50 | license='MIT', 51 | keywords='getpocket, pocket, api, articles, automatic, suggested, tag, natural language processing, nlp', 52 | packages=['pocket_tagger'], 53 | install_requires=['google.cloud', 'pocket', 'requests', 'bs4'], 54 | project_urls={ 55 | 'Bug Reports': 'https://github.com/sanghviharshit/pocket-tagger/issues', 56 | 'Say Thanks!': 'https://saythanks.io/to/sanghviharshit', 57 | 'Source': 'https://github.com/sanghviharshit/pocket-tagger', 58 | }, 59 | ) 60 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | db.sqlite3-journal 62 | 63 | # Flask stuff: 64 | instance/ 65 | .webassets-cache 66 | 67 | # Scrapy stuff: 68 | .scrapy 69 | 70 | # Sphinx documentation 71 | docs/_build/ 72 | 73 | # PyBuilder 74 | target/ 75 | 76 | # Jupyter Notebook 77 | .ipynb_checkpoints 78 | 79 | # IPython 80 | profile_default/ 81 | ipython_config.py 82 | 83 | # pyenv 84 | .python-version 85 | 86 | # pipenv 87 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 88 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 89 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 90 | # install all needed dependencies. 91 | #Pipfile.lock 92 | 93 | # celery beat schedule file 94 | celerybeat-schedule 95 | 96 | # SageMath parsed files 97 | *.sage.py 98 | 99 | # Environments 100 | .env 101 | .venv 102 | env/ 103 | venv/ 104 | ENV/ 105 | env.bak/ 106 | venv.bak/ 107 | 108 | # Spyder project settings 109 | .spyderproject 110 | .spyproject 111 | 112 | # Rope project settings 113 | .ropeproject 114 | 115 | # mkdocs documentation 116 | /site 117 | 118 | # mypy 119 | .mypy_cache/ 120 | .dmypy.json 121 | dmypy.json 122 | 123 | # Pyre type checker 124 | .pyre/ 125 | 126 | # OS 127 | .DS_Store 128 | 129 | # Project specific 130 | data/ 131 | scripts/ 132 | *.json 133 | credentials.py 134 | -------------------------------------------------------------------------------- /pocket_tagger/scraper.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import logging 3 | 4 | from bs4 import BeautifulSoup 5 | 6 | from .logger import Log 7 | 8 | logger = Log.get_logger(__name__) 9 | 10 | class Scraper: 11 | def get_webpage_content(self, url): 12 | title = '' 13 | description = '' 14 | text = '' 15 | 16 | # Make the request and check object type 17 | r = requests.get(url, headers={'User-Agent': 'Mozilla/5.0 (sanghviharshit/Auto Pocket tagger) Chrome/18 Safari/535.19'}) 18 | # Extract HTML from Response object 19 | html = r.text 20 | # Create a BeautifulSoup object from the HTML 21 | soup = BeautifulSoup(html, 'html5lib') 22 | 23 | # Get title and description 24 | try: 25 | if soup.title: 26 | title = soup.title.get_text() 27 | elif soup.h1: 28 | title = soup.h1.get_text() 29 | logger.info(' Title: {}'.format(title)) 30 | 31 | meta = soup.find('meta', attrs={'name': 'description'}) 32 | for tag, value in meta.attrs.items(): 33 | if tag == 'content': 34 | description = value 35 | break 36 | if not description: 37 | if soup.h2: 38 | description = soup.h2.get_text() 39 | 40 | logger.info(' Description: {}'.format(description)) 41 | 42 | except Exception as e: 43 | logger.warning(' ({}) Could not find title/description. {}'.format(url, e)) 44 | pass 45 | 46 | text = self.get_clean_text(soup) 47 | 48 | webpage_content = { 49 | 'title': title, 50 | 'description': description, 51 | 'text': text 52 | } 53 | return webpage_content 54 | 55 | def get_clean_text(self, soup): 56 | # kill all script and style elements 57 | for script in soup(['script', 'style']): 58 | script.decompose() # rip it out 59 | 60 | # get body text 61 | text_body = soup.body.get_text() 62 | # break into lines and remove leading and trailing space on each 63 | text_lines = (line.strip() for line in text_body.splitlines()) 64 | # break multi-headlines into a line each 65 | text_chunks = (phrase.strip() for line in text_lines for phrase in line.split(' ')) 66 | # drop blank lines 67 | clean_text = '\n'.join(chunk for chunk in text_chunks if chunk) 68 | return clean_text 69 | -------------------------------------------------------------------------------- /pocket_tagger/pocket_tagger.py: -------------------------------------------------------------------------------- 1 | import json 2 | import time 3 | import random 4 | import logging 5 | 6 | from .pocket_api_client import PocketAPIClient 7 | from .scraper import Scraper 8 | from .language_service_client import LanguageServiceClient 9 | from .logger import Log 10 | 11 | logger = Log.get_logger(__name__) 12 | 13 | class PocketTaggerException(Exception): 14 | pass 15 | 16 | class PocketTagger: 17 | pocket_client = None 18 | 19 | def __init__(self, consumer_key=None, access_token=None, gcloud_credentials_file=None): 20 | if consumer_key != None and access_token != None: 21 | self.pocket_client = self.get_pocket_client(consumer_key, access_token) 22 | self.scraper = Scraper() 23 | self.language_service_client = LanguageServiceClient(gcloud_credentials_file) 24 | 25 | def get_pocket_client(self, consumer_key=None, access_token=None): 26 | if self.pocket_client: 27 | return self.pocket_client 28 | elif consumer_key is None or access_token is None: 29 | raise PocketTaggerException 30 | else: 31 | return PocketAPIClient(consumer_key, access_token) 32 | 33 | def get_articles_from_api(self, *args, **kwargs): 34 | return self.get_pocket_client().get_articles_data(*args, **kwargs) 35 | 36 | def add_tags_to_articles(self, articles_with_tags): 37 | self.get_pocket_client().add_tags_to_articles(articles_with_tags) 38 | 39 | def get_articles_from_file(self, fileName): 40 | try: 41 | with open(fileName, 'r') as infile: 42 | articles = json.load(infile) 43 | return articles 44 | except Exception as e: 45 | logger.error('({}) {}'.format(fileName, e)) 46 | return {} 47 | 48 | def save_articles_to_file(self, file_name, articles): 49 | with open(file_name, 'w') as file_name: 50 | json.dump(articles, file_name) 51 | 52 | def get_tags_for_articles(self, articles, *args, **kwargs): 53 | urls = [] 54 | index = 1 55 | 56 | total_articles = len(articles.items()) 57 | if total_articles == 0: 58 | logger.warning('No articles fetched from Pocket') 59 | 60 | for id, data in articles.items(): 61 | url = data['given_url'] 62 | tags = [] 63 | 64 | try: 65 | logger.info('({}/{}) {}'.format(index, total_articles, url)) 66 | webpage_content = self.scraper.get_webpage_content(url) 67 | if webpage_content: 68 | tags = self.language_service_client.get_tags_from_webpage_content(webpage_content, *args, **kwargs) 69 | except Exception as e: 70 | logger.error(' ({}) {}'.format(url, e)) 71 | 72 | if tags: 73 | logger.info(' Tags: {}'.format(', '.join(tags))) 74 | else: 75 | logger.warning(' ({}) No Tags found'.format(url)) 76 | data['tags'] = tags 77 | index += 1 78 | # time.sleep(5) 79 | 80 | return articles 81 | -------------------------------------------------------------------------------- /pocket_tagger/language_service_client.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import math 4 | import logging 5 | 6 | from google.cloud import language 7 | 8 | from .logger import Log 9 | 10 | logger = Log.get_logger(__name__) 11 | 12 | class LanguageServiceClient: 13 | 14 | entity_salience_threshold = 0.7 15 | category_confidence_threshold = 0.3 16 | 17 | def __init__(self, crendentials_file=None): 18 | if crendentials_file: 19 | os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = crendentials_file 20 | 21 | # Initialize google cloud language service client 22 | self.client = language.LanguageServiceClient() 23 | 24 | def get_tags_from_webpage_content(self, webpage_content, thresholds={}): 25 | if thresholds.get('entity_salience_threshold'): 26 | self.entity_salience_threshold = kwargs.get('entity_salience_threshold') 27 | if thresholds.get('category_confidence_threshold'): 28 | self.category_confidence_threshold = kwargs.get('category_confidence_threshold') 29 | 30 | entities = [] 31 | entities = self.get_entities_from_content(webpage_content) 32 | categories = self.get_categories_from_content(webpage_content) 33 | return list(dict.fromkeys(entities + categories)) # Remove Duplicates 34 | 35 | def get_categories_from_content(self, webpage_content): 36 | categories = [] 37 | doc_content = '. '.join([webpage_content['title'], webpage_content['description'], webpage_content['text']]) 38 | if sys.getsizeof(doc_content) > 128000: 39 | max_len = len(doc_content)*128000/sys.getsizeof(doc_content) 40 | doc_content = doc_content[:math.floor(max_len)] 41 | 42 | document = language.types.Document( 43 | content = doc_content, 44 | # language='en', 45 | type=language.enums.Document.Type.PLAIN_TEXT, 46 | # type=language.enums.Document.Type.HTML, 47 | ) 48 | 49 | response = self.client.classify_text(document) 50 | 51 | response_categories = response.categories 52 | logger.debug(' Categories: ') 53 | for category in response_categories: 54 | addCategory = False 55 | if category.confidence > self.category_confidence_threshold: 56 | addCategory = True 57 | labels = [label for label in category.name.split('/') if label] 58 | categories = categories + labels 59 | logger.debug(' {} {}: {}'.format('X' if not addCategory else ' ', category.name, category.confidence)) 60 | return categories 61 | 62 | def get_entities_from_content(self, webpage_content): 63 | entities = [] 64 | document = language.types.Document( 65 | content = '. '.join([webpage_content['title'], webpage_content['description']]), 66 | # language='en', 67 | type=language.enums.Document.Type.PLAIN_TEXT, 68 | ) 69 | response = self.client.analyze_entities( 70 | document=document, 71 | encoding_type='UTF32', 72 | ) 73 | 74 | logger.debug(' Entities: ') 75 | for entity in response.entities: 76 | addEntity = False 77 | if entity.salience > self.entity_salience_threshold: 78 | addEntity = True 79 | entities.append(entity.name.title()) 80 | logger.debug(' {} {}: {}'.format('X' if not addEntity else ' ', entity.name.title(), entity.salience)) 81 | return entities 82 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Auto Pocket Tagger 2 | 3 | Use Google cloud's Natural Language Processing API to automatically analyze the webpage from articles saved in your Pocket list, derive tags/keywords based on the content of the page, and add tags to the articles in Pocket list for free. 4 | 5 | > Pocket has suggested tags service for their paid premium plans. You can find more about it [here](https://help.getpocket.com/article/906-pocket-premium-suggested-tags). This still requires manual work of adding the tags to each article one-by-one. This package automates all of it for free. 6 | 7 | ## Features 8 | - Uses [Python wrapper](https://github.com/tapanpandita/pocket) for [Pocket API](http://getpocket.com/api/docs) to retrieve articles in the `My List` 9 | - Uses [Beautiful Soup](https://www.crummy.com/software/BeautifulSoup/) to scrape webpages 10 | - Uses Google Cloud's [Natural Language Processing API](https://cloud.google.com/natural-language/) to generate list of categories and entities from the content of the webpage 11 | - Uses Pocket API to add tags to articles in your `My List` 12 | 13 | 14 | ## Usage 15 | 16 | ### Installation 17 | 18 | #### Install published version from pypi 19 | ```shell 20 | $ pip install pocket-tagger 21 | ``` 22 | 23 | #### Install latest version from git 24 | ```shell 25 | $ pip install git+https://github.com/sanghviharshit/pocket-tagger 26 | ``` 27 | 28 | 29 | ### Prerequisites 30 | #### [Google Cloud](https://cloud.google.com/natural-language/docs/quickstart) 31 | 32 | This package relies on Google cloud natural language processing API, which requires billing enabled on your project. 33 | You can find the quickstart instructions [here](https://cloud.google.com/natural-language/docs/quickstart) 34 | **Options:** 35 | 1. Create a service account and download the credentials file - https://cloud.google.com/video-intelligence/docs/common/auth 36 | ```python 37 | tagger = PocketTagger(gcloud_credentials_file="gcloud_credentials_file.json") 38 | ``` 39 | 2. or Configure gloud locally - https://cloud.google.com/sdk/gcloud/reference/init 40 | ```python 41 | tagger = PocketTagger() 42 | ``` 43 | 44 | #### [Pocket API](https://getpocket.com/developer/) 45 | 46 | To fetch the articles list and add tags, you need a developer key from [here](https://getpocket.com/developer/) 47 | Create a new Application with `modify` and `retrieve` permissions. Save the Consumer Key and Access Token. 48 | ```python 49 | tagger = PocketTagger(consumer_key='your-consumer-key', 50 | access_token='your-access-token') 51 | ``` 52 | 53 | ### [Examples](./examples) 54 | 55 | ```python 56 | # Initialize PocketTagger with GCloud and Pocket API Credentials 57 | tagger = PocketTagger(gcloud_credentials_file="gcloud_credentials_file.json", 58 | consumer_key='pocket-consumer-key', 59 | access_token='pocket-access-token') 60 | 61 | # Check https://getpocket.com/developer/docs/v3/retrieve for additional list of options you can pass for retrieving pocket list 62 | articles = tagger.get_articles_from_api(count=10, offset=10, detailType='complete') 63 | 64 | # Alternatively you can load the articles from file if you saved them previously using save_articles_to_file 65 | # articles = tagger.get_articles_from_file("20190621.json") 66 | # Generate tags for each article 67 | articles_with_tags = tagger.get_tags_for_articles(articles) 68 | 69 | # Save the articles with tags to file. You can use this file to verify it looks good before running the final step to tag the articles. 70 | tagger.save_articles_to_file(today.strftime('%Y%m%d-with-tags.json'), articles_with_tags) 71 | 72 | # You can skip this step if you want to do a dry run. Verify the tags in the file we generated in the previous step. 73 | tagger.add_tags_to_articles(articles_with_tags) 74 | ``` 75 | 76 | ### Optional overrides 77 | You can override the default thresholds for [entity](https://cloud.google.com/natural-language/docs/reference/rest/v1/Entity 78 | ) salience and [category](https://cloud.google.com/natural-language/docs/reference/rest/v1/ClassificationCategory) confidence 79 | 80 | ```python 81 | thresholds = { 82 | 'entity_salience_threshold': 0.7 83 | 'category_confidence_threshold': 0.3 84 | } 85 | articles_with_tags = tagger.get_tags_for_articles(articles, thresholds) 86 | ``` 87 | 88 | ## Sample 89 | 90 | Sample output from running it for my 490 items long Pocket list 91 | > `X` under Entities or Categories denotes the NLP client returned those as potential candidates, but we skipped them because it didn't meet the threshold. You can see the last line `Tags: abc, xyz` for list of tags pocket-tagger added for each URL. 92 | 93 | ``` 94 | (1/490) https://www.reddit.com/r/explainlikeimfive/comments/bvweym/eli5_why_do_coffee_drinkers_feel_more_clear/?utm_source=share&utm_medium=ios_app 95 | Title: ELI5: Why do coffee drinkers feel more clear headed after consuming caffeine? Why do some get a headache without it? Does caffeine cause any permanent brain changes and can the brain go back to 'normal' after years of caffeine use? : explainlikeimfive 96 | Description: r/explainlikeimfive: **Explain Like I'm Five is the best forum and archive on the internet for layperson-friendly explanations.**   Don't Panic! 97 | Entities: 98 | X Coffee Drinkers: 0.2438652664422989 99 | X Eli5: 0.14941969513893127 100 | X Caffeine: 0.12065556645393372 101 | X Caffeine: 0.0874909833073616 102 | X Some: 0.06917785853147507 103 | X Headache: 0.0606028214097023 104 | X Brain: 0.03606536239385605 105 | X Explainlikeimfive: 0.033727116882801056 106 | X Brain Changes: 0.03211209550499916 107 | X Caffeine Use: 0.029848895967006683 108 | X R: 0.02966366335749626 109 | X Forum: 0.028598546981811523 110 | X Internet: 0.022404097020626068 111 | X Archive: 0.022404097020626068 112 | X Explainlikeimfive: 0.017647551372647285 113 | X Don'T Panic: 0.009302889928221703 114 | X Five: 0.007013489492237568 115 | X Five: 0.0 116 | Categories: 117 | /Food & Drink/Beverages/Coffee & Tea: 0.6700000166893005 118 | Tags: Food & Drink, Beverages, Coffee & Tea 119 | (2/490) https://www.reddit.com/r/television/comments/bnpwe3/enjoy_three_full_minutes_of_the_cast_of_game_of/?utm_source=share&utm_medium=ios_app 120 | Title: Enjoy three full minutes of the cast of 'Game of Thrones' expressing disappointment in Season 8. : television 121 | Description: r/television: 122 | Entities: 123 | X Cast: 0.31218624114990234 124 | X Disappointment: 0.20341947674751282 125 | X Season: 0.20341947674751282 126 | X Game Of Thrones: 0.13265934586524963 127 | X Television: 0.08712445199489594 128 | X Television: 0.06119102984666824 129 | X 8: 0.0 130 | X Three: 0.0 131 | Categories: 132 | /Arts & Entertainment/TV & Video/TV Shows & Programs: 0.75 133 | Tags: Arts & Entertainment, TV & Video, TV Shows & Programs 134 | (3/490) https://www.reddit.com/r/homeautomation/comments/awvf5r/local_realtime_person_detection_for_rtsp_cameras/ 135 | Title: Local realtime person detection for RTSP cameras : homeautomation 136 | Description: r/homeautomation: A subreddit focused on automating your home, housework or household activity. Sensors, switches, cameras, locks, etc. Any … 137 | Entities: 138 | X Realtime Person Detection: 0.3057926297187805 139 | X Homeautomation: 0.15315502882003784 140 | X Cameras: 0.14035314321517944 141 | X Rtsp: 0.07461880147457123 142 | X Homeautomation: 0.051411159336566925 143 | X Home: 0.047811269760131836 144 | X Housework: 0.04366889223456383 145 | X Subreddit: 0.04183248057961464 146 | X R: 0.04132793843746185 147 | X Cameras: 0.032860007137060165 148 | X Locks: 0.028899790719151497 149 | X Household Activity: 0.012798599898815155 150 | X Switches: 0.012735127471387386 151 | X Sensors: 0.012735127471387386 152 | Categories: 153 | /Computers & Electronics: 0.7900000214576721 154 | Tags: Computers & Electronics 155 | ``` 156 | 157 | ## References 158 | - [Pocket API Wrapper for Python](https://github.com/tapanpandita/pocket) 159 | - [Pocket API Docs](http://getpocket.com/api/docs) 160 | - [Google Cloud Natural Language Processing](https://cloud.google.com/natural-language/) 161 | - [Beautiful Soup](https://www.crummy.com/software/BeautifulSoup/) 162 | - [Complete list of content categories from Google Natural Language API](https://cloud.google.com/natural-language/docs/categories) 163 | 164 | ## Analytics 165 | [![Analytics](https://ga-beacon.appspot.com/UA-59542024-4/pocket-tagger/)](https://github.com/igrigorik/ga-beacon) 166 | --------------------------------------------------------------------------------