├── README.md ├── engine.py └── setup.py /README.md: -------------------------------------------------------------------------------- 1 | # Github Recommendation Engine [![endorse](http://api.coderwall.com/modocache/endorsecount.png)](http://coderwall.com/modocache) 2 | 3 | A single Python script which queries the Github API 4 | to return a list of repositories you might be interested 5 | in following. 6 | 7 | ## Usage 8 | 9 | First, make sure you've entered your Github API token in 10 | your global git config. Then: 11 | 12 | $ pip install git+https://modocache@github.com/modocache/github-recommendation-engine.git 13 | $ github-recommendation-engine 14 | ... Wait a long time ... 15 | ['', ...] 16 | 17 | ## Limitations 18 | 19 | Github limits calls to the API to 60 per minute. In order to 20 | attain relevant data on users and repositories you might be 21 | interested in, this program queries the API quite a lot, taking 22 | a breather whenever Github returns an API Rate Exceeded error. 23 | If you follow a lot of repos, the program might take a considerable 24 | amount of time to produce a result. 25 | 26 | You might be wondering whether Github would appreciate so many 27 | requests being thrown at it--I wonder as well, and I plan on 28 | getting some feedback from an admin once I get a chance. 29 | 30 | ## Requirements 31 | 32 | - [github2](https://github.com/ask/python-github2), a Python wrapper for the Github API 33 | -------------------------------------------------------------------------------- /engine.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #-*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | from collections import defaultdict 6 | from contextlib import contextmanager 7 | import operator 8 | import time 9 | 10 | import github2 11 | from github2.client import Github 12 | from subprocess import Popen, PIPE 13 | 14 | 15 | API_RATE_OVER_MSG = 'API Rate Limit Exceeded' 16 | SLEEP_INTERVAL = 65 17 | 18 | 19 | @contextmanager 20 | def api_limiter(func, *args, **kwargs): 21 | """ 22 | Pace requests to Github API. 23 | If request exceeds API limit, wait a brief period of time. 24 | 25 | :param function func: Function to perform 26 | :param list args: Positional arguments for function 27 | :param dict kwargs: Keyword arguments for function 28 | """ 29 | attempts = 3 30 | for attempt_count in range(attempts): 31 | try: 32 | yield func(*args, **kwargs) 33 | break 34 | except github2.request.HttpError as e: 35 | if attempt_count == attempts-1: 36 | raise 37 | if e.code == 403 and API_RATE_OVER_MSG in str(e.content): 38 | print('{0}: Too many API calls, ' 39 | 'taking a break...'.format(api_limiter.__name__)) 40 | time.sleep(SLEEP_INTERVAL) 41 | print('{0}: Phew! Back to work.'.format(api_limiter.__name__)) 42 | 43 | 44 | def sort_dict(d): 45 | """ 46 | Sort a dictionary of ints and return keys 47 | in descending order of magnitude. 48 | 49 | :param dict d: The dictionary to sort 50 | """ 51 | sorted_dict = sorted(d.iteritems(), key=operator.itemgetter(1)) 52 | sorted_dict.reverse() 53 | return [k for (k, v) in sorted_dict] 54 | 55 | 56 | class RecommendationEngine(object): 57 | # Cache API calls 58 | _my_watched = None 59 | _watching_my_watched = None 60 | _similar_users = None 61 | _recommended_repos = None 62 | 63 | def __init__(self, username=None, api_user=None, api_token=None): 64 | """ 65 | A recommendation engine which polls the Github API 66 | to find users with similar interests in repositories. 67 | """ 68 | self.api_user = api_user or \ 69 | self.git_config_get('github.user') or \ 70 | self.git_config_get('user.name') 71 | self.api_token = api_token or self.git_config_get('github.token') 72 | self.username = username or self.api_user 73 | self.client = Github(self.api_user, self.api_token) 74 | self.sleep_interval = 65 75 | 76 | def __repr__(self): 77 | return "<{0}:{1}>".format(self.__class__, id(self)) 78 | __str__ = __repr__ 79 | 80 | def git_config_get(self, key): 81 | """ 82 | Return a value for the corresponding key in the 83 | user's git config. 84 | 85 | :param str key: The git config value key 86 | """ 87 | pipe = Popen(['git', 'config', '--get', key], stdout=PIPE) 88 | return pipe.communicate()[0].strip() 89 | 90 | @property 91 | def my_watched(self): 92 | """ 93 | Return a list of repositories watched by current user. 94 | """ 95 | if self._my_watched is None: 96 | with api_limiter(self.client.repos.watching, self.api_user) \ 97 | as watched: 98 | self._my_watched = watched 99 | return self._my_watched 100 | 101 | def get_watching_my_watched(self): 102 | """ 103 | Return a list of users also watching repositories 104 | watched by current user. 105 | """ 106 | if self._watching_my_watched is None: 107 | ret = [] 108 | for repo in self.my_watched: 109 | with api_limiter(self.client.repos.watchers, 110 | '{0}/{1}'.format(repo.owner, repo.name)) as watchers: 111 | ret.extend(watchers) 112 | ret = set(ret) 113 | ret.remove(self.api_user) 114 | self._watching_my_watched = ret 115 | return list(self._watching_my_watched) 116 | 117 | def get_similar_users(self, limit_api_calls=200, limit_top_users=10): 118 | """ 119 | Return a sorted list of users whose followed repositories 120 | overlap with current user. 121 | 122 | :param int limit_api_calls: Maximum number of API calls to make 123 | :param int limit_users: Number of users to return 124 | """ 125 | if self._similar_users is None: 126 | u = defaultdict(int) 127 | api_calls = 0 128 | for user in self.get_watching_my_watched(): 129 | with api_limiter(self.client.repos.watching, user) as watched: 130 | u[user] += sum( 131 | [repo for repo in watched if repo in self.my_watched] 132 | ) 133 | api_calls += 1 134 | if api_calls >= limit_api_calls: 135 | break 136 | self._similar_users = sort_dict(u) 137 | return self._similar_users[:limit_top_users] 138 | 139 | def get_recommended_repos(self, limit=10): 140 | """ 141 | Return a list of repositories followed by related users, 142 | sorted by their popularity within the group. 143 | """ 144 | if self._recommended_repos is None: 145 | r = defaultdict(int) 146 | for user in self.get_similar_users(): 147 | with api_limiter(self.client.repos.watching, user) as watched: 148 | for repo in watched: 149 | r[repo] += 1 150 | self._recommended_repos = sort_dict(r) 151 | return self._recommended_repos[:limit] 152 | 153 | 154 | def main(): 155 | r = RecommendationEngine() 156 | print(r.get_recommended_repos()) 157 | 158 | 159 | if __name__ == '__main__': 160 | main() 161 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name='github-suggestion-engine', 5 | version='0.0.1a1', 6 | url='https://github.com/modocache/github-recommendation-engine', 7 | author='modocache', 8 | author_email='modocache@gmail.com', 9 | description='A Github repository recommendation engine.', 10 | long_description=\ 11 | """ 12 | Outputs a list of Github repositories you might be interested 13 | in following. Note that this makes a large amount of API 14 | requests, which are throttled to 60 per minute. 15 | 16 | For more information, check out the 17 | `repository on Github `_. 18 | """, 19 | keywords='githubt repository follow', 20 | install_requires=['github2'], 21 | classifiers = [ 22 | 'Programming Language :: Python :: 2', 23 | 'Programming Language :: Python :: 3', 24 | 'Development Status :: 4 - Beta', 25 | 'Operating System :: OS Independent', 26 | 'Intended Audience :: End Users/Desktop', 27 | 'Topic :: Software Development :: Version Control', 28 | ], 29 | entry_points = { 30 | 'console_scripts': [ 31 | 'github-suggestion-engine = engine:main', 32 | ] 33 | } 34 | ) 35 | --------------------------------------------------------------------------------