├── requirements.txt ├── setup.cfg ├── setup.py ├── README.md ├── .gitignore └── find-similar-projects /requirements.txt: -------------------------------------------------------------------------------- 1 | lxml==3.4.4 2 | termcolor==1.1.0 3 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name="find-similar-projects", 5 | version="0.1.2", 6 | author='@noisy - Krzysztof Szumny', 7 | author_email='noisy.pl@gmail.com', 8 | description='find-similar-projects is simple script, which grep all pip requirements files in all Github repositories.', 9 | scripts=['find-similar-projects'], 10 | install_requires=[ 11 | "lxml==3.4.4", 12 | "termcolor==1.1.0", 13 | ], 14 | ) 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![PyPI version](https://badge.fury.io/py/find-similar-projects.svg)](http://badge.fury.io/py/find-similar-projects) 2 | 3 | # find-similar-projects 4 | 5 | `find-similar-projects.py` is simple script, which grep all pip requirements files in all Github repositories. Thanks to this, script you can easily find out whether someone else use similar configuration of python packages, 6 | how popular this configuration is. 7 | 8 | This could help you found hundreds of similar projects, which could be used 9 | by you as example :) 10 | 11 | Example of use: 12 | 13 | ./find-similar-projects.py django==1.8 django-allauth django-rest-auth 14 | 15 | [![asciicast](https://asciinema.org/a/24742.png)](https://asciinema.org/a/24742) 16 | 17 | 18 | [![Bitdeli Badge](https://d2weczhvl823v0.cloudfront.net/noisy/find-similar-projects/trend.png)](https://bitdeli.com/free "Bitdeli Badge") 19 | 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Linux template 3 | *~ 4 | 5 | # KDE directory preferences 6 | .directory 7 | 8 | # Linux trash folder which might appear on any partition or disk 9 | .Trash-* 10 | 11 | 12 | ### Python template 13 | # Byte-compiled / optimized / DLL files 14 | __pycache__/ 15 | *.py[cod] 16 | *$py.class 17 | 18 | # C extensions 19 | *.so 20 | 21 | # Distribution / packaging 22 | .Python 23 | env/ 24 | build/ 25 | develop-eggs/ 26 | dist/ 27 | downloads/ 28 | eggs/ 29 | .eggs/ 30 | lib/ 31 | lib64/ 32 | parts/ 33 | sdist/ 34 | var/ 35 | *.egg-info/ 36 | .installed.cfg 37 | *.egg 38 | 39 | # PyInstaller 40 | # Usually these files are written by a python script from a template 41 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 42 | *.manifest 43 | *.spec 44 | 45 | # Installer logs 46 | pip-log.txt 47 | pip-delete-this-directory.txt 48 | 49 | # Unit test / coverage reports 50 | htmlcov/ 51 | .tox/ 52 | .coverage 53 | .coverage.* 54 | .cache 55 | nosetests.xml 56 | coverage.xml 57 | *,cover 58 | 59 | # Translations 60 | *.mo 61 | *.pot 62 | 63 | # Django stuff: 64 | *.log 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | 73 | ### Example user template template 74 | ### Example user template 75 | 76 | # IntelliJ project files 77 | .idea 78 | *.iml 79 | out 80 | gen 81 | 82 | -------------------------------------------------------------------------------- /find-similar-projects: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | find-similar-projects.py is simple script, which grep all pip requirements 5 | files in all Github repositories. Thanks to this, script you can easily 6 | find out whether someone else use similar configuration of python packages, 7 | how popular this configuration is. 8 | 9 | This could help you found hundreds of similar projects, which could be used 10 | by you as example :) 11 | 12 | Example of use: 13 | 14 | ./find-similar-projects.py django==1.8 django-allauth django-rest-auth 15 | 16 | 17 | Report bugs to 18 | 19 | """ 20 | 21 | from __future__ import unicode_literals 22 | 23 | import argparse 24 | import os 25 | import re 26 | import urllib 27 | import urllib2 28 | import signal 29 | import sys 30 | 31 | from copy import deepcopy 32 | from lxml import etree 33 | from time import sleep 34 | from termcolor import colored 35 | 36 | FILENAME_URL_XPATH = '//*[@id="code_search_results"]/div[1]/div/p/a[2]/@href' 37 | 38 | # Regex which parse: 39 | # pysolr 40 | # django-haystack 41 | # Django>=1.7 42 | # isbn>1.7 43 | # djangorestframework==3.0.5 44 | # djangorestframework-jwt==1.2.0 45 | # django-haystack<2.0 46 | # pkg3>=1.0,<=2.0 47 | # broomstick 48 | # isbn>1.7 49 | # djangorestframework==3.0.5 50 | # djangorestframework-jwt==1.2.0 51 | regex = r'^([\w-]*)(?:(>=|>|==|<=|<)([\d\.]*)(?:,(>=|>|==|<=|<)([\d\.]*))?)?' 52 | pattern = re.compile(regex, re.M) 53 | 54 | signal.signal(signal.SIGINT, lambda signal, frame: sys.exit(0)) 55 | 56 | 57 | def parse_requirements(requirements): 58 | entries = pattern.findall(requirements.lower()) 59 | return [ 60 | {'status': None, 'entry': entry} 61 | for entry in entries 62 | if entry != ('', '', '', '', '') 63 | ] 64 | 65 | 66 | def query_builder(requirements=None, repos=None): 67 | q = 'filename:requirements.txt' 68 | 69 | for entry in parse_requirements("\n".join(requirements)): 70 | q += ' "{}"'.format(entry['entry'][0]) 71 | 72 | for repo in repos or []: 73 | q += ' repo:{}'.format(repo) 74 | 75 | return q 76 | 77 | 78 | def get_repo_from_blob(filename_blob_url): 79 | return 'http://github.com/' + '/'.join(filename_blob_url.split('/')[1:3]) 80 | 81 | 82 | def get_raw_file_url_from_blob(filename_blob_url): 83 | return 'https://raw.githubusercontent.com' + \ 84 | filename_blob_url.replace('/blob/', '/') 85 | 86 | 87 | def is_similar(requirements_parsed, repo_entries_parsed): 88 | for entry in requirements_parsed: 89 | 90 | repo_entries_to_check = [ 91 | e for e in repo_entries_parsed if not e['status'] 92 | ] 93 | 94 | for repo_entry in repo_entries_to_check: 95 | 96 | if entry['entry'] == repo_entry['entry']: 97 | entry['status'] = repo_entry['status'] = 'OK' 98 | 99 | break 100 | 101 | if entry['entry'][0] == repo_entry['entry'][0]: 102 | entry['status'] = repo_entry['status'] = 'ALMOST' 103 | break 104 | 105 | if not entry['status']: 106 | return False 107 | 108 | return True 109 | 110 | 111 | def main(): 112 | 113 | script = os.path.basename(__file__) 114 | 115 | parser = argparse.ArgumentParser( 116 | usage="{} python_package[==version] [python_package[==version] ...]".format(script), 117 | epilog=__doc__, 118 | formatter_class=argparse.RawDescriptionHelpFormatter 119 | ) 120 | 121 | parser.add_argument('python_package', nargs='+') 122 | args = parser.parse_args() 123 | 124 | requirements = args.python_package 125 | requirements_parsed = parse_requirements('\n'.join(requirements)) 126 | 127 | page = 1 128 | params = { 129 | 'type': 'Code', 130 | # 'utf8': '✓',requirements_raw_file 131 | 'ref': 'searchresults', 132 | 'q': query_builder(requirements), 133 | } 134 | 135 | while True: 136 | params['p'] = str(page) 137 | url = 'https://github.com/search?' + urllib.urlencode(params) 138 | # print url 139 | 140 | opener = urllib2.build_opener() 141 | opener.addheaders = [('User-agent', 'Mozilla/5.0')] 142 | 143 | # print ">> PAGE: %d <<" % page 144 | try: 145 | response = opener.open(url) 146 | except Exception as e: 147 | # print str(e) 148 | print "Github has to rest for a minute, please be patient ;)" 149 | sleep(70) 150 | response = opener.open(url) 151 | 152 | tree = etree.parse(response, etree.HTMLParser()) 153 | 154 | filename_blob_urls = tree.xpath(FILENAME_URL_XPATH) 155 | 156 | if not filename_blob_urls: 157 | break 158 | 159 | for filename_blob_url in filename_blob_urls: 160 | # print "Repo" + '/'.join(filename_blob_url.split('/')[1:3]) 161 | filename_path = '/'.join(filename_blob_url.split('/')[5:]) 162 | 163 | if 'requirements' in filename_path: 164 | raw_file_url = get_raw_file_url_from_blob(filename_blob_url) 165 | response = opener.open(raw_file_url) 166 | try: 167 | file_content = response.read() 168 | except: 169 | print "there was some problem..." 170 | 171 | requirements_parsed_cpy = deepcopy(requirements_parsed) 172 | repo_entries_parsed = deepcopy(parse_requirements(file_content)) 173 | 174 | result = is_similar(requirements_parsed_cpy, repo_entries_parsed) 175 | 176 | if result: 177 | print "Repository: " + get_repo_from_blob(filename_blob_url) 178 | 179 | repo_status = [ 180 | entry['status'] == 'OK' 181 | for entry in requirements_parsed_cpy 182 | ] 183 | 184 | if all(repo_status): 185 | print "Status:" + colored("Perfect match", 'green') 186 | else: 187 | print "Status:" + colored("Similar project", 'yellow') 188 | 189 | print "File: " + filename_path 190 | 191 | for entry in repo_entries_parsed: 192 | print " ", 193 | if entry['status'] == 'OK': 194 | print colored("".join(entry['entry']), 'green') 195 | elif entry['status'] == 'ALMOST': 196 | print colored( 197 | entry['entry'][0], 'cyan' 198 | ) + "".join(entry['entry'][1:]) 199 | else: 200 | print "".join(entry['entry']) 201 | 202 | print "" 203 | 204 | page += 1 205 | 206 | if __name__ == "__main__": 207 | main() 208 | --------------------------------------------------------------------------------