├── requirements.txt
├── setup.cfg
├── setup.py
├── README.md
├── .gitignore
└── find-similar-projects


/requirements.txt:
--------------------------------------------------------------------------------
1 | lxml==3.4.4
2 | termcolor==1.1.0
3 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | setup(
 4 |     name="find-similar-projects",
 5 |     version="0.1.2",
 6 |     author='@noisy - Krzysztof Szumny',
 7 |     author_email='noisy.pl@gmail.com',
 8 |     description='find-similar-projects is simple script, which grep all pip requirements files in all Github repositories.',
 9 |     scripts=['find-similar-projects'],
10 |     install_requires=[
11 |         "lxml==3.4.4",
12 |         "termcolor==1.1.0",
13 |     ],
14 | )
15 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![PyPI version](https://badge.fury.io/py/find-similar-projects.svg)](http://badge.fury.io/py/find-similar-projects)
 2 | 
 3 | # find-similar-projects
 4 | 
 5 | `find-similar-projects.py` is simple script, which grep all pip requirements files in all Github repositories. Thanks to this, script you can easily find out whether someone else use similar configuration of python packages,
 6 | how popular this configuration is.
 7 | 
 8 | This could help you found hundreds of similar projects, which could be used
 9 | by you as example :)
10 | 
11 | Example of use:
12 | 
13 |     ./find-similar-projects.py django==1.8 django-allauth django-rest-auth
14 | 
15 | [![asciicast](https://asciinema.org/a/24742.png)](https://asciinema.org/a/24742)
16 | 
17 | 
18 | [![Bitdeli Badge](https://d2weczhvl823v0.cloudfront.net/noisy/find-similar-projects/trend.png)](https://bitdeli.com/free "Bitdeli Badge")
19 | 
20 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Created by .ignore support plugin (hsz.mobi)
 2 | ### Linux template
 3 | *~
 4 | 
 5 | # KDE directory preferences
 6 | .directory
 7 | 
 8 | # Linux trash folder which might appear on any partition or disk
 9 | .Trash-*
10 | 
11 | 
12 | ### Python template
13 | # Byte-compiled / optimized / DLL files
14 | __pycache__/
15 | *.py[cod]
16 | *$py.class
17 | 
18 | # C extensions
19 | *.so
20 | 
21 | # Distribution / packaging
22 | .Python
23 | env/
24 | build/
25 | develop-eggs/
26 | dist/
27 | downloads/
28 | eggs/
29 | .eggs/
30 | lib/
31 | lib64/
32 | parts/
33 | sdist/
34 | var/
35 | *.egg-info/
36 | .installed.cfg
37 | *.egg
38 | 
39 | # PyInstaller
40 | #  Usually these files are written by a python script from a template
41 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
42 | *.manifest
43 | *.spec
44 | 
45 | # Installer logs
46 | pip-log.txt
47 | pip-delete-this-directory.txt
48 | 
49 | # Unit test / coverage reports
50 | htmlcov/
51 | .tox/
52 | .coverage
53 | .coverage.*
54 | .cache
55 | nosetests.xml
56 | coverage.xml
57 | *,cover
58 | 
59 | # Translations
60 | *.mo
61 | *.pot
62 | 
63 | # Django stuff:
64 | *.log
65 | 
66 | # Sphinx documentation
67 | docs/_build/
68 | 
69 | # PyBuilder
70 | target/
71 | 
72 | 
73 | ### Example user template template
74 | ### Example user template
75 | 
76 | # IntelliJ project files
77 | .idea
78 | *.iml
79 | out
80 | gen
81 | 
82 | 


--------------------------------------------------------------------------------
/find-similar-projects:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | find-similar-projects.py is simple script, which grep all pip requirements
  5 | files in all Github repositories. Thanks to this, script you can easily
  6 | find out whether someone else use similar configuration of python packages,
  7 | how popular this configuration is.
  8 | 
  9 | This could help you found hundreds of similar projects, which could be used
 10 | by you as example :)
 11 | 
 12 | Example of use:
 13 | 
 14 | ./find-similar-projects.py django==1.8 django-allauth django-rest-auth
 15 | 
 16 | 
 17 | Report bugs to <https://github.com/noisy/find-similar-projects/issues>
 18 | 
 19 | """
 20 | 
 21 | from __future__ import unicode_literals
 22 | 
 23 | import argparse
 24 | import os
 25 | import re
 26 | import urllib
 27 | import urllib2
 28 | import signal
 29 | import sys
 30 | 
 31 | from copy import deepcopy
 32 | from lxml import etree
 33 | from time import sleep
 34 | from termcolor import colored
 35 | 
 36 | FILENAME_URL_XPATH = '//*[@id="code_search_results"]/div[1]/div/p/a[2]/@href'
 37 | 
 38 | # Regex which parse:
 39 | #     pysolr
 40 | #     django-haystack
 41 | #     Django>=1.7
 42 | #     isbn>1.7
 43 | #     djangorestframework==3.0.5
 44 | #     djangorestframework-jwt==1.2.0
 45 | #     django-haystack<2.0
 46 | #     pkg3>=1.0,<=2.0
 47 | #     broomstick
 48 | #     isbn>1.7
 49 | #     djangorestframework==3.0.5
 50 | #     djangorestframework-jwt==1.2.0
 51 | regex = r'^([\w-]*)(?:(>=|>|==|<=|<)([\d\.]*)(?:,(>=|>|==|<=|<)([\d\.]*))?)?'
 52 | pattern = re.compile(regex, re.M)
 53 | 
 54 | signal.signal(signal.SIGINT, lambda signal, frame: sys.exit(0))
 55 | 
 56 | 
 57 | def parse_requirements(requirements):
 58 |     entries = pattern.findall(requirements.lower())
 59 |     return [
 60 |         {'status': None, 'entry': entry}
 61 |         for entry in entries
 62 |         if entry != ('', '', '', '', '')
 63 |     ]
 64 | 
 65 | 
 66 | def query_builder(requirements=None, repos=None):
 67 |     q = 'filename:requirements.txt'
 68 | 
 69 |     for entry in parse_requirements("\n".join(requirements)):
 70 |         q += ' "{}"'.format(entry['entry'][0])
 71 | 
 72 |     for repo in repos or []:
 73 |         q += ' repo:{}'.format(repo)
 74 | 
 75 |     return q
 76 | 
 77 | 
 78 | def get_repo_from_blob(filename_blob_url):
 79 |     return 'http://github.com/' + '/'.join(filename_blob_url.split('/')[1:3])
 80 | 
 81 | 
 82 | def get_raw_file_url_from_blob(filename_blob_url):
 83 |     return 'https://raw.githubusercontent.com' + \
 84 |            filename_blob_url.replace('/blob/', '/')
 85 | 
 86 | 
 87 | def is_similar(requirements_parsed, repo_entries_parsed):
 88 |     for entry in requirements_parsed:
 89 | 
 90 |         repo_entries_to_check = [
 91 |             e for e in repo_entries_parsed if not e['status']
 92 |         ]
 93 | 
 94 |         for repo_entry in repo_entries_to_check:
 95 | 
 96 |             if entry['entry'] == repo_entry['entry']:
 97 |                 entry['status'] = repo_entry['status'] = 'OK'
 98 | 
 99 |                 break
100 | 
101 |             if entry['entry'][0] == repo_entry['entry'][0]:
102 |                 entry['status'] = repo_entry['status'] = 'ALMOST'
103 |                 break
104 | 
105 |         if not entry['status']:
106 |             return False
107 | 
108 |     return True
109 | 
110 | 
111 | def main():
112 | 
113 |     script = os.path.basename(__file__)
114 | 
115 |     parser = argparse.ArgumentParser(
116 |         usage="{} python_package[==version] [python_package[==version] ...]".format(script),
117 |         epilog=__doc__,
118 |         formatter_class=argparse.RawDescriptionHelpFormatter
119 |     )
120 | 
121 |     parser.add_argument('python_package', nargs='+')
122 |     args = parser.parse_args()
123 | 
124 |     requirements = args.python_package
125 |     requirements_parsed = parse_requirements('\n'.join(requirements))
126 | 
127 |     page = 1
128 |     params = {
129 |         'type': 'Code',
130 |         # 'utf8': '✓',requirements_raw_file
131 |         'ref': 'searchresults',
132 |         'q': query_builder(requirements),
133 |     }
134 | 
135 |     while True:
136 |         params['p'] = str(page)
137 |         url = 'https://github.com/search?' + urllib.urlencode(params)
138 |         # print url
139 | 
140 |         opener = urllib2.build_opener()
141 |         opener.addheaders = [('User-agent', 'Mozilla/5.0')]
142 | 
143 |         # print ">>  PAGE: %d <<" % page
144 |         try:
145 |             response = opener.open(url)
146 |         except Exception as e:
147 |             # print str(e)
148 |             print "Github has to rest for a minute, please be patient ;)"
149 |             sleep(70)
150 |             response = opener.open(url)
151 | 
152 |         tree = etree.parse(response, etree.HTMLParser())
153 | 
154 |         filename_blob_urls = tree.xpath(FILENAME_URL_XPATH)
155 | 
156 |         if not filename_blob_urls:
157 |             break
158 | 
159 |         for filename_blob_url in filename_blob_urls:
160 |             # print "Repo" + '/'.join(filename_blob_url.split('/')[1:3])
161 |             filename_path = '/'.join(filename_blob_url.split('/')[5:])
162 | 
163 |             if 'requirements' in filename_path:
164 |                 raw_file_url = get_raw_file_url_from_blob(filename_blob_url)
165 |                 response = opener.open(raw_file_url)
166 |                 try:
167 |                     file_content = response.read()
168 |                 except:
169 |                     print "there was some problem..."
170 | 
171 |                 requirements_parsed_cpy = deepcopy(requirements_parsed)
172 |                 repo_entries_parsed = deepcopy(parse_requirements(file_content))
173 | 
174 |                 result = is_similar(requirements_parsed_cpy, repo_entries_parsed)
175 | 
176 |                 if result:
177 |                     print "Repository: " + get_repo_from_blob(filename_blob_url)
178 | 
179 |                     repo_status = [
180 |                         entry['status'] == 'OK'
181 |                         for entry in requirements_parsed_cpy
182 |                     ]
183 | 
184 |                     if all(repo_status):
185 |                         print "Status:" + colored("Perfect match", 'green')
186 |                     else:
187 |                         print "Status:" + colored("Similar project", 'yellow')
188 | 
189 |                     print "File: " + filename_path
190 | 
191 |                     for entry in repo_entries_parsed:
192 |                         print "   ",
193 |                         if entry['status'] == 'OK':
194 |                             print colored("".join(entry['entry']), 'green')
195 |                         elif entry['status'] == 'ALMOST':
196 |                             print colored(
197 |                                 entry['entry'][0], 'cyan'
198 |                             ) + "".join(entry['entry'][1:])
199 |                         else:
200 |                             print "".join(entry['entry'])
201 | 
202 |                     print ""
203 | 
204 |         page += 1
205 | 
206 | if __name__ == "__main__":
207 |     main()
208 | 


--------------------------------------------------------------------------------