├── README.md
├── static
└── icon.jpeg
├── urls.txt
├── sta.py
├── out.txt
├── templates
└── index.html
├── app.py
└── spider.py
/README.md:
--------------------------------------------------------------------------------
1 | # Compiler-Spider
--------------------------------------------------------------------------------
/static/icon.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oscardhc/Compiler-Spider/HEAD/static/icon.jpeg
--------------------------------------------------------------------------------
/urls.txt:
--------------------------------------------------------------------------------
1 | https://github.com/jinhongyii/Compiler2020
2 | https://github.com/oscardhc/MxSwift
3 | https://github.com/ZYHowell/Mx-Compiler
4 | https://github.com/masterJH5574/Mx-Compiler
5 | https://github.com/sparkmxy/MX_star-Compiler
6 | https://github.com/Yveh/Comiler-Mx_star
7 | https://github.com/cla7aye15I4nd/Ma-Compiler
8 | https://github.com/wu-qing-157/Mx-Compiler
9 | https://github.com/MintGreenTZ/Mx-Compiler
10 | https://github.com/chentong1023/Compiler
11 | https://github.com/yujie6/MX-Compiler
12 | https://github.com/fstqwq/Ornn
13 | https://github.com/ziqian2000/Mx-star-Compiler
14 | https://github.com/wuhuaijin/Mx_compiler
15 | https://github.com/LinsongGuo/Mxstar-compiler
16 |
--------------------------------------------------------------------------------
/sta.py:
--------------------------------------------------------------------------------
1 |
2 | import os
3 |
4 | acceptedFiles = ['.java', '.kt', '.cpp', '.swift', '.py', '.h', '.hpp']
5 | doNotCount = ['Generated from', 'The ANTLR Project. All rights reserved']
6 |
7 | class Statistics:
8 |
9 | def __init__(self, path):
10 | self.path = path
11 |
12 | def list(self):
13 | walk = os.walk(self.path)
14 | result = ''
15 | count = 0
16 | for path, _, flist in walk:
17 | for fname in flist:
18 | if os.path.splitext(fname)[-1] in acceptedFiles and 'Antlr4' not in path:
19 | with open(os.path.join(path, fname), 'r') as f:
20 | try:
21 | content = f.read()
22 | except:
23 | continue
24 | ok = True
25 | for str in doNotCount:
26 | if str in content:
27 | ok = False
28 | if ok:
29 | count += 1
30 | result += (content + '\n')
31 |
32 | arr = [x for x in result.split('\n') if x and not x.strip().startswith('//') and not x.strip().startswith('#')]
33 |
34 | return count, len(arr), len(result)
35 |
36 |
--------------------------------------------------------------------------------
/out.txt:
--------------------------------------------------------------------------------
1 | * Serving Flask app "app" (lazy loading)
2 | * Environment: production
3 | WARNING: This is a development server. Do not use it in a production deployment.
4 | Use a production WSGI server instead.
5 | * Debug mode: off
6 | * Running on http://0.0.0.0:80/ (Press CTRL+C to quit)
7 | Cloning into 'Compiler2020'...
8 | [2020-02-26 13:42:52,409] ERROR in app: Exception on / [GET]
9 | Traceback (most recent call last):
10 | File "/usr/local/lib/python3.6/dist-packages/flask/app.py", line 2446, in wsgi_app
11 | response = self.full_dispatch_request()
12 | File "/usr/local/lib/python3.6/dist-packages/flask/app.py", line 1951, in full_dispatch_request
13 | rv = self.handle_user_exception(e)
14 | File "/usr/local/lib/python3.6/dist-packages/flask/app.py", line 1820, in handle_user_exception
15 | reraise(exc_type, exc_value, tb)
16 | File "/usr/local/lib/python3.6/dist-packages/flask/_compat.py", line 39, in reraise
17 | raise value
18 | File "/usr/local/lib/python3.6/dist-packages/flask/app.py", line 1949, in full_dispatch_request
19 | rv = self.dispatch_request()
20 | File "/usr/local/lib/python3.6/dist-packages/flask/app.py", line 1935, in dispatch_request
21 | return self.view_functions[rule.endpoint](**req.view_args)
22 | File "app.py", line 49, in hello_world
23 | return render_template('index.html', res=res, time=t)
24 | NameError: name 't' is not defined
25 | 175.153.169.86 - - [26/Feb/2020 13:42:52] "[35m[1mGET / HTTP/1.1[0m" 500 -
26 | fatal: unable to open /root/Spider/233536673/Compiler2020/.git/objects/pack/tmp_pack_n0kOZB: No such file or directory
27 | fatal: index-pack failed
28 |
--------------------------------------------------------------------------------
/templates/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 | Compiler Spider
13 |
14 |
15 |
16 | {% block head %} {% endblock %}
17 |
18 |
19 |
20 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
Github repos
31 |
32 |
33 |
34 |
35 |
36 |
37 |
Sorted by the last commit time. Click on rows for commit details.
38 |
39 |
40 | {% for k, v, c in res %}
41 |
42 |
47 |
48 |
49 |
50 |
51 | {% for i in v %}
52 |
{{ i }}
53 | {% endfor %}
54 |
55 |
56 | {% endfor %}
57 |
58 |
59 |
Last updated: {{ time }}
60 |
61 |
62 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
1 | from flask import Flask, render_template
2 | import os
3 | import _thread
4 | import time
5 | import datetime
6 | import matplotlib.pyplot as plt
7 | import gc
8 |
9 | app = Flask(__name__)
10 |
11 | a = []
12 | res = []
13 |
14 | def hs(s):
15 | res = 0
16 | mod = 1000000007
17 | for c in s:
18 | res = (res * 233 + ord(c)) % mod
19 | return str(res)
20 |
21 | def byStr(t):
22 | return t[1][0]
23 |
24 | def square(a):
25 | return [i ** 0.5 for i in a]
26 |
27 | def updateFigure():
28 | fig, ax = plt.subplots(1, figsize=(9, 4))
29 |
30 | tot = int(time.strftime('%W', time.localtime()))
31 | xt = []
32 | yt = []
33 | for i in range(tot):
34 | yt += [0]
35 | xt += [i]
36 | for i in res:
37 | y = []
38 | for _ in range(tot):
39 | y += [0]
40 | for itm in i[1]:
41 | t = int(time.strftime('%W', time.strptime(itm[0: 19], '%Y-%m-%d %H:%M:%S'))) - 1
42 | y[t] += 1
43 | y = square(y)
44 | for j in range(tot):
45 | yt[j] += y[j]
46 | ax.fill_between(xt, y, 0,
47 | facecolor="orange",
48 | color='orange',
49 | alpha=0.1)
50 |
51 | ax.plot(xt, yt, 'b-', label='sum of sqrt')
52 | ax.set_ylabel('square root of commit numbers')
53 | ax.set_xlabel('week number in 2020')
54 | ax.set_title('Commit Statistics')
55 | ax.legend()
56 |
57 | fig.savefig('static/graph.png',dpi=300,format='png')
58 |
59 | del fig
60 | del ax
61 |
62 | def update():
63 | global a, res, t
64 | with open('urls.txt', 'r') as f:
65 | a = f.read().split('\n')
66 | a.remove('')
67 | _res = []
68 | for url in a:
69 | h = hs(url)
70 | if not os.path.exists(h):
71 | os.system('mkdir ' + h + '; cd ' + h + '; git clone ' + url + r';')
72 | os.system('cd ' + h + r'/*; git fetch --all; git reset --hard origin/master; git pull; git log --pretty=format:"%ad: %s" --date=format:"%Y-%m-%d %H:%M:%S" > ../log.txt')
73 | with open(h + r'/log.txt', 'r') as f:
74 | _res.append((url, f.read().split('\n')))
75 | t = datetime.datetime.now()
76 | res = sorted(_res, key=byStr)
77 | res.reverse()
78 |
79 | del _res
80 |
81 | updateFigure()
82 | gc.collect()
83 |
84 | time.sleep(60)
85 |
86 | def backend():
87 | while True:
88 | update()
89 |
90 | @app.route('/')
91 | def hello_world():
92 | # with open()
93 | return render_template('index.html', res=res, time=t)
94 |
95 | _thread.start_new_thread(backend, ())
96 | if __name__ == '__main__':
97 | t = datetime.datetime.now()
98 | app.run(host='0.0.0.0', port=80)
99 |
--------------------------------------------------------------------------------
/spider.py:
--------------------------------------------------------------------------------
1 | from flask import Flask, render_template
2 | import os
3 | import _thread
4 | import time
5 | import datetime
6 | import matplotlib.pyplot as plt
7 | import gc
8 | from sta import Statistics
9 |
10 | app = Flask(__name__)
11 |
12 | a = []
13 | res = []
14 |
15 | def hs(s):
16 | res = 0
17 | mod = 1000000007
18 | for c in s:
19 | res = (res * 233 + ord(c)) % mod
20 | return str(res)
21 |
22 | def byStr(t):
23 | return t[1][0]
24 |
25 | def square(a):
26 | return [i ** 0.5 for i in a]
27 |
28 | def updateFigure():
29 | fig, ax = plt.subplots(1, figsize=(9, 4))
30 |
31 | tot = int(time.strftime('%W', time.localtime()))
32 | xt = []
33 | yt = []
34 | for i in range(tot):
35 | yt += [0]
36 | xt += [i]
37 | for i in res:
38 | y = []
39 | for _ in range(tot):
40 | y += [0]
41 | for itm in i[1]:
42 | t = int(time.strftime('%W', time.strptime(itm[0: 19], '%Y-%m-%d %H:%M:%S'))) - 1
43 | y[t] += 1
44 | y = square(y)
45 | for j in range(tot):
46 | yt[j] += y[j]
47 | ax.fill_between(xt, y, 0,
48 | facecolor="orange",
49 | color='orange',
50 | alpha=0.1)
51 |
52 | ax.plot(xt, yt, 'b-', label='sum of sqrt')
53 | ax.set_ylabel('square root of commit numbers')
54 | ax.set_xlabel('week number in 2020')
55 | ax.set_title('Commit Statistics')
56 | ax.legend()
57 |
58 | fig.savefig('static/graph.png',dpi=300,format='png')
59 |
60 | plt.close()
61 |
62 | def update():
63 | global a, res, t
64 | with open('urls.txt', 'r') as f:
65 | a = f.read().split('\n')
66 | a.remove('')
67 | _res = []
68 | for url in a:
69 | h = hs(url)
70 | if not os.path.exists(h):
71 | os.system('mkdir ' + h + '; cd ' + h + '; git clone -c core.askpass $echo ' + url + r';')
72 | os.system('cd ' + h + r'/*; git fetch --all; git reset --hard origin/master; git pull; git log --pretty=format:"%ad: %s" --date=format:"%Y-%m-%d %H:%M:%S" > ../log.txt')
73 | print(h, url)
74 | with open(h + r'/log.txt', 'r') as f:
75 | _res.append((url, f.read().split('\n'), Statistics(h).list()))
76 | print(_res[-1][2])
77 | t = datetime.datetime.now()
78 | res = sorted(_res, key=byStr)
79 | res.reverse()
80 |
81 | del _res
82 |
83 | updateFigure()
84 | gc.collect()
85 |
86 | time.sleep(60)
87 |
88 | def backend():
89 | while True:
90 | update()
91 |
92 | @app.route('/')
93 | def hello_world():
94 | # with open()
95 | return render_template('index.html', res=res, time=t)
96 |
97 | _thread.start_new_thread(backend, ())
98 | if __name__ == '__main__':
99 | t = datetime.datetime.now()
100 | app.run(host='0.0.0.0', port=8080)
101 |
--------------------------------------------------------------------------------