6 |
我也推荐神奇的dex: https://github.com/mongolab/dex
7 |
10 |
索引字段不够(需要复合索引)
11 |
xx/models.py 143-146行
12 | statuses = list(coll.find(
13 | {"is_update": False},
14 | {"uid": 1}
15 | ).sort('date', -1).limit(200))
16 |
17 | > db.status_5008fe57b3158aa4709ecd8f_201311.getIndexes()
18 | [
19 | {
20 | "v" : 1,
21 | "key" : {
22 | "_id" : 1
23 | },
24 | "ns" : "dongwm.status_5008fe57b3158aa4709ecd8f_201311",
25 | "name" : "_id_"
26 | },
27 | {
28 | "v" : 1,
29 | "key" : {
30 | "date" : -1
31 | },
32 | "ns" : "dongwm.status_5008fe57b3158aa4709ecd8f_201311",
33 | "name" : "cdate_-1"
34 | },
35 | {
36 | "v" : 1,
37 | "key" : {
38 | "idate" : -1
39 | },
40 | "ns" : "dongwm.status_5008fe57b3158aa4709ecd8f_201311",
41 | "name" : "idate_-1"
42 | }
43 | ]
44 |
45 |
46 | db["status_5008fe57b3158aa4709ecd8f_201311"].ensureIndex({"is_update": 1, "date": 1}, {"background": true})
47 |
48 |
没有索引
49 |
xx/tasks.py 551-564行
50 | pre_date = update_date - timedelta(hours=1)
51 | pre_infcoll = s_db["inf_%s_%s" % (
52 | key["_id"], pre_date.strftime("%Y%m")
53 | )]
54 | pre_ho = {}
55 | pre_inf = (
56 | pre_infcoll.find_one({
57 | "day": update_date.replace(
58 | hour=0,
59 | minute=0,
60 | second=0,
61 | microsecond=0
62 | )
63 | }) or {}).get(str(pre_date.hour), [])
64 |
65 |
66 | > db.dongwm.influence_51ee2552fb0dd8d290a7749a_201311.getIndexes()
67 | [ ]
68 |
69 |
70 | db["inf_51ee2552fb0dd8d290a7749a_201311"].ensureIndex({"day": 1}, {"background": true})
71 |
72 |
索引顺序问题
74 |
xx/models.py 1110-1119行
75 | def get_o_s(uid, date):
76 | return list(db.st.find(
77 | {
78 | 'user_id': uid,
79 | 'created_at': {'$gte': date},
80 | 'rs': 0
81 | },
82 | {'rsince_timestamp': 1, 'csince_timestamp': 1, 'last_cid': 1}
83 | ))
84 |
85 |
86 | > db.status.getIndexes()
87 | [
88 | {
89 | "v" : 1,
90 | "key" : {
91 | "_id" : 1
92 | },
93 | "ns" : "dongwm.status",
94 | "name" : "_id_"
95 | }
96 | ]
97 |
98 |
99 | db["status"].ensureIndex({"user_id": 1, "rs": 1, "created_at": 1}, {"background": true})
100 |
101 |
102 |
103 |
104 | {% endblock %}
--------------------------------------------------------------------------------
/data_analysis/views.py:
--------------------------------------------------------------------------------
1 | #coding=utf-8
2 | import random
3 | import datetime
4 | from flask import (Blueprint, request, redirect, render_template, url_for,
5 | jsonify)
6 | from flask.views import MethodView
7 | from data_analysis.models import Apidist, Celery, Mongo
8 |
9 | posts = Blueprint('posts', __name__, template_folder='templates')
10 |
11 | color = ["#FF0F00", "#FF6600", "#FF9E01", "#FCD202", "#F8FF01", "#B0DE09",
12 | "#04D215", "#0D8ECF", "#0D52D1", "#2A0CD0", "#8A0CCF", "#CD0D74",
13 | "#754DEB", "#2c3e50", "#2c3e50", "#e67e22", "#e74c3c", "#ecf0f1",
14 | "#95a5a6", "#d35400", "#8e44ad", "#8e44ad", "#bdc3c7", "#d35400",
15 | "#1abc9c", "#2ecc71", "#3498db", "#9b59b6", "#34495e"]
16 |
17 | celery = dict(task=[('count', ["simple_column", "pie"]),
18 | ('cost', ["multi_column"])],
19 | time=[('count', ["simple_column", "pie"])])
20 |
21 | mongo = dict(all=[('getmore', ["multi_column"]), ('update', ["multi_column"]),
22 | ('insert', ["multi_column"]), ('command', ["multi_column"]),
23 | ('query', ["multi_column"])],
24 | sentiment=[('getmore', ["multi_column"]), ('update', ["multi_column"]),
25 | ('insert', ["multi_column"]), ('command', ["multi_column"]),
26 | ('query', ["multi_column"])],
27 | qq_online=[('getmore', ["multi_column"]), ('update', ["multi_column"]),
28 | ('insert', ["multi_column"]), ('command', ["multi_column"]),
29 | ('query', ["multi_column"])],
30 | total=[('getmore', ["simple_column", "pie"]),
31 | ('update', ["simple_column", "pie"]),
32 | ('insert', ["simple_column", "pie"]),
33 | ('command', ["simple_column", "pie"]),
34 | ('query', ["simple_column", "pie"]),
35 | ('all_op', ["simple_column", "pie"]),
36 | ('all_slow', ["simple_column", "pie"])])
37 | d3 = dict()
38 |
39 |
40 | def make_json(res, title, category, value, chart, key, des, total, titles=None,
41 | values=None):
42 | color1, color2, color3 = None, None, None
43 | if titles is not None:
44 | func= lambda x:x[values[key]]
45 | else:
46 | func= lambda x:x[value]
47 | dict_list = sorted(res, key=func, reverse=True)
48 | if titles is None:
49 | l = len(dict_list)
50 | color_choice = random.sample(color, l)
51 | for num, d in enumerate(dict_list):
52 | d['color'] = color_choice[num]
53 | else:
54 | color_choice = random.sample(color, 3)
55 | for d in dict_list:
56 | d['color1'] = color_choice[0]
57 | d['color2'] = color_choice[1]
58 | d['color3'] = color_choice[2]
59 | return jsonify(result=dict_list, title=title, category=category, des=des,
60 | value=value, titles=titles, values=values, chart=chart,
61 | total=total)
62 |
63 | def make_factory(change_dict):
64 | dict = {'title': 'name', 'value': 'call'}
65 | dict.update(change_dict)
66 | return dict
67 |
68 | class JsonView(MethodView):
69 |
70 | def get(self, url):
71 | if url == 'celery':
72 | return jsonify(celery)
73 | elif url == 'mongo':
74 | return jsonify(mongo)
75 |
76 | class IndexView(MethodView):
77 |
78 | def get(self):
79 | return redirect(url_for('.mongo'))
80 |
81 | class D3View(MethodView):
82 |
83 | def get(self):
84 | return render_template('posts/d3.html', des=u'celery任务调用次数略览',
85 | title=u'Celery Task调用分布')
86 |
87 | class TroubleView(MethodView):
88 |
89 | def get(self):
90 | return render_template('posts/trouble.html', title=u'Mongodb存在问题分析',
91 | des=u'总结错误索引原因和解决方案')
92 |
93 | class MongoView(MethodView):
94 |
95 | def get(self):
96 | l = Mongo.objects.distinct(field="database")
97 | #l.insert(0, 'all')
98 | l.insert(0, 'total')
99 | return render_template('posts/index.html', first_type=u'Mongodb分析',
100 | title=u'Mongodb使用数据分析', l=l,
101 | des=u'数据库操作数据分析')
102 |
103 | def all(self, type):
104 | return self.time('all', type)
105 |
106 | def sentiment(self, type):
107 | return self.time('sentiment', type)
108 |
109 | def qq_online(self, type):
110 | return self.time('qq_online', type)
111 |
112 | def time(self, database, type):
113 | if database == 'all':
114 | obj = Mongo.objects()['hour']
115 | else:
116 | obj = Mongo.objects(database=database)[0]['hour']
117 | o = []
118 | all_total = 0
119 | slow_total = 0
120 | for i in range(24):
121 | i = str(i)
122 | d = {}
123 | d['name'] = u'{0}时'.format(str(i))
124 | d['op'] = obj[i]['op'][type]
125 | d['slow'] = obj[i]['slow'][type]
126 | all_total += d['op']
127 | slow_total += d['slow']
128 | o.append(d)
129 | titles = (u'慢查询', u'全部查询')
130 | values = ('slow', 'op')
131 | return make_factory({'titles': titles, type: o,
132 | 'total': '慢查询:{0}, 全部查询:{1}'.format(
133 | slow_total, all_total),
134 | 'des': '[数据库{0} 类型{1}] 总量'.format(database,
135 | type),
136 | 'category': u'mongodb分析', 'values': values})
137 |
138 | def total_data(self, type):
139 | getmore, insert, update, command, query = 0, 0, 0, 0, 0
140 | for i in range(24):
141 | for obj in Mongo.objects():
142 | insert += obj['hour'][str(i)][type]['insert']
143 | update += obj['hour'][str(i)][type]['update']
144 | command += obj['hour'][str(i)][type]['command']
145 | query += obj['hour'][str(i)][type]['query']
146 | getmore += obj['hour'][str(i)][type]['getmore']
147 | count = []
148 | for name, call in ((u'查询', query), (u'插入', insert),
149 | (u'命令', command), (u'更新', update),
150 | ('getomre', getmore)):
151 | d = {}
152 | d['name'] = name
153 | d['call'] = call
154 | count.append(d)
155 | total = getmore + insert + update + command + query
156 | return count, total
157 |
158 | def all_op(self):
159 | data = self.total_data('op')
160 | return make_factory({'all_op': data[0], 'des': u'数据库操作总量',
161 | 'category': u'mongodb操作分布',
162 | 'total': data[1]})
163 |
164 | def all_slow(self):
165 | data = self.total_data('slow')
166 | return make_factory({'all_slow': data[0], 'des': u'数据库慢查询总量',
167 | 'category': u'mongodb慢操作分布',
168 | 'total': data[1]})
169 |
170 | def total(self, type):
171 | if type == 'all_op':
172 | return self.all_op()
173 | elif type == 'all_slow':
174 | return self.all_slow()
175 | call = 0
176 | call_total = 0
177 | count = []
178 | for i in range(24):
179 | d = {}
180 | for obj in Mongo.objects():
181 | call += obj['hour'][str(i)]['total']
182 | d['name'] = u'{0}时'.format(str(i))
183 | d['call'] = call
184 | call_total += call
185 | call = 0
186 | count.append(d)
187 | return make_factory({type: count, 'category':u'mongodb分布',
188 | 'des': u'数据库查询总量',
189 | 'total': call_total})
190 |
191 | def post(self):
192 | return get_form(request, self, mongo, key=1)
193 |
194 |
195 | def get_form(request, self, type, key=2):
196 | vt = request.form.getlist('vt')[0]
197 | data_type = request.form.getlist('type')[0]
198 | data_type = data_type if data_type else type[vt][0][0]
199 | chart = request.form.getlist('chart')[0]
200 | if not chart:
201 | for t in type[vt]:
202 | if data_type == t[0]:
203 | chart = t[1][0]
204 | data = getattr(self, vt)(data_type)
205 | if chart in ["simple_column", "pie"]:
206 | return make_json(data[data_type], data['title'], data['category'],
207 | data['value'], chart, key, data['des'], data['total'])
208 | elif chart == 'multi_column':
209 | return make_json(data[data_type], data['title'], data['category'],
210 | data['value'], chart, key, data['des'], data['total'],
211 | titles=data['titles'], values=data['values'])
212 |
213 |
214 | class CeleryView(MethodView):
215 | def time(self, type):
216 | count = []
217 | total = 0
218 | for i in range(23):
219 | d = {}
220 | d['name'] = u'{0}时'.format(str(i))
221 | d['call'] = Celery.objects(
222 | time__gte=datetime.datetime(2013, 10, 27, i),
223 | time__lt=datetime.datetime(2013, 10, 27, i+1)).count()
224 | total += d['call']
225 | count.append(d)
226 | return make_factory(dict(count=count, category=u'celery分布',
227 | des=u'celery总量', total=total))
228 |
229 | def post(self):
230 | return get_form(request, self, celery)
231 |
232 | def task(self, type):
233 | count = []
234 | cost = []
235 | total = 0
236 | for i in Celery.objects.distinct(field='task'):
237 | d = {}
238 | c = {}
239 | data = Celery.objects(task=i)
240 | d['call'] = data.count()
241 | d['name'], c['name'] = i, i
242 | c['avg'] = data.order_by('cost')[0].cost, # min
243 | c['min'] = data.average('cost'),
244 | c['max'] = data.order_by('-cost')[0].cost # max
245 | total += d['call']
246 | cost.append(c)
247 | count.append(d)
248 | titles = (u'最小值', u'平均值', u'最大值')
249 | values = ('avg', 'min', 'max')
250 | return make_factory(dict(titles=titles, count=count, cost=cost,
251 | category=u'celery分析', values=values,
252 | des=u'celery总量', total=total))
253 |
254 | def get(self):
255 | l = ['task', 'time']
256 | return render_template('posts/index.html', first_type=u'监控类型',
257 | title=u'mongo日志分析',
258 | des=u'分析mongodb全天日志整理的分布数据', l=l)
259 |
260 | posts.add_url_rule('/mongo/', view_func=MongoView.as_view('mongo'))
261 | posts.add_url_rule('/', view_func=IndexView.as_view('index'))
262 | posts.add_url_rule('/celery/', view_func=CeleryView.as_view('celery'))
263 | posts.add_url_rule('/json/