18 |
19 | Valid Twitter API key & token are required, see https://apps.twitter.com/
20 |
21 | Usage
22 | -----
23 |
24 | usage: tweetsniff.py [-h] [-c CONFIG]
25 |
26 | Display a Tweet feed
27 |
28 | optional arguments:
29 | -h, --help show this help message and exit
30 | -c CONFIG, --config CONFIG
31 | configuration file (default: /etc/tweetsniff.conf)
32 |
33 |
34 | Expand URLs
35 | ----
36 | - Enable in config with "process_urls: True"
37 | - urllib2.url provides the same features but gets sometimes rejected by sites (403)
38 | - URLs have to be part of the twitter URL object, otherwise they do not get recognized
39 | - not yet in writeCEFEvent
40 | - ES object contains URL, expanded URL, MD5 and SHA1 of the (expanded) URL
41 |
42 | Todo
43 | ----
44 | - Add more statistics
--------------------------------------------------------------------------------
/tweetsniff-kibana.json:
--------------------------------------------------------------------------------
1 | {
2 | "title": "Tweetsniff",
3 | "services": {
4 | "query": {
5 | "list": {
6 | "1": {
7 | "id": 1,
8 | "color": "#7EB26D",
9 | "alias": "",
10 | "pin": false,
11 | "type": "lucene",
12 | "enable": true,
13 | "query": ""
14 | }
15 | },
16 | "ids": [
17 | 1
18 | ]
19 | },
20 | "filter": {
21 | "list": {},
22 | "ids": []
23 | }
24 | },
25 | "rows": [
26 | {
27 | "title": "Timeframe",
28 | "height": "200px",
29 | "editable": true,
30 | "collapse": false,
31 | "collapsable": true,
32 | "panels": [
33 | {
34 | "error": false,
35 | "span": 8,
36 | "editable": true,
37 | "type": "text",
38 | "loadingEditor": false,
39 | "mode": "text",
40 | "content": "Dashboard for Tweetsniff.\nSee http://blog.rootshell.be/2014/12/23/tweetsniff-py-a-python-tweets-grabber/ and https://github.com/xme/tweetsniff.",
41 | "style": {
42 | "font-size": "8pt"
43 | },
44 | "title": "Tweetsniff"
45 | },
46 | {
47 | "error": false,
48 | "span": 4,
49 | "editable": true,
50 | "type": "stats",
51 | "loadingEditor": false,
52 | "queries": {
53 | "mode": "all",
54 | "ids": [
55 | 1
56 | ]
57 | },
58 | "style": {
59 | "font-size": "14pt"
60 | },
61 | "format": "number",
62 | "mode": "count",
63 | "display_breakdown": "yes",
64 | "sort_field": "",
65 | "sort_reverse": false,
66 | "label_name": "Query",
67 | "value_name": "Value",
68 | "spyable": true,
69 | "show": {
70 | "count": true,
71 | "min": false,
72 | "max": false,
73 | "mean": false,
74 | "std_deviation": false,
75 | "sum_of_squares": false,
76 | "total": false,
77 | "variance": false
78 | },
79 | "field": "@timestamp",
80 | "title": "Tweets"
81 | },
82 | {
83 | "span": 12,
84 | "editable": true,
85 | "type": "histogram",
86 | "loadingEditor": false,
87 | "mode": "count",
88 | "time_field": "@timestamp",
89 | "value_field": null,
90 | "x-axis": true,
91 | "y-axis": true,
92 | "scale": 1,
93 | "y_format": "none",
94 | "grid": {
95 | "max": null,
96 | "min": 0
97 | },
98 | "queries": {
99 | "mode": "all",
100 | "ids": [
101 | 1
102 | ]
103 | },
104 | "annotate": {
105 | "enable": false,
106 | "query": "*",
107 | "size": 20,
108 | "field": "_type",
109 | "sort": [
110 | "_score",
111 | "desc"
112 | ]
113 | },
114 | "auto_int": false,
115 | "resolution": 100,
116 | "interval": "10m",
117 | "intervals": [
118 | "auto",
119 | "1s",
120 | "1m",
121 | "5m",
122 | "10m",
123 | "30m",
124 | "1h",
125 | "3h",
126 | "12h",
127 | "1d",
128 | "1w",
129 | "1y"
130 | ],
131 | "lines": false,
132 | "fill": 0,
133 | "linewidth": 3,
134 | "points": false,
135 | "pointradius": 5,
136 | "bars": true,
137 | "stack": true,
138 | "spyable": true,
139 | "zoomlinks": true,
140 | "options": true,
141 | "legend": true,
142 | "show_query": true,
143 | "interactive": true,
144 | "legend_counts": true,
145 | "timezone": "browser",
146 | "percentage": false,
147 | "zerofill": true,
148 | "derivative": false,
149 | "tooltip": {
150 | "value_type": "individual",
151 | "query_as_alias": true
152 | },
153 | "title": "Timeframe"
154 | }
155 | ],
156 | "notice": false
157 | },
158 | {
159 | "title": "Tweets (limited)",
160 | "height": "250px",
161 | "editable": true,
162 | "collapse": false,
163 | "collapsable": true,
164 | "panels": [
165 | {
166 | "error": false,
167 | "span": 12,
168 | "editable": true,
169 | "type": "table",
170 | "loadingEditor": false,
171 | "size": 20,
172 | "pages": 10,
173 | "offset": 0,
174 | "sort": [
175 | "@timestamp",
176 | "desc"
177 | ],
178 | "overflow": "min-height",
179 | "fields": [
180 | "@timestamp",
181 | "user.screen_name",
182 | "text"
183 | ],
184 | "highlight": [],
185 | "sortable": true,
186 | "header": true,
187 | "paging": true,
188 | "field_list": false,
189 | "all_fields": false,
190 | "trimFactor": 300,
191 | "localTime": true,
192 | "timeField": "@timestamp",
193 | "spyable": true,
194 | "queries": {
195 | "mode": "all",
196 | "ids": [
197 | 1
198 | ]
199 | },
200 | "style": {
201 | "font-size": "9pt"
202 | },
203 | "normTimes": true
204 | }
205 | ],
206 | "notice": false
207 | },
208 | {
209 | "title": "Tops",
210 | "height": "250px",
211 | "editable": true,
212 | "collapse": false,
213 | "collapsable": true,
214 | "panels": [
215 | {
216 | "error": false,
217 | "span": 6,
218 | "editable": true,
219 | "type": "terms",
220 | "loadingEditor": false,
221 | "field": "user.screen_name",
222 | "exclude": [],
223 | "missing": false,
224 | "other": false,
225 | "size": 10,
226 | "order": "count",
227 | "style": {
228 | "font-size": "10pt"
229 | },
230 | "donut": false,
231 | "tilt": false,
232 | "labels": true,
233 | "arrangement": "horizontal",
234 | "chart": "bar",
235 | "counter_pos": "above",
236 | "spyable": true,
237 | "queries": {
238 | "mode": "all",
239 | "ids": [
240 | 1
241 | ]
242 | },
243 | "tmode": "terms",
244 | "tstat": "total",
245 | "valuefield": "",
246 | "title": "Top users"
247 | },
248 | {
249 | "error": false,
250 | "span": 6,
251 | "editable": true,
252 | "type": "terms",
253 | "loadingEditor": false,
254 | "field": "hashtags",
255 | "exclude": [],
256 | "missing": false,
257 | "other": false,
258 | "size": 10,
259 | "order": "count",
260 | "style": {
261 | "font-size": "10pt"
262 | },
263 | "donut": false,
264 | "tilt": false,
265 | "labels": true,
266 | "arrangement": "horizontal",
267 | "chart": "bar",
268 | "counter_pos": "above",
269 | "spyable": true,
270 | "queries": {
271 | "mode": "all",
272 | "ids": [
273 | 1
274 | ]
275 | },
276 | "tmode": "terms",
277 | "tstat": "total",
278 | "valuefield": "",
279 | "title": "Top Hashtags"
280 | }
281 | ],
282 | "notice": false
283 | },
284 | {
285 | "title": "Tweetdata",
286 | "height": "200px",
287 | "editable": true,
288 | "collapse": false,
289 | "collapsable": true,
290 | "panels": [
291 | {
292 | "error": false,
293 | "span": 6,
294 | "editable": true,
295 | "type": "terms",
296 | "loadingEditor": false,
297 | "field": "retweet_count",
298 | "exclude": [],
299 | "missing": false,
300 | "other": false,
301 | "size": 15,
302 | "order": "count",
303 | "style": {
304 | "font-size": "10pt"
305 | },
306 | "donut": false,
307 | "tilt": false,
308 | "labels": true,
309 | "arrangement": "horizontal",
310 | "chart": "bar",
311 | "counter_pos": "above",
312 | "spyable": true,
313 | "queries": {
314 | "mode": "all",
315 | "ids": [
316 | 1
317 | ]
318 | },
319 | "tmode": "terms",
320 | "tstat": "total",
321 | "valuefield": "",
322 | "title": "Retweet count"
323 | },
324 | {
325 | "error": false,
326 | "span": 6,
327 | "editable": true,
328 | "type": "terms",
329 | "loadingEditor": false,
330 | "field": "lang",
331 | "exclude": [],
332 | "missing": false,
333 | "other": false,
334 | "size": 10,
335 | "order": "count",
336 | "style": {
337 | "font-size": "10pt"
338 | },
339 | "donut": false,
340 | "tilt": false,
341 | "labels": true,
342 | "arrangement": "horizontal",
343 | "chart": "pie",
344 | "counter_pos": "above",
345 | "spyable": true,
346 | "queries": {
347 | "mode": "all",
348 | "ids": [
349 | 1
350 | ]
351 | },
352 | "tmode": "terms",
353 | "tstat": "total",
354 | "valuefield": "",
355 | "title": "Tweet Language"
356 | }
357 | ],
358 | "notice": false
359 | },
360 | {
361 | "title": "Tweeter data",
362 | "height": "200px",
363 | "editable": true,
364 | "collapse": false,
365 | "collapsable": true,
366 | "panels": [
367 | {
368 | "error": false,
369 | "span": 4,
370 | "editable": true,
371 | "type": "terms",
372 | "loadingEditor": false,
373 | "field": "user.lang",
374 | "exclude": [],
375 | "missing": false,
376 | "other": false,
377 | "size": 15,
378 | "order": "count",
379 | "style": {
380 | "font-size": "10pt"
381 | },
382 | "donut": false,
383 | "tilt": false,
384 | "labels": false,
385 | "arrangement": "horizontal",
386 | "chart": "pie",
387 | "counter_pos": "above",
388 | "spyable": true,
389 | "queries": {
390 | "mode": "all",
391 | "ids": [
392 | 1
393 | ]
394 | },
395 | "tmode": "terms",
396 | "tstat": "total",
397 | "valuefield": "",
398 | "title": "Tweeter Language"
399 | },
400 | {
401 | "error": false,
402 | "span": 4,
403 | "editable": true,
404 | "type": "terms",
405 | "loadingEditor": false,
406 | "field": "user.location",
407 | "exclude": [],
408 | "missing": false,
409 | "other": false,
410 | "size": 15,
411 | "order": "count",
412 | "style": {
413 | "font-size": "10pt"
414 | },
415 | "donut": false,
416 | "tilt": false,
417 | "labels": true,
418 | "arrangement": "horizontal",
419 | "chart": "bar",
420 | "counter_pos": "above",
421 | "spyable": true,
422 | "queries": {
423 | "mode": "all",
424 | "ids": [
425 | 1
426 | ]
427 | },
428 | "tmode": "terms",
429 | "tstat": "total",
430 | "valuefield": "",
431 | "title": "Tweeter Location"
432 | },
433 | {
434 | "error": false,
435 | "span": 4,
436 | "editable": true,
437 | "type": "terms",
438 | "loadingEditor": false,
439 | "field": "user.time_zone",
440 | "exclude": [],
441 | "missing": false,
442 | "other": false,
443 | "size": 15,
444 | "order": "count",
445 | "style": {
446 | "font-size": "10pt"
447 | },
448 | "donut": false,
449 | "tilt": false,
450 | "labels": true,
451 | "arrangement": "horizontal",
452 | "chart": "bar",
453 | "counter_pos": "above",
454 | "spyable": true,
455 | "queries": {
456 | "mode": "all",
457 | "ids": [
458 | 1
459 | ]
460 | },
461 | "tmode": "terms",
462 | "tstat": "total",
463 | "valuefield": "",
464 | "title": "Tweeter Timezone"
465 | }
466 | ],
467 | "notice": false
468 | },
469 | {
470 | "title": "Tweeter data stats",
471 | "height": "200px",
472 | "editable": true,
473 | "collapse": false,
474 | "collapsable": true,
475 | "panels": [
476 | {
477 | "error": false,
478 | "span": 4,
479 | "editable": true,
480 | "type": "terms",
481 | "loadingEditor": false,
482 | "field": "user.followers_count",
483 | "exclude": [],
484 | "missing": false,
485 | "other": false,
486 | "size": 15,
487 | "order": "count",
488 | "style": {
489 | "font-size": "10pt"
490 | },
491 | "donut": false,
492 | "tilt": false,
493 | "labels": true,
494 | "arrangement": "horizontal",
495 | "chart": "bar",
496 | "counter_pos": "above",
497 | "spyable": true,
498 | "queries": {
499 | "mode": "all",
500 | "ids": [
501 | 1
502 | ]
503 | },
504 | "tmode": "terms",
505 | "tstat": "total",
506 | "valuefield": "",
507 | "title": "Tweeter Followers"
508 | },
509 | {
510 | "error": false,
511 | "span": 4,
512 | "editable": true,
513 | "type": "terms",
514 | "loadingEditor": false,
515 | "field": "user.favourites_count",
516 | "exclude": [],
517 | "missing": false,
518 | "other": false,
519 | "size": 15,
520 | "order": "count",
521 | "style": {
522 | "font-size": "10pt"
523 | },
524 | "donut": false,
525 | "tilt": false,
526 | "labels": true,
527 | "arrangement": "horizontal",
528 | "chart": "bar",
529 | "counter_pos": "above",
530 | "spyable": true,
531 | "queries": {
532 | "mode": "all",
533 | "ids": [
534 | 1
535 | ]
536 | },
537 | "tmode": "terms",
538 | "tstat": "total",
539 | "valuefield": "",
540 | "title": "Tweeter Favourites"
541 | }
542 | ],
543 | "notice": false
544 | },
545 | {
546 | "title": "Tweets",
547 | "height": "350px",
548 | "editable": true,
549 | "collapse": false,
550 | "collapsable": true,
551 | "panels": [
552 | {
553 | "error": false,
554 | "span": 12,
555 | "editable": true,
556 | "type": "table",
557 | "loadingEditor": false,
558 | "size": 100,
559 | "pages": 5,
560 | "offset": 0,
561 | "sort": [
562 | "_score",
563 | "desc"
564 | ],
565 | "overflow": "min-height",
566 | "fields": [],
567 | "highlight": [],
568 | "sortable": true,
569 | "header": true,
570 | "paging": true,
571 | "field_list": true,
572 | "all_fields": false,
573 | "trimFactor": 300,
574 | "localTime": false,
575 | "timeField": "@timestamp",
576 | "spyable": true,
577 | "queries": {
578 | "mode": "all",
579 | "ids": [
580 | 1
581 | ]
582 | },
583 | "style": {
584 | "font-size": "9pt"
585 | },
586 | "normTimes": true
587 | }
588 | ],
589 | "notice": false
590 | }
591 | ],
592 | "editable": true,
593 | "failover": false,
594 | "index": {
595 | "interval": "none",
596 | "pattern": "[logstash-]YYYY.MM.DD",
597 | "default": "_all",
598 | "warm_fields": false
599 | },
600 | "style": "dark",
601 | "panel_hints": true,
602 | "pulldowns": [
603 | {
604 | "type": "query",
605 | "collapse": false,
606 | "notice": false,
607 | "enable": true,
608 | "query": "*",
609 | "pinned": true,
610 | "history": [
611 | "",
612 | "equation",
613 | "malware",
614 | "malware botner"
615 | ],
616 | "remember": 10
617 | },
618 | {
619 | "type": "filtering",
620 | "collapse": false,
621 | "notice": false,
622 | "enable": true
623 | }
624 | ],
625 | "nav": [
626 | {
627 | "type": "timepicker",
628 | "collapse": false,
629 | "notice": false,
630 | "enable": true,
631 | "status": "Stable",
632 | "time_options": [
633 | "5m",
634 | "15m",
635 | "1h",
636 | "6h",
637 | "12h",
638 | "24h",
639 | "2d",
640 | "7d",
641 | "30d"
642 | ],
643 | "refresh_intervals": [
644 | "5s",
645 | "10s",
646 | "30s",
647 | "1m",
648 | "5m",
649 | "15m",
650 | "30m",
651 | "1h",
652 | "2h",
653 | "1d"
654 | ],
655 | "timefield": "@timestamp"
656 | }
657 | ],
658 | "loader": {
659 | "save_gist": false,
660 | "save_elasticsearch": true,
661 | "save_local": true,
662 | "save_default": true,
663 | "save_temp": true,
664 | "save_temp_ttl_enable": true,
665 | "save_temp_ttl": "30d",
666 | "load_gist": false,
667 | "load_elasticsearch": true,
668 | "load_elasticsearch_size": 20,
669 | "load_local": false,
670 | "hide": false
671 | },
672 | "refresh": false
673 | }
--------------------------------------------------------------------------------
/tweetsniff.conf.sample:
--------------------------------------------------------------------------------
1 | [twitterapi]
2 | consumer_key: xxxx
3 | consumer_secret: xxxx
4 | access_token_key: xxxx
5 | access_token_secret: xxxx
6 | status_file: /var/run/tweetsniff.status
7 |
8 | [highlight]
9 | color: red
10 | regex: foo
11 | security
12 |
13 | [search]
14 | color: blue
15 | keywords: drone
16 | brucon
17 | malware
18 |
19 | [elasticsearch]
20 | server: http://127.0.0.1:9200
21 | index: twitter-%Y.%m.%d
22 |
23 | [cef]
24 | server: 192.168.0.1
25 | port: 514
26 |
27 | [urls]
28 | ua: "Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0"
29 | timeout: 30
30 | urls_process_urls: true
31 |
--------------------------------------------------------------------------------
/tweetsniff.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #
3 | # Author: Xavier Mertens
4 | # Copyright: GPLv3 (http://gplv3.fsf.org/)
5 | # Feel free to use the code, but please share the changes you've made
6 | #
7 | import argparse
8 | import errno
9 | import ConfigParser
10 | import json
11 | import logging
12 | import logging.handlers
13 | import os
14 | import re
15 | import signal
16 | import sys
17 | import time
18 |
19 | try:
20 | import twitter
21 | except:
22 | print "[ERROR]: python-twitter is required. See https://github.com/bear/python-twitter"
23 |
24 | try:
25 | import syslog
26 | except:
27 | print "[INFO]: No Syslog support, logging to console"
28 | from datetime import datetime
29 | from dateutil import parser
30 | from dateutil import tz
31 | from elasticsearch import Elasticsearch
32 | from termcolor import colored
33 |
34 | import hashlib
35 | import urllib, httplib
36 |
37 | api = None
38 | logger = None
39 |
40 | # Default configuration
41 | config = {
42 | 'statusFile': '/var/run/tweetsniff.status',
43 | 'esServer': '',
44 | 'keywords': '',
45 | 'regex': '',
46 | 'highlightColor': 'red',
47 | 'keywordColor': 'blue',
48 | 'cefServer': '',
49 | 'cefPort': ''
50 | }
51 |
52 | def sigHandler(s, f):
53 |
54 | """Cleanup once CTRL-C is received"""
55 |
56 | print "Killed."
57 | sys.exit(0)
58 |
59 | def writeLog(msg):
60 |
61 | """Output a message to the console/Syslog depending on the host"""
62 |
63 | if os.name == "posix":
64 | syslog.openlog(logoption=syslog.LOG_PID,facility=syslog.LOG_MAIL)
65 | syslog.syslog(msg)
66 | else:
67 | print msg
68 | return
69 |
70 | def writeCEFEvent(tweet):
71 |
72 | """Send a CEF event to a Syslog destination"""
73 |
74 | # print "[Debug]: Writing CEF: %s" % tweet
75 | cefmsg = ' CEF:0|blog.rootshell.be|tweetsniff|1.0|TwitterMsg|Received Twitter Message|0|cs1Label=TweetHandle cs1=%s cs2Label=TweetTime cs2=%s msg=%s' % (tweet.user.screen_name, tweet.created_at, tweet.text)
76 | logger.info(cefmsg)
77 | return
78 |
79 | def time2Local(s):
80 |
81 | """Convert a 'created_at' date (UTC) to local time"""
82 |
83 | if not s:
84 | utc = datetime.utcnow()
85 | else:
86 | utc = datetime.strptime(parser.parse(s).strftime('%Y-%m-%d %H:%M:%S'), '%Y-%m-%d %H:%M:%S')
87 |
88 | from_zone = tz.tzutc()
89 | to_zone = tz.tzlocal()
90 | utc = utc.replace(tzinfo=from_zone)
91 | return(utc.astimezone(to_zone))
92 |
93 | def indexEs(tweet, urls, tweet_message):
94 |
95 | """Index a new Tweet in Elasticsearch"""
96 |
97 | doc = tweet.AsDict()
98 | # Delete 'retweeted_status' - to be fixed later
99 | if 'retweeted_status' in doc:
100 | del doc['retweeted_status']
101 | # Delete old urls'
102 | if 'urls' in doc:
103 | del doc['urls']
104 |
105 | # To fix: support different timezones? (+00:00
106 | try:
107 | doc['@timestamp'] = parser.parse(doc['created_at']).strftime("%Y-%m-%dT%H:%M:%S+00:00")
108 | doc['text'] = tweet_message
109 |
110 | if config['urls_process_urls'] == True:
111 | if urls:
112 | x = 1
113 | for url in urls:
114 | doc['urls.' + str(x) + '.url'] = url['url']
115 | doc['urls.' + str(x) + '.url_unshortened'] = url['url_unshortened']
116 | doc['urls.' + str(x) + '.url_md5'] = url['url_md5']
117 | doc['urls.' + str(x) + '.url_sha1'] = url['url_sha1']
118 | x = x + 1
119 | res = es.index(index=time.strftime(esIndex, time.localtime()),
120 | doc_type='tweet',
121 | body=doc)
122 | except:
123 | print "[Warning] Can't connect to %s" % config['esServer']
124 |
125 | return
126 |
127 | def processURL(urls):
128 |
129 | """ Process the short URLs to get expanded and get their hashes """
130 |
131 | processed_urls = []
132 | if urls:
133 | for url in urls:
134 | try:
135 | url_unshortened = unshortenURL(url.expanded_url.encode("utf-8"))
136 | url_md5 = hashlib.md5(url_unshortened.encode('utf-8')).hexdigest()
137 | url_sha1 = hashlib.sha1(url_unshortened.encode('utf-8')).hexdigest()
138 | processed_urls.append( { 'url': url.url.encode('utf-8'),
139 | 'url_unshortened': url_unshortened,
140 | 'url_md5': url_md5,
141 | 'url_sha1': url_sha1 })
142 | except:
143 | continue
144 | return processed_urls
145 |
146 | def unshortenURL(url):
147 |
148 | """ unshortenURL https://github.com/cudeso/expandurl"""
149 |
150 | url_ua = config['urls_ua']
151 | urls_timeout = config['urls_timeout']
152 | currenturl = url.strip()
153 | previousurl = None
154 | while currenturl != previousurl:
155 |
156 | try:
157 | httprequest = httplib.urlsplit(currenturl)
158 | scheme = httprequest.scheme.lower()
159 | netloc = httprequest.netloc.lower()
160 | previousurl = currenturl
161 | if scheme == 'http':
162 | conn = httplib.HTTPConnection(netloc, timeout=5)
163 | req = currenturl[7+len(netloc):]
164 | location = "%s://%s" % (scheme, netloc)
165 | elif scheme=='https':
166 | conn = httplib.HTTPSConnection(netloc, timeout=5)
167 | req = currenturl[8+len(netloc):]
168 | location = "%s://%s" % (scheme, netloc)
169 |
170 | conn.request("HEAD", req, None, {'User-Agent': url_ua,'Accept': '*/*',})
171 | res = conn.getresponse()
172 |
173 | if res.status in [301, 304]:
174 | currenturl = res.getheader('Location')
175 | httprequest_redirect = httplib.urlsplit(currenturl)
176 |
177 | if httprequest_redirect.scheme.lower() != 'http' and httprequest_redirect.scheme.lower() != 'https':
178 | # currenturl does not contain http(s)
179 | currenturl = "%s://%s%s" % (scheme, netloc,currenturl)
180 | except:
181 | currenturl = url
182 |
183 | return currenturl
184 |
185 | def updateTimeline(timeline_id):
186 |
187 | """Get new Tweets from twitter.com"""
188 |
189 | try:
190 | timeline = api.GetHomeTimeline(since_id=timeline_id)
191 | except twitter.error.TwitterError as e:
192 | print "[Error] Twitter returned: %s (%d)" % (e[0][0]['message'], e[0][0]['code'])
193 | return timeline_id
194 |
195 | if not timeline:
196 | return timeline_id
197 |
198 | last_id = timeline_id
199 | for t in reversed(timeline):
200 | text = t.text
201 | for r in config['regex']:
202 | if r:
203 | if re.search('('+r+')', text, re.I):
204 | text = text.replace(r, colored(r, config['highlightColor']))
205 |
206 | tweet_message = text.encode("utf-8")
207 | if config['urls_process_urls'] == True:
208 | urls = processURL(t.urls)
209 | for url in urls:
210 | tweet_message = tweet_message.replace( url['url'] , url['url_unshortened'])
211 | else:
212 | urls = []
213 |
214 | print "%s | %15s | %s" % (time2Local(t.created_at).strftime("%H:%M:%S"),
215 | t.user.screen_name.encode("utf-8"),
216 | tweet_message)
217 |
218 | if es:
219 | indexEs(t, urls, tweet_message)
220 |
221 | if logger:
222 | writeCEFEvent(t)
223 |
224 | if (long(t.id) > long(last_id)):
225 | last_id = t.id
226 | return(last_id)
227 |
228 | def updateSearch(search_id):
229 |
230 | """Get new Tweets containing specific keywords"""
231 |
232 | last_id = search_id
233 | for keyword in config['keywords']:
234 | if not keyword:
235 | continue
236 | try:
237 | tweets = api.GetSearch(term=keyword, since_id=search_id)
238 | except twitter.error.TwitterError as e:
239 | print "[Error] Twitter returned: %s (%s)" % (e[0][0]['message'], str(e[0][0]['code']))
240 | return(search_id)
241 |
242 | if not tweets:
243 | continue
244 |
245 | for t in reversed(tweets):
246 | text = t.text
247 |
248 | # Highlight keyword
249 | if re.search('('+keyword+')', text, re.I):
250 | text = text.replace(keyword, colored(keyword, config['keywordColor']))
251 |
252 | for r in config['regex']:
253 | if r:
254 | if re.search('('+r+')', text, re.I):
255 | text = text.replace(r, colored(r, config['highlightColor']))
256 |
257 | tweet_message = text.encode("utf-8")
258 | if config['urls_process_urls'] == True:
259 | urls = processURL(t.urls)
260 | for url in urls:
261 | tweet_message = tweet_message.replace( url['url'] , url['url_unshortened'])
262 | else:
263 | urls = []
264 |
265 | print "%s | %15s | %s" % (time2Local(t.created_at).strftime("%H:%M:%S"),
266 | t.user.screen_name.encode("utf-8"),
267 | tweet_message)
268 |
269 | if es:
270 | indexEs(t, urls, tweet_message)
271 |
272 | if logger:
273 | writeCEFEvent(t)
274 |
275 | if long(t.id) > long(last_id):
276 | last_id = t.id
277 | print "[DEBUG] last_id = %s" % last_id
278 | return(last_id)
279 |
280 | def main():
281 | global api
282 | global config
283 | global es
284 | global esIndex
285 | global logger
286 |
287 | signal.signal(signal.SIGINT, sigHandler)
288 |
289 | parser = argparse.ArgumentParser(
290 | description='Display a Tweet feed')
291 | parser.add_argument('-c', '--config',
292 | dest = 'configFile',
293 | help = 'configuration file (default: /etc/tweetsniff.conf)',
294 | metavar = 'CONFIG')
295 | args = parser.parse_args()
296 |
297 | if not args.configFile:
298 | args.configFile = '/etc/tweetsniff.conf'
299 |
300 | try:
301 | c = ConfigParser.ConfigParser()
302 | c.read(args.configFile)
303 | # Twitter config
304 | consumerKey = c.get('twitterapi', 'consumer_key')
305 | consumerSecret = c.get('twitterapi', 'consumer_secret')
306 | accessTokenKey = c.get('twitterapi', 'access_token_key')
307 | accessTokenSecret = c.get('twitterapi', 'access_token_secret')
308 | config['statusFile'] = c.get('twitterapi', 'status_file')
309 | #Highligts
310 | config['highlightColor'] = c.get('highlight', 'color')
311 | highlightRegex = c.get('highlight', 'regex')
312 | # Search
313 | searchKeywords = c.get('search', 'keywords')
314 | config['keywordColor'] = c.get('search', 'color')
315 | # URLs
316 | config['urls_ua'] = c.get('urls', 'ua')
317 | config['urls_timeout'] = c.get('urls', 'timeout')
318 | config['urls_process_urls'] = c.get('urls', 'process_urls')
319 | if config['urls_process_urls'].lower() == 'true':
320 | config['urls_process_urls'] = True
321 | # Elasticsearch config (optional)
322 | config['esServer'] = c.get('elasticsearch', 'server')
323 | esIndex = c.get('elasticsearch', 'index')
324 | # CEF confit
325 | try:
326 | config['cefServer'] = c.get('cef', 'server')
327 | config['cefPort'] = c.get('cef', 'port')
328 | except:
329 | pass
330 | except OSError as e:
331 | writeLog('Cannot read config file %s: %s' % (args.configFile, e.errno()))
332 | exit
333 |
334 | print "DEBUG: %s, %s, %s, %s" % (consumerKey,consumerSecret,accessTokenKey,accessTokenSecret)
335 | print "DEBUG: Regex: %s" % highlightRegex
336 |
337 | if searchKeywords:
338 | config['keywords'] = searchKeywords.split('\n')
339 | print "DEBUG: keywords = %s" % config['keywords']
340 |
341 | if highlightRegex:
342 | config['regex'] = highlightRegex.split('\n')
343 |
344 | try:
345 | api = twitter.Api(consumer_key = consumerKey,
346 | consumer_secret = consumerSecret,
347 | access_token_key = accessTokenKey,
348 | access_token_secret = accessTokenSecret)
349 | except:
350 | print "[Error] Can't connect to twitter.com"
351 | sys.exit(1)
352 |
353 | if config['esServer']:
354 | try:
355 | es = Elasticsearch(
356 | [config['esServer']]
357 | )
358 | except:
359 | print "[Warning] Can't connect to %s" % config['esServer']
360 |
361 | if config['cefServer']:
362 | try:
363 | logger = logging.getLogger('tweetsniff')
364 | logger.setLevel(logging.INFO)
365 | if config['cefPort']:
366 | handler = logging.handlers.SysLogHandler(address=(config['cefServer'], int(config['cefPort'])))
367 | else:
368 | handler = logging.handlers.SysLogHandler(address=(config['cefServer'], 514))
369 | logger.addHandler(handler)
370 | except:
371 | print "[Warning] Can't configure CEF destination: %s:%s", (config['cefServer'],config['cefPort'])
372 |
373 | if not os.path.isfile(config['statusFile']):
374 | print "DEBUG: Status file not found, starting new feed"
375 | timeline_id = 0
376 | search_id = 0
377 |
378 | else:
379 | fd = open(config['statusFile'], 'r')
380 | data = fd.read().split(',')
381 | timeline_id = data[0]
382 | search_id = data[1]
383 | fd.close()
384 | print "DEBUG: Restarting feed from ID %s/%s" % (timeline_id, search_id)
385 |
386 | while 1:
387 | try:
388 | timeline_id = updateTimeline(timeline_id)
389 | search_id = updateSearch(search_id)
390 | except AttributeError:
391 | print "[Error] Can't connect to twitter.com"
392 | sys.exit(1)
393 |
394 | fd = open(config['statusFile'], 'w')
395 | fd.write("%s,%s" % (str(timeline_id), str(search_id)))
396 | fd.close()
397 | try:
398 | sleep_home = api.GetAverageSleepTime('statuses/home_timeline')
399 | sleep_search = api.GetAverageSleepTime('search/tweets')
400 | except twitter.error.TwitterError as e:
401 | print "[Error] Twitter returned: %s (%s)" % (e[0][0]['message'], str(e[0][0]['code']))
402 |
403 | print "DEBUG: Sleep = %s / %s" % (sleep_home, sleep_search)
404 | if sleep_search > sleep_home:
405 | time.sleep(sleep_search)
406 | else:
407 | time.sleep(sleep_home)
408 |
409 | if __name__ == '__main__':
410 | main()
411 |
--------------------------------------------------------------------------------