├── .gitignore
├── ActivityInfo.py
├── DoutuFeatures.txt
├── DoutuProcessor.py
├── DoutuRobot
    ├── README.md
    ├── dat
    │   └── gifs
    │   │   ├── 170403-210016.gif
    │   │   ├── 170403-232221.gif
    │   │   ├── 170404-021026.gif
    │   │   ├── 170404-033135.gif
    │   │   ├── 170404-051440.gif
    │   │   ├── 170404-062843.gif
    │   │   ├── 170404-065246.gif
    │   │   ├── 170404-081520.gif
    │   │   ├── 170404-205424.gif
    │   │   ├── 170404-210537.gif
    │   │   ├── 170404-234147.gif
    │   │   ├── 170404-235954.gif
    │   │   ├── 170405-020917.gif
    │   │   ├── 170405-021924.gif
    │   │   ├── 170405-062442.gif
    │   │   ├── 170405-071029.gif
    │   │   ├── 170405-092427.gif
    │   │   └── 170405-092558.gif
    ├── dedupAndCopy.sh
    ├── extractFeatures.py
    ├── incrementalExtractFeatures.sh
    └── installCaffe.sh
├── GaTextHook.py
├── GlobalTextHook.py
├── GroupMessageForwarder.py
├── GroupTagCloud.py
├── HistoryRecorder.py
├── ImageSearcher.py
├── PaiDuiHook.py
├── ProcessInterface.py
├── README.md
├── deploy.sh
├── launchWechatBot.sh
├── main.py
├── requirements.txt
└── utilities.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.swp
3 | *.swo
4 | .Python
5 | .DS_Store
6 | 


--------------------------------------------------------------------------------
/ActivityInfo.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from utilities import *
 3 | from itchat.content import *
 4 | from ProcessInterface import ProcessInterface
 5 | from pymongo import DESCENDING
 6 | import matplotlib
 7 | matplotlib.use('Agg')
 8 | import matplotlib.pyplot as pp
 9 | from matplotlib.font_manager import FontProperties
10 | from matplotlib.dates import HourLocator, DateFormatter
11 | import numpy as np
12 | from time import time
13 | from collections import Counter
14 | from datetime import datetime
15 | import itchat
16 | import random
17 | import re
18 | import os
19 | import logging
20 | 
21 | class ActivityInfo(ProcessInterface):
22 |     timestampSubtract = 3600 * 24  # 1 day
23 |     maxActivityInfoCount = 10
24 |     imgDir = 'activityInfo'
25 | 
26 |     def __init__(self, fontPath):
27 |         if not os.path.exists(self.imgDir):
28 |             os.mkdir(self.imgDir)
29 |         self.client = client
30 |         self.coll = self.client[dbName][collName]
31 |         self.prop = FontProperties(fname=fontPath)
32 |         logging.info('ActivityInfo initialized.')
33 | 
34 |     def process(self, msg, type):
35 |         if type != TEXT:
36 |             return
37 |         if msg['Content'] == '/activity':
38 |             logging.info('Generating activity info for {0}.'.format(msg['User']['NickName']))
39 |             fn = self.generateActivityInfoForGroup(msg['User']['NickName'])
40 |             destinationChatroomId = msg['FromUserName'] if re.search('@@', msg['FromUserName']) else msg['ToUserName']
41 |             logging.info('Sending activity file {0} to {1}.'.format(fn, destinationChatroomId))
42 |             itchat.send('@img@{0}'.format(fn), destinationChatroomId)
43 | 
44 |     def generateActivityInfoForGroup(self, groupName):
45 |         timestampNow = int(time())
46 |         timestampYesterday = timestampNow - self.timestampSubtract
47 |         records = list(self.coll.find({ 'to': groupName, 'timestamp': { '$gt': timestampYesterday } }).sort([ ('timestamp', DESCENDING) ]))
48 |         fn = self.generateTmpFileName()
49 |         # Get histogram for activity
50 |         hist, bins = np.histogram([ x['timestamp'] for x in records ], bins=24)
51 |         center = (bins[:-1] + bins[1:]) / 2
52 |         datex = [ datetime.fromtimestamp(x) for x in center ]
53 |         pp.figure(figsize=(6,14))
54 |         ax = pp.subplot(2, 1, 1)
55 |         pp.plot_date(datex, hist, '.-')
56 |         pp.gcf().autofmt_xdate()
57 |         pp.xlabel(u'美国西部时间', fontproperties=self.prop)
58 |         pp.ylabel(u'每小时消息数', fontproperties=self.prop)
59 |         ax.xaxis.set_major_formatter(DateFormatter('%m-%d %H:%M'))
60 |         # Get bar chart for active users
61 |         pieDat = Counter([ x['from'] for x in records ])
62 |         pieDatSorted = sorted([ (k, pieDat[k]) for k in pieDat ],key=lambda x: x[1], reverse=True)
63 |         if len(pieDatSorted) > self.maxActivityInfoCount:
64 |             pieDatSorted = pieDatSorted[:self.maxActivityInfoCount]
65 |         ax = pp.subplot(2, 1, 2)
66 |         width = 0.7
67 |         x = np.arange(len(pieDatSorted)) + width
68 |         xText = [ xx[0] for xx in pieDatSorted ]
69 |         y = [ xx[1] for xx in pieDatSorted ]
70 |         pp.bar(x, y, width)
71 |         a = pp.gca()
72 |         a.set_xticklabels(a.get_xticks(), { 'fontProperties': self.prop })
73 |         pp.xticks(x, xText, rotation='vertical')
74 |         pp.xlabel(u'用户', fontproperties=self.prop)
75 |         pp.ylabel(u'24小时消息数', fontproperties=self.prop)
76 |         ax.set_xlim([ 0, len(xText) + 1 - width ])
77 |         pp.margins(0.2)
78 |         pp.savefig(fn)
79 |         return fn
80 | 
81 |     def generateTmpFileName(self):
82 |         return '{0}/{1}-{2}.png'.format(self.imgDir, int(time() * 1000), random.randint(0, 10000))
83 | 
84 | if __name__ == '__main__':
85 |     ai = ActivityInfo('/usr/share/fonts/truetype/wqy/wqy-microhei.ttc')
86 |     ai.generateActivityInfoForGroup('TestGroup')
87 | 


--------------------------------------------------------------------------------
/DoutuProcessor.py:
--------------------------------------------------------------------------------
 1 | from time import time
 2 | from datetime import datetime
 3 | from ProcessInterface import ProcessInterface
 4 | from utilities import *
 5 | from itchat.content import *
 6 | from subprocess import call
 7 | from ImageSearcher import ImageSearcher
 8 | from threading import Thread
 9 | from time import time, sleep
10 | import logging
11 | import itchat
12 | import re
13 | import os
14 | 
15 | def DoutuEnd(destinationChatroomId):
16 |     sleep(DoutuProcessor.doutuTimeInterval)
17 |     itchat.send('时间到， 斗图结束。', destinationChatroomId)
18 | 
19 | class DoutuProcessor(ProcessInterface):
20 |     doutuTimeInterval = 5 * 60   # seconds
21 |     
22 |     def __init__(self, doutuFeatureFn, whitelist=[]):
23 |         self.imgFolder = 'DouTuRobot/dat/gifs/'
24 |         self.doutuFolder = 'DoutuImages'
25 |         self.whitelist = set(whitelist)
26 |         self.activationTime = {}
27 |         if not os.path.exists(self.doutuFolder):
28 |             os.mkdir(self.doutuFolder)
29 |         self.imageSearcher = ImageSearcher(doutuFeatureFn)
30 |         logging.info('DoutuProcessor initialized.')
31 | 
32 |     def process(self, msg, type):
33 |         # Mode management
34 |         groupName = msg['User']['NickName']
35 |         destinationChatroomId = msg['FromUserName'] if re.search('@@', msg['FromUserName']) else msg['ToUserName']
36 |         if type == TEXT and msg['Content'] == '/doutu':
37 |             # Control mode
38 |             self.activationTime[groupName] = time() + self.doutuTimeInterval
39 |             itchat.send('鸭哥进入斗图模式！ {0}分钟内群里所有照片和表情（除了商城表情），鸭哥都会回复斗图！'.format(int(self.doutuTimeInterval / 60)), destinationChatroomId)
40 |             Thread(target=DoutuEnd, args=[destinationChatroomId]).start()
41 |             return
42 |         if type != PICTURE:
43 |             return
44 |         # If in whitelist. skip the mode check. Otherwise check the activation time.
45 |         if groupName not in self.whitelist:
46 |             if groupName not in self.activationTime or self.activationTime [groupName] <= time():
47 |                 return
48 | 
49 |         logging.info('[Doutu] Begin processing...')
50 |         fn = msg['FileName']
51 |         newfn = os.path.join(self.doutuFolder, fn)
52 |         msg['Text'](fn)
53 |         os.rename(fn, newfn)
54 |         newfnjpg = newfn + '.jpg'
55 |         call(['convert', '{0}[0]'.format(newfn), newfnjpg])
56 |         if os.path.exists(newfnjpg):
57 |             logging.info('[Doutu] imagemagick succeeded.')
58 |         else:
59 |             itchat.send('鸭哥没办法和腾讯表情商城的表情斗图。。', destinationChatroomId)
60 |             logging.info('[Doutu] imagemagick failed.')
61 |             return
62 | 
63 |         doutufn = self.imageSearcher.search(newfnjpg)
64 |         doutufn = os.path.join(self.imgFolder, doutufn)
65 |         itchat.send('@img@{0}'.format(doutufn), destinationChatroomId)
66 |         logging.info('Doutu! {0} => {1}.'.format(newfn, doutufn))
67 | 
68 | if __name__ == '__main__':
69 |     processor = DoutuProcessor('./DoutuFeatures.txt')
70 | 


--------------------------------------------------------------------------------
/DoutuRobot/README.md:
--------------------------------------------------------------------------------
 1 | ## 斗图功能
 2 | 
 3 | 目前对这个功能并没有官方支持。
 4 | 这个文档只是为感兴趣的读者做一个参考。
 5 | 要想部署使用这个系统，需要一些深度学习的知识和经验，并且需要读一下代码。
 6 | 
 7 | 系统的基本框架是，用Caffe把所有图片的feature抽出来，构成一个数据库。
 8 | 新的图片进来以后，抽feature，在这个数据库里面进行检索。
 9 | 最接近的几个图里面随机挑选一个返回。
10 | 
11 | ### 训练
12 | 
13 | * 安装Caffe。`installCaffe.sh`可以作为一个参考。几个要点：不要忘了`make pycaffe`；用OpenBLAS启用多线程可以减小Latency；如果有GPU的话可以大幅加速。
14 | * 执行`dedupAndCopy.sh`转换文件格式。
15 | * 执行`incrementalExtractFeatures.sh`抽取feature。
16 | * 把生成的feature文件`featuresall.txt`拷贝到父目录，并且在main.py里面指定`DoutuProcessor`的文件路径。
17 | 


--------------------------------------------------------------------------------
/DoutuRobot/dat/gifs/170403-210016.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170403-210016.gif


--------------------------------------------------------------------------------
/DoutuRobot/dat/gifs/170403-232221.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170403-232221.gif


--------------------------------------------------------------------------------
/DoutuRobot/dat/gifs/170404-021026.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170404-021026.gif


--------------------------------------------------------------------------------
/DoutuRobot/dat/gifs/170404-033135.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170404-033135.gif


--------------------------------------------------------------------------------
/DoutuRobot/dat/gifs/170404-051440.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170404-051440.gif


--------------------------------------------------------------------------------
/DoutuRobot/dat/gifs/170404-062843.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170404-062843.gif


--------------------------------------------------------------------------------
/DoutuRobot/dat/gifs/170404-065246.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170404-065246.gif


--------------------------------------------------------------------------------
/DoutuRobot/dat/gifs/170404-081520.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170404-081520.gif


--------------------------------------------------------------------------------
/DoutuRobot/dat/gifs/170404-205424.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170404-205424.gif


--------------------------------------------------------------------------------
/DoutuRobot/dat/gifs/170404-210537.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170404-210537.gif


--------------------------------------------------------------------------------
/DoutuRobot/dat/gifs/170404-234147.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170404-234147.gif


--------------------------------------------------------------------------------
/DoutuRobot/dat/gifs/170404-235954.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170404-235954.gif


--------------------------------------------------------------------------------
/DoutuRobot/dat/gifs/170405-020917.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170405-020917.gif


--------------------------------------------------------------------------------
/DoutuRobot/dat/gifs/170405-021924.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170405-021924.gif


--------------------------------------------------------------------------------
/DoutuRobot/dat/gifs/170405-062442.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170405-062442.gif


--------------------------------------------------------------------------------
/DoutuRobot/dat/gifs/170405-071029.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170405-071029.gif


--------------------------------------------------------------------------------
/DoutuRobot/dat/gifs/170405-092427.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170405-092427.gif


--------------------------------------------------------------------------------
/DoutuRobot/dat/gifs/170405-092558.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170405-092558.gif


--------------------------------------------------------------------------------
/DoutuRobot/dedupAndCopy.sh:
--------------------------------------------------------------------------------
 1 | DedupAndCopyFile() {
 2 |     LC_ALL=C md5sum $1/*.gif | sort -k1,1 -u | awk '{print $2;}' | sed 's/^.*\///' > files.txt
 3 |     rsync -av --files-from=files.txt $1 dat/gifs
 4 |     rm files.txt
 5 | }
 6 | 
 7 | DedupAndCopyFile '../HistoryImages'
 8 | 
 9 | # Convert to jpg for training use
10 | ls ./dat/gifs | xargs -n1 -I{} -P4 bash -c 'echo {}; if [ ! -e "dat/jpgs/{}.jpg" ]; then convert "dat/gifs/{}[0]" "dat/jpgs/{}.jpg"; fi'
11 | # Generate files list for Caffe use
12 | find dat/jpgs > files.txt
13 | 


--------------------------------------------------------------------------------
/DoutuRobot/extractFeatures.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os, sys, getopt
 3 | import hashlib
 4 | 
 5 | # Main path to your caffe installation
 6 | caffe_root = './caffe/'
 7 | 
 8 | # Model prototxt file
 9 | model_prototxt = caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt'
10 | #model_prototxt = caffe_root + 'models/bvlc_googlenet/deploy.prototxt'
11 | 
12 | # Model caffemodel file
13 | model_trained = caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'
14 | #model_trained = caffe_root + 'models/bvlc_googlenet/bvlc_googlenet.caffemodel'
15 | 
16 | # File containing the class labels
17 | imagenet_labels = caffe_root + 'data/ilsvrc12/synset_words.txt'
18 | 
19 | # Path to the mean image (used for input processing)
20 | mean_path = caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy'
21 | 
22 | # Name of the layer we want to extract
23 | #layer_name = 'pool5/7x7_s1'
24 | layer_name = 'fc8'
25 | 
26 | sys.path.insert(0, caffe_root + 'python')
27 | import caffe
28 | 
29 | def main(argv):
30 |     inputfile = ''
31 |     outputfile = ''
32 | 
33 |     try:
34 |         opts, args = getopt.getopt(argv,"hi:o:",["ifile=","ofile="])
35 |     except getopt.GetoptError:
36 |         print('caffe_feature_extractor.py -i <inputfile> -o <outputfile>')
37 |         sys.exit(2)
38 | 
39 |     for opt, arg in opts:
40 |         if opt == '-h':
41 |             print('caffe_feature_extractor.py -i <inputfile> -o <outputfile>')
42 |             sys.exit()
43 |         elif opt in ("-i"):
44 |             inputfile = arg
45 |         elif opt in ("-o"):
46 |             outputfile = arg
47 | 
48 |     print('Reading images from "', inputfile)
49 |     print('Writing vectors to "', outputfile)
50 | 
51 |     # Setting this to CPU, but feel free to use GPU if you have CUDA installed
52 |     caffe.set_mode_cpu()
53 |     # Loading the Caffe model, setting preprocessing parameters
54 |     net = caffe.Classifier(model_prototxt, model_trained,
55 |                            mean=np.load(mean_path).mean(1).mean(1),
56 |                            channel_swap=(2,1,0),
57 |                            raw_scale=255,
58 |                            image_dims=(256, 256))
59 | 
60 |     # Loading class labels
61 |     with open(imagenet_labels) as f:
62 |         labels = f.readlines()
63 | 
64 |     # This prints information about the network layers (names and sizes)
65 |     # You can uncomment this, to have a look inside the network and choose which layer to print
66 |     #print [(k, v.data.shape) for k, v in net.blobs.items()]
67 |     #exit()
68 | 
69 |     # Processing one image at a time, printint predictions and writing the vector to a file
70 |     with open(inputfile, 'r') as reader:
71 |         with open(outputfile, 'w') as writer:
72 |             writer.truncate()
73 |             for image_path in reader:
74 |                 try:
75 |                     image_path = image_path.strip()
76 |                     with open(image_path, 'rb') as fp:
77 |                         cachekey = hashlib.sha224(fp.read()).hexdigest()
78 |                     input_image = caffe.io.load_image(image_path)
79 |                     prediction = net.predict([input_image], oversample=False)
80 |                     print(os.path.basename(image_path), ' : ' , labels[prediction[0].argmax()].strip() , ' (', prediction[0][prediction[0].argmax()] , ')')
81 |                     feature = net.blobs[layer_name].data[0].reshape(1,-1)
82 |                     featureTxt = ' '.join([ str(x) for x in feature.tolist()[0] ])
83 |                     writer.write('{0}\t{1}\t{2}\n'.format(image_path, cachekey, featureTxt))
84 |                 except Exception as e:
85 |                     print(e)
86 |                     print('ERROR: skip {0}.'.format(image_path))
87 | 
88 | if __name__ == "__main__":
89 |     main(sys.argv[1:])
90 | 


--------------------------------------------------------------------------------
/DoutuRobot/incrementalExtractFeatures.sh:
--------------------------------------------------------------------------------
 1 | # Generate files.txt
 2 | awk '{ printf("dat/jpgs/%s.jpg\n", $1); }' < ./featuresall.txt | sort > extractedFiles.txt
 3 | find dat/jpgs -type f | sort > allFiles.txt
 4 | comm -23 allFiles.txt extractedFiles.txt > files.txt
 5 | rm allFiles.txt
 6 | rm extractedFiles.txt
 7 | lines=`wc -l files.txt`
 8 | echo files.txt generated, with $lines lines.
 9 | 
10 | # Invoke Caffe to extract features
11 | python3 -u ./extractFeatures.py -i files.txt -o newFeatures.txt
12 | sed -i 's/dat\/jpgs\///' newFeatures.txt
13 | sed -i 's/\.jpg\t/\t/' newFeatures.txt
14 | cp featuresall.txt featuresall.txt.bak
15 | cat newFeatures.txt >> featuresall.txt
16 | 


--------------------------------------------------------------------------------
/DoutuRobot/installCaffe.sh:
--------------------------------------------------------------------------------
 1 | #Dependencies
 2 | sudo apt-get install -y libprotobuf-dev libleveldb-dev libsnappy-dev libopencv-dev libhdf5-serial-dev protobuf-compiler
 3 | sudo apt-get install -y --no-install-recommends libboost-all-dev
 4 | sudo apt-get install -y libatlas-base-dev
 5 | sudo apt-get install -y python3-dev
 6 | sudo apt-get install -y libgoogle-glog-dev liblmdb-dev
 7 | 
 8 | # Caffe
 9 | git clone https://github.com/BVLC/caffe
10 | cd caffe
11 | cp Makefile.config.example Makefile.config
12 | echo "ALSO NEED TO MODIFY THE FILE IF YOU WANT CPU_ONLY"
13 | read
14 | 
15 | # Debian only
16 | echo 'INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include /usr/include/hdf5/serial/' >> Makefile.config
17 | echo 'LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib /usr/lib/x86_64-linux-gnu/hdf5/serial/' >> Makefile.config
18 | 
19 | make all -j4
20 | 


--------------------------------------------------------------------------------
/GaTextHook.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from itchat.content import *
 4 | from ProcessInterface import ProcessInterface
 5 | from utilities import *
 6 | from time import time, sleep
 7 | from threading import Timer
 8 | from datetime import datetime
 9 | import itchat
10 | import re
11 | import logging
12 |     
13 | def clearGaNumDict():
14 |     GaTextHook.gaNumDict = {}
15 |     client[dbName][gaCollName].remove({}, {'multi': True})
16 |     logging.info('GaNumDict cleared. GaNumDict = {0}.'.format(GaTextHook.gaNumDict))
17 |     scheduleTimerToClearGaNumDict()
18 | 
19 | def scheduleTimerToClearGaNumDict():
20 |     t = datetime.today()
21 |     t2 = t.replace(day=t.day+1, hour=9, minute=0, second=0, microsecond=0) # 0:00 in China
22 |     deltaT = t2 - t
23 |     secs = deltaT.seconds + 1
24 |     Timer(secs, clearGaNumDict).start()
25 | 
26 | # The logic is getting more complicated. We make it a seprate processor
27 | class GaTextHook(ProcessInterface):
28 |     gaNumDict = {}
29 |     def __init__(self, blacklist=[]):
30 |         self.blacklist = blacklist
31 |         self.client = client
32 |         self.gaColl = self.client[dbName][gaCollName]
33 |         GaTextHook.gaNumDict = { x['GroupName']: x['CurrentGaNum'] for x in self.gaColl.find() }
34 |         self.gaNumMax = 100
35 |         self.triggerText = '鸭哥'
36 |         self.gaText = '嘎？'
37 |         self.forceTriggerText = '鸭哥嘎一个'
38 |         self.forceTriggerNextTimestamp = {}
39 |         self.forceTriggerInterval = 5 * 60 # 5 minutes
40 |         self.forceTriggerGaText = '强力嘎！'
41 |         scheduleTimerToClearGaNumDict()
42 | 
43 |         # Set up the clear timer
44 |         logging.info('GaTextHook initialized.')
45 | 
46 |     def process(self, msg, type):
47 |         if type != TEXT:
48 |             return
49 |         groupName = msg['User']['NickName']
50 |         toSend = None
51 |         if any([ re.search(x, groupName) is not None for x in self.blacklist ]):
52 |             return
53 |         if re.search(self.forceTriggerText, msg['Content']):
54 |             currentTime = time()
55 |             gaNextTime = self.forceTriggerNextTimestamp.get(groupName, 0)
56 |             if currentTime < gaNextTime:
57 |                 logging.info("Don't force Ga because time {0} < NextTime {1} for group {2}.".format(currentTime, gaNextTime, groupName))
58 |                 return;
59 |             self.forceTriggerNextTimestamp[groupName] = currentTime + self.forceTriggerInterval
60 |             toSend = self.forceTriggerGaText
61 |             logging.info('{0} => {1}'.format(msg['Content'], toSend))
62 |             itchat.send(toSend, msg['FromUserName'])
63 |             return
64 |         if re.search(self.triggerText, msg['Content']):
65 |             # Check the ga time
66 |             if groupName not in GaTextHook.gaNumDict:
67 |                 GaTextHook.gaNumDict[groupName] = 0
68 |             GaTextHook.gaNumDict[groupName] += 1
69 |             self.gaColl.update({'GroupName': groupName}, {'$set': { 'CurrentGaNum': GaTextHook.gaNumDict[groupName] } }, upsert=True)
70 |             if GaTextHook.gaNumDict[groupName] > self.gaNumMax:
71 |                 logging.info("Don't Ga because GaNum {0} exceeds max {1} for group {2}.".format(GaTextHook.gaNumDict[groupName], self.gaNumMax, groupName))
72 |                 return
73 |             toSend = '{0} x{1}'.format(self.gaText, GaTextHook.gaNumDict[groupName])
74 |             logging.info('{0} => {1}'.format(msg['Content'], toSend))
75 |             itchat.send(toSend, msg['FromUserName'])
76 | 
77 | if __name__ == '__main__':
78 |     logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
79 |     hook = GaTextHook()
80 | 


--------------------------------------------------------------------------------
/GlobalTextHook.py:
--------------------------------------------------------------------------------
 1 | from itchat.content import *
 2 | from ProcessInterface import ProcessInterface
 3 | import itchat
 4 | import re
 5 | import logging
 6 | 
 7 | class GlobalTextHook(ProcessInterface):
 8 |     def __init__(self, subdict={}, blacklist=[]):
 9 |         self.dict = subdict
10 |         self.blacklist = blacklist
11 |         logging.info('GlobalTextHook initialized.')
12 | 
13 |     def process(self, msg, type):
14 |         if type != TEXT:
15 |             return
16 |         if any([ re.search(x, msg['User']['NickName']) is not None for x in self.blacklist ]):
17 |             return
18 |         for k in self.dict:
19 |             if re.search(k, msg['Content']):
20 |                 v = self.dict[k]
21 |                 logging.info('{0} => {1}'.format(msg['Content'], v))
22 |                 itchat.send(v, msg['FromUserName'])
23 | 


--------------------------------------------------------------------------------
/GroupMessageForwarder.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from utilities import *
 3 | from itchat.content import *
 4 | from ProcessInterface import ProcessInterface
 5 | import os
 6 | import itchat
 7 | 
 8 | class GroupMessageForwarder(ProcessInterface):
 9 |     def __init__(self, chatroomDisplayNames, chatroomNames, isBidirectional=True):
10 |         self.isInitialized = False
11 |         self.chatroomDisplayNames = chatroomDisplayNames
12 |         self.chatroomNames = chatroomNames
13 |         self.isBidirectional = isBidirectional
14 |         chatrooms = itchat.get_chatrooms()
15 |         self.chatroomObjs = [ getChatroomByName(chatrooms, x) for x in chatroomNames ]
16 |         if len([ x for x in self.chatroomObjs if x is None ]) != 0:
17 |             logging.info('Cannot find chatrooms for {0}'.format(chatroomNames))
18 |             return
19 |         self.chatroomIds = [ x['UserName'] for x in self.chatroomObjs ]
20 |         self.nickNameLookup = NickNameLookup(self.chatroomObjs)
21 |         self.fileFolder = 'ForwarderFiles'
22 |         if not os.path.exists(self.fileFolder):
23 |             os.mkdir(self.fileFolder)
24 |         logging.info('Fetched user ids for the chatrooms {0}.'.format(chatroomNames))
25 |         self.isInitialized = True
26 | 
27 |     def process(self, msg, type):
28 |         if not self.isInitialized:
29 |             logging.error('The forwarder was not properly initialized. Please send a message in the groups you want to connect and try again.')
30 |             return
31 |         shallSendObj = self.shallSend(msg)
32 |         if not shallSendObj['shallSend']:
33 |             return
34 |         if type == TEXT:
35 |             fromText = '[{0}]'.format(self.chatroomDisplayNames[shallSendObj['fromChatroom']])
36 |             destinationChatroomId = self.chatroomIds[not shallSendObj['fromChatroom']]
37 |             content = '{0} {1}: {2}'.format(fromText, msg['ActualNickName'], msg['Content'])
38 |             logging.info(content)
39 |             itchat.send(content, destinationChatroomId)
40 |         elif type == PICTURE:
41 |             fn = msg['FileName']
42 |             newfn = os.path.join(self.fileFolder, fn)
43 |             msg['Text'](fn)
44 |             os.rename(fn, newfn)
45 |             type = {'Picture': 'img', 'Video': 'vid'}.get(msg['Type'], 'fil')
46 |             typeText = {'Picture': '图片', 'Video': '视频'}.get(msg['Type'], '文件')
47 |             fromText = '[{0}]'.format(self.chatroomDisplayNames[shallSendObj['fromChatroom']])
48 |             destinationChatroomId = self.chatroomIds[not shallSendObj['fromChatroom']]
49 |             content = '{0} {1} 发送了{2}:'.format(fromText, self.nickNameLookup.lookupNickName(msg), typeText)
50 |             itchat.send(content, destinationChatroomId)
51 |             logging.info(content)
52 |             itchat.send('@{0}@{1}'.format(type, newfn), destinationChatroomId)
53 |         elif type == SHARING:
54 |             fromText = '[{0}]'.format(self.chatroomDisplayNames[shallSendObj['fromChatroom']])
55 |             destinationChatroomId = self.chatroomIds[not shallSendObj['fromChatroom']]
56 |             content = '{0} {1} 分享了链接: {2} {3}'.format(fromText, self.nickNameLookup.lookupNickName(msg), msg['Text'], msg['Url'])
57 |             logging.info(content)
58 |             itchat.send(content, destinationChatroomId)
59 |         else:
60 |             logging.info('Unknown type encoutered.')
61 |         pass
62 | 
63 |     def shallSend(self, msg):
64 |         result = False
65 |         for i in range(len(self.chatroomIds)):
66 |             result = result or extractToUserName(msg) == self.chatroomIds[i] or extractFromUserName(msg) == self.chatroomIds[i]
67 |             if result:
68 |                 return { 'shallSend': True, 'fromChatroom': i }
69 |             if not self.isBidirectional:
70 |                 break
71 |         return { 'shallSend': False }
72 | 


--------------------------------------------------------------------------------
/GroupTagCloud.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from utilities import *
  3 | from itchat.content import *
  4 | from collections import Counter
  5 | from pymongo import MongoClient, DESCENDING
  6 | from wordcloud import WordCloud
  7 | from ProcessInterface import ProcessInterface
  8 | import itertools
  9 | import gensim
 10 | import os
 11 | import itchat
 12 | import re
 13 | import random
 14 | import time
 15 | import logging
 16 | import jieba
 17 | 
 18 | class GroupTagCloud(ProcessInterface):
 19 |     recordMaxNum = 500
 20 |     maxFrequency = 40
 21 |     imgDir = 'TagCloud'
 22 | 
 23 |     def __init__(self, fontPath):
 24 |         self.client = MongoClient()
 25 |         self.coll = self.client[dbName][collName]
 26 |         self.fontPath = fontPath
 27 |         self.wordCloud = WordCloud(font_path=self.fontPath, width=400, height=400, max_words=100)
 28 |         if not os.path.exists(self.imgDir):
 29 |             os.mkdir(self.imgDir)
 30 |         logging.info('GroupTagCloud connected to MongoDB.')
 31 | 
 32 |     def process(self, msg, type):
 33 |         shallRunObj = self.isRun(msg, type)
 34 |         if shallRunObj['shallRun']:
 35 |             toLog = 'Generating tag cloud for {0}.'.format(shallRunObj['groupName'])
 36 |             if shallRunObj['userName']:
 37 |                 toLog = '{0} Username {1}.'.format(toLog, shallRunObj['userName'])
 38 |             logging.info(toLog)
 39 |             fn = self.generateTagCloudForGroupV2(shallRunObj['groupName'], shallRunObj['userName'])
 40 |             destinationChatroomId = msg['FromUserName'] if re.search('@@', msg['FromUserName']) else msg['ToUserName']
 41 |             logging.info('Sending tag cloud file {0} to {1}.'.format(fn, destinationChatroomId))
 42 |             itchat.send('@img@{0}'.format(fn), destinationChatroomId)
 43 | 
 44 |     # Generate a tag cloud image from the latest self.recordMaxNum records, based on TF-IDF. Return the file name.
 45 |     def generateTagCloudForGroupV2(self, groupName, userName=None):
 46 |         records = None
 47 |         if userName is None:
 48 |             records = self.coll.find({ 'to': groupName }).sort([ ('timestamp', DESCENDING) ]).limit(self.recordMaxNum)
 49 |             allRecords = self.coll.find({ 'to': { '$ne': groupName } }).sort([ ('timestamp', DESCENDING) ]).limit(self.recordMaxNum * 5)
 50 |             allRecordsGroup = sorted(allRecords, key=lambda x: x['to'])
 51 |         else:
 52 |             records = self.coll.find({ 'from': userName, 'to': groupName }).sort([ ('timestamp', DESCENDING) ]).limit(self.recordMaxNum)
 53 |             allRecords = self.coll.find({ 'from': { '$ne': userName }, 'to': groupName }).sort([ ('timestamp', DESCENDING) ]).limit(self.recordMaxNum * 5)
 54 |             allRecordsGroup = sorted(allRecords, key=lambda x: x['from'])
 55 |         docThisGroup = list(jieba.cut(' '.join([ r['content'] for r in records if re.match('<<<IMG', r['content']) is None])))  # remove the image records
 56 |         allRecordsGroup = itertools.groupby(allRecordsGroup, lambda x: x['to'])
 57 |         docsOtherGroups = [ list(jieba.cut(' '.join([x['content'] for x in list(g) if re.match('<<<IMG', x['content']) is None]))) for k, g in allRecordsGroup ]
 58 |         docs = [ docThisGroup ] + docsOtherGroups
 59 |         dictionary = gensim.corpora.Dictionary(docs)
 60 |         docs = [ dictionary.doc2bow(doc) for doc in docs ]
 61 |         id2token = { v: k for k, v in dictionary.token2id.items() }
 62 |         tfidf = gensim.models.tfidfmodel.TfidfModel(corpus=docs)
 63 |         tagCloudFrequencies = { id2token[x[0]]: x[1] for x in tfidf[docs[0]] }
 64 | 
 65 |         img = self.wordCloud.generate_from_frequencies(tagCloudFrequencies).to_image()
 66 |         fn = self.generateTmpFileName()
 67 |         img.save(fn)
 68 |         return fn
 69 | 
 70 |     # Generate a tag cloud image from the latest self.recordMaxNum messages. Return the file name.
 71 |     def generateTagCloudForGroup(self, groupName, userName=None):
 72 |         records = None
 73 |         if userName is None:
 74 |             records = self.coll.find({ 'to': groupName }).sort([ ('timestamp', DESCENDING) ]).limit(self.recordMaxNum)
 75 |         else:
 76 |             records = self.coll.find({ 'from': userName, 'to': groupName }).sort([ ('timestamp', DESCENDING) ]).limit(self.recordMaxNum)
 77 |         texts = [ r['content'] for r in records ]
 78 |         frequencies = Counter([ w for text in texts for w in jieba.cut(text, cut_all=False) if len(w) > 1 ])
 79 |         frequencies = { k: min(self.maxFrequency, frequencies[k]) for k in frequencies }
 80 |         img = self.wordCloud.generate_from_frequencies(frequencies).to_image()
 81 |         fn = self.generateTmpFileName()
 82 |         img.save(fn)
 83 |         return fn
 84 | 
 85 |     def isRun(self, msg, type):
 86 |         if type != TEXT or 'Content' not in msg:
 87 |             return { 'shallRun': False }
 88 |         if re.search(r'^\s*/tagcloud$', msg['Content']):
 89 |             return { 'shallRun': True, 'userName': None, 'groupName': msg['User']['NickName'] }
 90 |         if re.search(r'^\s*/mytag$', msg['Content']):
 91 |             return { 'shallRun': True, 'userName': msg['ActualNickName'], 'groupName': msg['User']['NickName'] }
 92 |         return { 'shallRun': False }
 93 | 
 94 |     def generateTmpFileName(self):
 95 |         return '{0}/{1}-{2}.png'.format(self.imgDir, int(time.time() * 1000), random.randint(0, 10000))
 96 | 
 97 | if __name__ == '__main__':
 98 |     groupTagCloud = GroupTagCloud('/usr/share/fonts/truetype/wqy/wqy-microhei.ttc')
 99 |     groupTagCloud.generateTagCloudForGroup('TestGroup', '鸭哥')
100 |     groupTagCloud.generateTagCloudForGroupV2('TestGroup', '鸭哥')
101 | 


--------------------------------------------------------------------------------
/HistoryRecorder.py:
--------------------------------------------------------------------------------
 1 | from time import time
 2 | from datetime import datetime
 3 | from ProcessInterface import ProcessInterface
 4 | from utilities import *
 5 | from itchat.content import *
 6 | import os
 7 | 
 8 | class HistoryRecorder(ProcessInterface):
 9 |     def __init__(self):
10 |         self.client = client
11 |         self.coll = self.client[dbName][collName]
12 |         self.imgFolder = 'HistoryImages'
13 |         if not os.path.exists(self.imgFolder):
14 |             os.mkdir(self.imgFolder)
15 |         logging.info('HistoryRecorder initialized.')
16 | 
17 |     # Record an itchat message to mongodb
18 |     # Currently only support text messages in group chats
19 |     def process(self, msg, type):
20 |         if type == PICTURE:
21 |             fn = msg['FileName']
22 |             newfn = os.path.join(self.imgFolder, fn)
23 |             msg['Text'](fn)
24 |             os.rename(fn, newfn)
25 |             msg['Content'] = '<<<IMG:{0}>>>'.format(newfn)
26 |         if type == TEXT or type == PICTURE:
27 |             timestamp = time()
28 |             rtime = datetime.utcfromtimestamp(timestamp)
29 |             r = {
30 |                 'content': msg['Content'],
31 |                 'from': msg['ActualNickName'],
32 |                 'fromId': msg['ToUserName'],
33 |                 'to': msg['User']['NickName'] if 'User' in msg and 'UserName' in msg['User'] else 'N/A',
34 |                 'timestamp': timestamp,
35 |                 'time': rtime
36 |                 }
37 |             self.coll.insert(r)
38 | 


--------------------------------------------------------------------------------
/ImageSearcher.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import scipy
  3 | import sys
  4 | import logging
  5 | import hashlib
  6 | import sys
  7 | 
  8 | # Caffe related configurations
  9 | caffe_root = './DouTuRobot/caffe/'
 10 | 
 11 | # Old model: googlenet
 12 | #model_prototxt = caffe_root + 'models/bvlc_googlenet/deploy.prototxt'
 13 | #model_trained = caffe_root + 'models/bvlc_googlenet/bvlc_googlenet.caffemodel'
 14 | #layer_name = 'pool5/7x7_s1'
 15 | # New model: caffenet
 16 | model_prototxt = caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt'
 17 | model_trained = caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'
 18 | layer_name = 'fc8'
 19 | 
 20 | imagenet_labels = caffe_root + 'data/ilsvrc12/synset_words.txt'
 21 | mean_path = caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy'
 22 | sys.path.insert(0, caffe_root + 'python')
 23 | import caffe
 24 | caffe.set_mode_cpu()
 25 | logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
 26 | 
 27 | class ImageSearcher:
 28 |     def __init__(self, featurefn):
 29 |         self.resultNum = 5
 30 |         self.net = caffe.Classifier(model_prototxt, model_trained,
 31 |             mean=np.load(mean_path).mean(1).mean(1),
 32 |             channel_swap=(2,1,0),
 33 |             raw_scale=255,
 34 |             image_dims=(256, 256))
 35 |         self.parseFeature(featurefn)
 36 |         logging.info('Caffe net initialized. Loading cache...')
 37 |         #self.buildCache()
 38 |         self.loadCache('./featureCache.tsv')
 39 |         logging.info('Cache built.')
 40 | 
 41 |     def loadCache(self, fn):
 42 |         lines = [ x.strip().split('\t') for x in open(fn) ]
 43 |         self.resultCache = { x[0]: x[1].split(',') for x in lines }
 44 | 
 45 |     # Precompute the SHA-SearchResult cache
 46 |     def buildCache(self):
 47 |         for i in range(self.features.shape[0]):
 48 |             qfeature = self.features[i, :]
 49 |             results = self.searchWithFeature(qfeature)
 50 |             self.resultCache[self.hashes[i]] = results
 51 |             sys.stderr.write('.')
 52 |         sys.stderr.write('\n')
 53 |     
 54 |     # The feature file is expected to have three columns: id (usually file name), md5, and DNN features
 55 |     def parseFeature(self, fn):
 56 |         features = []
 57 |         self.imgfns = []
 58 |         self.hashes = []
 59 |         for line in open(fn):
 60 |             imgfn, hash, imgfeatures = line.split('\t')
 61 |             features.append([ float(x) for x in imgfeatures.split(' ') ])
 62 |             self.hashes.append(hash)
 63 |             self.imgfns.append(imgfn)
 64 |         self.features = np.asarray(features)
 65 |         self.resultCache = {}
 66 | 
 67 |     # Return the features extracted from the file
 68 |     def extractFeatures(self, fn):
 69 |         input_image = caffe.io.load_image(fn)
 70 |         self.net.predict([input_image], oversample=False)
 71 |         feature = self.net.blobs[layer_name].data[0]
 72 |         return feature
 73 | 
 74 |     def searchWithFeature(self, queryFeature):
 75 |         disp = self.features - queryFeature
 76 |         distances = (disp * disp).sum(1)
 77 |         indices = np.argsort(distances)
 78 |         if distances[indices[0]] < 0.02:
 79 |             # very close. We don't want to send dups
 80 |             index = indices[1: 1 + self.resultNum]
 81 |         else:
 82 |             index = indices[0: self.resultNum]
 83 |         return [ self.imgfns[i] for i in index ]
 84 | 
 85 |     # Search for the most similar image to the given query
 86 |     def search(self, fn):
 87 |         # First check the cache
 88 |         with open(fn, 'rb') as fp:
 89 |             cachekey = hashlib.sha224(fp.read()).hexdigest()
 90 |         if cachekey in self.resultCache:
 91 |             logging.info('Cache hit! Directly return.')
 92 |             return self.resultCache[cachekey]
 93 |         # Cache miss. Search and update cache
 94 |         queryFeature = self.extractFeatures(fn).reshape(1, -1)
 95 |         result = self.searchWithFeature(queryFeature)
 96 |         self.resultCache[cachekey] = result
 97 |         return result
 98 | 
 99 | if __name__ == '__main__':
100 |     imageSearcher = ImageSearcher('./DoutuFeatures.txt')
101 |     print(imageSearcher.search('./DouTuRobot/dat/jpgs/170405-013811.gif.jpg'))
102 |     print(imageSearcher.search('./DouTuRobot/dat/jpgs/170405-013811.gif.jpg'))
103 | 


--------------------------------------------------------------------------------
/PaiDuiHook.py:
--------------------------------------------------------------------------------
 1 | from itchat.content import *
 2 | from ProcessInterface import ProcessInterface
 3 | import itchat
 4 | import re
 5 | import logging
 6 | 
 7 | class PaiDuiHook(ProcessInterface):
 8 |     groupContentCacheMaxCapacity = 5
 9 |     maxSelfPaiDuiTTL = 15
10 | 
11 |     def __init__(self, blacklist=[]):
12 |         self.blacklist = blacklist
13 |         self.groupLastMsgsDict = {}
14 |         # A dictionary controlling not pai dui for more than one time
15 |         # Key: (groupName, content), Value: TTL (0 or non-exist means OK to paidui)
16 |         self.selfPaiDuiTTL = {}   
17 |         logging.info('PaiduiHook initialized.')
18 | 
19 |     def WhatToPaiDui(self, groupName):
20 |         msgCount = {}
21 |         msgs = self.groupLastMsgsDict[groupName]
22 |         for msg in msgs:
23 |             if msg['Content'] not in msgCount:
24 |                 msgCount[msg['Content']] = 0
25 |             msgCount[msg['Content']] += 1
26 |         contentToPaiDui = [ x for x in msgCount if msgCount[x] > 1 ]
27 |         if len(contentToPaiDui) == 0:
28 |             # No dui to pai
29 |             return
30 |         # it's possible that two duis are formed at the same time, but only one can pass the TTL check
31 |         for content in contentToPaiDui:
32 |             if (groupName, content) not in self.selfPaiDuiTTL or self.selfPaiDuiTTL == 0:
33 |                 self.selfPaiDuiTTL[(groupName, content)] = self.maxSelfPaiDuiTTL
34 |                 yield content  # We use yield here because we still need to conitnue managing the TTL
35 |             else:
36 |                 self.selfPaiDuiTTL[(groupName, content)] -= 1
37 | 
38 |     def isFromSelf(self, msg):
39 |         if re.search('^@@', msg['ToUserName']):
40 |             return True
41 |         else:
42 |             return False
43 | 
44 |     def updateGroupContentCache(self, msg, groupName):
45 |         if groupName not in self.groupLastMsgsDict:
46 |             self.groupLastMsgsDict[groupName] = []
47 |         if len(self.groupLastMsgsDict[groupName]) >= self.groupContentCacheMaxCapacity:
48 |             self.groupLastMsgsDict[groupName].pop(0)
49 |         self.groupLastMsgsDict[groupName].append({ 'Content': msg['Content'] })
50 | 
51 |     def process(self, msg, type):
52 |         if type != TEXT:
53 |             return
54 |         groupName = msg['User']['NickName']
55 |         if any([ re.search(x, groupName) is not None for x in self.blacklist ]):
56 |             return
57 |         if re.search('^/', msg['Content']):
58 |             return
59 |         if self.isFromSelf(msg):
60 |             # Stop processing if the message is from myself
61 |             return
62 | 
63 |         self.updateGroupContentCache(msg, groupName)
64 |         contentToPaiDui = list(self.WhatToPaiDui(groupName))
65 |         if len(contentToPaiDui) > 1:
66 |             logging.error('Multiple duis detected.')
67 |         if len(contentToPaiDui) != 0:
68 |             # Pai dui!
69 |             itchat.send(msg['Content'], msg['FromUserName'])
70 |             logging.info('Pai Dui! {0}.'.format(msg['Content']))
71 |             # Update data structure to avoid Pai dui for multiple times.
72 |             self.updateGroupContentCache({ 'Content': msg['Content'], 'FromSelf': True }, groupName)
73 | 


--------------------------------------------------------------------------------
/ProcessInterface.py:
--------------------------------------------------------------------------------
1 | class ProcessInterface:
2 |     def process(self, msg, type):
3 |         raise NotImplementedError("ProcessInterface is an abstract interface")
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 微信群机器人
 2 | 
 3 | 目前支持的功能：
 4 | 
 5 | * 响应/help，显示帮助信息。
 6 | * 在两个群中间转发文本，图片，自定义动画表情，链接。微信群有500人的容量限制，一个消息转发机器人可以通过把几个群接起来相互转发消息来突破这样的限制。
 7 | * 用正则表达式来自定义回复。
 8 | * 响应/tagcloud，生成整个群的标签云，或者响应/mytag，生成某个用户的标签云。
 9 | * 响应/activity，生成群的活跃度时间图，以及活跃用户饼图。
10 | * 自动排队。如果群里出现了二传，自动三传。
11 | * 斗图。/doutu进入斗图模式，用深度学习确定回复什么图，进行斗图。
12 | * 兼容Windows和Linux。如果在Windows下运行，请更改字体路径。
13 | 
14 | 目前不支持的功能：
15 | 
16 | * 转发表情商城中的表情（即使表情是免费的）
17 | * 转发红包（想啥呢你）
18 | * 多于两个群的转发（但更改应该很简单）
19 | 
20 | ## 部署
21 | 
22 | * 运行`deploy.sh`。或者手工安装python，mongodb，然后安装`requirements.txt`中的库。
23 | * 依赖于[itchat](https://itchat.readthedocs.io/zh/latest/), [mongodb](https://docs.mongodb.com/manual/administration/install-community/).
24 | 
25 | ## 运行
26 | 
27 | * 这个小工具不是针对最终用户的，所以现在需要改code（`main.py`）来设置一些参数，尤其是群名，黑白名单等等。
28 | * `python3 -u main.py`。会弹出一个二维码扫码登录。
29 | * 如果在Linux VPS上运行，ssh进去的时候记得加上X转发，这样才能看到二维码。如果不用X转发的话也可以手工下载文件或者改用命令行二维码。
30 | 
31 | ## 已知问题
32 | 
33 | * 如果启动时候说无法找到群，请在群里说句话。这是微信接口的限制所致。
34 | * 如果启动的时候说连接27017端口connection refused，这是因为你没有安装mongodb。安装mongodb可以解决这个问题。
35 | * 如果出来的标签云里面都是框框，这是字体没有配置好所致。请去`main.py`里面更改字体路径。
36 | * 斗图的功能需要一些深度学习的基础知识和Caffe的相关经验。目前不提供支持。但DoutuRobot文件夹下面有一些文档可以参考。
37 | 


--------------------------------------------------------------------------------
/deploy.sh:
--------------------------------------------------------------------------------
 1 | # Install mongodb and python
 2 | sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 0C49F3730359A14518585931BC711F9BA15703C6
 3 | # We assume it's 16.04. Check out https://docs.mongodb.com/manual/tutorial/install-mongodb-on-ubuntu/ for other versions.
 4 | echo "deb [ arch=amd64,arm64 ] http://repo.mongodb.org/apt/ubuntu xenial/mongodb-org/3.4 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-3.4.list
 5 | sudo apt-get update
 6 | sudo apt-get install -y mongodb-org
 7 | sudo apt-get install -y python3 python3-pip
 8 | 
 9 | # Install python dependencies
10 | sudo pip3 install -r requirements.txt
11 | 


--------------------------------------------------------------------------------
/launchWechatBot.sh:
--------------------------------------------------------------------------------
1 | # Execute this to launch mongodb
2 | # mongod --dbpath=~/Documents/temp/WechatHistoryMongoDB &
3 | # Launch python script
4 | python3 -u main.py 2>&1 | tee log.txt
5 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import itchat, time, re
 3 | from itchat.content import *
 4 | from utilities import *
 5 | from sys import argv, exit
 6 | from GlobalTextHook import GlobalTextHook
 7 | from GaTextHook import GaTextHook
 8 | from PaiDuiHook import PaiDuiHook
 9 | from HistoryRecorder import HistoryRecorder
10 | from GroupTagCloud import GroupTagCloud
11 | from GroupMessageForwarder import GroupMessageForwarder
12 | from ProcessInterface import ProcessInterface
13 | from ActivityInfo import ActivityInfo
14 | from DoutuProcessor import DoutuProcessor
15 | import logging
16 | logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
17 | 
18 | # Some global switches for debugging use only
19 | isDebug = not True
20 | 
21 | # Component initialization
22 | itchat.auto_login(True)
23 | plugins = [
24 |     GlobalTextHook({ '^/help$': """鸭哥调戏指南：
25 | /activity: 查看本群活动和话唠排名
26 | /tagcloud: 查看本群所有发言标签云
27 | /mytag: 查看自己的消息标签云
28 | /doutu: 启动斗图模式，机器人会对每一个非商城表情斗图。持续5分钟。
29 | 此外，鸭哥是只有节操的鸭，每天只嘎100次。可以用鸭哥嘎一个来无视100次限制强行召唤鸭哥，但有5分钟技能冷却"""}),
30 |     GaTextHook(),
31 |     PaiDuiHook(),
32 |     HistoryRecorder(),
33 |     GroupTagCloud('/usr/share/fonts/truetype/wqy/wqy-microhei.ttc'),
34 |     ActivityInfo('/usr/share/fonts/truetype/wqy/wqy-microhei.ttc'),
35 |     GroupMessageForwarder([ '二群', '三群' ], [ '科大AI二群测试中', '科大AI三群供测试' ]),
36 |     #DoutuProcessor('./DoutuFeatures.txt')  # Uncomment to enable Dou Tu
37 | ]
38 | for plugin in plugins:
39 |     if not isinstance(plugin, ProcessInterface):
40 |         logging.error('One of the plugins are not a subclass of ProcessInterface.')
41 |         exit(-1)
42 | 
43 | # Core message loops
44 | @itchat.msg_register([PICTURE, RECORDING, ATTACHMENT, VIDEO], isGroupChat=True)
45 | def picture_reply(msg):
46 |     if isDebug:
47 |         logging.info(msg)
48 |     for plugin in plugins:
49 |         try:
50 |             plugin.process(msg, PICTURE)
51 |         except Exception as e:
52 |             logging.error(e) # so that one plug's failure won't prevent others from being executed 
53 | 
54 | @itchat.msg_register([SHARING], isGroupChat=True)
55 | def sharing_reply(msg):
56 |     if isDebug:
57 |         logging.info(msg)
58 |     for plugin in plugins:
59 |         try:
60 |             plugin.process(msg, SHARING)
61 |         except Exception as e:
62 |             logging.error(e) # so that one plug's failure won't prevent others from being executed 
63 | 
64 | @itchat.msg_register([TEXT], isGroupChat=True)
65 | def text_reply(msg):
66 |     if isDebug:
67 |         logging.info(msg)
68 |     for plugin in plugins:
69 |         try:
70 |             plugin.process(msg, TEXT)
71 |         except Exception as e:
72 |             logging.error(e) # so that one plug's failure won't prevent others from being executed 
73 | 
74 | if __name__ == '__main__':
75 |     itchat.run()
76 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | itchat
2 | pymongo
3 | jieba
4 | wordcloud
5 | numpy
6 | matplotlib
7 | gensim
8 | 


--------------------------------------------------------------------------------
/utilities.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Some utility functions
 4 | """
 5 | from pymongo import MongoClient
 6 | import logging
 7 | 
 8 | # Some global configuration
 9 | dbName = 'WechatHistory'
10 | collName = 'history'
11 | gaCollName = 'GaNum'
12 | client = MongoClient('localhost', 27017)
13 | 
14 | class NickNameLookup:
15 |     def __init__(self, chatrooms):
16 |         self.dict = {}
17 |         for chatroom in chatrooms:
18 |             for member in chatroom['MemberList']:
19 |                 self.dict[member['UserName']] = member['DisplayName'] if member['DisplayName'] != '' else member['NickName']
20 | 
21 |     def lookupNickName(self, msg):
22 |         if 'ActualNickName' in msg and msg['ActualNickName'] != '':
23 |             return msg['ActualNickName']
24 |         username = msg['ActualUserName']
25 |         if username in self.dict:
26 |             return self.dict[username]
27 |         else:
28 |             return '未知昵称'
29 | 
30 | def extractFromUserName(msg):
31 |     # It's messy that if the sender is yourself, the group name will appear in the ToUserName
32 |     if 'ToUserName' in msg and msg['ToUserName'] != '':
33 |         return msg['ToUserName']
34 | 
35 | def extractToUserName(msg):
36 |     # For group chat FromUserName is the group Id
37 |     if 'FromUserName' in msg and msg['FromUserName'] != '':
38 |         return msg['FromUserName']
39 |     elif 'User' in msg and msg['User']['UserName'] != '':
40 |         return msg['User']['UserName']
41 |     else:
42 |         return '未知昵称'
43 | 
44 | def getChatroomByName(chatrooms, name):
45 |     groups = [ x for x in chatrooms if x['NickName'] == name ]
46 |     if len(groups) == 0:
47 |         logging.error('Cannot find the chatroom named {0}.'.format(name))
48 |         return None
49 |     return groups[0]
50 | 
51 | def getNameForChatroomDisplayName(name):
52 |     return name.replace('&', '&amp;')
53 | 


--------------------------------------------------------------------------------