├── .gitignore ├── ActivityInfo.py ├── DoutuFeatures.txt ├── DoutuProcessor.py ├── DoutuRobot ├── README.md ├── dat │ └── gifs │ │ ├── 170403-210016.gif │ │ ├── 170403-232221.gif │ │ ├── 170404-021026.gif │ │ ├── 170404-033135.gif │ │ ├── 170404-051440.gif │ │ ├── 170404-062843.gif │ │ ├── 170404-065246.gif │ │ ├── 170404-081520.gif │ │ ├── 170404-205424.gif │ │ ├── 170404-210537.gif │ │ ├── 170404-234147.gif │ │ ├── 170404-235954.gif │ │ ├── 170405-020917.gif │ │ ├── 170405-021924.gif │ │ ├── 170405-062442.gif │ │ ├── 170405-071029.gif │ │ ├── 170405-092427.gif │ │ └── 170405-092558.gif ├── dedupAndCopy.sh ├── extractFeatures.py ├── incrementalExtractFeatures.sh └── installCaffe.sh ├── GaTextHook.py ├── GlobalTextHook.py ├── GroupMessageForwarder.py ├── GroupTagCloud.py ├── HistoryRecorder.py ├── ImageSearcher.py ├── PaiDuiHook.py ├── ProcessInterface.py ├── README.md ├── deploy.sh ├── launchWechatBot.sh ├── main.py ├── requirements.txt └── utilities.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.swp 3 | *.swo 4 | .Python 5 | .DS_Store 6 | -------------------------------------------------------------------------------- /ActivityInfo.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from utilities import * 3 | from itchat.content import * 4 | from ProcessInterface import ProcessInterface 5 | from pymongo import DESCENDING 6 | import matplotlib 7 | matplotlib.use('Agg') 8 | import matplotlib.pyplot as pp 9 | from matplotlib.font_manager import FontProperties 10 | from matplotlib.dates import HourLocator, DateFormatter 11 | import numpy as np 12 | from time import time 13 | from collections import Counter 14 | from datetime import datetime 15 | import itchat 16 | import random 17 | import re 18 | import os 19 | import logging 20 | 21 | class ActivityInfo(ProcessInterface): 22 | timestampSubtract = 3600 * 24 # 1 day 23 | maxActivityInfoCount = 10 24 | imgDir = 'activityInfo' 25 | 26 | def __init__(self, fontPath): 27 | if not os.path.exists(self.imgDir): 28 | os.mkdir(self.imgDir) 29 | self.client = client 30 | self.coll = self.client[dbName][collName] 31 | self.prop = FontProperties(fname=fontPath) 32 | logging.info('ActivityInfo initialized.') 33 | 34 | def process(self, msg, type): 35 | if type != TEXT: 36 | return 37 | if msg['Content'] == '/activity': 38 | logging.info('Generating activity info for {0}.'.format(msg['User']['NickName'])) 39 | fn = self.generateActivityInfoForGroup(msg['User']['NickName']) 40 | destinationChatroomId = msg['FromUserName'] if re.search('@@', msg['FromUserName']) else msg['ToUserName'] 41 | logging.info('Sending activity file {0} to {1}.'.format(fn, destinationChatroomId)) 42 | itchat.send('@img@{0}'.format(fn), destinationChatroomId) 43 | 44 | def generateActivityInfoForGroup(self, groupName): 45 | timestampNow = int(time()) 46 | timestampYesterday = timestampNow - self.timestampSubtract 47 | records = list(self.coll.find({ 'to': groupName, 'timestamp': { '$gt': timestampYesterday } }).sort([ ('timestamp', DESCENDING) ])) 48 | fn = self.generateTmpFileName() 49 | # Get histogram for activity 50 | hist, bins = np.histogram([ x['timestamp'] for x in records ], bins=24) 51 | center = (bins[:-1] + bins[1:]) / 2 52 | datex = [ datetime.fromtimestamp(x) for x in center ] 53 | pp.figure(figsize=(6,14)) 54 | ax = pp.subplot(2, 1, 1) 55 | pp.plot_date(datex, hist, '.-') 56 | pp.gcf().autofmt_xdate() 57 | pp.xlabel(u'美国西部时间', fontproperties=self.prop) 58 | pp.ylabel(u'每小时消息数', fontproperties=self.prop) 59 | ax.xaxis.set_major_formatter(DateFormatter('%m-%d %H:%M')) 60 | # Get bar chart for active users 61 | pieDat = Counter([ x['from'] for x in records ]) 62 | pieDatSorted = sorted([ (k, pieDat[k]) for k in pieDat ],key=lambda x: x[1], reverse=True) 63 | if len(pieDatSorted) > self.maxActivityInfoCount: 64 | pieDatSorted = pieDatSorted[:self.maxActivityInfoCount] 65 | ax = pp.subplot(2, 1, 2) 66 | width = 0.7 67 | x = np.arange(len(pieDatSorted)) + width 68 | xText = [ xx[0] for xx in pieDatSorted ] 69 | y = [ xx[1] for xx in pieDatSorted ] 70 | pp.bar(x, y, width) 71 | a = pp.gca() 72 | a.set_xticklabels(a.get_xticks(), { 'fontProperties': self.prop }) 73 | pp.xticks(x, xText, rotation='vertical') 74 | pp.xlabel(u'用户', fontproperties=self.prop) 75 | pp.ylabel(u'24小时消息数', fontproperties=self.prop) 76 | ax.set_xlim([ 0, len(xText) + 1 - width ]) 77 | pp.margins(0.2) 78 | pp.savefig(fn) 79 | return fn 80 | 81 | def generateTmpFileName(self): 82 | return '{0}/{1}-{2}.png'.format(self.imgDir, int(time() * 1000), random.randint(0, 10000)) 83 | 84 | if __name__ == '__main__': 85 | ai = ActivityInfo('/usr/share/fonts/truetype/wqy/wqy-microhei.ttc') 86 | ai.generateActivityInfoForGroup('TestGroup') 87 | -------------------------------------------------------------------------------- /DoutuProcessor.py: -------------------------------------------------------------------------------- 1 | from time import time 2 | from datetime import datetime 3 | from ProcessInterface import ProcessInterface 4 | from utilities import * 5 | from itchat.content import * 6 | from subprocess import call 7 | from ImageSearcher import ImageSearcher 8 | from threading import Thread 9 | from time import time, sleep 10 | import logging 11 | import itchat 12 | import re 13 | import os 14 | 15 | def DoutuEnd(destinationChatroomId): 16 | sleep(DoutuProcessor.doutuTimeInterval) 17 | itchat.send('时间到, 斗图结束。', destinationChatroomId) 18 | 19 | class DoutuProcessor(ProcessInterface): 20 | doutuTimeInterval = 5 * 60 # seconds 21 | 22 | def __init__(self, doutuFeatureFn, whitelist=[]): 23 | self.imgFolder = 'DouTuRobot/dat/gifs/' 24 | self.doutuFolder = 'DoutuImages' 25 | self.whitelist = set(whitelist) 26 | self.activationTime = {} 27 | if not os.path.exists(self.doutuFolder): 28 | os.mkdir(self.doutuFolder) 29 | self.imageSearcher = ImageSearcher(doutuFeatureFn) 30 | logging.info('DoutuProcessor initialized.') 31 | 32 | def process(self, msg, type): 33 | # Mode management 34 | groupName = msg['User']['NickName'] 35 | destinationChatroomId = msg['FromUserName'] if re.search('@@', msg['FromUserName']) else msg['ToUserName'] 36 | if type == TEXT and msg['Content'] == '/doutu': 37 | # Control mode 38 | self.activationTime[groupName] = time() + self.doutuTimeInterval 39 | itchat.send('鸭哥进入斗图模式! {0}分钟内群里所有照片和表情(除了商城表情),鸭哥都会回复斗图!'.format(int(self.doutuTimeInterval / 60)), destinationChatroomId) 40 | Thread(target=DoutuEnd, args=[destinationChatroomId]).start() 41 | return 42 | if type != PICTURE: 43 | return 44 | # If in whitelist. skip the mode check. Otherwise check the activation time. 45 | if groupName not in self.whitelist: 46 | if groupName not in self.activationTime or self.activationTime [groupName] <= time(): 47 | return 48 | 49 | logging.info('[Doutu] Begin processing...') 50 | fn = msg['FileName'] 51 | newfn = os.path.join(self.doutuFolder, fn) 52 | msg['Text'](fn) 53 | os.rename(fn, newfn) 54 | newfnjpg = newfn + '.jpg' 55 | call(['convert', '{0}[0]'.format(newfn), newfnjpg]) 56 | if os.path.exists(newfnjpg): 57 | logging.info('[Doutu] imagemagick succeeded.') 58 | else: 59 | itchat.send('鸭哥没办法和腾讯表情商城的表情斗图。。', destinationChatroomId) 60 | logging.info('[Doutu] imagemagick failed.') 61 | return 62 | 63 | doutufn = self.imageSearcher.search(newfnjpg) 64 | doutufn = os.path.join(self.imgFolder, doutufn) 65 | itchat.send('@img@{0}'.format(doutufn), destinationChatroomId) 66 | logging.info('Doutu! {0} => {1}.'.format(newfn, doutufn)) 67 | 68 | if __name__ == '__main__': 69 | processor = DoutuProcessor('./DoutuFeatures.txt') 70 | -------------------------------------------------------------------------------- /DoutuRobot/README.md: -------------------------------------------------------------------------------- 1 | ## 斗图功能 2 | 3 | 目前对这个功能并没有官方支持。 4 | 这个文档只是为感兴趣的读者做一个参考。 5 | 要想部署使用这个系统,需要一些深度学习的知识和经验,并且需要读一下代码。 6 | 7 | 系统的基本框架是,用Caffe把所有图片的feature抽出来,构成一个数据库。 8 | 新的图片进来以后,抽feature,在这个数据库里面进行检索。 9 | 最接近的几个图里面随机挑选一个返回。 10 | 11 | ### 训练 12 | 13 | * 安装Caffe。`installCaffe.sh`可以作为一个参考。几个要点:不要忘了`make pycaffe`;用OpenBLAS启用多线程可以减小Latency;如果有GPU的话可以大幅加速。 14 | * 执行`dedupAndCopy.sh`转换文件格式。 15 | * 执行`incrementalExtractFeatures.sh`抽取feature。 16 | * 把生成的feature文件`featuresall.txt`拷贝到父目录,并且在main.py里面指定`DoutuProcessor`的文件路径。 17 | -------------------------------------------------------------------------------- /DoutuRobot/dat/gifs/170403-210016.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170403-210016.gif -------------------------------------------------------------------------------- /DoutuRobot/dat/gifs/170403-232221.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170403-232221.gif -------------------------------------------------------------------------------- /DoutuRobot/dat/gifs/170404-021026.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170404-021026.gif -------------------------------------------------------------------------------- /DoutuRobot/dat/gifs/170404-033135.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170404-033135.gif -------------------------------------------------------------------------------- /DoutuRobot/dat/gifs/170404-051440.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170404-051440.gif -------------------------------------------------------------------------------- /DoutuRobot/dat/gifs/170404-062843.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170404-062843.gif -------------------------------------------------------------------------------- /DoutuRobot/dat/gifs/170404-065246.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170404-065246.gif -------------------------------------------------------------------------------- /DoutuRobot/dat/gifs/170404-081520.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170404-081520.gif -------------------------------------------------------------------------------- /DoutuRobot/dat/gifs/170404-205424.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170404-205424.gif -------------------------------------------------------------------------------- /DoutuRobot/dat/gifs/170404-210537.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170404-210537.gif -------------------------------------------------------------------------------- /DoutuRobot/dat/gifs/170404-234147.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170404-234147.gif -------------------------------------------------------------------------------- /DoutuRobot/dat/gifs/170404-235954.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170404-235954.gif -------------------------------------------------------------------------------- /DoutuRobot/dat/gifs/170405-020917.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170405-020917.gif -------------------------------------------------------------------------------- /DoutuRobot/dat/gifs/170405-021924.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170405-021924.gif -------------------------------------------------------------------------------- /DoutuRobot/dat/gifs/170405-062442.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170405-062442.gif -------------------------------------------------------------------------------- /DoutuRobot/dat/gifs/170405-071029.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170405-071029.gif -------------------------------------------------------------------------------- /DoutuRobot/dat/gifs/170405-092427.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170405-092427.gif -------------------------------------------------------------------------------- /DoutuRobot/dat/gifs/170405-092558.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grapeot/WechatForwardBot/6d9d1d7b12f1730ea77c654c5e500a4f83579785/DoutuRobot/dat/gifs/170405-092558.gif -------------------------------------------------------------------------------- /DoutuRobot/dedupAndCopy.sh: -------------------------------------------------------------------------------- 1 | DedupAndCopyFile() { 2 | LC_ALL=C md5sum $1/*.gif | sort -k1,1 -u | awk '{print $2;}' | sed 's/^.*\///' > files.txt 3 | rsync -av --files-from=files.txt $1 dat/gifs 4 | rm files.txt 5 | } 6 | 7 | DedupAndCopyFile '../HistoryImages' 8 | 9 | # Convert to jpg for training use 10 | ls ./dat/gifs | xargs -n1 -I{} -P4 bash -c 'echo {}; if [ ! -e "dat/jpgs/{}.jpg" ]; then convert "dat/gifs/{}[0]" "dat/jpgs/{}.jpg"; fi' 11 | # Generate files list for Caffe use 12 | find dat/jpgs > files.txt 13 | -------------------------------------------------------------------------------- /DoutuRobot/extractFeatures.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os, sys, getopt 3 | import hashlib 4 | 5 | # Main path to your caffe installation 6 | caffe_root = './caffe/' 7 | 8 | # Model prototxt file 9 | model_prototxt = caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt' 10 | #model_prototxt = caffe_root + 'models/bvlc_googlenet/deploy.prototxt' 11 | 12 | # Model caffemodel file 13 | model_trained = caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel' 14 | #model_trained = caffe_root + 'models/bvlc_googlenet/bvlc_googlenet.caffemodel' 15 | 16 | # File containing the class labels 17 | imagenet_labels = caffe_root + 'data/ilsvrc12/synset_words.txt' 18 | 19 | # Path to the mean image (used for input processing) 20 | mean_path = caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy' 21 | 22 | # Name of the layer we want to extract 23 | #layer_name = 'pool5/7x7_s1' 24 | layer_name = 'fc8' 25 | 26 | sys.path.insert(0, caffe_root + 'python') 27 | import caffe 28 | 29 | def main(argv): 30 | inputfile = '' 31 | outputfile = '' 32 | 33 | try: 34 | opts, args = getopt.getopt(argv,"hi:o:",["ifile=","ofile="]) 35 | except getopt.GetoptError: 36 | print('caffe_feature_extractor.py -i -o ') 37 | sys.exit(2) 38 | 39 | for opt, arg in opts: 40 | if opt == '-h': 41 | print('caffe_feature_extractor.py -i -o ') 42 | sys.exit() 43 | elif opt in ("-i"): 44 | inputfile = arg 45 | elif opt in ("-o"): 46 | outputfile = arg 47 | 48 | print('Reading images from "', inputfile) 49 | print('Writing vectors to "', outputfile) 50 | 51 | # Setting this to CPU, but feel free to use GPU if you have CUDA installed 52 | caffe.set_mode_cpu() 53 | # Loading the Caffe model, setting preprocessing parameters 54 | net = caffe.Classifier(model_prototxt, model_trained, 55 | mean=np.load(mean_path).mean(1).mean(1), 56 | channel_swap=(2,1,0), 57 | raw_scale=255, 58 | image_dims=(256, 256)) 59 | 60 | # Loading class labels 61 | with open(imagenet_labels) as f: 62 | labels = f.readlines() 63 | 64 | # This prints information about the network layers (names and sizes) 65 | # You can uncomment this, to have a look inside the network and choose which layer to print 66 | #print [(k, v.data.shape) for k, v in net.blobs.items()] 67 | #exit() 68 | 69 | # Processing one image at a time, printint predictions and writing the vector to a file 70 | with open(inputfile, 'r') as reader: 71 | with open(outputfile, 'w') as writer: 72 | writer.truncate() 73 | for image_path in reader: 74 | try: 75 | image_path = image_path.strip() 76 | with open(image_path, 'rb') as fp: 77 | cachekey = hashlib.sha224(fp.read()).hexdigest() 78 | input_image = caffe.io.load_image(image_path) 79 | prediction = net.predict([input_image], oversample=False) 80 | print(os.path.basename(image_path), ' : ' , labels[prediction[0].argmax()].strip() , ' (', prediction[0][prediction[0].argmax()] , ')') 81 | feature = net.blobs[layer_name].data[0].reshape(1,-1) 82 | featureTxt = ' '.join([ str(x) for x in feature.tolist()[0] ]) 83 | writer.write('{0}\t{1}\t{2}\n'.format(image_path, cachekey, featureTxt)) 84 | except Exception as e: 85 | print(e) 86 | print('ERROR: skip {0}.'.format(image_path)) 87 | 88 | if __name__ == "__main__": 89 | main(sys.argv[1:]) 90 | -------------------------------------------------------------------------------- /DoutuRobot/incrementalExtractFeatures.sh: -------------------------------------------------------------------------------- 1 | # Generate files.txt 2 | awk '{ printf("dat/jpgs/%s.jpg\n", $1); }' < ./featuresall.txt | sort > extractedFiles.txt 3 | find dat/jpgs -type f | sort > allFiles.txt 4 | comm -23 allFiles.txt extractedFiles.txt > files.txt 5 | rm allFiles.txt 6 | rm extractedFiles.txt 7 | lines=`wc -l files.txt` 8 | echo files.txt generated, with $lines lines. 9 | 10 | # Invoke Caffe to extract features 11 | python3 -u ./extractFeatures.py -i files.txt -o newFeatures.txt 12 | sed -i 's/dat\/jpgs\///' newFeatures.txt 13 | sed -i 's/\.jpg\t/\t/' newFeatures.txt 14 | cp featuresall.txt featuresall.txt.bak 15 | cat newFeatures.txt >> featuresall.txt 16 | -------------------------------------------------------------------------------- /DoutuRobot/installCaffe.sh: -------------------------------------------------------------------------------- 1 | #Dependencies 2 | sudo apt-get install -y libprotobuf-dev libleveldb-dev libsnappy-dev libopencv-dev libhdf5-serial-dev protobuf-compiler 3 | sudo apt-get install -y --no-install-recommends libboost-all-dev 4 | sudo apt-get install -y libatlas-base-dev 5 | sudo apt-get install -y python3-dev 6 | sudo apt-get install -y libgoogle-glog-dev liblmdb-dev 7 | 8 | # Caffe 9 | git clone https://github.com/BVLC/caffe 10 | cd caffe 11 | cp Makefile.config.example Makefile.config 12 | echo "ALSO NEED TO MODIFY THE FILE IF YOU WANT CPU_ONLY" 13 | read 14 | 15 | # Debian only 16 | echo 'INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include /usr/include/hdf5/serial/' >> Makefile.config 17 | echo 'LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib /usr/lib/x86_64-linux-gnu/hdf5/serial/' >> Makefile.config 18 | 19 | make all -j4 20 | -------------------------------------------------------------------------------- /GaTextHook.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from itchat.content import * 4 | from ProcessInterface import ProcessInterface 5 | from utilities import * 6 | from time import time, sleep 7 | from threading import Timer 8 | from datetime import datetime 9 | import itchat 10 | import re 11 | import logging 12 | 13 | def clearGaNumDict(): 14 | GaTextHook.gaNumDict = {} 15 | client[dbName][gaCollName].remove({}, {'multi': True}) 16 | logging.info('GaNumDict cleared. GaNumDict = {0}.'.format(GaTextHook.gaNumDict)) 17 | scheduleTimerToClearGaNumDict() 18 | 19 | def scheduleTimerToClearGaNumDict(): 20 | t = datetime.today() 21 | t2 = t.replace(day=t.day+1, hour=9, minute=0, second=0, microsecond=0) # 0:00 in China 22 | deltaT = t2 - t 23 | secs = deltaT.seconds + 1 24 | Timer(secs, clearGaNumDict).start() 25 | 26 | # The logic is getting more complicated. We make it a seprate processor 27 | class GaTextHook(ProcessInterface): 28 | gaNumDict = {} 29 | def __init__(self, blacklist=[]): 30 | self.blacklist = blacklist 31 | self.client = client 32 | self.gaColl = self.client[dbName][gaCollName] 33 | GaTextHook.gaNumDict = { x['GroupName']: x['CurrentGaNum'] for x in self.gaColl.find() } 34 | self.gaNumMax = 100 35 | self.triggerText = '鸭哥' 36 | self.gaText = '嘎?' 37 | self.forceTriggerText = '鸭哥嘎一个' 38 | self.forceTriggerNextTimestamp = {} 39 | self.forceTriggerInterval = 5 * 60 # 5 minutes 40 | self.forceTriggerGaText = '强力嘎!' 41 | scheduleTimerToClearGaNumDict() 42 | 43 | # Set up the clear timer 44 | logging.info('GaTextHook initialized.') 45 | 46 | def process(self, msg, type): 47 | if type != TEXT: 48 | return 49 | groupName = msg['User']['NickName'] 50 | toSend = None 51 | if any([ re.search(x, groupName) is not None for x in self.blacklist ]): 52 | return 53 | if re.search(self.forceTriggerText, msg['Content']): 54 | currentTime = time() 55 | gaNextTime = self.forceTriggerNextTimestamp.get(groupName, 0) 56 | if currentTime < gaNextTime: 57 | logging.info("Don't force Ga because time {0} < NextTime {1} for group {2}.".format(currentTime, gaNextTime, groupName)) 58 | return; 59 | self.forceTriggerNextTimestamp[groupName] = currentTime + self.forceTriggerInterval 60 | toSend = self.forceTriggerGaText 61 | logging.info('{0} => {1}'.format(msg['Content'], toSend)) 62 | itchat.send(toSend, msg['FromUserName']) 63 | return 64 | if re.search(self.triggerText, msg['Content']): 65 | # Check the ga time 66 | if groupName not in GaTextHook.gaNumDict: 67 | GaTextHook.gaNumDict[groupName] = 0 68 | GaTextHook.gaNumDict[groupName] += 1 69 | self.gaColl.update({'GroupName': groupName}, {'$set': { 'CurrentGaNum': GaTextHook.gaNumDict[groupName] } }, upsert=True) 70 | if GaTextHook.gaNumDict[groupName] > self.gaNumMax: 71 | logging.info("Don't Ga because GaNum {0} exceeds max {1} for group {2}.".format(GaTextHook.gaNumDict[groupName], self.gaNumMax, groupName)) 72 | return 73 | toSend = '{0} x{1}'.format(self.gaText, GaTextHook.gaNumDict[groupName]) 74 | logging.info('{0} => {1}'.format(msg['Content'], toSend)) 75 | itchat.send(toSend, msg['FromUserName']) 76 | 77 | if __name__ == '__main__': 78 | logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) 79 | hook = GaTextHook() 80 | -------------------------------------------------------------------------------- /GlobalTextHook.py: -------------------------------------------------------------------------------- 1 | from itchat.content import * 2 | from ProcessInterface import ProcessInterface 3 | import itchat 4 | import re 5 | import logging 6 | 7 | class GlobalTextHook(ProcessInterface): 8 | def __init__(self, subdict={}, blacklist=[]): 9 | self.dict = subdict 10 | self.blacklist = blacklist 11 | logging.info('GlobalTextHook initialized.') 12 | 13 | def process(self, msg, type): 14 | if type != TEXT: 15 | return 16 | if any([ re.search(x, msg['User']['NickName']) is not None for x in self.blacklist ]): 17 | return 18 | for k in self.dict: 19 | if re.search(k, msg['Content']): 20 | v = self.dict[k] 21 | logging.info('{0} => {1}'.format(msg['Content'], v)) 22 | itchat.send(v, msg['FromUserName']) 23 | -------------------------------------------------------------------------------- /GroupMessageForwarder.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from utilities import * 3 | from itchat.content import * 4 | from ProcessInterface import ProcessInterface 5 | import os 6 | import itchat 7 | 8 | class GroupMessageForwarder(ProcessInterface): 9 | def __init__(self, chatroomDisplayNames, chatroomNames, isBidirectional=True): 10 | self.isInitialized = False 11 | self.chatroomDisplayNames = chatroomDisplayNames 12 | self.chatroomNames = chatroomNames 13 | self.isBidirectional = isBidirectional 14 | chatrooms = itchat.get_chatrooms() 15 | self.chatroomObjs = [ getChatroomByName(chatrooms, x) for x in chatroomNames ] 16 | if len([ x for x in self.chatroomObjs if x is None ]) != 0: 17 | logging.info('Cannot find chatrooms for {0}'.format(chatroomNames)) 18 | return 19 | self.chatroomIds = [ x['UserName'] for x in self.chatroomObjs ] 20 | self.nickNameLookup = NickNameLookup(self.chatroomObjs) 21 | self.fileFolder = 'ForwarderFiles' 22 | if not os.path.exists(self.fileFolder): 23 | os.mkdir(self.fileFolder) 24 | logging.info('Fetched user ids for the chatrooms {0}.'.format(chatroomNames)) 25 | self.isInitialized = True 26 | 27 | def process(self, msg, type): 28 | if not self.isInitialized: 29 | logging.error('The forwarder was not properly initialized. Please send a message in the groups you want to connect and try again.') 30 | return 31 | shallSendObj = self.shallSend(msg) 32 | if not shallSendObj['shallSend']: 33 | return 34 | if type == TEXT: 35 | fromText = '[{0}]'.format(self.chatroomDisplayNames[shallSendObj['fromChatroom']]) 36 | destinationChatroomId = self.chatroomIds[not shallSendObj['fromChatroom']] 37 | content = '{0} {1}: {2}'.format(fromText, msg['ActualNickName'], msg['Content']) 38 | logging.info(content) 39 | itchat.send(content, destinationChatroomId) 40 | elif type == PICTURE: 41 | fn = msg['FileName'] 42 | newfn = os.path.join(self.fileFolder, fn) 43 | msg['Text'](fn) 44 | os.rename(fn, newfn) 45 | type = {'Picture': 'img', 'Video': 'vid'}.get(msg['Type'], 'fil') 46 | typeText = {'Picture': '图片', 'Video': '视频'}.get(msg['Type'], '文件') 47 | fromText = '[{0}]'.format(self.chatroomDisplayNames[shallSendObj['fromChatroom']]) 48 | destinationChatroomId = self.chatroomIds[not shallSendObj['fromChatroom']] 49 | content = '{0} {1} 发送了{2}:'.format(fromText, self.nickNameLookup.lookupNickName(msg), typeText) 50 | itchat.send(content, destinationChatroomId) 51 | logging.info(content) 52 | itchat.send('@{0}@{1}'.format(type, newfn), destinationChatroomId) 53 | elif type == SHARING: 54 | fromText = '[{0}]'.format(self.chatroomDisplayNames[shallSendObj['fromChatroom']]) 55 | destinationChatroomId = self.chatroomIds[not shallSendObj['fromChatroom']] 56 | content = '{0} {1} 分享了链接: {2} {3}'.format(fromText, self.nickNameLookup.lookupNickName(msg), msg['Text'], msg['Url']) 57 | logging.info(content) 58 | itchat.send(content, destinationChatroomId) 59 | else: 60 | logging.info('Unknown type encoutered.') 61 | pass 62 | 63 | def shallSend(self, msg): 64 | result = False 65 | for i in range(len(self.chatroomIds)): 66 | result = result or extractToUserName(msg) == self.chatroomIds[i] or extractFromUserName(msg) == self.chatroomIds[i] 67 | if result: 68 | return { 'shallSend': True, 'fromChatroom': i } 69 | if not self.isBidirectional: 70 | break 71 | return { 'shallSend': False } 72 | -------------------------------------------------------------------------------- /GroupTagCloud.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from utilities import * 3 | from itchat.content import * 4 | from collections import Counter 5 | from pymongo import MongoClient, DESCENDING 6 | from wordcloud import WordCloud 7 | from ProcessInterface import ProcessInterface 8 | import itertools 9 | import gensim 10 | import os 11 | import itchat 12 | import re 13 | import random 14 | import time 15 | import logging 16 | import jieba 17 | 18 | class GroupTagCloud(ProcessInterface): 19 | recordMaxNum = 500 20 | maxFrequency = 40 21 | imgDir = 'TagCloud' 22 | 23 | def __init__(self, fontPath): 24 | self.client = MongoClient() 25 | self.coll = self.client[dbName][collName] 26 | self.fontPath = fontPath 27 | self.wordCloud = WordCloud(font_path=self.fontPath, width=400, height=400, max_words=100) 28 | if not os.path.exists(self.imgDir): 29 | os.mkdir(self.imgDir) 30 | logging.info('GroupTagCloud connected to MongoDB.') 31 | 32 | def process(self, msg, type): 33 | shallRunObj = self.isRun(msg, type) 34 | if shallRunObj['shallRun']: 35 | toLog = 'Generating tag cloud for {0}.'.format(shallRunObj['groupName']) 36 | if shallRunObj['userName']: 37 | toLog = '{0} Username {1}.'.format(toLog, shallRunObj['userName']) 38 | logging.info(toLog) 39 | fn = self.generateTagCloudForGroupV2(shallRunObj['groupName'], shallRunObj['userName']) 40 | destinationChatroomId = msg['FromUserName'] if re.search('@@', msg['FromUserName']) else msg['ToUserName'] 41 | logging.info('Sending tag cloud file {0} to {1}.'.format(fn, destinationChatroomId)) 42 | itchat.send('@img@{0}'.format(fn), destinationChatroomId) 43 | 44 | # Generate a tag cloud image from the latest self.recordMaxNum records, based on TF-IDF. Return the file name. 45 | def generateTagCloudForGroupV2(self, groupName, userName=None): 46 | records = None 47 | if userName is None: 48 | records = self.coll.find({ 'to': groupName }).sort([ ('timestamp', DESCENDING) ]).limit(self.recordMaxNum) 49 | allRecords = self.coll.find({ 'to': { '$ne': groupName } }).sort([ ('timestamp', DESCENDING) ]).limit(self.recordMaxNum * 5) 50 | allRecordsGroup = sorted(allRecords, key=lambda x: x['to']) 51 | else: 52 | records = self.coll.find({ 'from': userName, 'to': groupName }).sort([ ('timestamp', DESCENDING) ]).limit(self.recordMaxNum) 53 | allRecords = self.coll.find({ 'from': { '$ne': userName }, 'to': groupName }).sort([ ('timestamp', DESCENDING) ]).limit(self.recordMaxNum * 5) 54 | allRecordsGroup = sorted(allRecords, key=lambda x: x['from']) 55 | docThisGroup = list(jieba.cut(' '.join([ r['content'] for r in records if re.match('<< 1 ]) 79 | frequencies = { k: min(self.maxFrequency, frequencies[k]) for k in frequencies } 80 | img = self.wordCloud.generate_from_frequencies(frequencies).to_image() 81 | fn = self.generateTmpFileName() 82 | img.save(fn) 83 | return fn 84 | 85 | def isRun(self, msg, type): 86 | if type != TEXT or 'Content' not in msg: 87 | return { 'shallRun': False } 88 | if re.search(r'^\s*/tagcloud$', msg['Content']): 89 | return { 'shallRun': True, 'userName': None, 'groupName': msg['User']['NickName'] } 90 | if re.search(r'^\s*/mytag$', msg['Content']): 91 | return { 'shallRun': True, 'userName': msg['ActualNickName'], 'groupName': msg['User']['NickName'] } 92 | return { 'shallRun': False } 93 | 94 | def generateTmpFileName(self): 95 | return '{0}/{1}-{2}.png'.format(self.imgDir, int(time.time() * 1000), random.randint(0, 10000)) 96 | 97 | if __name__ == '__main__': 98 | groupTagCloud = GroupTagCloud('/usr/share/fonts/truetype/wqy/wqy-microhei.ttc') 99 | groupTagCloud.generateTagCloudForGroup('TestGroup', '鸭哥') 100 | groupTagCloud.generateTagCloudForGroupV2('TestGroup', '鸭哥') 101 | -------------------------------------------------------------------------------- /HistoryRecorder.py: -------------------------------------------------------------------------------- 1 | from time import time 2 | from datetime import datetime 3 | from ProcessInterface import ProcessInterface 4 | from utilities import * 5 | from itchat.content import * 6 | import os 7 | 8 | class HistoryRecorder(ProcessInterface): 9 | def __init__(self): 10 | self.client = client 11 | self.coll = self.client[dbName][collName] 12 | self.imgFolder = 'HistoryImages' 13 | if not os.path.exists(self.imgFolder): 14 | os.mkdir(self.imgFolder) 15 | logging.info('HistoryRecorder initialized.') 16 | 17 | # Record an itchat message to mongodb 18 | # Currently only support text messages in group chats 19 | def process(self, msg, type): 20 | if type == PICTURE: 21 | fn = msg['FileName'] 22 | newfn = os.path.join(self.imgFolder, fn) 23 | msg['Text'](fn) 24 | os.rename(fn, newfn) 25 | msg['Content'] = '<<>>'.format(newfn) 26 | if type == TEXT or type == PICTURE: 27 | timestamp = time() 28 | rtime = datetime.utcfromtimestamp(timestamp) 29 | r = { 30 | 'content': msg['Content'], 31 | 'from': msg['ActualNickName'], 32 | 'fromId': msg['ToUserName'], 33 | 'to': msg['User']['NickName'] if 'User' in msg and 'UserName' in msg['User'] else 'N/A', 34 | 'timestamp': timestamp, 35 | 'time': rtime 36 | } 37 | self.coll.insert(r) 38 | -------------------------------------------------------------------------------- /ImageSearcher.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy 3 | import sys 4 | import logging 5 | import hashlib 6 | import sys 7 | 8 | # Caffe related configurations 9 | caffe_root = './DouTuRobot/caffe/' 10 | 11 | # Old model: googlenet 12 | #model_prototxt = caffe_root + 'models/bvlc_googlenet/deploy.prototxt' 13 | #model_trained = caffe_root + 'models/bvlc_googlenet/bvlc_googlenet.caffemodel' 14 | #layer_name = 'pool5/7x7_s1' 15 | # New model: caffenet 16 | model_prototxt = caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt' 17 | model_trained = caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel' 18 | layer_name = 'fc8' 19 | 20 | imagenet_labels = caffe_root + 'data/ilsvrc12/synset_words.txt' 21 | mean_path = caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy' 22 | sys.path.insert(0, caffe_root + 'python') 23 | import caffe 24 | caffe.set_mode_cpu() 25 | logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) 26 | 27 | class ImageSearcher: 28 | def __init__(self, featurefn): 29 | self.resultNum = 5 30 | self.net = caffe.Classifier(model_prototxt, model_trained, 31 | mean=np.load(mean_path).mean(1).mean(1), 32 | channel_swap=(2,1,0), 33 | raw_scale=255, 34 | image_dims=(256, 256)) 35 | self.parseFeature(featurefn) 36 | logging.info('Caffe net initialized. Loading cache...') 37 | #self.buildCache() 38 | self.loadCache('./featureCache.tsv') 39 | logging.info('Cache built.') 40 | 41 | def loadCache(self, fn): 42 | lines = [ x.strip().split('\t') for x in open(fn) ] 43 | self.resultCache = { x[0]: x[1].split(',') for x in lines } 44 | 45 | # Precompute the SHA-SearchResult cache 46 | def buildCache(self): 47 | for i in range(self.features.shape[0]): 48 | qfeature = self.features[i, :] 49 | results = self.searchWithFeature(qfeature) 50 | self.resultCache[self.hashes[i]] = results 51 | sys.stderr.write('.') 52 | sys.stderr.write('\n') 53 | 54 | # The feature file is expected to have three columns: id (usually file name), md5, and DNN features 55 | def parseFeature(self, fn): 56 | features = [] 57 | self.imgfns = [] 58 | self.hashes = [] 59 | for line in open(fn): 60 | imgfn, hash, imgfeatures = line.split('\t') 61 | features.append([ float(x) for x in imgfeatures.split(' ') ]) 62 | self.hashes.append(hash) 63 | self.imgfns.append(imgfn) 64 | self.features = np.asarray(features) 65 | self.resultCache = {} 66 | 67 | # Return the features extracted from the file 68 | def extractFeatures(self, fn): 69 | input_image = caffe.io.load_image(fn) 70 | self.net.predict([input_image], oversample=False) 71 | feature = self.net.blobs[layer_name].data[0] 72 | return feature 73 | 74 | def searchWithFeature(self, queryFeature): 75 | disp = self.features - queryFeature 76 | distances = (disp * disp).sum(1) 77 | indices = np.argsort(distances) 78 | if distances[indices[0]] < 0.02: 79 | # very close. We don't want to send dups 80 | index = indices[1: 1 + self.resultNum] 81 | else: 82 | index = indices[0: self.resultNum] 83 | return [ self.imgfns[i] for i in index ] 84 | 85 | # Search for the most similar image to the given query 86 | def search(self, fn): 87 | # First check the cache 88 | with open(fn, 'rb') as fp: 89 | cachekey = hashlib.sha224(fp.read()).hexdigest() 90 | if cachekey in self.resultCache: 91 | logging.info('Cache hit! Directly return.') 92 | return self.resultCache[cachekey] 93 | # Cache miss. Search and update cache 94 | queryFeature = self.extractFeatures(fn).reshape(1, -1) 95 | result = self.searchWithFeature(queryFeature) 96 | self.resultCache[cachekey] = result 97 | return result 98 | 99 | if __name__ == '__main__': 100 | imageSearcher = ImageSearcher('./DoutuFeatures.txt') 101 | print(imageSearcher.search('./DouTuRobot/dat/jpgs/170405-013811.gif.jpg')) 102 | print(imageSearcher.search('./DouTuRobot/dat/jpgs/170405-013811.gif.jpg')) 103 | -------------------------------------------------------------------------------- /PaiDuiHook.py: -------------------------------------------------------------------------------- 1 | from itchat.content import * 2 | from ProcessInterface import ProcessInterface 3 | import itchat 4 | import re 5 | import logging 6 | 7 | class PaiDuiHook(ProcessInterface): 8 | groupContentCacheMaxCapacity = 5 9 | maxSelfPaiDuiTTL = 15 10 | 11 | def __init__(self, blacklist=[]): 12 | self.blacklist = blacklist 13 | self.groupLastMsgsDict = {} 14 | # A dictionary controlling not pai dui for more than one time 15 | # Key: (groupName, content), Value: TTL (0 or non-exist means OK to paidui) 16 | self.selfPaiDuiTTL = {} 17 | logging.info('PaiduiHook initialized.') 18 | 19 | def WhatToPaiDui(self, groupName): 20 | msgCount = {} 21 | msgs = self.groupLastMsgsDict[groupName] 22 | for msg in msgs: 23 | if msg['Content'] not in msgCount: 24 | msgCount[msg['Content']] = 0 25 | msgCount[msg['Content']] += 1 26 | contentToPaiDui = [ x for x in msgCount if msgCount[x] > 1 ] 27 | if len(contentToPaiDui) == 0: 28 | # No dui to pai 29 | return 30 | # it's possible that two duis are formed at the same time, but only one can pass the TTL check 31 | for content in contentToPaiDui: 32 | if (groupName, content) not in self.selfPaiDuiTTL or self.selfPaiDuiTTL == 0: 33 | self.selfPaiDuiTTL[(groupName, content)] = self.maxSelfPaiDuiTTL 34 | yield content # We use yield here because we still need to conitnue managing the TTL 35 | else: 36 | self.selfPaiDuiTTL[(groupName, content)] -= 1 37 | 38 | def isFromSelf(self, msg): 39 | if re.search('^@@', msg['ToUserName']): 40 | return True 41 | else: 42 | return False 43 | 44 | def updateGroupContentCache(self, msg, groupName): 45 | if groupName not in self.groupLastMsgsDict: 46 | self.groupLastMsgsDict[groupName] = [] 47 | if len(self.groupLastMsgsDict[groupName]) >= self.groupContentCacheMaxCapacity: 48 | self.groupLastMsgsDict[groupName].pop(0) 49 | self.groupLastMsgsDict[groupName].append({ 'Content': msg['Content'] }) 50 | 51 | def process(self, msg, type): 52 | if type != TEXT: 53 | return 54 | groupName = msg['User']['NickName'] 55 | if any([ re.search(x, groupName) is not None for x in self.blacklist ]): 56 | return 57 | if re.search('^/', msg['Content']): 58 | return 59 | if self.isFromSelf(msg): 60 | # Stop processing if the message is from myself 61 | return 62 | 63 | self.updateGroupContentCache(msg, groupName) 64 | contentToPaiDui = list(self.WhatToPaiDui(groupName)) 65 | if len(contentToPaiDui) > 1: 66 | logging.error('Multiple duis detected.') 67 | if len(contentToPaiDui) != 0: 68 | # Pai dui! 69 | itchat.send(msg['Content'], msg['FromUserName']) 70 | logging.info('Pai Dui! {0}.'.format(msg['Content'])) 71 | # Update data structure to avoid Pai dui for multiple times. 72 | self.updateGroupContentCache({ 'Content': msg['Content'], 'FromSelf': True }, groupName) 73 | -------------------------------------------------------------------------------- /ProcessInterface.py: -------------------------------------------------------------------------------- 1 | class ProcessInterface: 2 | def process(self, msg, type): 3 | raise NotImplementedError("ProcessInterface is an abstract interface") 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 微信群机器人 2 | 3 | 目前支持的功能: 4 | 5 | * 响应/help,显示帮助信息。 6 | * 在两个群中间转发文本,图片,自定义动画表情,链接。微信群有500人的容量限制,一个消息转发机器人可以通过把几个群接起来相互转发消息来突破这样的限制。 7 | * 用正则表达式来自定义回复。 8 | * 响应/tagcloud,生成整个群的标签云,或者响应/mytag,生成某个用户的标签云。 9 | * 响应/activity,生成群的活跃度时间图,以及活跃用户饼图。 10 | * 自动排队。如果群里出现了二传,自动三传。 11 | * 斗图。/doutu进入斗图模式,用深度学习确定回复什么图,进行斗图。 12 | * 兼容Windows和Linux。如果在Windows下运行,请更改字体路径。 13 | 14 | 目前不支持的功能: 15 | 16 | * 转发表情商城中的表情(即使表情是免费的) 17 | * 转发红包(想啥呢你) 18 | * 多于两个群的转发(但更改应该很简单) 19 | 20 | ## 部署 21 | 22 | * 运行`deploy.sh`。或者手工安装python,mongodb,然后安装`requirements.txt`中的库。 23 | * 依赖于[itchat](https://itchat.readthedocs.io/zh/latest/), [mongodb](https://docs.mongodb.com/manual/administration/install-community/). 24 | 25 | ## 运行 26 | 27 | * 这个小工具不是针对最终用户的,所以现在需要改code(`main.py`)来设置一些参数,尤其是群名,黑白名单等等。 28 | * `python3 -u main.py`。会弹出一个二维码扫码登录。 29 | * 如果在Linux VPS上运行,ssh进去的时候记得加上X转发,这样才能看到二维码。如果不用X转发的话也可以手工下载文件或者改用命令行二维码。 30 | 31 | ## 已知问题 32 | 33 | * 如果启动时候说无法找到群,请在群里说句话。这是微信接口的限制所致。 34 | * 如果启动的时候说连接27017端口connection refused,这是因为你没有安装mongodb。安装mongodb可以解决这个问题。 35 | * 如果出来的标签云里面都是框框,这是字体没有配置好所致。请去`main.py`里面更改字体路径。 36 | * 斗图的功能需要一些深度学习的基础知识和Caffe的相关经验。目前不提供支持。但DoutuRobot文件夹下面有一些文档可以参考。 37 | -------------------------------------------------------------------------------- /deploy.sh: -------------------------------------------------------------------------------- 1 | # Install mongodb and python 2 | sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 0C49F3730359A14518585931BC711F9BA15703C6 3 | # We assume it's 16.04. Check out https://docs.mongodb.com/manual/tutorial/install-mongodb-on-ubuntu/ for other versions. 4 | echo "deb [ arch=amd64,arm64 ] http://repo.mongodb.org/apt/ubuntu xenial/mongodb-org/3.4 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-3.4.list 5 | sudo apt-get update 6 | sudo apt-get install -y mongodb-org 7 | sudo apt-get install -y python3 python3-pip 8 | 9 | # Install python dependencies 10 | sudo pip3 install -r requirements.txt 11 | -------------------------------------------------------------------------------- /launchWechatBot.sh: -------------------------------------------------------------------------------- 1 | # Execute this to launch mongodb 2 | # mongod --dbpath=~/Documents/temp/WechatHistoryMongoDB & 3 | # Launch python script 4 | python3 -u main.py 2>&1 | tee log.txt 5 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import itchat, time, re 3 | from itchat.content import * 4 | from utilities import * 5 | from sys import argv, exit 6 | from GlobalTextHook import GlobalTextHook 7 | from GaTextHook import GaTextHook 8 | from PaiDuiHook import PaiDuiHook 9 | from HistoryRecorder import HistoryRecorder 10 | from GroupTagCloud import GroupTagCloud 11 | from GroupMessageForwarder import GroupMessageForwarder 12 | from ProcessInterface import ProcessInterface 13 | from ActivityInfo import ActivityInfo 14 | from DoutuProcessor import DoutuProcessor 15 | import logging 16 | logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) 17 | 18 | # Some global switches for debugging use only 19 | isDebug = not True 20 | 21 | # Component initialization 22 | itchat.auto_login(True) 23 | plugins = [ 24 | GlobalTextHook({ '^/help$': """鸭哥调戏指南: 25 | /activity: 查看本群活动和话唠排名 26 | /tagcloud: 查看本群所有发言标签云 27 | /mytag: 查看自己的消息标签云 28 | /doutu: 启动斗图模式,机器人会对每一个非商城表情斗图。持续5分钟。 29 | 此外,鸭哥是只有节操的鸭,每天只嘎100次。可以用鸭哥嘎一个来无视100次限制强行召唤鸭哥,但有5分钟技能冷却"""}), 30 | GaTextHook(), 31 | PaiDuiHook(), 32 | HistoryRecorder(), 33 | GroupTagCloud('/usr/share/fonts/truetype/wqy/wqy-microhei.ttc'), 34 | ActivityInfo('/usr/share/fonts/truetype/wqy/wqy-microhei.ttc'), 35 | GroupMessageForwarder([ '二群', '三群' ], [ '科大AI二群测试中', '科大AI三群供测试' ]), 36 | #DoutuProcessor('./DoutuFeatures.txt') # Uncomment to enable Dou Tu 37 | ] 38 | for plugin in plugins: 39 | if not isinstance(plugin, ProcessInterface): 40 | logging.error('One of the plugins are not a subclass of ProcessInterface.') 41 | exit(-1) 42 | 43 | # Core message loops 44 | @itchat.msg_register([PICTURE, RECORDING, ATTACHMENT, VIDEO], isGroupChat=True) 45 | def picture_reply(msg): 46 | if isDebug: 47 | logging.info(msg) 48 | for plugin in plugins: 49 | try: 50 | plugin.process(msg, PICTURE) 51 | except Exception as e: 52 | logging.error(e) # so that one plug's failure won't prevent others from being executed 53 | 54 | @itchat.msg_register([SHARING], isGroupChat=True) 55 | def sharing_reply(msg): 56 | if isDebug: 57 | logging.info(msg) 58 | for plugin in plugins: 59 | try: 60 | plugin.process(msg, SHARING) 61 | except Exception as e: 62 | logging.error(e) # so that one plug's failure won't prevent others from being executed 63 | 64 | @itchat.msg_register([TEXT], isGroupChat=True) 65 | def text_reply(msg): 66 | if isDebug: 67 | logging.info(msg) 68 | for plugin in plugins: 69 | try: 70 | plugin.process(msg, TEXT) 71 | except Exception as e: 72 | logging.error(e) # so that one plug's failure won't prevent others from being executed 73 | 74 | if __name__ == '__main__': 75 | itchat.run() 76 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | itchat 2 | pymongo 3 | jieba 4 | wordcloud 5 | numpy 6 | matplotlib 7 | gensim 8 | -------------------------------------------------------------------------------- /utilities.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Some utility functions 4 | """ 5 | from pymongo import MongoClient 6 | import logging 7 | 8 | # Some global configuration 9 | dbName = 'WechatHistory' 10 | collName = 'history' 11 | gaCollName = 'GaNum' 12 | client = MongoClient('localhost', 27017) 13 | 14 | class NickNameLookup: 15 | def __init__(self, chatrooms): 16 | self.dict = {} 17 | for chatroom in chatrooms: 18 | for member in chatroom['MemberList']: 19 | self.dict[member['UserName']] = member['DisplayName'] if member['DisplayName'] != '' else member['NickName'] 20 | 21 | def lookupNickName(self, msg): 22 | if 'ActualNickName' in msg and msg['ActualNickName'] != '': 23 | return msg['ActualNickName'] 24 | username = msg['ActualUserName'] 25 | if username in self.dict: 26 | return self.dict[username] 27 | else: 28 | return '未知昵称' 29 | 30 | def extractFromUserName(msg): 31 | # It's messy that if the sender is yourself, the group name will appear in the ToUserName 32 | if 'ToUserName' in msg and msg['ToUserName'] != '': 33 | return msg['ToUserName'] 34 | 35 | def extractToUserName(msg): 36 | # For group chat FromUserName is the group Id 37 | if 'FromUserName' in msg and msg['FromUserName'] != '': 38 | return msg['FromUserName'] 39 | elif 'User' in msg and msg['User']['UserName'] != '': 40 | return msg['User']['UserName'] 41 | else: 42 | return '未知昵称' 43 | 44 | def getChatroomByName(chatrooms, name): 45 | groups = [ x for x in chatrooms if x['NickName'] == name ] 46 | if len(groups) == 0: 47 | logging.error('Cannot find the chatroom named {0}.'.format(name)) 48 | return None 49 | return groups[0] 50 | 51 | def getNameForChatroomDisplayName(name): 52 | return name.replace('&', '&') 53 | --------------------------------------------------------------------------------