├── CaptchaRecognise ├── 3.jpeg ├── icon │ ├── 0 │ │ ├── 0_1.jpeg │ │ └── 0_2.jpeg │ ├── 1 │ │ ├── 23.jpeg │ │ └── 56.jpeg │ ├── 2 │ │ ├── 15.jpeg │ │ └── 22.jpeg │ ├── 3 │ │ ├── 21.jpeg │ │ └── 52.jpeg │ ├── 4 │ │ ├── 13.jpeg │ │ ├── 14.jpeg │ │ ├── 3.jpeg │ │ └── 8.jpeg │ ├── 5 │ │ ├── 33.jpeg │ │ └── 7.jpeg │ ├── 6 │ │ ├── 0.jpeg │ │ └── 1.jpeg │ ├── 7 │ │ ├── 17.jpeg │ │ ├── 20.jpeg │ │ ├── 4.jpeg │ │ └── 9.jpeg │ ├── 8 │ │ ├── 12.jpeg │ │ └── 7.jpeg │ └── 9 │ │ ├── 25.jpeg │ │ └── 26.jpeg ├── login.py └── recognise.py ├── DocFilter ├── docclass.py ├── news_title_traindata.json ├── newstitleclassifier.py └── traindata.json ├── FaceRecognize ├── camera.py ├── camera_function.py ├── camera_ui.py └── facerecognize.py ├── README.md ├── hack ├── SVNDigger │ ├── Licence.txt │ ├── ReadMe.txt │ ├── all-dirs.txt │ ├── all-extensionless.txt │ ├── all.txt │ ├── cat │ │ ├── Conf │ │ │ ├── conf.txt │ │ │ ├── config.txt │ │ │ ├── htaccess.txt │ │ │ └── properties.txt │ │ ├── Database │ │ │ ├── inc.txt │ │ │ ├── ini.txt │ │ │ ├── mdb.txt │ │ │ ├── mdf.txt │ │ │ ├── sql.txt │ │ │ └── xml.txt │ │ ├── Language │ │ │ ├── ascx.txt │ │ │ ├── asp.txt │ │ │ ├── aspx.txt │ │ │ ├── c.txt │ │ │ ├── cfm.txt │ │ │ ├── cpp.txt │ │ │ ├── cs.txt │ │ │ ├── css.txt │ │ │ ├── html.txt │ │ │ ├── jar.txt │ │ │ ├── java.txt │ │ │ ├── js.txt │ │ │ ├── jsp.txt │ │ │ ├── jspf.txt │ │ │ ├── php.txt │ │ │ ├── php3.txt │ │ │ ├── php5.txt │ │ │ ├── phpt.txt │ │ │ ├── pl.txt │ │ │ ├── py.txt │ │ │ ├── rb.txt │ │ │ ├── sh.txt │ │ │ ├── swf.txt │ │ │ ├── tpl.txt │ │ │ ├── vb.txt │ │ │ └── wsdl.txt │ │ └── Project │ │ │ ├── csproj.txt │ │ │ ├── pdb.txt │ │ │ ├── resx.txt │ │ │ ├── sln.txt │ │ │ ├── suo.txt │ │ │ └── vbproj.txt │ ├── context │ │ ├── admin.txt │ │ ├── debug.txt │ │ ├── error.txt │ │ ├── help.txt │ │ ├── index.txt │ │ ├── install.txt │ │ ├── log.txt │ │ ├── readme.txt │ │ ├── root.txt │ │ ├── setup.txt │ │ └── test.txt │ └── test.txt ├── pcap_parser.py ├── scanner.py ├── sniffer.py └── webappmapper.py ├── participle ├── WordFrequency.py ├── dict │ └── dict.txt ├── functionwords ├── ui_participle.py └── wordfrequency.py └── proxyip ├── getproxies.py ├── proxy ├── db.sqlite3 ├── index │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-34.pyc │ │ └── views.cpython-34.pyc │ ├── admin.py │ ├── migrations │ │ └── __init__.py │ ├── models.py │ ├── tests.py │ └── views.py ├── manage.py ├── proxy │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-34.pyc │ │ ├── settings.cpython-34.pyc │ │ ├── urls.cpython-34.pyc │ │ └── wsgi.cpython-34.pyc │ ├── settings.py │ ├── urls.py │ └── wsgi.py └── sqldb.db └── verify.py /CaptchaRecognise/3.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zuojx1013/NyPython/afb7a89656e292bae68716ec92ee37b3a8c296ce/CaptchaRecognise/3.jpeg -------------------------------------------------------------------------------- /CaptchaRecognise/icon/0/0_1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zuojx1013/NyPython/afb7a89656e292bae68716ec92ee37b3a8c296ce/CaptchaRecognise/icon/0/0_1.jpeg -------------------------------------------------------------------------------- /CaptchaRecognise/icon/0/0_2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zuojx1013/NyPython/afb7a89656e292bae68716ec92ee37b3a8c296ce/CaptchaRecognise/icon/0/0_2.jpeg -------------------------------------------------------------------------------- /CaptchaRecognise/icon/1/23.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zuojx1013/NyPython/afb7a89656e292bae68716ec92ee37b3a8c296ce/CaptchaRecognise/icon/1/23.jpeg -------------------------------------------------------------------------------- /CaptchaRecognise/icon/1/56.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zuojx1013/NyPython/afb7a89656e292bae68716ec92ee37b3a8c296ce/CaptchaRecognise/icon/1/56.jpeg -------------------------------------------------------------------------------- /CaptchaRecognise/icon/2/15.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zuojx1013/NyPython/afb7a89656e292bae68716ec92ee37b3a8c296ce/CaptchaRecognise/icon/2/15.jpeg -------------------------------------------------------------------------------- /CaptchaRecognise/icon/2/22.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zuojx1013/NyPython/afb7a89656e292bae68716ec92ee37b3a8c296ce/CaptchaRecognise/icon/2/22.jpeg -------------------------------------------------------------------------------- /CaptchaRecognise/icon/3/21.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zuojx1013/NyPython/afb7a89656e292bae68716ec92ee37b3a8c296ce/CaptchaRecognise/icon/3/21.jpeg -------------------------------------------------------------------------------- /CaptchaRecognise/icon/3/52.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zuojx1013/NyPython/afb7a89656e292bae68716ec92ee37b3a8c296ce/CaptchaRecognise/icon/3/52.jpeg -------------------------------------------------------------------------------- /CaptchaRecognise/icon/4/13.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zuojx1013/NyPython/afb7a89656e292bae68716ec92ee37b3a8c296ce/CaptchaRecognise/icon/4/13.jpeg -------------------------------------------------------------------------------- /CaptchaRecognise/icon/4/14.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zuojx1013/NyPython/afb7a89656e292bae68716ec92ee37b3a8c296ce/CaptchaRecognise/icon/4/14.jpeg -------------------------------------------------------------------------------- /CaptchaRecognise/icon/4/3.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zuojx1013/NyPython/afb7a89656e292bae68716ec92ee37b3a8c296ce/CaptchaRecognise/icon/4/3.jpeg -------------------------------------------------------------------------------- /CaptchaRecognise/icon/4/8.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zuojx1013/NyPython/afb7a89656e292bae68716ec92ee37b3a8c296ce/CaptchaRecognise/icon/4/8.jpeg -------------------------------------------------------------------------------- /CaptchaRecognise/icon/5/33.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zuojx1013/NyPython/afb7a89656e292bae68716ec92ee37b3a8c296ce/CaptchaRecognise/icon/5/33.jpeg -------------------------------------------------------------------------------- /CaptchaRecognise/icon/5/7.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zuojx1013/NyPython/afb7a89656e292bae68716ec92ee37b3a8c296ce/CaptchaRecognise/icon/5/7.jpeg -------------------------------------------------------------------------------- /CaptchaRecognise/icon/6/0.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zuojx1013/NyPython/afb7a89656e292bae68716ec92ee37b3a8c296ce/CaptchaRecognise/icon/6/0.jpeg -------------------------------------------------------------------------------- /CaptchaRecognise/icon/6/1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zuojx1013/NyPython/afb7a89656e292bae68716ec92ee37b3a8c296ce/CaptchaRecognise/icon/6/1.jpeg -------------------------------------------------------------------------------- /CaptchaRecognise/icon/7/17.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zuojx1013/NyPython/afb7a89656e292bae68716ec92ee37b3a8c296ce/CaptchaRecognise/icon/7/17.jpeg -------------------------------------------------------------------------------- /CaptchaRecognise/icon/7/20.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zuojx1013/NyPython/afb7a89656e292bae68716ec92ee37b3a8c296ce/CaptchaRecognise/icon/7/20.jpeg -------------------------------------------------------------------------------- /CaptchaRecognise/icon/7/4.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zuojx1013/NyPython/afb7a89656e292bae68716ec92ee37b3a8c296ce/CaptchaRecognise/icon/7/4.jpeg -------------------------------------------------------------------------------- /CaptchaRecognise/icon/7/9.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zuojx1013/NyPython/afb7a89656e292bae68716ec92ee37b3a8c296ce/CaptchaRecognise/icon/7/9.jpeg -------------------------------------------------------------------------------- /CaptchaRecognise/icon/8/12.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zuojx1013/NyPython/afb7a89656e292bae68716ec92ee37b3a8c296ce/CaptchaRecognise/icon/8/12.jpeg -------------------------------------------------------------------------------- /CaptchaRecognise/icon/8/7.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zuojx1013/NyPython/afb7a89656e292bae68716ec92ee37b3a8c296ce/CaptchaRecognise/icon/8/7.jpeg -------------------------------------------------------------------------------- /CaptchaRecognise/icon/9/25.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zuojx1013/NyPython/afb7a89656e292bae68716ec92ee37b3a8c296ce/CaptchaRecognise/icon/9/25.jpeg -------------------------------------------------------------------------------- /CaptchaRecognise/icon/9/26.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zuojx1013/NyPython/afb7a89656e292bae68716ec92ee37b3a8c296ce/CaptchaRecognise/icon/9/26.jpeg -------------------------------------------------------------------------------- /CaptchaRecognise/login.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | 3 | import requests 4 | from recognise import * 5 | from PIL import Image 6 | import base64 7 | import getpass 8 | 9 | 10 | def login(username,passwd): 11 | session=requests.session() 12 | session.get('http://wsxk.hust.edu.cn/login.jsp').text 13 | img=session.get('http://wsxk.hust.edu.cn/randomImage.action').content 14 | with open('captcha.jpeg','wb') as imgfile: 15 | imgfile.write(img) 16 | imageRecognize=CaptchaRecognize() 17 | image=Image.open('captcha.jpeg') 18 | result=imageRecognize.recognise(image) 19 | string='' 20 | for item in result: 21 | string+=item[1] 22 | print(string) 23 | data={ 24 | 'usertype':"xs", 25 | 'username':username, 26 | 'password':passwd, 27 | 'rand':string, 28 | 'sm1':"", 29 | 'ln':"app610.dc.hust.edu.cn" 30 | } 31 | headers = { 32 | 'Host':"wsxk.hust.edu.cn", 33 | "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 34 | "Accept-Encoding": "gzip, deflate", 35 | "Accept-Language": "en-US,en;q=0.5", 36 | "Connection": "keep-alive", 37 | 'Referer':"http://wsxk.hust.edu.cn/login.jsp", 38 | "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:39.0) Gecko/20100101 Firefox/39.0"} 39 | session.post('http://wsxk.hust.edu.cn/hublogin.action',data=data,headers=headers) 40 | html=session.get('http://wsxk.hust.edu.cn/select.jsp',headers=headers).text 41 | print(html) 42 | return session 43 | 44 | def main(): 45 | username=input('username:') 46 | passwd=base64.b64encode(getpass.getpass('Passwd:').encode()).decode() 47 | login(username,passwd) 48 | 49 | main() 50 | -------------------------------------------------------------------------------- /CaptchaRecognise/recognise.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | 3 | import os 4 | import requests 5 | from PIL import Image 6 | import math 7 | 8 | def imagesget(): 9 | os.mkdir('images') 10 | count=0 11 | while True: 12 | img=requests.get('http://wsxk.hust.edu.cn/randomImage.action').content 13 | with open('images/%s.jpeg'%count,'wb') as imgfile: 14 | imgfile.write(img) 15 | count+=1 16 | if(count==100): 17 | break 18 | 19 | def convert_image(image): 20 | image=image.convert('L') 21 | image2=Image.new('L',image.size,255) 22 | for x in range(image.size[0]): 23 | for y in range(image.size[1]): 24 | pix=image.getpixel((x,y)) 25 | if pix<120: 26 | image2.putpixel((x,y),0) 27 | return image2 28 | 29 | def cut_image(image): 30 | inletter=False 31 | foundletter=False 32 | letters=[] 33 | start=0 34 | end=0 35 | for x in range(image.size[0]): 36 | for y in range(image.size[1]): 37 | pix=image.getpixel((x,y)) 38 | if(pix==0): 39 | inletter=True 40 | if foundletter==False and inletter ==True: 41 | foundletter=True 42 | start=x 43 | if foundletter==True and inletter==False: 44 | end=x 45 | letters.append((start,end)) 46 | foundletter=False 47 | inletter=False 48 | images=[] 49 | for letter in letters: 50 | img=image.crop((letter[0],0,letter[1],image.size[1])) 51 | images.append(img) 52 | return images 53 | 54 | def buildvector(image): 55 | result={} 56 | count=0 57 | for i in image.getdata(): 58 | result[count]=i 59 | count+=1 60 | return result 61 | 62 | 63 | class CaptchaRecognize: 64 | def __init__(self): 65 | self.letters=['0','1','2','3','4','5','6','7','8','9'] 66 | self.loadSet() 67 | 68 | def loadSet(self): 69 | self.imgset=[] 70 | for letter in self.letters: 71 | temp=[] 72 | for img in os.listdir('./icon/%s'%(letter)): 73 | temp.append(buildvector(Image.open('./icon/%s/%s'%(letter,img)))) 74 | self.imgset.append({letter:temp}) 75 | 76 | #计算矢量大小 77 | def magnitude(self,concordance): 78 | total = 0 79 | for word,count in concordance.items(): 80 | total += count ** 2 81 | return math.sqrt(total) 82 | 83 | #计算矢量之间的 cos 值 84 | def relation(self,concordance1, concordance2): 85 | relevance = 0 86 | topvalue = 0 87 | for word, count in concordance1.items(): 88 | if word in concordance2: 89 | topvalue += count * concordance2[word] 90 | return topvalue / (self.magnitude(concordance1) * self.magnitude(concordance2)) 91 | 92 | def recognise(self,image): 93 | image=convert_image(image) 94 | images=cut_image(image) 95 | vectors=[] 96 | for img in images: 97 | vectors.append(buildvector(img)) 98 | result=[] 99 | for vector in vectors: 100 | guess=[] 101 | for image in self.imgset: 102 | for letter,temp in image.items(): 103 | relevance=0 104 | num=0 105 | for img in temp: 106 | relevance+=self.relation(vector,img) 107 | num+=1 108 | relevance=relevance/num 109 | guess.append((relevance,letter)) 110 | guess.sort(reverse=True) 111 | result.append(guess[0]) 112 | return result 113 | 114 | if __name__=='__main__': 115 | imageRecognize=CaptchaRecognize() 116 | image=Image.open('0.jpeg') 117 | result=imageRecognize.recognise(image) 118 | string=[''.join(item[1]) for item in result] 119 | print(string) 120 | -------------------------------------------------------------------------------- /DocFilter/docclass.py: -------------------------------------------------------------------------------- 1 | import re 2 | import math 3 | import json 4 | import jieba 5 | 6 | def participle(data): 7 | result=jieba.cut(data,cut_all=True) 8 | return result 9 | 10 | def get_engwords(doc): 11 | splitter=re.compile('\\W*') 12 | words=[word.lower() for word in splitter.split(doc) if len(word) >2 and len(word)<20] 13 | return dict([(word,1) for word in words]) 14 | 15 | def get_chinese_words(text): 16 | sub_re='[a-zA-Z]+|[\s+\.\!\/_,$%^*\(\d+\"\']+|[+—;—!:\(\):《》,。?、~@#¥%……&*()%~\[\]\|\?\·【】“”;-]+' 17 | text=re.sub(sub_re,'',text) 18 | words=participle(text) 19 | return dict([(word,1) for word in words]) 20 | 21 | class Classifier: 22 | def __init__(self,getfeatures,filename=None): 23 | self.thresholds={} 24 | self.fc={} 25 | self.cc={} 26 | self.getfeatures=getfeatures 27 | 28 | def incf(self,feature,cat):#增加对特征/分类组合的计数值 29 | self.fc.setdefault(feature,{}) 30 | self.fc[feature].setdefault(cat,0) 31 | self.fc[feature][cat]+=1 32 | 33 | def incc(self,cat): 34 | self.cc.setdefault(cat,0) 35 | self.cc[cat]+=1 36 | 37 | def set_threshold(self,cat,threshold_count): 38 | self.thresholds[cat]=threshold_count 39 | 40 | def get_threshold(self,cat): 41 | if cat not in self.thresholds: 42 | return 1 43 | return self.thresholds[cat] 44 | 45 | def feature_count(self,feature,cat): 46 | if feature in self.fc and cat in self.fc[feature]: 47 | return float(self.fc[feature][cat]) 48 | return 0.0 49 | 50 | def cat_count(self,cat): 51 | if cat in self.cc: 52 | return float(self.cc[cat]) 53 | return 0.0 54 | 55 | def total_count(self): 56 | return sum(self.cc.values()) 57 | 58 | def categaries(self): 59 | return [key for key in self.cc] 60 | 61 | def train(self,item,cat): 62 | features=self.getfeatures(item) 63 | for feature in features: 64 | self.incf(feature,cat) 65 | self.incc(cat) 66 | 67 | def feature_prob(self,feature,categarie): 68 | if self.cat_count(categarie)==0: 69 | return 0 70 | return self.feature_count(feature,categarie)/self.cat_count(categarie) 71 | 72 | def weighted_prob(self,feature,categarie,prf,weight=1.0,ap=0.5): 73 | basicprob=prf(feature,categarie) 74 | totals=sum([self.feature_count(feature,cat) for cat in self.categaries()]) 75 | bp=((weight*ap)+(totals*basicprob))/(weight+totals) 76 | return bp 77 | 78 | def load_traineddata(self,filename='traindata.json'): 79 | try: 80 | data=json.loads(open(filename,'r',encoding='utf-8').read()) 81 | except: 82 | print('load train data failed') 83 | return 84 | self.fc=data['fc'] 85 | self.cc=data['cc'] 86 | 87 | def save_traindata(self,filename='traindata.json'): 88 | data={} 89 | data['fc']=self.fc 90 | data['cc']=self.cc 91 | f=open(filename,'w',encoding='utf-8') 92 | json.dump(data,f) 93 | f.close() 94 | 95 | class NaiveBayes(Classifier): 96 | def docprob(self,item,cat): 97 | features=self.getfeatures(item) 98 | p=1 99 | for feature in features: 100 | p*=self.weighted_prob(feature,cat,self.feature_prob) 101 | return p 102 | 103 | def prob(self,item,cat): 104 | catprob=self.cat_count(cat)/self.total_count() 105 | docprob=self.docprob(item,cat) 106 | return docprob*catprob 107 | 108 | def classify(self,item,default=None): 109 | max=0 110 | probs={} 111 | for cat in self.categaries(): 112 | probs[cat]=self.prob(item,cat) 113 | if probs[cat]>max: 114 | max=probs[cat] 115 | best=cat 116 | for cat in probs: 117 | if cat==best: 118 | continue 119 | if probs[cat]*self.get_threshold(best)>probs[best]: 120 | return default 121 | print(probs) 122 | return best 123 | 124 | 125 | class FisherClassifier(Classifier): 126 | def __init__(self,getfeatures): 127 | Classifier.__init__(self,getfeatures) 128 | self.minimums={} 129 | 130 | def setminimum(self,cat,minnum): 131 | self.minimums[cat]=minnum 132 | 133 | def getminimum(self,cat): 134 | if cat in self.minimums: 135 | return self.minimums[cat] 136 | return 0 137 | 138 | def cprob(self,feature,cat): 139 | clf=self.feature_prob(feature,cat) 140 | if clf==0: 141 | return 0 142 | freqnum=sum([self.feature_prob(feature,cat) for cat in self.categaries()]) 143 | p=clf/freqnum 144 | return p 145 | 146 | def fisherProb(self,item,cat): 147 | p=1 148 | features=self.getfeatures(item) 149 | for feature in features: 150 | p*=self.weighted_prob(feature,cat,self.cprob) 151 | fscore=-2*math.log(p) 152 | return self.invchi2(fscore,len(features)*2) 153 | 154 | def invchi2(self,chi,df):#倒置对数卡方函数 155 | m=chi/2 156 | sumcount=term=math.exp(-m) 157 | for i in range(1,int(df/2)): 158 | term*=m/i 159 | sumcount+=term 160 | return min(sumcount,1) 161 | 162 | def classify(self,item,default=None):#分类 163 | best=default 164 | maxnum=0 165 | for cat in self.categaries(): 166 | p=self.fisherProb(item,cat) 167 | if p>self.getminimum(cat) and p>maxnum: 168 | best=cat 169 | maxnum=p 170 | return best 171 | -------------------------------------------------------------------------------- /DocFilter/newstitleclassifier.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import docclass 3 | import re 4 | 5 | headers = { 6 | 'User-Agent': 'Mozilla/5.0 (Android 5.1; Mobile; rv:47.0) Gecko/47.0 Firefox/47.0', 7 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 8 | 'Accept-Language': 'en-US,en;q=0.5', 9 | 'Accept-Encoding': 'gzip, deflate'} 10 | 11 | def train(): 12 | docfilter=docclass.FisherClassifier(docclass.get_chinese_words) 13 | url={'体育':'http://channel.chinanews.com/cns/s/channel:ty.shtml?pager=%s&pagenum=20', 14 | '娱乐':'http://channel.chinanews.com/cns/s/channel:yl.shtml?pager=%s&pagenum=20', 15 | '金融':'http://channel.chinanews.com/cns/s/channel:fortune.shtml?pager=%s&pagenum=20', 16 | '财经':'http://channel.chinanews.com/cns/s/channel:cj.shtml?pager=%s&pagenum=20', 17 | '军事':'http://channel.chinanews.com/cns/s/channel:mil.shtml?pager=%s&pagenum=20', 18 | '国际':'http://channel.chinanews.com/cns/s/channel:gj.shtml?pager=%s&pagenum=20', 19 | '社会':'http://channel.chinanews.com/cns/s/channel:sh.shtml?pager=%s&pagenum=20'} 20 | for key in url: 21 | startpage=1 22 | while startpage<100: 23 | try: 24 | html=requests.get(url[key]%(startpage),headers=headers).text.replace(' ','') 25 | except: 26 | break 27 | result=re.findall('"title":"(.*?)"',html) 28 | for item in result: 29 | docfilter.train(item,key) 30 | print(key,startpage,'ok') 31 | startpage+=1 32 | docfilter.save_traindata()#('news_title_traindata.json') 33 | 34 | def test(): 35 | docfilter=docclass.FisherClassifier(docclass.get_chinese_words) 36 | docfilter.load_traineddata('news_title_traindata.json') 37 | while True: 38 | title=input('news title:') 39 | categarie=docfilter.classify(title) 40 | print(categarie) 41 | 42 | test() 43 | -------------------------------------------------------------------------------- /FaceRecognize/camera.py: -------------------------------------------------------------------------------- 1 | from camera_ui import Ui_MainWindow 2 | import pygame 3 | import pygame.camera 4 | from pygame.locals import * 5 | import PIL 6 | from PIL import Image,ImageQt,ImageDraw,ImageFont 7 | import time 8 | from PyQt5 import QtCore, QtGui, QtWidgets 9 | from PyQt5.QtCore import * 10 | from PyQt5.QtGui import QPixmap 11 | import numpy as np 12 | import math 13 | from facerecognize import FaceRecognize 14 | import time 15 | 16 | 17 | class CameraImage(): 18 | def __init__(self): 19 | pygame.camera.init() 20 | self.cam=pygame.camera.Camera(pygame.camera.list_cameras()[0],(640,480)) 21 | self.cam.start() 22 | 23 | def get_PIL_image(self): 24 | webcamImage = self.cam.get_image() 25 | pil_string_image = pygame.image.tostring(webcamImage,"RGBA",False) 26 | img=Image.frombytes("RGBA",(640,480),pil_string_image) 27 | return img 28 | 29 | class Camera(QtWidgets.QMainWindow,Ui_MainWindow): 30 | def __init__(self): 31 | super(Camera,self).__init__() 32 | self.setupUi(self) 33 | self.timeid=self.startTimer(200) 34 | self.camera_image=CameraImage() 35 | self.setting_init() 36 | self.font=ImageFont.truetype('GB2312.ttf',24) 37 | self.facerecognize=FaceRecognize() 38 | self.facerecognize.load_trained_data('facedata.json') 39 | 40 | def setting_init(self): 41 | self.action.triggered.connect(self.appclose) 42 | 43 | #计算该像素是肤色的概率 44 | def calculate(self,x): 45 | x=np.matrix(x) 46 | m=np.matrix([156.5599,117.4361]) 47 | C=np.matrix([[299.4574,12.1430],[12.1430,160.130]]) 48 | result=(-0.5)*(x-m)*(C.I)*(x-m).T 49 | return math.exp(result) 50 | 51 | #需要改进 52 | def get_facearea(self,x_value,y_value): 53 | x_min=255 54 | x_max=0 55 | y_min=255 56 | y_max=0 57 | for key in x_value: 58 | if x_value[key]<10: 59 | continue 60 | if keyx_max: 63 | x_max=key 64 | 65 | for key in y_value: 66 | if y_value[key]<10: 67 | continue 68 | if key>y_max: 69 | y_max=key 70 | if key0.4: 86 | try: 87 | x_value[x]+=1 88 | except: 89 | x_value[x]=1 90 | try: 91 | y_value[y]+=1 92 | except: 93 | y_value[y]=1 94 | area=self.get_facearea(x_value,y_value) 95 | face=image.crop([x*10 for x in area]) 96 | ''' 97 | face.save('faces/%s.jpg'%(time.strftime("%Y%m%d_%H%M%S",time.localtime())))4 98 | ''' 99 | recog_result=self.facerecognize.compare(face) 100 | result="你谁呀?" 101 | for key in recog_result: 102 | if recog_result[key]<500: 103 | result=key 104 | draw=ImageDraw.Draw(image) 105 | draw.text((int((area[0]+area[2])/2)*10,area[3]*10-50),result,font=self.font,fill='#111111') 106 | draw.rectangle([x*10 for x in area]) 107 | return image 108 | 109 | def train(self): 110 | import os 111 | for filename in os.listdir('faces'): 112 | image=Image.open('faces/%s'%filename) 113 | self.facerecognize.train(image,'boss') 114 | self.facerecognize.save_trained_data('facedata.json') 115 | 116 | def timerEvent(self,event): 117 | image=self.camera_image.get_PIL_image() 118 | image=image.transpose(Image.FLIP_LEFT_RIGHT)#左右镜像 119 | image=self.get_face(image) 120 | image=ImageQt.ImageQt(image) 121 | self.imagelabel.setPixmap(QPixmap.fromImage(image)) 122 | 123 | def appclose(self): 124 | self.camera_image.cam.stop() 125 | self.close() 126 | 127 | if __name__=='__main__': 128 | import sys 129 | app=QtWidgets.QApplication(sys.argv) 130 | cam=Camera() 131 | cam.show() 132 | sys.exit(app.exec_()) 133 | -------------------------------------------------------------------------------- /FaceRecognize/camera_function.py: -------------------------------------------------------------------------------- 1 | import pygame 2 | import pygame.camera 3 | from pygame.locals import * 4 | import PIL 5 | from PIL import Image 6 | import time 7 | 8 | 9 | def camstream(): 10 | DEVICE = '/dev/video0' 11 | SIZE = (640, 480) 12 | FILENAME = 'capture.png' 13 | pygame.init() 14 | pygame.camera.init() 15 | display = pygame.display.set_mode(SIZE, 0) 16 | camera = pygame.camera.Camera(DEVICE, SIZE) 17 | camera.start() 18 | screen = pygame.surface.Surface(SIZE, 0, display) 19 | capture = True 20 | while capture: 21 | screen = camera.get_image(screen) 22 | pil_string_image = pygame.image.tostring(screen,"RGBA",False) 23 | im=Image.frombytes("RGBA",(640,480),pil_string_image) 24 | im.show() 25 | time.sleep(0.5) 26 | im.close() 27 | display.blit(screen, (0,0)) 28 | pygame.display.flip() 29 | for event in pygame.event.get(): 30 | if event.type == QUIT: 31 | capture = False 32 | elif event.type == KEYDOWN and event.key == K_s: 33 | pygame.image.save(screen, FILENAME) 34 | camera.stop() 35 | pygame.quit() 36 | return 37 | 38 | class Capture(object): 39 | def __init__(self): 40 | pygame.init() 41 | pygame.camera.init() 42 | self.size = (640,480) 43 | # create a display surface. standard pygame stuff 44 | self.display = pygame.display.set_mode(self.size, 0) 45 | 46 | # this is the same as what we saw before 47 | self.clist = pygame.camera.list_cameras() 48 | if not self.clist: 49 | raise ValueError("Sorry, no cameras detected.") 50 | self.cam = pygame.camera.Camera(self.clist[0], self.size) 51 | self.cam.start() 52 | 53 | # create a surface to capture to. for performance purposes 54 | # bit depth is the same as that of the display surface. 55 | self.snapshot = pygame.surface.Surface(self.size, 0, self.display) 56 | 57 | def get_and_flip(self): 58 | # if you don't want to tie the framerate to the camera, you can check 59 | # if the camera has an image ready. note that while this works 60 | # on most cameras, some will never return true. 61 | if self.cam.query_image(): 62 | self.snapshot = self.cam.get_image(self.snapshot) 63 | 64 | # blit it to the display surface. simple! 65 | self.display.blit(self.snapshot, (0,0)) 66 | pygame.display.flip() 67 | 68 | def main(self): 69 | going = True 70 | while going: 71 | events = pygame.event.get() 72 | for e in events: 73 | if e.type == QUIT or (e.type == KEYDOWN and e.key == K_ESCAPE): 74 | self.cam.stop() 75 | going = False 76 | self.get_and_flip() 77 | 78 | class CameraImage(): 79 | def __init__(self): 80 | pygame.camera.init() 81 | self.cam=pygame.camera.Camera(pygame.camera.list_cameras()[0],(640,480)) 82 | self.cam.start() 83 | 84 | def get_PIL_image(self): 85 | webcamImage = self.cam.get_image() 86 | pil_string_image = pygame.image.tostring(webcamImage,"RGBA",False) 87 | img=Image.frombytes("RGBA",(640,480),pil_string_image) 88 | return img 89 | 90 | if __name__ == '__main__': 91 | #camstream() 92 | #cam=Capture() 93 | #cam.main() 94 | -------------------------------------------------------------------------------- /FaceRecognize/camera_ui.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Form implementation generated from reading ui file 'mainwindow.ui' 4 | # 5 | # Created by: PyQt5 UI code generator 5.4.1 6 | # 7 | # WARNING! All changes made in this file will be lost! 8 | 9 | from PyQt5 import QtCore, QtGui, QtWidgets 10 | 11 | class Ui_MainWindow(object): 12 | def setupUi(self, MainWindow): 13 | MainWindow.setObjectName("MainWindow") 14 | MainWindow.resize(767, 580) 15 | self.centralWidget = QtWidgets.QWidget(MainWindow) 16 | self.centralWidget.setObjectName("centralWidget") 17 | self.imagelabel = QtWidgets.QLabel(self.centralWidget) 18 | self.imagelabel.setGeometry(QtCore.QRect(10, 10, 571, 451)) 19 | self.imagelabel.setText("") 20 | self.imagelabel.setObjectName("imagelabel") 21 | MainWindow.setCentralWidget(self.centralWidget) 22 | self.menuBar = QtWidgets.QMenuBar(MainWindow) 23 | self.menuBar.setGeometry(QtCore.QRect(0, 0, 767, 22)) 24 | self.menuBar.setObjectName("menuBar") 25 | self.menu = QtWidgets.QMenu(self.menuBar) 26 | self.menu.setObjectName("menu") 27 | MainWindow.setMenuBar(self.menuBar) 28 | self.action = QtWidgets.QAction(MainWindow) 29 | self.action.setObjectName("action") 30 | self.menu.addAction(self.action) 31 | self.menuBar.addAction(self.menu.menuAction()) 32 | 33 | self.retranslateUi(MainWindow) 34 | QtCore.QMetaObject.connectSlotsByName(MainWindow) 35 | 36 | def retranslateUi(self, MainWindow): 37 | _translate = QtCore.QCoreApplication.translate 38 | MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow")) 39 | self.menu.setTitle(_translate("MainWindow", "菜单")) 40 | self.action.setText(_translate("MainWindow", "退出")) 41 | -------------------------------------------------------------------------------- /FaceRecognize/facerecognize.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import json 3 | import math 4 | 5 | def image_lbp(image): 6 | image=image.resize((int(image.size[0]/4),int(image.size[1]/4)),Image.ANTIALIAS) 7 | image=image.convert('L') 8 | result=Image.new('L',image.size,255) 9 | for x in range(1,image.size[0]-1): 10 | for y in range(1,image.size[1]-1): 11 | pixes=[] 12 | dealmap=[(0,0),(-1,-1),(-1,0),(-1,1),(0,1),(1,1),(1,0),(1,-1),(0,-1)] 13 | for item in dealmap: 14 | pixes.append(image.getpixel((x+item[0],y+item[1]))) 15 | LBPvalue=0 16 | for index in range(8): 17 | LBPvalue+=(pixes[index+1]>pixes[0])<<(8-index-1) 18 | result.putpixel((x,y),LBPvalue) 19 | 20 | ''' 21 | #等价模式 22 | temp=(LBPvalue<<1)|(LBPvalue>>7) 23 | temp=bin(temp&LBPvalue) 24 | if temp.count('1')<=2: 25 | result.putpixel((x,y),LBPvalue) 26 | else: 27 | result.putpixel((x,y),255) 28 | ''' 29 | 30 | ''' 31 | #旋转不变 32 | temps=[] 33 | for i in range(8): 34 | temp=(LBPvalue<>(8-i)) 35 | temps.append(temp) 36 | result.putpixel((x,y),min(temps)) 37 | ''' 38 | #等价 and 旋转不变 39 | temp=(LBPvalue<<1)|(LBPvalue>>7) 40 | temp=bin(temp&LBPvalue) 41 | if temp.count('1')<=2: 42 | LBP_temps=[] 43 | for i in range(8): 44 | LBP_temp=(LBPvalue<>(8-i)) 45 | LBP_temps.append(LBP_temp) 46 | result.putpixel((x,y),min(LBP_temps)) 47 | else: 48 | result.putpixel((x,y),0) 49 | 50 | LBP_histogram=[0 for i in range(256)] 51 | for x in range(1,result.size[0]): 52 | for y in range(1,result.size[1]): 53 | pix=result.getpixel((x,y)) 54 | LBP_histogram[pix]+=1 55 | return LBP_histogram 56 | 57 | 58 | class FaceRecognize(): 59 | def __init__(self): 60 | self.typedata={} 61 | 62 | def train(self,image,label): 63 | image_his=image_lbp(image) 64 | try: 65 | self.typedata[label].append(image_his) 66 | except: 67 | self.typedata[label]=[image_his] 68 | 69 | def save_trained_data(self,filename="trained_data.json"): 70 | try: 71 | f=open(filename,'w',encoding='utf-8') 72 | except: 73 | print("Open file failed!") 74 | return 75 | json.dump(self.typedata,f) 76 | f.close() 77 | 78 | def load_trained_data(self,filename="trained_data.json"): 79 | try: 80 | data=json.loads(open(filename,'r',encoding='utf-8').read()) 81 | except: 82 | print("Load trained data failed!") 83 | return 84 | self.typedata=data 85 | 86 | def chi_square_statistic(self,his_one,his_two): 87 | result=0 88 | for index in range(256): 89 | try: 90 | result+=(his_one[index]-his_two[index])**2/(his_one[index]+his_two[index]) 91 | except: 92 | continue 93 | return result 94 | 95 | def compare(self,image): 96 | his=image_lbp(image) 97 | compare_result={} 98 | for key in self.typedata: 99 | count=0 100 | ''' 101 | compare_value=0 102 | for img in self.typedata[key]: 103 | compare_value+=self.chi_square_statistic(his,img) 104 | count+=1 105 | compare_result[key]=compare_value/count 106 | ''' 107 | compare_value=200000 108 | for img in self.typedata[key]: 109 | value=self.chi_square_statistic(his,img) 110 | if value